qemu/tcg/optimize.c
<<
>>
Prefs
   1/*
   2 * Optimizations for Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2010 Samsung Electronics.
   5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "qemu/int128.h"
  28#include "tcg/tcg-op.h"
  29#include "tcg-internal.h"
  30
  31#define CASE_OP_32_64(x)                        \
  32        glue(glue(case INDEX_op_, x), _i32):    \
  33        glue(glue(case INDEX_op_, x), _i64)
  34
  35#define CASE_OP_32_64_VEC(x)                    \
  36        glue(glue(case INDEX_op_, x), _i32):    \
  37        glue(glue(case INDEX_op_, x), _i64):    \
  38        glue(glue(case INDEX_op_, x), _vec)
  39
  40typedef struct TempOptInfo {
  41    bool is_const;
  42    TCGTemp *prev_copy;
  43    TCGTemp *next_copy;
  44    uint64_t val;
  45    uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
  46    uint64_t s_mask;  /* a left-aligned mask of clrsb(value) bits. */
  47} TempOptInfo;
  48
  49typedef struct OptContext {
  50    TCGContext *tcg;
  51    TCGOp *prev_mb;
  52    TCGTempSet temps_used;
  53
  54    /* In flight values from optimization. */
  55    uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
  56    uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
  57    uint64_t s_mask;  /* mask of clrsb(value) bits */
  58    TCGType type;
  59} OptContext;
  60
  61/* Calculate the smask for a specific value. */
  62static uint64_t smask_from_value(uint64_t value)
  63{
  64    int rep = clrsb64(value);
  65    return ~(~0ull >> rep);
  66}
  67
  68/*
  69 * Calculate the smask for a given set of known-zeros.
  70 * If there are lots of zeros on the left, we can consider the remainder
  71 * an unsigned field, and thus the corresponding signed field is one bit
  72 * larger.
  73 */
  74static uint64_t smask_from_zmask(uint64_t zmask)
  75{
  76    /*
  77     * Only the 0 bits are significant for zmask, thus the msb itself
  78     * must be zero, else we have no sign information.
  79     */
  80    int rep = clz64(zmask);
  81    if (rep == 0) {
  82        return 0;
  83    }
  84    rep -= 1;
  85    return ~(~0ull >> rep);
  86}
  87
  88/*
  89 * Recreate a properly left-aligned smask after manipulation.
  90 * Some bit-shuffling, particularly shifts and rotates, may
  91 * retain sign bits on the left, but may scatter disconnected
  92 * sign bits on the right.  Retain only what remains to the left.
  93 */
  94static uint64_t smask_from_smask(int64_t smask)
  95{
  96    /* Only the 1 bits are significant for smask */
  97    return smask_from_zmask(~smask);
  98}
  99
 100static inline TempOptInfo *ts_info(TCGTemp *ts)
 101{
 102    return ts->state_ptr;
 103}
 104
 105static inline TempOptInfo *arg_info(TCGArg arg)
 106{
 107    return ts_info(arg_temp(arg));
 108}
 109
 110static inline bool ts_is_const(TCGTemp *ts)
 111{
 112    return ts_info(ts)->is_const;
 113}
 114
 115static inline bool arg_is_const(TCGArg arg)
 116{
 117    return ts_is_const(arg_temp(arg));
 118}
 119
 120static inline bool ts_is_copy(TCGTemp *ts)
 121{
 122    return ts_info(ts)->next_copy != ts;
 123}
 124
 125/* Reset TEMP's state, possibly removing the temp for the list of copies.  */
 126static void reset_ts(TCGTemp *ts)
 127{
 128    TempOptInfo *ti = ts_info(ts);
 129    TempOptInfo *pi = ts_info(ti->prev_copy);
 130    TempOptInfo *ni = ts_info(ti->next_copy);
 131
 132    ni->prev_copy = ti->prev_copy;
 133    pi->next_copy = ti->next_copy;
 134    ti->next_copy = ts;
 135    ti->prev_copy = ts;
 136    ti->is_const = false;
 137    ti->z_mask = -1;
 138    ti->s_mask = 0;
 139}
 140
 141static void reset_temp(TCGArg arg)
 142{
 143    reset_ts(arg_temp(arg));
 144}
 145
 146/* Initialize and activate a temporary.  */
 147static void init_ts_info(OptContext *ctx, TCGTemp *ts)
 148{
 149    size_t idx = temp_idx(ts);
 150    TempOptInfo *ti;
 151
 152    if (test_bit(idx, ctx->temps_used.l)) {
 153        return;
 154    }
 155    set_bit(idx, ctx->temps_used.l);
 156
 157    ti = ts->state_ptr;
 158    if (ti == NULL) {
 159        ti = tcg_malloc(sizeof(TempOptInfo));
 160        ts->state_ptr = ti;
 161    }
 162
 163    ti->next_copy = ts;
 164    ti->prev_copy = ts;
 165    if (ts->kind == TEMP_CONST) {
 166        ti->is_const = true;
 167        ti->val = ts->val;
 168        ti->z_mask = ts->val;
 169        ti->s_mask = smask_from_value(ts->val);
 170    } else {
 171        ti->is_const = false;
 172        ti->z_mask = -1;
 173        ti->s_mask = 0;
 174    }
 175}
 176
 177static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
 178{
 179    TCGTemp *i, *g, *l;
 180
 181    /* If this is already readonly, we can't do better. */
 182    if (temp_readonly(ts)) {
 183        return ts;
 184    }
 185
 186    g = l = NULL;
 187    for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
 188        if (temp_readonly(i)) {
 189            return i;
 190        } else if (i->kind > ts->kind) {
 191            if (i->kind == TEMP_GLOBAL) {
 192                g = i;
 193            } else if (i->kind == TEMP_LOCAL) {
 194                l = i;
 195            }
 196        }
 197    }
 198
 199    /* If we didn't find a better representation, return the same temp. */
 200    return g ? g : l ? l : ts;
 201}
 202
 203static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
 204{
 205    TCGTemp *i;
 206
 207    if (ts1 == ts2) {
 208        return true;
 209    }
 210
 211    if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
 212        return false;
 213    }
 214
 215    for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
 216        if (i == ts2) {
 217            return true;
 218        }
 219    }
 220
 221    return false;
 222}
 223
 224static bool args_are_copies(TCGArg arg1, TCGArg arg2)
 225{
 226    return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
 227}
 228
 229static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
 230{
 231    TCGTemp *dst_ts = arg_temp(dst);
 232    TCGTemp *src_ts = arg_temp(src);
 233    TempOptInfo *di;
 234    TempOptInfo *si;
 235    TCGOpcode new_op;
 236
 237    if (ts_are_copies(dst_ts, src_ts)) {
 238        tcg_op_remove(ctx->tcg, op);
 239        return true;
 240    }
 241
 242    reset_ts(dst_ts);
 243    di = ts_info(dst_ts);
 244    si = ts_info(src_ts);
 245
 246    switch (ctx->type) {
 247    case TCG_TYPE_I32:
 248        new_op = INDEX_op_mov_i32;
 249        break;
 250    case TCG_TYPE_I64:
 251        new_op = INDEX_op_mov_i64;
 252        break;
 253    case TCG_TYPE_V64:
 254    case TCG_TYPE_V128:
 255    case TCG_TYPE_V256:
 256        /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
 257        new_op = INDEX_op_mov_vec;
 258        break;
 259    default:
 260        g_assert_not_reached();
 261    }
 262    op->opc = new_op;
 263    op->args[0] = dst;
 264    op->args[1] = src;
 265
 266    di->z_mask = si->z_mask;
 267    di->s_mask = si->s_mask;
 268
 269    if (src_ts->type == dst_ts->type) {
 270        TempOptInfo *ni = ts_info(si->next_copy);
 271
 272        di->next_copy = si->next_copy;
 273        di->prev_copy = src_ts;
 274        ni->prev_copy = dst_ts;
 275        si->next_copy = dst_ts;
 276        di->is_const = si->is_const;
 277        di->val = si->val;
 278    }
 279    return true;
 280}
 281
 282static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
 283                             TCGArg dst, uint64_t val)
 284{
 285    TCGTemp *tv;
 286
 287    if (ctx->type == TCG_TYPE_I32) {
 288        val = (int32_t)val;
 289    }
 290
 291    /* Convert movi to mov with constant temp. */
 292    tv = tcg_constant_internal(ctx->type, val);
 293    init_ts_info(ctx, tv);
 294    return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
 295}
 296
 297static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
 298{
 299    uint64_t l64, h64;
 300
 301    switch (op) {
 302    CASE_OP_32_64(add):
 303        return x + y;
 304
 305    CASE_OP_32_64(sub):
 306        return x - y;
 307
 308    CASE_OP_32_64(mul):
 309        return x * y;
 310
 311    CASE_OP_32_64_VEC(and):
 312        return x & y;
 313
 314    CASE_OP_32_64_VEC(or):
 315        return x | y;
 316
 317    CASE_OP_32_64_VEC(xor):
 318        return x ^ y;
 319
 320    case INDEX_op_shl_i32:
 321        return (uint32_t)x << (y & 31);
 322
 323    case INDEX_op_shl_i64:
 324        return (uint64_t)x << (y & 63);
 325
 326    case INDEX_op_shr_i32:
 327        return (uint32_t)x >> (y & 31);
 328
 329    case INDEX_op_shr_i64:
 330        return (uint64_t)x >> (y & 63);
 331
 332    case INDEX_op_sar_i32:
 333        return (int32_t)x >> (y & 31);
 334
 335    case INDEX_op_sar_i64:
 336        return (int64_t)x >> (y & 63);
 337
 338    case INDEX_op_rotr_i32:
 339        return ror32(x, y & 31);
 340
 341    case INDEX_op_rotr_i64:
 342        return ror64(x, y & 63);
 343
 344    case INDEX_op_rotl_i32:
 345        return rol32(x, y & 31);
 346
 347    case INDEX_op_rotl_i64:
 348        return rol64(x, y & 63);
 349
 350    CASE_OP_32_64_VEC(not):
 351        return ~x;
 352
 353    CASE_OP_32_64(neg):
 354        return -x;
 355
 356    CASE_OP_32_64_VEC(andc):
 357        return x & ~y;
 358
 359    CASE_OP_32_64_VEC(orc):
 360        return x | ~y;
 361
 362    CASE_OP_32_64_VEC(eqv):
 363        return ~(x ^ y);
 364
 365    CASE_OP_32_64_VEC(nand):
 366        return ~(x & y);
 367
 368    CASE_OP_32_64_VEC(nor):
 369        return ~(x | y);
 370
 371    case INDEX_op_clz_i32:
 372        return (uint32_t)x ? clz32(x) : y;
 373
 374    case INDEX_op_clz_i64:
 375        return x ? clz64(x) : y;
 376
 377    case INDEX_op_ctz_i32:
 378        return (uint32_t)x ? ctz32(x) : y;
 379
 380    case INDEX_op_ctz_i64:
 381        return x ? ctz64(x) : y;
 382
 383    case INDEX_op_ctpop_i32:
 384        return ctpop32(x);
 385
 386    case INDEX_op_ctpop_i64:
 387        return ctpop64(x);
 388
 389    CASE_OP_32_64(ext8s):
 390        return (int8_t)x;
 391
 392    CASE_OP_32_64(ext16s):
 393        return (int16_t)x;
 394
 395    CASE_OP_32_64(ext8u):
 396        return (uint8_t)x;
 397
 398    CASE_OP_32_64(ext16u):
 399        return (uint16_t)x;
 400
 401    CASE_OP_32_64(bswap16):
 402        x = bswap16(x);
 403        return y & TCG_BSWAP_OS ? (int16_t)x : x;
 404
 405    CASE_OP_32_64(bswap32):
 406        x = bswap32(x);
 407        return y & TCG_BSWAP_OS ? (int32_t)x : x;
 408
 409    case INDEX_op_bswap64_i64:
 410        return bswap64(x);
 411
 412    case INDEX_op_ext_i32_i64:
 413    case INDEX_op_ext32s_i64:
 414        return (int32_t)x;
 415
 416    case INDEX_op_extu_i32_i64:
 417    case INDEX_op_extrl_i64_i32:
 418    case INDEX_op_ext32u_i64:
 419        return (uint32_t)x;
 420
 421    case INDEX_op_extrh_i64_i32:
 422        return (uint64_t)x >> 32;
 423
 424    case INDEX_op_muluh_i32:
 425        return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
 426    case INDEX_op_mulsh_i32:
 427        return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
 428
 429    case INDEX_op_muluh_i64:
 430        mulu64(&l64, &h64, x, y);
 431        return h64;
 432    case INDEX_op_mulsh_i64:
 433        muls64(&l64, &h64, x, y);
 434        return h64;
 435
 436    case INDEX_op_div_i32:
 437        /* Avoid crashing on divide by zero, otherwise undefined.  */
 438        return (int32_t)x / ((int32_t)y ? : 1);
 439    case INDEX_op_divu_i32:
 440        return (uint32_t)x / ((uint32_t)y ? : 1);
 441    case INDEX_op_div_i64:
 442        return (int64_t)x / ((int64_t)y ? : 1);
 443    case INDEX_op_divu_i64:
 444        return (uint64_t)x / ((uint64_t)y ? : 1);
 445
 446    case INDEX_op_rem_i32:
 447        return (int32_t)x % ((int32_t)y ? : 1);
 448    case INDEX_op_remu_i32:
 449        return (uint32_t)x % ((uint32_t)y ? : 1);
 450    case INDEX_op_rem_i64:
 451        return (int64_t)x % ((int64_t)y ? : 1);
 452    case INDEX_op_remu_i64:
 453        return (uint64_t)x % ((uint64_t)y ? : 1);
 454
 455    default:
 456        fprintf(stderr,
 457                "Unrecognized operation %d in do_constant_folding.\n", op);
 458        tcg_abort();
 459    }
 460}
 461
 462static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
 463                                    uint64_t x, uint64_t y)
 464{
 465    uint64_t res = do_constant_folding_2(op, x, y);
 466    if (type == TCG_TYPE_I32) {
 467        res = (int32_t)res;
 468    }
 469    return res;
 470}
 471
 472static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
 473{
 474    switch (c) {
 475    case TCG_COND_EQ:
 476        return x == y;
 477    case TCG_COND_NE:
 478        return x != y;
 479    case TCG_COND_LT:
 480        return (int32_t)x < (int32_t)y;
 481    case TCG_COND_GE:
 482        return (int32_t)x >= (int32_t)y;
 483    case TCG_COND_LE:
 484        return (int32_t)x <= (int32_t)y;
 485    case TCG_COND_GT:
 486        return (int32_t)x > (int32_t)y;
 487    case TCG_COND_LTU:
 488        return x < y;
 489    case TCG_COND_GEU:
 490        return x >= y;
 491    case TCG_COND_LEU:
 492        return x <= y;
 493    case TCG_COND_GTU:
 494        return x > y;
 495    default:
 496        tcg_abort();
 497    }
 498}
 499
 500static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
 501{
 502    switch (c) {
 503    case TCG_COND_EQ:
 504        return x == y;
 505    case TCG_COND_NE:
 506        return x != y;
 507    case TCG_COND_LT:
 508        return (int64_t)x < (int64_t)y;
 509    case TCG_COND_GE:
 510        return (int64_t)x >= (int64_t)y;
 511    case TCG_COND_LE:
 512        return (int64_t)x <= (int64_t)y;
 513    case TCG_COND_GT:
 514        return (int64_t)x > (int64_t)y;
 515    case TCG_COND_LTU:
 516        return x < y;
 517    case TCG_COND_GEU:
 518        return x >= y;
 519    case TCG_COND_LEU:
 520        return x <= y;
 521    case TCG_COND_GTU:
 522        return x > y;
 523    default:
 524        tcg_abort();
 525    }
 526}
 527
 528static bool do_constant_folding_cond_eq(TCGCond c)
 529{
 530    switch (c) {
 531    case TCG_COND_GT:
 532    case TCG_COND_LTU:
 533    case TCG_COND_LT:
 534    case TCG_COND_GTU:
 535    case TCG_COND_NE:
 536        return 0;
 537    case TCG_COND_GE:
 538    case TCG_COND_GEU:
 539    case TCG_COND_LE:
 540    case TCG_COND_LEU:
 541    case TCG_COND_EQ:
 542        return 1;
 543    default:
 544        tcg_abort();
 545    }
 546}
 547
 548/*
 549 * Return -1 if the condition can't be simplified,
 550 * and the result of the condition (0 or 1) if it can.
 551 */
 552static int do_constant_folding_cond(TCGType type, TCGArg x,
 553                                    TCGArg y, TCGCond c)
 554{
 555    if (arg_is_const(x) && arg_is_const(y)) {
 556        uint64_t xv = arg_info(x)->val;
 557        uint64_t yv = arg_info(y)->val;
 558
 559        switch (type) {
 560        case TCG_TYPE_I32:
 561            return do_constant_folding_cond_32(xv, yv, c);
 562        case TCG_TYPE_I64:
 563            return do_constant_folding_cond_64(xv, yv, c);
 564        default:
 565            /* Only scalar comparisons are optimizable */
 566            return -1;
 567        }
 568    } else if (args_are_copies(x, y)) {
 569        return do_constant_folding_cond_eq(c);
 570    } else if (arg_is_const(y) && arg_info(y)->val == 0) {
 571        switch (c) {
 572        case TCG_COND_LTU:
 573            return 0;
 574        case TCG_COND_GEU:
 575            return 1;
 576        default:
 577            return -1;
 578        }
 579    }
 580    return -1;
 581}
 582
 583/*
 584 * Return -1 if the condition can't be simplified,
 585 * and the result of the condition (0 or 1) if it can.
 586 */
 587static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
 588{
 589    TCGArg al = p1[0], ah = p1[1];
 590    TCGArg bl = p2[0], bh = p2[1];
 591
 592    if (arg_is_const(bl) && arg_is_const(bh)) {
 593        tcg_target_ulong blv = arg_info(bl)->val;
 594        tcg_target_ulong bhv = arg_info(bh)->val;
 595        uint64_t b = deposit64(blv, 32, 32, bhv);
 596
 597        if (arg_is_const(al) && arg_is_const(ah)) {
 598            tcg_target_ulong alv = arg_info(al)->val;
 599            tcg_target_ulong ahv = arg_info(ah)->val;
 600            uint64_t a = deposit64(alv, 32, 32, ahv);
 601            return do_constant_folding_cond_64(a, b, c);
 602        }
 603        if (b == 0) {
 604            switch (c) {
 605            case TCG_COND_LTU:
 606                return 0;
 607            case TCG_COND_GEU:
 608                return 1;
 609            default:
 610                break;
 611            }
 612        }
 613    }
 614    if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
 615        return do_constant_folding_cond_eq(c);
 616    }
 617    return -1;
 618}
 619
 620/**
 621 * swap_commutative:
 622 * @dest: TCGArg of the destination argument, or NO_DEST.
 623 * @p1: first paired argument
 624 * @p2: second paired argument
 625 *
 626 * If *@p1 is a constant and *@p2 is not, swap.
 627 * If *@p2 matches @dest, swap.
 628 * Return true if a swap was performed.
 629 */
 630
 631#define NO_DEST  temp_arg(NULL)
 632
 633static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 634{
 635    TCGArg a1 = *p1, a2 = *p2;
 636    int sum = 0;
 637    sum += arg_is_const(a1);
 638    sum -= arg_is_const(a2);
 639
 640    /* Prefer the constant in second argument, and then the form
 641       op a, a, b, which is better handled on non-RISC hosts. */
 642    if (sum > 0 || (sum == 0 && dest == a2)) {
 643        *p1 = a2;
 644        *p2 = a1;
 645        return true;
 646    }
 647    return false;
 648}
 649
 650static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 651{
 652    int sum = 0;
 653    sum += arg_is_const(p1[0]);
 654    sum += arg_is_const(p1[1]);
 655    sum -= arg_is_const(p2[0]);
 656    sum -= arg_is_const(p2[1]);
 657    if (sum > 0) {
 658        TCGArg t;
 659        t = p1[0], p1[0] = p2[0], p2[0] = t;
 660        t = p1[1], p1[1] = p2[1], p2[1] = t;
 661        return true;
 662    }
 663    return false;
 664}
 665
 666static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
 667{
 668    for (int i = 0; i < nb_args; i++) {
 669        TCGTemp *ts = arg_temp(op->args[i]);
 670        if (ts) {
 671            init_ts_info(ctx, ts);
 672        }
 673    }
 674}
 675
 676static void copy_propagate(OptContext *ctx, TCGOp *op,
 677                           int nb_oargs, int nb_iargs)
 678{
 679    TCGContext *s = ctx->tcg;
 680
 681    for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
 682        TCGTemp *ts = arg_temp(op->args[i]);
 683        if (ts && ts_is_copy(ts)) {
 684            op->args[i] = temp_arg(find_better_copy(s, ts));
 685        }
 686    }
 687}
 688
 689static void finish_folding(OptContext *ctx, TCGOp *op)
 690{
 691    const TCGOpDef *def = &tcg_op_defs[op->opc];
 692    int i, nb_oargs;
 693
 694    /*
 695     * For an opcode that ends a BB, reset all temp data.
 696     * We do no cross-BB optimization.
 697     */
 698    if (def->flags & TCG_OPF_BB_END) {
 699        memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
 700        ctx->prev_mb = NULL;
 701        return;
 702    }
 703
 704    nb_oargs = def->nb_oargs;
 705    for (i = 0; i < nb_oargs; i++) {
 706        TCGTemp *ts = arg_temp(op->args[i]);
 707        reset_ts(ts);
 708        /*
 709         * Save the corresponding known-zero/sign bits mask for the
 710         * first output argument (only one supported so far).
 711         */
 712        if (i == 0) {
 713            ts_info(ts)->z_mask = ctx->z_mask;
 714            ts_info(ts)->s_mask = ctx->s_mask;
 715        }
 716    }
 717}
 718
 719/*
 720 * The fold_* functions return true when processing is complete,
 721 * usually by folding the operation to a constant or to a copy,
 722 * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
 723 * like collect information about the value produced, for use in
 724 * optimizing a subsequent operation.
 725 *
 726 * These first fold_* functions are all helpers, used by other
 727 * folders for more specific operations.
 728 */
 729
 730static bool fold_const1(OptContext *ctx, TCGOp *op)
 731{
 732    if (arg_is_const(op->args[1])) {
 733        uint64_t t;
 734
 735        t = arg_info(op->args[1])->val;
 736        t = do_constant_folding(op->opc, ctx->type, t, 0);
 737        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
 738    }
 739    return false;
 740}
 741
 742static bool fold_const2(OptContext *ctx, TCGOp *op)
 743{
 744    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
 745        uint64_t t1 = arg_info(op->args[1])->val;
 746        uint64_t t2 = arg_info(op->args[2])->val;
 747
 748        t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
 749        return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
 750    }
 751    return false;
 752}
 753
 754static bool fold_commutative(OptContext *ctx, TCGOp *op)
 755{
 756    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
 757    return false;
 758}
 759
 760static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
 761{
 762    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
 763    return fold_const2(ctx, op);
 764}
 765
 766static bool fold_masks(OptContext *ctx, TCGOp *op)
 767{
 768    uint64_t a_mask = ctx->a_mask;
 769    uint64_t z_mask = ctx->z_mask;
 770    uint64_t s_mask = ctx->s_mask;
 771
 772    /*
 773     * 32-bit ops generate 32-bit results, which for the purpose of
 774     * simplifying tcg are sign-extended.  Certainly that's how we
 775     * represent our constants elsewhere.  Note that the bits will
 776     * be reset properly for a 64-bit value when encountering the
 777     * type changing opcodes.
 778     */
 779    if (ctx->type == TCG_TYPE_I32) {
 780        a_mask = (int32_t)a_mask;
 781        z_mask = (int32_t)z_mask;
 782        s_mask |= MAKE_64BIT_MASK(32, 32);
 783        ctx->z_mask = z_mask;
 784        ctx->s_mask = s_mask;
 785    }
 786
 787    if (z_mask == 0) {
 788        return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
 789    }
 790    if (a_mask == 0) {
 791        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 792    }
 793    return false;
 794}
 795
 796/*
 797 * Convert @op to NOT, if NOT is supported by the host.
 798 * Return true f the conversion is successful, which will still
 799 * indicate that the processing is complete.
 800 */
 801static bool fold_not(OptContext *ctx, TCGOp *op);
 802static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
 803{
 804    TCGOpcode not_op;
 805    bool have_not;
 806
 807    switch (ctx->type) {
 808    case TCG_TYPE_I32:
 809        not_op = INDEX_op_not_i32;
 810        have_not = TCG_TARGET_HAS_not_i32;
 811        break;
 812    case TCG_TYPE_I64:
 813        not_op = INDEX_op_not_i64;
 814        have_not = TCG_TARGET_HAS_not_i64;
 815        break;
 816    case TCG_TYPE_V64:
 817    case TCG_TYPE_V128:
 818    case TCG_TYPE_V256:
 819        not_op = INDEX_op_not_vec;
 820        have_not = TCG_TARGET_HAS_not_vec;
 821        break;
 822    default:
 823        g_assert_not_reached();
 824    }
 825    if (have_not) {
 826        op->opc = not_op;
 827        op->args[1] = op->args[idx];
 828        return fold_not(ctx, op);
 829    }
 830    return false;
 831}
 832
 833/* If the binary operation has first argument @i, fold to @i. */
 834static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 835{
 836    if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
 837        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 838    }
 839    return false;
 840}
 841
 842/* If the binary operation has first argument @i, fold to NOT. */
 843static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
 844{
 845    if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
 846        return fold_to_not(ctx, op, 2);
 847    }
 848    return false;
 849}
 850
 851/* If the binary operation has second argument @i, fold to @i. */
 852static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 853{
 854    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
 855        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 856    }
 857    return false;
 858}
 859
 860/* If the binary operation has second argument @i, fold to identity. */
 861static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
 862{
 863    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
 864        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 865    }
 866    return false;
 867}
 868
 869/* If the binary operation has second argument @i, fold to NOT. */
 870static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
 871{
 872    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
 873        return fold_to_not(ctx, op, 1);
 874    }
 875    return false;
 876}
 877
 878/* If the binary operation has both arguments equal, fold to @i. */
 879static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 880{
 881    if (args_are_copies(op->args[1], op->args[2])) {
 882        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 883    }
 884    return false;
 885}
 886
 887/* If the binary operation has both arguments equal, fold to identity. */
 888static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
 889{
 890    if (args_are_copies(op->args[1], op->args[2])) {
 891        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 892    }
 893    return false;
 894}
 895
 896/*
 897 * These outermost fold_<op> functions are sorted alphabetically.
 898 *
 899 * The ordering of the transformations should be:
 900 *   1) those that produce a constant
 901 *   2) those that produce a copy
 902 *   3) those that produce information about the result value.
 903 */
 904
 905static bool fold_add(OptContext *ctx, TCGOp *op)
 906{
 907    if (fold_const2_commutative(ctx, op) ||
 908        fold_xi_to_x(ctx, op, 0)) {
 909        return true;
 910    }
 911    return false;
 912}
 913
 914/* We cannot as yet do_constant_folding with vectors. */
 915static bool fold_add_vec(OptContext *ctx, TCGOp *op)
 916{
 917    if (fold_commutative(ctx, op) ||
 918        fold_xi_to_x(ctx, op, 0)) {
 919        return true;
 920    }
 921    return false;
 922}
 923
 924static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
 925{
 926    if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
 927        arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
 928        uint64_t al = arg_info(op->args[2])->val;
 929        uint64_t ah = arg_info(op->args[3])->val;
 930        uint64_t bl = arg_info(op->args[4])->val;
 931        uint64_t bh = arg_info(op->args[5])->val;
 932        TCGArg rl, rh;
 933        TCGOp *op2;
 934
 935        if (ctx->type == TCG_TYPE_I32) {
 936            uint64_t a = deposit64(al, 32, 32, ah);
 937            uint64_t b = deposit64(bl, 32, 32, bh);
 938
 939            if (add) {
 940                a += b;
 941            } else {
 942                a -= b;
 943            }
 944
 945            al = sextract64(a, 0, 32);
 946            ah = sextract64(a, 32, 32);
 947        } else {
 948            Int128 a = int128_make128(al, ah);
 949            Int128 b = int128_make128(bl, bh);
 950
 951            if (add) {
 952                a = int128_add(a, b);
 953            } else {
 954                a = int128_sub(a, b);
 955            }
 956
 957            al = int128_getlo(a);
 958            ah = int128_gethi(a);
 959        }
 960
 961        rl = op->args[0];
 962        rh = op->args[1];
 963
 964        /* The proper opcode is supplied by tcg_opt_gen_mov. */
 965        op2 = tcg_op_insert_before(ctx->tcg, op, 0);
 966
 967        tcg_opt_gen_movi(ctx, op, rl, al);
 968        tcg_opt_gen_movi(ctx, op2, rh, ah);
 969        return true;
 970    }
 971    return false;
 972}
 973
 974static bool fold_add2(OptContext *ctx, TCGOp *op)
 975{
 976    /* Note that the high and low parts may be independently swapped. */
 977    swap_commutative(op->args[0], &op->args[2], &op->args[4]);
 978    swap_commutative(op->args[1], &op->args[3], &op->args[5]);
 979
 980    return fold_addsub2(ctx, op, true);
 981}
 982
 983static bool fold_and(OptContext *ctx, TCGOp *op)
 984{
 985    uint64_t z1, z2;
 986
 987    if (fold_const2_commutative(ctx, op) ||
 988        fold_xi_to_i(ctx, op, 0) ||
 989        fold_xi_to_x(ctx, op, -1) ||
 990        fold_xx_to_x(ctx, op)) {
 991        return true;
 992    }
 993
 994    z1 = arg_info(op->args[1])->z_mask;
 995    z2 = arg_info(op->args[2])->z_mask;
 996    ctx->z_mask = z1 & z2;
 997
 998    /*
 999     * Sign repetitions are perforce all identical, whether they are 1 or 0.
1000     * Bitwise operations preserve the relative quantity of the repetitions.
1001     */
1002    ctx->s_mask = arg_info(op->args[1])->s_mask
1003                & arg_info(op->args[2])->s_mask;
1004
1005    /*
1006     * Known-zeros does not imply known-ones.  Therefore unless
1007     * arg2 is constant, we can't infer affected bits from it.
1008     */
1009    if (arg_is_const(op->args[2])) {
1010        ctx->a_mask = z1 & ~z2;
1011    }
1012
1013    return fold_masks(ctx, op);
1014}
1015
1016static bool fold_andc(OptContext *ctx, TCGOp *op)
1017{
1018    uint64_t z1;
1019
1020    if (fold_const2(ctx, op) ||
1021        fold_xx_to_i(ctx, op, 0) ||
1022        fold_xi_to_x(ctx, op, 0) ||
1023        fold_ix_to_not(ctx, op, -1)) {
1024        return true;
1025    }
1026
1027    z1 = arg_info(op->args[1])->z_mask;
1028
1029    /*
1030     * Known-zeros does not imply known-ones.  Therefore unless
1031     * arg2 is constant, we can't infer anything from it.
1032     */
1033    if (arg_is_const(op->args[2])) {
1034        uint64_t z2 = ~arg_info(op->args[2])->z_mask;
1035        ctx->a_mask = z1 & ~z2;
1036        z1 &= z2;
1037    }
1038    ctx->z_mask = z1;
1039
1040    ctx->s_mask = arg_info(op->args[1])->s_mask
1041                & arg_info(op->args[2])->s_mask;
1042    return fold_masks(ctx, op);
1043}
1044
1045static bool fold_brcond(OptContext *ctx, TCGOp *op)
1046{
1047    TCGCond cond = op->args[2];
1048    int i;
1049
1050    if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
1051        op->args[2] = cond = tcg_swap_cond(cond);
1052    }
1053
1054    i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
1055    if (i == 0) {
1056        tcg_op_remove(ctx->tcg, op);
1057        return true;
1058    }
1059    if (i > 0) {
1060        op->opc = INDEX_op_br;
1061        op->args[0] = op->args[3];
1062    }
1063    return false;
1064}
1065
1066static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1067{
1068    TCGCond cond = op->args[4];
1069    TCGArg label = op->args[5];
1070    int i, inv = 0;
1071
1072    if (swap_commutative2(&op->args[0], &op->args[2])) {
1073        op->args[4] = cond = tcg_swap_cond(cond);
1074    }
1075
1076    i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
1077    if (i >= 0) {
1078        goto do_brcond_const;
1079    }
1080
1081    switch (cond) {
1082    case TCG_COND_LT:
1083    case TCG_COND_GE:
1084        /*
1085         * Simplify LT/GE comparisons vs zero to a single compare
1086         * vs the high word of the input.
1087         */
1088        if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
1089            arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
1090            goto do_brcond_high;
1091        }
1092        break;
1093
1094    case TCG_COND_NE:
1095        inv = 1;
1096        QEMU_FALLTHROUGH;
1097    case TCG_COND_EQ:
1098        /*
1099         * Simplify EQ/NE comparisons where one of the pairs
1100         * can be simplified.
1101         */
1102        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1103                                     op->args[2], cond);
1104        switch (i ^ inv) {
1105        case 0:
1106            goto do_brcond_const;
1107        case 1:
1108            goto do_brcond_high;
1109        }
1110
1111        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1112                                     op->args[3], cond);
1113        switch (i ^ inv) {
1114        case 0:
1115            goto do_brcond_const;
1116        case 1:
1117            op->opc = INDEX_op_brcond_i32;
1118            op->args[1] = op->args[2];
1119            op->args[2] = cond;
1120            op->args[3] = label;
1121            break;
1122        }
1123        break;
1124
1125    default:
1126        break;
1127
1128    do_brcond_high:
1129        op->opc = INDEX_op_brcond_i32;
1130        op->args[0] = op->args[1];
1131        op->args[1] = op->args[3];
1132        op->args[2] = cond;
1133        op->args[3] = label;
1134        break;
1135
1136    do_brcond_const:
1137        if (i == 0) {
1138            tcg_op_remove(ctx->tcg, op);
1139            return true;
1140        }
1141        op->opc = INDEX_op_br;
1142        op->args[0] = label;
1143        break;
1144    }
1145    return false;
1146}
1147
1148static bool fold_bswap(OptContext *ctx, TCGOp *op)
1149{
1150    uint64_t z_mask, s_mask, sign;
1151
1152    if (arg_is_const(op->args[1])) {
1153        uint64_t t = arg_info(op->args[1])->val;
1154
1155        t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
1156        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1157    }
1158
1159    z_mask = arg_info(op->args[1])->z_mask;
1160
1161    switch (op->opc) {
1162    case INDEX_op_bswap16_i32:
1163    case INDEX_op_bswap16_i64:
1164        z_mask = bswap16(z_mask);
1165        sign = INT16_MIN;
1166        break;
1167    case INDEX_op_bswap32_i32:
1168    case INDEX_op_bswap32_i64:
1169        z_mask = bswap32(z_mask);
1170        sign = INT32_MIN;
1171        break;
1172    case INDEX_op_bswap64_i64:
1173        z_mask = bswap64(z_mask);
1174        sign = INT64_MIN;
1175        break;
1176    default:
1177        g_assert_not_reached();
1178    }
1179    s_mask = smask_from_zmask(z_mask);
1180
1181    switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1182    case TCG_BSWAP_OZ:
1183        break;
1184    case TCG_BSWAP_OS:
1185        /* If the sign bit may be 1, force all the bits above to 1. */
1186        if (z_mask & sign) {
1187            z_mask |= sign;
1188            s_mask = sign << 1;
1189        }
1190        break;
1191    default:
1192        /* The high bits are undefined: force all bits above the sign to 1. */
1193        z_mask |= sign << 1;
1194        s_mask = 0;
1195        break;
1196    }
1197    ctx->z_mask = z_mask;
1198    ctx->s_mask = s_mask;
1199
1200    return fold_masks(ctx, op);
1201}
1202
1203static bool fold_call(OptContext *ctx, TCGOp *op)
1204{
1205    TCGContext *s = ctx->tcg;
1206    int nb_oargs = TCGOP_CALLO(op);
1207    int nb_iargs = TCGOP_CALLI(op);
1208    int flags, i;
1209
1210    init_arguments(ctx, op, nb_oargs + nb_iargs);
1211    copy_propagate(ctx, op, nb_oargs, nb_iargs);
1212
1213    /* If the function reads or writes globals, reset temp data. */
1214    flags = tcg_call_flags(op);
1215    if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1216        int nb_globals = s->nb_globals;
1217
1218        for (i = 0; i < nb_globals; i++) {
1219            if (test_bit(i, ctx->temps_used.l)) {
1220                reset_ts(&ctx->tcg->temps[i]);
1221            }
1222        }
1223    }
1224
1225    /* Reset temp data for outputs. */
1226    for (i = 0; i < nb_oargs; i++) {
1227        reset_temp(op->args[i]);
1228    }
1229
1230    /* Stop optimizing MB across calls. */
1231    ctx->prev_mb = NULL;
1232    return true;
1233}
1234
1235static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1236{
1237    uint64_t z_mask;
1238
1239    if (arg_is_const(op->args[1])) {
1240        uint64_t t = arg_info(op->args[1])->val;
1241
1242        if (t != 0) {
1243            t = do_constant_folding(op->opc, ctx->type, t, 0);
1244            return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1245        }
1246        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1247    }
1248
1249    switch (ctx->type) {
1250    case TCG_TYPE_I32:
1251        z_mask = 31;
1252        break;
1253    case TCG_TYPE_I64:
1254        z_mask = 63;
1255        break;
1256    default:
1257        g_assert_not_reached();
1258    }
1259    ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
1260    ctx->s_mask = smask_from_zmask(ctx->z_mask);
1261    return false;
1262}
1263
1264static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1265{
1266    if (fold_const1(ctx, op)) {
1267        return true;
1268    }
1269
1270    switch (ctx->type) {
1271    case TCG_TYPE_I32:
1272        ctx->z_mask = 32 | 31;
1273        break;
1274    case TCG_TYPE_I64:
1275        ctx->z_mask = 64 | 63;
1276        break;
1277    default:
1278        g_assert_not_reached();
1279    }
1280    ctx->s_mask = smask_from_zmask(ctx->z_mask);
1281    return false;
1282}
1283
1284static bool fold_deposit(OptContext *ctx, TCGOp *op)
1285{
1286    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1287        uint64_t t1 = arg_info(op->args[1])->val;
1288        uint64_t t2 = arg_info(op->args[2])->val;
1289
1290        t1 = deposit64(t1, op->args[3], op->args[4], t2);
1291        return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1292    }
1293
1294    ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
1295                            op->args[3], op->args[4],
1296                            arg_info(op->args[2])->z_mask);
1297    return false;
1298}
1299
1300static bool fold_divide(OptContext *ctx, TCGOp *op)
1301{
1302    if (fold_const2(ctx, op) ||
1303        fold_xi_to_x(ctx, op, 1)) {
1304        return true;
1305    }
1306    return false;
1307}
1308
1309static bool fold_dup(OptContext *ctx, TCGOp *op)
1310{
1311    if (arg_is_const(op->args[1])) {
1312        uint64_t t = arg_info(op->args[1])->val;
1313        t = dup_const(TCGOP_VECE(op), t);
1314        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1315    }
1316    return false;
1317}
1318
1319static bool fold_dup2(OptContext *ctx, TCGOp *op)
1320{
1321    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1322        uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
1323                               arg_info(op->args[2])->val);
1324        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1325    }
1326
1327    if (args_are_copies(op->args[1], op->args[2])) {
1328        op->opc = INDEX_op_dup_vec;
1329        TCGOP_VECE(op) = MO_32;
1330    }
1331    return false;
1332}
1333
1334static bool fold_eqv(OptContext *ctx, TCGOp *op)
1335{
1336    if (fold_const2_commutative(ctx, op) ||
1337        fold_xi_to_x(ctx, op, -1) ||
1338        fold_xi_to_not(ctx, op, 0)) {
1339        return true;
1340    }
1341
1342    ctx->s_mask = arg_info(op->args[1])->s_mask
1343                & arg_info(op->args[2])->s_mask;
1344    return false;
1345}
1346
1347static bool fold_extract(OptContext *ctx, TCGOp *op)
1348{
1349    uint64_t z_mask_old, z_mask;
1350    int pos = op->args[2];
1351    int len = op->args[3];
1352
1353    if (arg_is_const(op->args[1])) {
1354        uint64_t t;
1355
1356        t = arg_info(op->args[1])->val;
1357        t = extract64(t, pos, len);
1358        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1359    }
1360
1361    z_mask_old = arg_info(op->args[1])->z_mask;
1362    z_mask = extract64(z_mask_old, pos, len);
1363    if (pos == 0) {
1364        ctx->a_mask = z_mask_old ^ z_mask;
1365    }
1366    ctx->z_mask = z_mask;
1367    ctx->s_mask = smask_from_zmask(z_mask);
1368
1369    return fold_masks(ctx, op);
1370}
1371
1372static bool fold_extract2(OptContext *ctx, TCGOp *op)
1373{
1374    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1375        uint64_t v1 = arg_info(op->args[1])->val;
1376        uint64_t v2 = arg_info(op->args[2])->val;
1377        int shr = op->args[3];
1378
1379        if (op->opc == INDEX_op_extract2_i64) {
1380            v1 >>= shr;
1381            v2 <<= 64 - shr;
1382        } else {
1383            v1 = (uint32_t)v1 >> shr;
1384            v2 = (uint64_t)((int32_t)v2 << (32 - shr));
1385        }
1386        return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
1387    }
1388    return false;
1389}
1390
1391static bool fold_exts(OptContext *ctx, TCGOp *op)
1392{
1393    uint64_t s_mask_old, s_mask, z_mask, sign;
1394    bool type_change = false;
1395
1396    if (fold_const1(ctx, op)) {
1397        return true;
1398    }
1399
1400    z_mask = arg_info(op->args[1])->z_mask;
1401    s_mask = arg_info(op->args[1])->s_mask;
1402    s_mask_old = s_mask;
1403
1404    switch (op->opc) {
1405    CASE_OP_32_64(ext8s):
1406        sign = INT8_MIN;
1407        z_mask = (uint8_t)z_mask;
1408        break;
1409    CASE_OP_32_64(ext16s):
1410        sign = INT16_MIN;
1411        z_mask = (uint16_t)z_mask;
1412        break;
1413    case INDEX_op_ext_i32_i64:
1414        type_change = true;
1415        QEMU_FALLTHROUGH;
1416    case INDEX_op_ext32s_i64:
1417        sign = INT32_MIN;
1418        z_mask = (uint32_t)z_mask;
1419        break;
1420    default:
1421        g_assert_not_reached();
1422    }
1423
1424    if (z_mask & sign) {
1425        z_mask |= sign;
1426    }
1427    s_mask |= sign << 1;
1428
1429    ctx->z_mask = z_mask;
1430    ctx->s_mask = s_mask;
1431    if (!type_change) {
1432        ctx->a_mask = s_mask & ~s_mask_old;
1433    }
1434
1435    return fold_masks(ctx, op);
1436}
1437
1438static bool fold_extu(OptContext *ctx, TCGOp *op)
1439{
1440    uint64_t z_mask_old, z_mask;
1441    bool type_change = false;
1442
1443    if (fold_const1(ctx, op)) {
1444        return true;
1445    }
1446
1447    z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
1448
1449    switch (op->opc) {
1450    CASE_OP_32_64(ext8u):
1451        z_mask = (uint8_t)z_mask;
1452        break;
1453    CASE_OP_32_64(ext16u):
1454        z_mask = (uint16_t)z_mask;
1455        break;
1456    case INDEX_op_extrl_i64_i32:
1457    case INDEX_op_extu_i32_i64:
1458        type_change = true;
1459        QEMU_FALLTHROUGH;
1460    case INDEX_op_ext32u_i64:
1461        z_mask = (uint32_t)z_mask;
1462        break;
1463    case INDEX_op_extrh_i64_i32:
1464        type_change = true;
1465        z_mask >>= 32;
1466        break;
1467    default:
1468        g_assert_not_reached();
1469    }
1470
1471    ctx->z_mask = z_mask;
1472    ctx->s_mask = smask_from_zmask(z_mask);
1473    if (!type_change) {
1474        ctx->a_mask = z_mask_old ^ z_mask;
1475    }
1476    return fold_masks(ctx, op);
1477}
1478
1479static bool fold_mb(OptContext *ctx, TCGOp *op)
1480{
1481    /* Eliminate duplicate and redundant fence instructions.  */
1482    if (ctx->prev_mb) {
1483        /*
1484         * Merge two barriers of the same type into one,
1485         * or a weaker barrier into a stronger one,
1486         * or two weaker barriers into a stronger one.
1487         *   mb X; mb Y => mb X|Y
1488         *   mb; strl => mb; st
1489         *   ldaq; mb => ld; mb
1490         *   ldaq; strl => ld; mb; st
1491         * Other combinations are also merged into a strong
1492         * barrier.  This is stricter than specified but for
1493         * the purposes of TCG is better than not optimizing.
1494         */
1495        ctx->prev_mb->args[0] |= op->args[0];
1496        tcg_op_remove(ctx->tcg, op);
1497    } else {
1498        ctx->prev_mb = op;
1499    }
1500    return true;
1501}
1502
1503static bool fold_mov(OptContext *ctx, TCGOp *op)
1504{
1505    return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1506}
1507
1508static bool fold_movcond(OptContext *ctx, TCGOp *op)
1509{
1510    TCGCond cond = op->args[5];
1511    int i;
1512
1513    if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1514        op->args[5] = cond = tcg_swap_cond(cond);
1515    }
1516    /*
1517     * Canonicalize the "false" input reg to match the destination reg so
1518     * that the tcg backend can implement a "move if true" operation.
1519     */
1520    if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1521        op->args[5] = cond = tcg_invert_cond(cond);
1522    }
1523
1524    i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1525    if (i >= 0) {
1526        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
1527    }
1528
1529    ctx->z_mask = arg_info(op->args[3])->z_mask
1530                | arg_info(op->args[4])->z_mask;
1531    ctx->s_mask = arg_info(op->args[3])->s_mask
1532                & arg_info(op->args[4])->s_mask;
1533
1534    if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1535        uint64_t tv = arg_info(op->args[3])->val;
1536        uint64_t fv = arg_info(op->args[4])->val;
1537        TCGOpcode opc;
1538
1539        switch (ctx->type) {
1540        case TCG_TYPE_I32:
1541            opc = INDEX_op_setcond_i32;
1542            break;
1543        case TCG_TYPE_I64:
1544            opc = INDEX_op_setcond_i64;
1545            break;
1546        default:
1547            g_assert_not_reached();
1548        }
1549
1550        if (tv == 1 && fv == 0) {
1551            op->opc = opc;
1552            op->args[3] = cond;
1553        } else if (fv == 1 && tv == 0) {
1554            op->opc = opc;
1555            op->args[3] = tcg_invert_cond(cond);
1556        }
1557    }
1558    return false;
1559}
1560
1561static bool fold_mul(OptContext *ctx, TCGOp *op)
1562{
1563    if (fold_const2(ctx, op) ||
1564        fold_xi_to_i(ctx, op, 0) ||
1565        fold_xi_to_x(ctx, op, 1)) {
1566        return true;
1567    }
1568    return false;
1569}
1570
1571static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
1572{
1573    if (fold_const2_commutative(ctx, op) ||
1574        fold_xi_to_i(ctx, op, 0)) {
1575        return true;
1576    }
1577    return false;
1578}
1579
1580static bool fold_multiply2(OptContext *ctx, TCGOp *op)
1581{
1582    swap_commutative(op->args[0], &op->args[2], &op->args[3]);
1583
1584    if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1585        uint64_t a = arg_info(op->args[2])->val;
1586        uint64_t b = arg_info(op->args[3])->val;
1587        uint64_t h, l;
1588        TCGArg rl, rh;
1589        TCGOp *op2;
1590
1591        switch (op->opc) {
1592        case INDEX_op_mulu2_i32:
1593            l = (uint64_t)(uint32_t)a * (uint32_t)b;
1594            h = (int32_t)(l >> 32);
1595            l = (int32_t)l;
1596            break;
1597        case INDEX_op_muls2_i32:
1598            l = (int64_t)(int32_t)a * (int32_t)b;
1599            h = l >> 32;
1600            l = (int32_t)l;
1601            break;
1602        case INDEX_op_mulu2_i64:
1603            mulu64(&l, &h, a, b);
1604            break;
1605        case INDEX_op_muls2_i64:
1606            muls64(&l, &h, a, b);
1607            break;
1608        default:
1609            g_assert_not_reached();
1610        }
1611
1612        rl = op->args[0];
1613        rh = op->args[1];
1614
1615        /* The proper opcode is supplied by tcg_opt_gen_mov. */
1616        op2 = tcg_op_insert_before(ctx->tcg, op, 0);
1617
1618        tcg_opt_gen_movi(ctx, op, rl, l);
1619        tcg_opt_gen_movi(ctx, op2, rh, h);
1620        return true;
1621    }
1622    return false;
1623}
1624
1625static bool fold_nand(OptContext *ctx, TCGOp *op)
1626{
1627    if (fold_const2_commutative(ctx, op) ||
1628        fold_xi_to_not(ctx, op, -1)) {
1629        return true;
1630    }
1631
1632    ctx->s_mask = arg_info(op->args[1])->s_mask
1633                & arg_info(op->args[2])->s_mask;
1634    return false;
1635}
1636
1637static bool fold_neg(OptContext *ctx, TCGOp *op)
1638{
1639    uint64_t z_mask;
1640
1641    if (fold_const1(ctx, op)) {
1642        return true;
1643    }
1644
1645    /* Set to 1 all bits to the left of the rightmost.  */
1646    z_mask = arg_info(op->args[1])->z_mask;
1647    ctx->z_mask = -(z_mask & -z_mask);
1648
1649    /*
1650     * Because of fold_sub_to_neg, we want to always return true,
1651     * via finish_folding.
1652     */
1653    finish_folding(ctx, op);
1654    return true;
1655}
1656
1657static bool fold_nor(OptContext *ctx, TCGOp *op)
1658{
1659    if (fold_const2_commutative(ctx, op) ||
1660        fold_xi_to_not(ctx, op, 0)) {
1661        return true;
1662    }
1663
1664    ctx->s_mask = arg_info(op->args[1])->s_mask
1665                & arg_info(op->args[2])->s_mask;
1666    return false;
1667}
1668
1669static bool fold_not(OptContext *ctx, TCGOp *op)
1670{
1671    if (fold_const1(ctx, op)) {
1672        return true;
1673    }
1674
1675    ctx->s_mask = arg_info(op->args[1])->s_mask;
1676
1677    /* Because of fold_to_not, we want to always return true, via finish. */
1678    finish_folding(ctx, op);
1679    return true;
1680}
1681
1682static bool fold_or(OptContext *ctx, TCGOp *op)
1683{
1684    if (fold_const2_commutative(ctx, op) ||
1685        fold_xi_to_x(ctx, op, 0) ||
1686        fold_xx_to_x(ctx, op)) {
1687        return true;
1688    }
1689
1690    ctx->z_mask = arg_info(op->args[1])->z_mask
1691                | arg_info(op->args[2])->z_mask;
1692    ctx->s_mask = arg_info(op->args[1])->s_mask
1693                & arg_info(op->args[2])->s_mask;
1694    return fold_masks(ctx, op);
1695}
1696
1697static bool fold_orc(OptContext *ctx, TCGOp *op)
1698{
1699    if (fold_const2(ctx, op) ||
1700        fold_xx_to_i(ctx, op, -1) ||
1701        fold_xi_to_x(ctx, op, -1) ||
1702        fold_ix_to_not(ctx, op, 0)) {
1703        return true;
1704    }
1705
1706    ctx->s_mask = arg_info(op->args[1])->s_mask
1707                & arg_info(op->args[2])->s_mask;
1708    return false;
1709}
1710
1711static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
1712{
1713    const TCGOpDef *def = &tcg_op_defs[op->opc];
1714    MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
1715    MemOp mop = get_memop(oi);
1716    int width = 8 * memop_size(mop);
1717
1718    if (width < 64) {
1719        ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
1720        if (!(mop & MO_SIGN)) {
1721            ctx->z_mask = MAKE_64BIT_MASK(0, width);
1722            ctx->s_mask <<= 1;
1723        }
1724    }
1725
1726    /* Opcodes that touch guest memory stop the mb optimization.  */
1727    ctx->prev_mb = NULL;
1728    return false;
1729}
1730
1731static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
1732{
1733    /* Opcodes that touch guest memory stop the mb optimization.  */
1734    ctx->prev_mb = NULL;
1735    return false;
1736}
1737
1738static bool fold_remainder(OptContext *ctx, TCGOp *op)
1739{
1740    if (fold_const2(ctx, op) ||
1741        fold_xx_to_i(ctx, op, 0)) {
1742        return true;
1743    }
1744    return false;
1745}
1746
1747static bool fold_setcond(OptContext *ctx, TCGOp *op)
1748{
1749    TCGCond cond = op->args[3];
1750    int i;
1751
1752    if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
1753        op->args[3] = cond = tcg_swap_cond(cond);
1754    }
1755
1756    i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1757    if (i >= 0) {
1758        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1759    }
1760
1761    ctx->z_mask = 1;
1762    ctx->s_mask = smask_from_zmask(1);
1763    return false;
1764}
1765
1766static bool fold_setcond2(OptContext *ctx, TCGOp *op)
1767{
1768    TCGCond cond = op->args[5];
1769    int i, inv = 0;
1770
1771    if (swap_commutative2(&op->args[1], &op->args[3])) {
1772        op->args[5] = cond = tcg_swap_cond(cond);
1773    }
1774
1775    i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
1776    if (i >= 0) {
1777        goto do_setcond_const;
1778    }
1779
1780    switch (cond) {
1781    case TCG_COND_LT:
1782    case TCG_COND_GE:
1783        /*
1784         * Simplify LT/GE comparisons vs zero to a single compare
1785         * vs the high word of the input.
1786         */
1787        if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
1788            arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
1789            goto do_setcond_high;
1790        }
1791        break;
1792
1793    case TCG_COND_NE:
1794        inv = 1;
1795        QEMU_FALLTHROUGH;
1796    case TCG_COND_EQ:
1797        /*
1798         * Simplify EQ/NE comparisons where one of the pairs
1799         * can be simplified.
1800         */
1801        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1802                                     op->args[3], cond);
1803        switch (i ^ inv) {
1804        case 0:
1805            goto do_setcond_const;
1806        case 1:
1807            goto do_setcond_high;
1808        }
1809
1810        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
1811                                     op->args[4], cond);
1812        switch (i ^ inv) {
1813        case 0:
1814            goto do_setcond_const;
1815        case 1:
1816            op->args[2] = op->args[3];
1817            op->args[3] = cond;
1818            op->opc = INDEX_op_setcond_i32;
1819            break;
1820        }
1821        break;
1822
1823    default:
1824        break;
1825
1826    do_setcond_high:
1827        op->args[1] = op->args[2];
1828        op->args[2] = op->args[4];
1829        op->args[3] = cond;
1830        op->opc = INDEX_op_setcond_i32;
1831        break;
1832    }
1833
1834    ctx->z_mask = 1;
1835    ctx->s_mask = smask_from_zmask(1);
1836    return false;
1837
1838 do_setcond_const:
1839    return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1840}
1841
1842static bool fold_sextract(OptContext *ctx, TCGOp *op)
1843{
1844    uint64_t z_mask, s_mask, s_mask_old;
1845    int pos = op->args[2];
1846    int len = op->args[3];
1847
1848    if (arg_is_const(op->args[1])) {
1849        uint64_t t;
1850
1851        t = arg_info(op->args[1])->val;
1852        t = sextract64(t, pos, len);
1853        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1854    }
1855
1856    z_mask = arg_info(op->args[1])->z_mask;
1857    z_mask = sextract64(z_mask, pos, len);
1858    ctx->z_mask = z_mask;
1859
1860    s_mask_old = arg_info(op->args[1])->s_mask;
1861    s_mask = sextract64(s_mask_old, pos, len);
1862    s_mask |= MAKE_64BIT_MASK(len, 64 - len);
1863    ctx->s_mask = s_mask;
1864
1865    if (pos == 0) {
1866        ctx->a_mask = s_mask & ~s_mask_old;
1867    }
1868
1869    return fold_masks(ctx, op);
1870}
1871
1872static bool fold_shift(OptContext *ctx, TCGOp *op)
1873{
1874    uint64_t s_mask, z_mask, sign;
1875
1876    if (fold_const2(ctx, op) ||
1877        fold_ix_to_i(ctx, op, 0) ||
1878        fold_xi_to_x(ctx, op, 0)) {
1879        return true;
1880    }
1881
1882    s_mask = arg_info(op->args[1])->s_mask;
1883    z_mask = arg_info(op->args[1])->z_mask;
1884
1885    if (arg_is_const(op->args[2])) {
1886        int sh = arg_info(op->args[2])->val;
1887
1888        ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
1889
1890        s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
1891        ctx->s_mask = smask_from_smask(s_mask);
1892
1893        return fold_masks(ctx, op);
1894    }
1895
1896    switch (op->opc) {
1897    CASE_OP_32_64(sar):
1898        /*
1899         * Arithmetic right shift will not reduce the number of
1900         * input sign repetitions.
1901         */
1902        ctx->s_mask = s_mask;
1903        break;
1904    CASE_OP_32_64(shr):
1905        /*
1906         * If the sign bit is known zero, then logical right shift
1907         * will not reduced the number of input sign repetitions.
1908         */
1909        sign = (s_mask & -s_mask) >> 1;
1910        if (!(z_mask & sign)) {
1911            ctx->s_mask = s_mask;
1912        }
1913        break;
1914    default:
1915        break;
1916    }
1917
1918    return false;
1919}
1920
1921static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
1922{
1923    TCGOpcode neg_op;
1924    bool have_neg;
1925
1926    if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
1927        return false;
1928    }
1929
1930    switch (ctx->type) {
1931    case TCG_TYPE_I32:
1932        neg_op = INDEX_op_neg_i32;
1933        have_neg = TCG_TARGET_HAS_neg_i32;
1934        break;
1935    case TCG_TYPE_I64:
1936        neg_op = INDEX_op_neg_i64;
1937        have_neg = TCG_TARGET_HAS_neg_i64;
1938        break;
1939    case TCG_TYPE_V64:
1940    case TCG_TYPE_V128:
1941    case TCG_TYPE_V256:
1942        neg_op = INDEX_op_neg_vec;
1943        have_neg = (TCG_TARGET_HAS_neg_vec &&
1944                    tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
1945        break;
1946    default:
1947        g_assert_not_reached();
1948    }
1949    if (have_neg) {
1950        op->opc = neg_op;
1951        op->args[1] = op->args[2];
1952        return fold_neg(ctx, op);
1953    }
1954    return false;
1955}
1956
1957/* We cannot as yet do_constant_folding with vectors. */
1958static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
1959{
1960    if (fold_xx_to_i(ctx, op, 0) ||
1961        fold_xi_to_x(ctx, op, 0) ||
1962        fold_sub_to_neg(ctx, op)) {
1963        return true;
1964    }
1965    return false;
1966}
1967
1968static bool fold_sub(OptContext *ctx, TCGOp *op)
1969{
1970    return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
1971}
1972
1973static bool fold_sub2(OptContext *ctx, TCGOp *op)
1974{
1975    return fold_addsub2(ctx, op, false);
1976}
1977
1978static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
1979{
1980    /* We can't do any folding with a load, but we can record bits. */
1981    switch (op->opc) {
1982    CASE_OP_32_64(ld8s):
1983        ctx->s_mask = MAKE_64BIT_MASK(8, 56);
1984        break;
1985    CASE_OP_32_64(ld8u):
1986        ctx->z_mask = MAKE_64BIT_MASK(0, 8);
1987        ctx->s_mask = MAKE_64BIT_MASK(9, 55);
1988        break;
1989    CASE_OP_32_64(ld16s):
1990        ctx->s_mask = MAKE_64BIT_MASK(16, 48);
1991        break;
1992    CASE_OP_32_64(ld16u):
1993        ctx->z_mask = MAKE_64BIT_MASK(0, 16);
1994        ctx->s_mask = MAKE_64BIT_MASK(17, 47);
1995        break;
1996    case INDEX_op_ld32s_i64:
1997        ctx->s_mask = MAKE_64BIT_MASK(32, 32);
1998        break;
1999    case INDEX_op_ld32u_i64:
2000        ctx->z_mask = MAKE_64BIT_MASK(0, 32);
2001        ctx->s_mask = MAKE_64BIT_MASK(33, 31);
2002        break;
2003    default:
2004        g_assert_not_reached();
2005    }
2006    return false;
2007}
2008
2009static bool fold_xor(OptContext *ctx, TCGOp *op)
2010{
2011    if (fold_const2_commutative(ctx, op) ||
2012        fold_xx_to_i(ctx, op, 0) ||
2013        fold_xi_to_x(ctx, op, 0) ||
2014        fold_xi_to_not(ctx, op, -1)) {
2015        return true;
2016    }
2017
2018    ctx->z_mask = arg_info(op->args[1])->z_mask
2019                | arg_info(op->args[2])->z_mask;
2020    ctx->s_mask = arg_info(op->args[1])->s_mask
2021                & arg_info(op->args[2])->s_mask;
2022    return fold_masks(ctx, op);
2023}
2024
2025/* Propagate constants and copies, fold constant expressions. */
2026void tcg_optimize(TCGContext *s)
2027{
2028    int nb_temps, i;
2029    TCGOp *op, *op_next;
2030    OptContext ctx = { .tcg = s };
2031
2032    /* Array VALS has an element for each temp.
2033       If this temp holds a constant then its value is kept in VALS' element.
2034       If this temp is a copy of other ones then the other copies are
2035       available through the doubly linked circular list. */
2036
2037    nb_temps = s->nb_temps;
2038    for (i = 0; i < nb_temps; ++i) {
2039        s->temps[i].state_ptr = NULL;
2040    }
2041
2042    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2043        TCGOpcode opc = op->opc;
2044        const TCGOpDef *def;
2045        bool done = false;
2046
2047        /* Calls are special. */
2048        if (opc == INDEX_op_call) {
2049            fold_call(&ctx, op);
2050            continue;
2051        }
2052
2053        def = &tcg_op_defs[opc];
2054        init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
2055        copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
2056
2057        /* Pre-compute the type of the operation. */
2058        if (def->flags & TCG_OPF_VECTOR) {
2059            ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
2060        } else if (def->flags & TCG_OPF_64BIT) {
2061            ctx.type = TCG_TYPE_I64;
2062        } else {
2063            ctx.type = TCG_TYPE_I32;
2064        }
2065
2066        /* Assume all bits affected, no bits known zero, no sign reps. */
2067        ctx.a_mask = -1;
2068        ctx.z_mask = -1;
2069        ctx.s_mask = 0;
2070
2071        /*
2072         * Process each opcode.
2073         * Sorted alphabetically by opcode as much as possible.
2074         */
2075        switch (opc) {
2076        CASE_OP_32_64(add):
2077            done = fold_add(&ctx, op);
2078            break;
2079        case INDEX_op_add_vec:
2080            done = fold_add_vec(&ctx, op);
2081            break;
2082        CASE_OP_32_64(add2):
2083            done = fold_add2(&ctx, op);
2084            break;
2085        CASE_OP_32_64_VEC(and):
2086            done = fold_and(&ctx, op);
2087            break;
2088        CASE_OP_32_64_VEC(andc):
2089            done = fold_andc(&ctx, op);
2090            break;
2091        CASE_OP_32_64(brcond):
2092            done = fold_brcond(&ctx, op);
2093            break;
2094        case INDEX_op_brcond2_i32:
2095            done = fold_brcond2(&ctx, op);
2096            break;
2097        CASE_OP_32_64(bswap16):
2098        CASE_OP_32_64(bswap32):
2099        case INDEX_op_bswap64_i64:
2100            done = fold_bswap(&ctx, op);
2101            break;
2102        CASE_OP_32_64(clz):
2103        CASE_OP_32_64(ctz):
2104            done = fold_count_zeros(&ctx, op);
2105            break;
2106        CASE_OP_32_64(ctpop):
2107            done = fold_ctpop(&ctx, op);
2108            break;
2109        CASE_OP_32_64(deposit):
2110            done = fold_deposit(&ctx, op);
2111            break;
2112        CASE_OP_32_64(div):
2113        CASE_OP_32_64(divu):
2114            done = fold_divide(&ctx, op);
2115            break;
2116        case INDEX_op_dup_vec:
2117            done = fold_dup(&ctx, op);
2118            break;
2119        case INDEX_op_dup2_vec:
2120            done = fold_dup2(&ctx, op);
2121            break;
2122        CASE_OP_32_64_VEC(eqv):
2123            done = fold_eqv(&ctx, op);
2124            break;
2125        CASE_OP_32_64(extract):
2126            done = fold_extract(&ctx, op);
2127            break;
2128        CASE_OP_32_64(extract2):
2129            done = fold_extract2(&ctx, op);
2130            break;
2131        CASE_OP_32_64(ext8s):
2132        CASE_OP_32_64(ext16s):
2133        case INDEX_op_ext32s_i64:
2134        case INDEX_op_ext_i32_i64:
2135            done = fold_exts(&ctx, op);
2136            break;
2137        CASE_OP_32_64(ext8u):
2138        CASE_OP_32_64(ext16u):
2139        case INDEX_op_ext32u_i64:
2140        case INDEX_op_extu_i32_i64:
2141        case INDEX_op_extrl_i64_i32:
2142        case INDEX_op_extrh_i64_i32:
2143            done = fold_extu(&ctx, op);
2144            break;
2145        CASE_OP_32_64(ld8s):
2146        CASE_OP_32_64(ld8u):
2147        CASE_OP_32_64(ld16s):
2148        CASE_OP_32_64(ld16u):
2149        case INDEX_op_ld32s_i64:
2150        case INDEX_op_ld32u_i64:
2151            done = fold_tcg_ld(&ctx, op);
2152            break;
2153        case INDEX_op_mb:
2154            done = fold_mb(&ctx, op);
2155            break;
2156        CASE_OP_32_64_VEC(mov):
2157            done = fold_mov(&ctx, op);
2158            break;
2159        CASE_OP_32_64(movcond):
2160            done = fold_movcond(&ctx, op);
2161            break;
2162        CASE_OP_32_64(mul):
2163            done = fold_mul(&ctx, op);
2164            break;
2165        CASE_OP_32_64(mulsh):
2166        CASE_OP_32_64(muluh):
2167            done = fold_mul_highpart(&ctx, op);
2168            break;
2169        CASE_OP_32_64(muls2):
2170        CASE_OP_32_64(mulu2):
2171            done = fold_multiply2(&ctx, op);
2172            break;
2173        CASE_OP_32_64_VEC(nand):
2174            done = fold_nand(&ctx, op);
2175            break;
2176        CASE_OP_32_64(neg):
2177            done = fold_neg(&ctx, op);
2178            break;
2179        CASE_OP_32_64_VEC(nor):
2180            done = fold_nor(&ctx, op);
2181            break;
2182        CASE_OP_32_64_VEC(not):
2183            done = fold_not(&ctx, op);
2184            break;
2185        CASE_OP_32_64_VEC(or):
2186            done = fold_or(&ctx, op);
2187            break;
2188        CASE_OP_32_64_VEC(orc):
2189            done = fold_orc(&ctx, op);
2190            break;
2191        case INDEX_op_qemu_ld_i32:
2192        case INDEX_op_qemu_ld_i64:
2193            done = fold_qemu_ld(&ctx, op);
2194            break;
2195        case INDEX_op_qemu_st_i32:
2196        case INDEX_op_qemu_st8_i32:
2197        case INDEX_op_qemu_st_i64:
2198            done = fold_qemu_st(&ctx, op);
2199            break;
2200        CASE_OP_32_64(rem):
2201        CASE_OP_32_64(remu):
2202            done = fold_remainder(&ctx, op);
2203            break;
2204        CASE_OP_32_64(rotl):
2205        CASE_OP_32_64(rotr):
2206        CASE_OP_32_64(sar):
2207        CASE_OP_32_64(shl):
2208        CASE_OP_32_64(shr):
2209            done = fold_shift(&ctx, op);
2210            break;
2211        CASE_OP_32_64(setcond):
2212            done = fold_setcond(&ctx, op);
2213            break;
2214        case INDEX_op_setcond2_i32:
2215            done = fold_setcond2(&ctx, op);
2216            break;
2217        CASE_OP_32_64(sextract):
2218            done = fold_sextract(&ctx, op);
2219            break;
2220        CASE_OP_32_64(sub):
2221            done = fold_sub(&ctx, op);
2222            break;
2223        case INDEX_op_sub_vec:
2224            done = fold_sub_vec(&ctx, op);
2225            break;
2226        CASE_OP_32_64(sub2):
2227            done = fold_sub2(&ctx, op);
2228            break;
2229        CASE_OP_32_64_VEC(xor):
2230            done = fold_xor(&ctx, op);
2231            break;
2232        default:
2233            break;
2234        }
2235
2236        if (!done) {
2237            finish_folding(&ctx, op);
2238        }
2239    }
2240}
2241