qemu/tcg/optimize.c
<<
>>
Prefs
   1/*
   2 * Optimizations for Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2010 Samsung Electronics.
   5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "qemu/int128.h"
  28#include "tcg/tcg-op.h"
  29#include "tcg-internal.h"
  30
  31#define CASE_OP_32_64(x)                        \
  32        glue(glue(case INDEX_op_, x), _i32):    \
  33        glue(glue(case INDEX_op_, x), _i64)
  34
  35#define CASE_OP_32_64_VEC(x)                    \
  36        glue(glue(case INDEX_op_, x), _i32):    \
  37        glue(glue(case INDEX_op_, x), _i64):    \
  38        glue(glue(case INDEX_op_, x), _vec)
  39
  40typedef struct TempOptInfo {
  41    bool is_const;
  42    TCGTemp *prev_copy;
  43    TCGTemp *next_copy;
  44    uint64_t val;
  45    uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
  46    uint64_t s_mask;  /* a left-aligned mask of clrsb(value) bits. */
  47} TempOptInfo;
  48
  49typedef struct OptContext {
  50    TCGContext *tcg;
  51    TCGOp *prev_mb;
  52    TCGTempSet temps_used;
  53
  54    /* In flight values from optimization. */
  55    uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
  56    uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
  57    uint64_t s_mask;  /* mask of clrsb(value) bits */
  58    TCGType type;
  59} OptContext;
  60
  61/* Calculate the smask for a specific value. */
  62static uint64_t smask_from_value(uint64_t value)
  63{
  64    int rep = clrsb64(value);
  65    return ~(~0ull >> rep);
  66}
  67
  68/*
  69 * Calculate the smask for a given set of known-zeros.
  70 * If there are lots of zeros on the left, we can consider the remainder
  71 * an unsigned field, and thus the corresponding signed field is one bit
  72 * larger.
  73 */
  74static uint64_t smask_from_zmask(uint64_t zmask)
  75{
  76    /*
  77     * Only the 0 bits are significant for zmask, thus the msb itself
  78     * must be zero, else we have no sign information.
  79     */
  80    int rep = clz64(zmask);
  81    if (rep == 0) {
  82        return 0;
  83    }
  84    rep -= 1;
  85    return ~(~0ull >> rep);
  86}
  87
  88/*
  89 * Recreate a properly left-aligned smask after manipulation.
  90 * Some bit-shuffling, particularly shifts and rotates, may
  91 * retain sign bits on the left, but may scatter disconnected
  92 * sign bits on the right.  Retain only what remains to the left.
  93 */
  94static uint64_t smask_from_smask(int64_t smask)
  95{
  96    /* Only the 1 bits are significant for smask */
  97    return smask_from_zmask(~smask);
  98}
  99
 100static inline TempOptInfo *ts_info(TCGTemp *ts)
 101{
 102    return ts->state_ptr;
 103}
 104
 105static inline TempOptInfo *arg_info(TCGArg arg)
 106{
 107    return ts_info(arg_temp(arg));
 108}
 109
 110static inline bool ts_is_const(TCGTemp *ts)
 111{
 112    return ts_info(ts)->is_const;
 113}
 114
 115static inline bool arg_is_const(TCGArg arg)
 116{
 117    return ts_is_const(arg_temp(arg));
 118}
 119
 120static inline bool ts_is_copy(TCGTemp *ts)
 121{
 122    return ts_info(ts)->next_copy != ts;
 123}
 124
 125/* Reset TEMP's state, possibly removing the temp for the list of copies.  */
 126static void reset_ts(TCGTemp *ts)
 127{
 128    TempOptInfo *ti = ts_info(ts);
 129    TempOptInfo *pi = ts_info(ti->prev_copy);
 130    TempOptInfo *ni = ts_info(ti->next_copy);
 131
 132    ni->prev_copy = ti->prev_copy;
 133    pi->next_copy = ti->next_copy;
 134    ti->next_copy = ts;
 135    ti->prev_copy = ts;
 136    ti->is_const = false;
 137    ti->z_mask = -1;
 138    ti->s_mask = 0;
 139}
 140
 141static void reset_temp(TCGArg arg)
 142{
 143    reset_ts(arg_temp(arg));
 144}
 145
 146/* Initialize and activate a temporary.  */
 147static void init_ts_info(OptContext *ctx, TCGTemp *ts)
 148{
 149    size_t idx = temp_idx(ts);
 150    TempOptInfo *ti;
 151
 152    if (test_bit(idx, ctx->temps_used.l)) {
 153        return;
 154    }
 155    set_bit(idx, ctx->temps_used.l);
 156
 157    ti = ts->state_ptr;
 158    if (ti == NULL) {
 159        ti = tcg_malloc(sizeof(TempOptInfo));
 160        ts->state_ptr = ti;
 161    }
 162
 163    ti->next_copy = ts;
 164    ti->prev_copy = ts;
 165    if (ts->kind == TEMP_CONST) {
 166        ti->is_const = true;
 167        ti->val = ts->val;
 168        ti->z_mask = ts->val;
 169        ti->s_mask = smask_from_value(ts->val);
 170    } else {
 171        ti->is_const = false;
 172        ti->z_mask = -1;
 173        ti->s_mask = 0;
 174    }
 175}
 176
 177static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
 178{
 179    TCGTemp *i, *g, *l;
 180
 181    /* If this is already readonly, we can't do better. */
 182    if (temp_readonly(ts)) {
 183        return ts;
 184    }
 185
 186    g = l = NULL;
 187    for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
 188        if (temp_readonly(i)) {
 189            return i;
 190        } else if (i->kind > ts->kind) {
 191            if (i->kind == TEMP_GLOBAL) {
 192                g = i;
 193            } else if (i->kind == TEMP_LOCAL) {
 194                l = i;
 195            }
 196        }
 197    }
 198
 199    /* If we didn't find a better representation, return the same temp. */
 200    return g ? g : l ? l : ts;
 201}
 202
 203static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
 204{
 205    TCGTemp *i;
 206
 207    if (ts1 == ts2) {
 208        return true;
 209    }
 210
 211    if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
 212        return false;
 213    }
 214
 215    for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
 216        if (i == ts2) {
 217            return true;
 218        }
 219    }
 220
 221    return false;
 222}
 223
 224static bool args_are_copies(TCGArg arg1, TCGArg arg2)
 225{
 226    return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
 227}
 228
 229static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
 230{
 231    TCGTemp *dst_ts = arg_temp(dst);
 232    TCGTemp *src_ts = arg_temp(src);
 233    TempOptInfo *di;
 234    TempOptInfo *si;
 235    TCGOpcode new_op;
 236
 237    if (ts_are_copies(dst_ts, src_ts)) {
 238        tcg_op_remove(ctx->tcg, op);
 239        return true;
 240    }
 241
 242    reset_ts(dst_ts);
 243    di = ts_info(dst_ts);
 244    si = ts_info(src_ts);
 245
 246    switch (ctx->type) {
 247    case TCG_TYPE_I32:
 248        new_op = INDEX_op_mov_i32;
 249        break;
 250    case TCG_TYPE_I64:
 251        new_op = INDEX_op_mov_i64;
 252        break;
 253    case TCG_TYPE_V64:
 254    case TCG_TYPE_V128:
 255    case TCG_TYPE_V256:
 256        /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
 257        new_op = INDEX_op_mov_vec;
 258        break;
 259    default:
 260        g_assert_not_reached();
 261    }
 262    op->opc = new_op;
 263    op->args[0] = dst;
 264    op->args[1] = src;
 265
 266    di->z_mask = si->z_mask;
 267    di->s_mask = si->s_mask;
 268
 269    if (src_ts->type == dst_ts->type) {
 270        TempOptInfo *ni = ts_info(si->next_copy);
 271
 272        di->next_copy = si->next_copy;
 273        di->prev_copy = src_ts;
 274        ni->prev_copy = dst_ts;
 275        si->next_copy = dst_ts;
 276        di->is_const = si->is_const;
 277        di->val = si->val;
 278    }
 279    return true;
 280}
 281
 282static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
 283                             TCGArg dst, uint64_t val)
 284{
 285    TCGTemp *tv;
 286
 287    if (ctx->type == TCG_TYPE_I32) {
 288        val = (int32_t)val;
 289    }
 290
 291    /* Convert movi to mov with constant temp. */
 292    tv = tcg_constant_internal(ctx->type, val);
 293    init_ts_info(ctx, tv);
 294    return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
 295}
 296
 297static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
 298{
 299    uint64_t l64, h64;
 300
 301    switch (op) {
 302    CASE_OP_32_64(add):
 303        return x + y;
 304
 305    CASE_OP_32_64(sub):
 306        return x - y;
 307
 308    CASE_OP_32_64(mul):
 309        return x * y;
 310
 311    CASE_OP_32_64(and):
 312        return x & y;
 313
 314    CASE_OP_32_64(or):
 315        return x | y;
 316
 317    CASE_OP_32_64(xor):
 318        return x ^ y;
 319
 320    case INDEX_op_shl_i32:
 321        return (uint32_t)x << (y & 31);
 322
 323    case INDEX_op_shl_i64:
 324        return (uint64_t)x << (y & 63);
 325
 326    case INDEX_op_shr_i32:
 327        return (uint32_t)x >> (y & 31);
 328
 329    case INDEX_op_shr_i64:
 330        return (uint64_t)x >> (y & 63);
 331
 332    case INDEX_op_sar_i32:
 333        return (int32_t)x >> (y & 31);
 334
 335    case INDEX_op_sar_i64:
 336        return (int64_t)x >> (y & 63);
 337
 338    case INDEX_op_rotr_i32:
 339        return ror32(x, y & 31);
 340
 341    case INDEX_op_rotr_i64:
 342        return ror64(x, y & 63);
 343
 344    case INDEX_op_rotl_i32:
 345        return rol32(x, y & 31);
 346
 347    case INDEX_op_rotl_i64:
 348        return rol64(x, y & 63);
 349
 350    CASE_OP_32_64(not):
 351        return ~x;
 352
 353    CASE_OP_32_64(neg):
 354        return -x;
 355
 356    CASE_OP_32_64(andc):
 357        return x & ~y;
 358
 359    CASE_OP_32_64(orc):
 360        return x | ~y;
 361
 362    CASE_OP_32_64(eqv):
 363        return ~(x ^ y);
 364
 365    CASE_OP_32_64(nand):
 366        return ~(x & y);
 367
 368    CASE_OP_32_64(nor):
 369        return ~(x | y);
 370
 371    case INDEX_op_clz_i32:
 372        return (uint32_t)x ? clz32(x) : y;
 373
 374    case INDEX_op_clz_i64:
 375        return x ? clz64(x) : y;
 376
 377    case INDEX_op_ctz_i32:
 378        return (uint32_t)x ? ctz32(x) : y;
 379
 380    case INDEX_op_ctz_i64:
 381        return x ? ctz64(x) : y;
 382
 383    case INDEX_op_ctpop_i32:
 384        return ctpop32(x);
 385
 386    case INDEX_op_ctpop_i64:
 387        return ctpop64(x);
 388
 389    CASE_OP_32_64(ext8s):
 390        return (int8_t)x;
 391
 392    CASE_OP_32_64(ext16s):
 393        return (int16_t)x;
 394
 395    CASE_OP_32_64(ext8u):
 396        return (uint8_t)x;
 397
 398    CASE_OP_32_64(ext16u):
 399        return (uint16_t)x;
 400
 401    CASE_OP_32_64(bswap16):
 402        x = bswap16(x);
 403        return y & TCG_BSWAP_OS ? (int16_t)x : x;
 404
 405    CASE_OP_32_64(bswap32):
 406        x = bswap32(x);
 407        return y & TCG_BSWAP_OS ? (int32_t)x : x;
 408
 409    case INDEX_op_bswap64_i64:
 410        return bswap64(x);
 411
 412    case INDEX_op_ext_i32_i64:
 413    case INDEX_op_ext32s_i64:
 414        return (int32_t)x;
 415
 416    case INDEX_op_extu_i32_i64:
 417    case INDEX_op_extrl_i64_i32:
 418    case INDEX_op_ext32u_i64:
 419        return (uint32_t)x;
 420
 421    case INDEX_op_extrh_i64_i32:
 422        return (uint64_t)x >> 32;
 423
 424    case INDEX_op_muluh_i32:
 425        return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
 426    case INDEX_op_mulsh_i32:
 427        return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
 428
 429    case INDEX_op_muluh_i64:
 430        mulu64(&l64, &h64, x, y);
 431        return h64;
 432    case INDEX_op_mulsh_i64:
 433        muls64(&l64, &h64, x, y);
 434        return h64;
 435
 436    case INDEX_op_div_i32:
 437        /* Avoid crashing on divide by zero, otherwise undefined.  */
 438        return (int32_t)x / ((int32_t)y ? : 1);
 439    case INDEX_op_divu_i32:
 440        return (uint32_t)x / ((uint32_t)y ? : 1);
 441    case INDEX_op_div_i64:
 442        return (int64_t)x / ((int64_t)y ? : 1);
 443    case INDEX_op_divu_i64:
 444        return (uint64_t)x / ((uint64_t)y ? : 1);
 445
 446    case INDEX_op_rem_i32:
 447        return (int32_t)x % ((int32_t)y ? : 1);
 448    case INDEX_op_remu_i32:
 449        return (uint32_t)x % ((uint32_t)y ? : 1);
 450    case INDEX_op_rem_i64:
 451        return (int64_t)x % ((int64_t)y ? : 1);
 452    case INDEX_op_remu_i64:
 453        return (uint64_t)x % ((uint64_t)y ? : 1);
 454
 455    default:
 456        fprintf(stderr,
 457                "Unrecognized operation %d in do_constant_folding.\n", op);
 458        tcg_abort();
 459    }
 460}
 461
 462static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
 463                                    uint64_t x, uint64_t y)
 464{
 465    uint64_t res = do_constant_folding_2(op, x, y);
 466    if (type == TCG_TYPE_I32) {
 467        res = (int32_t)res;
 468    }
 469    return res;
 470}
 471
 472static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
 473{
 474    switch (c) {
 475    case TCG_COND_EQ:
 476        return x == y;
 477    case TCG_COND_NE:
 478        return x != y;
 479    case TCG_COND_LT:
 480        return (int32_t)x < (int32_t)y;
 481    case TCG_COND_GE:
 482        return (int32_t)x >= (int32_t)y;
 483    case TCG_COND_LE:
 484        return (int32_t)x <= (int32_t)y;
 485    case TCG_COND_GT:
 486        return (int32_t)x > (int32_t)y;
 487    case TCG_COND_LTU:
 488        return x < y;
 489    case TCG_COND_GEU:
 490        return x >= y;
 491    case TCG_COND_LEU:
 492        return x <= y;
 493    case TCG_COND_GTU:
 494        return x > y;
 495    default:
 496        tcg_abort();
 497    }
 498}
 499
 500static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
 501{
 502    switch (c) {
 503    case TCG_COND_EQ:
 504        return x == y;
 505    case TCG_COND_NE:
 506        return x != y;
 507    case TCG_COND_LT:
 508        return (int64_t)x < (int64_t)y;
 509    case TCG_COND_GE:
 510        return (int64_t)x >= (int64_t)y;
 511    case TCG_COND_LE:
 512        return (int64_t)x <= (int64_t)y;
 513    case TCG_COND_GT:
 514        return (int64_t)x > (int64_t)y;
 515    case TCG_COND_LTU:
 516        return x < y;
 517    case TCG_COND_GEU:
 518        return x >= y;
 519    case TCG_COND_LEU:
 520        return x <= y;
 521    case TCG_COND_GTU:
 522        return x > y;
 523    default:
 524        tcg_abort();
 525    }
 526}
 527
 528static bool do_constant_folding_cond_eq(TCGCond c)
 529{
 530    switch (c) {
 531    case TCG_COND_GT:
 532    case TCG_COND_LTU:
 533    case TCG_COND_LT:
 534    case TCG_COND_GTU:
 535    case TCG_COND_NE:
 536        return 0;
 537    case TCG_COND_GE:
 538    case TCG_COND_GEU:
 539    case TCG_COND_LE:
 540    case TCG_COND_LEU:
 541    case TCG_COND_EQ:
 542        return 1;
 543    default:
 544        tcg_abort();
 545    }
 546}
 547
 548/*
 549 * Return -1 if the condition can't be simplified,
 550 * and the result of the condition (0 or 1) if it can.
 551 */
 552static int do_constant_folding_cond(TCGType type, TCGArg x,
 553                                    TCGArg y, TCGCond c)
 554{
 555    uint64_t xv = arg_info(x)->val;
 556    uint64_t yv = arg_info(y)->val;
 557
 558    if (arg_is_const(x) && arg_is_const(y)) {
 559        switch (type) {
 560        case TCG_TYPE_I32:
 561            return do_constant_folding_cond_32(xv, yv, c);
 562        case TCG_TYPE_I64:
 563            return do_constant_folding_cond_64(xv, yv, c);
 564        default:
 565            /* Only scalar comparisons are optimizable */
 566            return -1;
 567        }
 568    } else if (args_are_copies(x, y)) {
 569        return do_constant_folding_cond_eq(c);
 570    } else if (arg_is_const(y) && yv == 0) {
 571        switch (c) {
 572        case TCG_COND_LTU:
 573            return 0;
 574        case TCG_COND_GEU:
 575            return 1;
 576        default:
 577            return -1;
 578        }
 579    }
 580    return -1;
 581}
 582
 583/*
 584 * Return -1 if the condition can't be simplified,
 585 * and the result of the condition (0 or 1) if it can.
 586 */
 587static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
 588{
 589    TCGArg al = p1[0], ah = p1[1];
 590    TCGArg bl = p2[0], bh = p2[1];
 591
 592    if (arg_is_const(bl) && arg_is_const(bh)) {
 593        tcg_target_ulong blv = arg_info(bl)->val;
 594        tcg_target_ulong bhv = arg_info(bh)->val;
 595        uint64_t b = deposit64(blv, 32, 32, bhv);
 596
 597        if (arg_is_const(al) && arg_is_const(ah)) {
 598            tcg_target_ulong alv = arg_info(al)->val;
 599            tcg_target_ulong ahv = arg_info(ah)->val;
 600            uint64_t a = deposit64(alv, 32, 32, ahv);
 601            return do_constant_folding_cond_64(a, b, c);
 602        }
 603        if (b == 0) {
 604            switch (c) {
 605            case TCG_COND_LTU:
 606                return 0;
 607            case TCG_COND_GEU:
 608                return 1;
 609            default:
 610                break;
 611            }
 612        }
 613    }
 614    if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
 615        return do_constant_folding_cond_eq(c);
 616    }
 617    return -1;
 618}
 619
 620/**
 621 * swap_commutative:
 622 * @dest: TCGArg of the destination argument, or NO_DEST.
 623 * @p1: first paired argument
 624 * @p2: second paired argument
 625 *
 626 * If *@p1 is a constant and *@p2 is not, swap.
 627 * If *@p2 matches @dest, swap.
 628 * Return true if a swap was performed.
 629 */
 630
 631#define NO_DEST  temp_arg(NULL)
 632
 633static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 634{
 635    TCGArg a1 = *p1, a2 = *p2;
 636    int sum = 0;
 637    sum += arg_is_const(a1);
 638    sum -= arg_is_const(a2);
 639
 640    /* Prefer the constant in second argument, and then the form
 641       op a, a, b, which is better handled on non-RISC hosts. */
 642    if (sum > 0 || (sum == 0 && dest == a2)) {
 643        *p1 = a2;
 644        *p2 = a1;
 645        return true;
 646    }
 647    return false;
 648}
 649
 650static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 651{
 652    int sum = 0;
 653    sum += arg_is_const(p1[0]);
 654    sum += arg_is_const(p1[1]);
 655    sum -= arg_is_const(p2[0]);
 656    sum -= arg_is_const(p2[1]);
 657    if (sum > 0) {
 658        TCGArg t;
 659        t = p1[0], p1[0] = p2[0], p2[0] = t;
 660        t = p1[1], p1[1] = p2[1], p2[1] = t;
 661        return true;
 662    }
 663    return false;
 664}
 665
 666static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
 667{
 668    for (int i = 0; i < nb_args; i++) {
 669        TCGTemp *ts = arg_temp(op->args[i]);
 670        if (ts) {
 671            init_ts_info(ctx, ts);
 672        }
 673    }
 674}
 675
 676static void copy_propagate(OptContext *ctx, TCGOp *op,
 677                           int nb_oargs, int nb_iargs)
 678{
 679    TCGContext *s = ctx->tcg;
 680
 681    for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
 682        TCGTemp *ts = arg_temp(op->args[i]);
 683        if (ts && ts_is_copy(ts)) {
 684            op->args[i] = temp_arg(find_better_copy(s, ts));
 685        }
 686    }
 687}
 688
 689static void finish_folding(OptContext *ctx, TCGOp *op)
 690{
 691    const TCGOpDef *def = &tcg_op_defs[op->opc];
 692    int i, nb_oargs;
 693
 694    /*
 695     * For an opcode that ends a BB, reset all temp data.
 696     * We do no cross-BB optimization.
 697     */
 698    if (def->flags & TCG_OPF_BB_END) {
 699        memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
 700        ctx->prev_mb = NULL;
 701        return;
 702    }
 703
 704    nb_oargs = def->nb_oargs;
 705    for (i = 0; i < nb_oargs; i++) {
 706        TCGTemp *ts = arg_temp(op->args[i]);
 707        reset_ts(ts);
 708        /*
 709         * Save the corresponding known-zero/sign bits mask for the
 710         * first output argument (only one supported so far).
 711         */
 712        if (i == 0) {
 713            ts_info(ts)->z_mask = ctx->z_mask;
 714            ts_info(ts)->s_mask = ctx->s_mask;
 715        }
 716    }
 717}
 718
 719/*
 720 * The fold_* functions return true when processing is complete,
 721 * usually by folding the operation to a constant or to a copy,
 722 * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
 723 * like collect information about the value produced, for use in
 724 * optimizing a subsequent operation.
 725 *
 726 * These first fold_* functions are all helpers, used by other
 727 * folders for more specific operations.
 728 */
 729
 730static bool fold_const1(OptContext *ctx, TCGOp *op)
 731{
 732    if (arg_is_const(op->args[1])) {
 733        uint64_t t;
 734
 735        t = arg_info(op->args[1])->val;
 736        t = do_constant_folding(op->opc, ctx->type, t, 0);
 737        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
 738    }
 739    return false;
 740}
 741
 742static bool fold_const2(OptContext *ctx, TCGOp *op)
 743{
 744    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
 745        uint64_t t1 = arg_info(op->args[1])->val;
 746        uint64_t t2 = arg_info(op->args[2])->val;
 747
 748        t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
 749        return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
 750    }
 751    return false;
 752}
 753
 754static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
 755{
 756    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
 757    return fold_const2(ctx, op);
 758}
 759
 760static bool fold_masks(OptContext *ctx, TCGOp *op)
 761{
 762    uint64_t a_mask = ctx->a_mask;
 763    uint64_t z_mask = ctx->z_mask;
 764    uint64_t s_mask = ctx->s_mask;
 765
 766    /*
 767     * 32-bit ops generate 32-bit results, which for the purpose of
 768     * simplifying tcg are sign-extended.  Certainly that's how we
 769     * represent our constants elsewhere.  Note that the bits will
 770     * be reset properly for a 64-bit value when encountering the
 771     * type changing opcodes.
 772     */
 773    if (ctx->type == TCG_TYPE_I32) {
 774        a_mask = (int32_t)a_mask;
 775        z_mask = (int32_t)z_mask;
 776        s_mask |= MAKE_64BIT_MASK(32, 32);
 777        ctx->z_mask = z_mask;
 778        ctx->s_mask = s_mask;
 779    }
 780
 781    if (z_mask == 0) {
 782        return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
 783    }
 784    if (a_mask == 0) {
 785        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 786    }
 787    return false;
 788}
 789
 790/*
 791 * Convert @op to NOT, if NOT is supported by the host.
 792 * Return true f the conversion is successful, which will still
 793 * indicate that the processing is complete.
 794 */
 795static bool fold_not(OptContext *ctx, TCGOp *op);
 796static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
 797{
 798    TCGOpcode not_op;
 799    bool have_not;
 800
 801    switch (ctx->type) {
 802    case TCG_TYPE_I32:
 803        not_op = INDEX_op_not_i32;
 804        have_not = TCG_TARGET_HAS_not_i32;
 805        break;
 806    case TCG_TYPE_I64:
 807        not_op = INDEX_op_not_i64;
 808        have_not = TCG_TARGET_HAS_not_i64;
 809        break;
 810    case TCG_TYPE_V64:
 811    case TCG_TYPE_V128:
 812    case TCG_TYPE_V256:
 813        not_op = INDEX_op_not_vec;
 814        have_not = TCG_TARGET_HAS_not_vec;
 815        break;
 816    default:
 817        g_assert_not_reached();
 818    }
 819    if (have_not) {
 820        op->opc = not_op;
 821        op->args[1] = op->args[idx];
 822        return fold_not(ctx, op);
 823    }
 824    return false;
 825}
 826
 827/* If the binary operation has first argument @i, fold to @i. */
 828static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 829{
 830    if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
 831        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 832    }
 833    return false;
 834}
 835
 836/* If the binary operation has first argument @i, fold to NOT. */
 837static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
 838{
 839    if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
 840        return fold_to_not(ctx, op, 2);
 841    }
 842    return false;
 843}
 844
 845/* If the binary operation has second argument @i, fold to @i. */
 846static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 847{
 848    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
 849        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 850    }
 851    return false;
 852}
 853
 854/* If the binary operation has second argument @i, fold to identity. */
 855static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
 856{
 857    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
 858        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 859    }
 860    return false;
 861}
 862
 863/* If the binary operation has second argument @i, fold to NOT. */
 864static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
 865{
 866    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
 867        return fold_to_not(ctx, op, 1);
 868    }
 869    return false;
 870}
 871
 872/* If the binary operation has both arguments equal, fold to @i. */
 873static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 874{
 875    if (args_are_copies(op->args[1], op->args[2])) {
 876        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 877    }
 878    return false;
 879}
 880
 881/* If the binary operation has both arguments equal, fold to identity. */
 882static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
 883{
 884    if (args_are_copies(op->args[1], op->args[2])) {
 885        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 886    }
 887    return false;
 888}
 889
 890/*
 891 * These outermost fold_<op> functions are sorted alphabetically.
 892 *
 893 * The ordering of the transformations should be:
 894 *   1) those that produce a constant
 895 *   2) those that produce a copy
 896 *   3) those that produce information about the result value.
 897 */
 898
 899static bool fold_add(OptContext *ctx, TCGOp *op)
 900{
 901    if (fold_const2_commutative(ctx, op) ||
 902        fold_xi_to_x(ctx, op, 0)) {
 903        return true;
 904    }
 905    return false;
 906}
 907
 908static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
 909{
 910    if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
 911        arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
 912        uint64_t al = arg_info(op->args[2])->val;
 913        uint64_t ah = arg_info(op->args[3])->val;
 914        uint64_t bl = arg_info(op->args[4])->val;
 915        uint64_t bh = arg_info(op->args[5])->val;
 916        TCGArg rl, rh;
 917        TCGOp *op2;
 918
 919        if (ctx->type == TCG_TYPE_I32) {
 920            uint64_t a = deposit64(al, 32, 32, ah);
 921            uint64_t b = deposit64(bl, 32, 32, bh);
 922
 923            if (add) {
 924                a += b;
 925            } else {
 926                a -= b;
 927            }
 928
 929            al = sextract64(a, 0, 32);
 930            ah = sextract64(a, 32, 32);
 931        } else {
 932            Int128 a = int128_make128(al, ah);
 933            Int128 b = int128_make128(bl, bh);
 934
 935            if (add) {
 936                a = int128_add(a, b);
 937            } else {
 938                a = int128_sub(a, b);
 939            }
 940
 941            al = int128_getlo(a);
 942            ah = int128_gethi(a);
 943        }
 944
 945        rl = op->args[0];
 946        rh = op->args[1];
 947
 948        /* The proper opcode is supplied by tcg_opt_gen_mov. */
 949        op2 = tcg_op_insert_before(ctx->tcg, op, 0);
 950
 951        tcg_opt_gen_movi(ctx, op, rl, al);
 952        tcg_opt_gen_movi(ctx, op2, rh, ah);
 953        return true;
 954    }
 955    return false;
 956}
 957
 958static bool fold_add2(OptContext *ctx, TCGOp *op)
 959{
 960    /* Note that the high and low parts may be independently swapped. */
 961    swap_commutative(op->args[0], &op->args[2], &op->args[4]);
 962    swap_commutative(op->args[1], &op->args[3], &op->args[5]);
 963
 964    return fold_addsub2(ctx, op, true);
 965}
 966
 967static bool fold_and(OptContext *ctx, TCGOp *op)
 968{
 969    uint64_t z1, z2;
 970
 971    if (fold_const2_commutative(ctx, op) ||
 972        fold_xi_to_i(ctx, op, 0) ||
 973        fold_xi_to_x(ctx, op, -1) ||
 974        fold_xx_to_x(ctx, op)) {
 975        return true;
 976    }
 977
 978    z1 = arg_info(op->args[1])->z_mask;
 979    z2 = arg_info(op->args[2])->z_mask;
 980    ctx->z_mask = z1 & z2;
 981
 982    /*
 983     * Sign repetitions are perforce all identical, whether they are 1 or 0.
 984     * Bitwise operations preserve the relative quantity of the repetitions.
 985     */
 986    ctx->s_mask = arg_info(op->args[1])->s_mask
 987                & arg_info(op->args[2])->s_mask;
 988
 989    /*
 990     * Known-zeros does not imply known-ones.  Therefore unless
 991     * arg2 is constant, we can't infer affected bits from it.
 992     */
 993    if (arg_is_const(op->args[2])) {
 994        ctx->a_mask = z1 & ~z2;
 995    }
 996
 997    return fold_masks(ctx, op);
 998}
 999
1000static bool fold_andc(OptContext *ctx, TCGOp *op)
1001{
1002    uint64_t z1;
1003
1004    if (fold_const2(ctx, op) ||
1005        fold_xx_to_i(ctx, op, 0) ||
1006        fold_xi_to_x(ctx, op, 0) ||
1007        fold_ix_to_not(ctx, op, -1)) {
1008        return true;
1009    }
1010
1011    z1 = arg_info(op->args[1])->z_mask;
1012
1013    /*
1014     * Known-zeros does not imply known-ones.  Therefore unless
1015     * arg2 is constant, we can't infer anything from it.
1016     */
1017    if (arg_is_const(op->args[2])) {
1018        uint64_t z2 = ~arg_info(op->args[2])->z_mask;
1019        ctx->a_mask = z1 & ~z2;
1020        z1 &= z2;
1021    }
1022    ctx->z_mask = z1;
1023
1024    ctx->s_mask = arg_info(op->args[1])->s_mask
1025                & arg_info(op->args[2])->s_mask;
1026    return fold_masks(ctx, op);
1027}
1028
1029static bool fold_brcond(OptContext *ctx, TCGOp *op)
1030{
1031    TCGCond cond = op->args[2];
1032    int i;
1033
1034    if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
1035        op->args[2] = cond = tcg_swap_cond(cond);
1036    }
1037
1038    i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
1039    if (i == 0) {
1040        tcg_op_remove(ctx->tcg, op);
1041        return true;
1042    }
1043    if (i > 0) {
1044        op->opc = INDEX_op_br;
1045        op->args[0] = op->args[3];
1046    }
1047    return false;
1048}
1049
1050static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1051{
1052    TCGCond cond = op->args[4];
1053    TCGArg label = op->args[5];
1054    int i, inv = 0;
1055
1056    if (swap_commutative2(&op->args[0], &op->args[2])) {
1057        op->args[4] = cond = tcg_swap_cond(cond);
1058    }
1059
1060    i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
1061    if (i >= 0) {
1062        goto do_brcond_const;
1063    }
1064
1065    switch (cond) {
1066    case TCG_COND_LT:
1067    case TCG_COND_GE:
1068        /*
1069         * Simplify LT/GE comparisons vs zero to a single compare
1070         * vs the high word of the input.
1071         */
1072        if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
1073            arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
1074            goto do_brcond_high;
1075        }
1076        break;
1077
1078    case TCG_COND_NE:
1079        inv = 1;
1080        QEMU_FALLTHROUGH;
1081    case TCG_COND_EQ:
1082        /*
1083         * Simplify EQ/NE comparisons where one of the pairs
1084         * can be simplified.
1085         */
1086        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1087                                     op->args[2], cond);
1088        switch (i ^ inv) {
1089        case 0:
1090            goto do_brcond_const;
1091        case 1:
1092            goto do_brcond_high;
1093        }
1094
1095        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1096                                     op->args[3], cond);
1097        switch (i ^ inv) {
1098        case 0:
1099            goto do_brcond_const;
1100        case 1:
1101            op->opc = INDEX_op_brcond_i32;
1102            op->args[1] = op->args[2];
1103            op->args[2] = cond;
1104            op->args[3] = label;
1105            break;
1106        }
1107        break;
1108
1109    default:
1110        break;
1111
1112    do_brcond_high:
1113        op->opc = INDEX_op_brcond_i32;
1114        op->args[0] = op->args[1];
1115        op->args[1] = op->args[3];
1116        op->args[2] = cond;
1117        op->args[3] = label;
1118        break;
1119
1120    do_brcond_const:
1121        if (i == 0) {
1122            tcg_op_remove(ctx->tcg, op);
1123            return true;
1124        }
1125        op->opc = INDEX_op_br;
1126        op->args[0] = label;
1127        break;
1128    }
1129    return false;
1130}
1131
1132static bool fold_bswap(OptContext *ctx, TCGOp *op)
1133{
1134    uint64_t z_mask, s_mask, sign;
1135
1136    if (arg_is_const(op->args[1])) {
1137        uint64_t t = arg_info(op->args[1])->val;
1138
1139        t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
1140        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1141    }
1142
1143    z_mask = arg_info(op->args[1])->z_mask;
1144
1145    switch (op->opc) {
1146    case INDEX_op_bswap16_i32:
1147    case INDEX_op_bswap16_i64:
1148        z_mask = bswap16(z_mask);
1149        sign = INT16_MIN;
1150        break;
1151    case INDEX_op_bswap32_i32:
1152    case INDEX_op_bswap32_i64:
1153        z_mask = bswap32(z_mask);
1154        sign = INT32_MIN;
1155        break;
1156    case INDEX_op_bswap64_i64:
1157        z_mask = bswap64(z_mask);
1158        sign = INT64_MIN;
1159        break;
1160    default:
1161        g_assert_not_reached();
1162    }
1163    s_mask = smask_from_zmask(z_mask);
1164
1165    switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1166    case TCG_BSWAP_OZ:
1167        break;
1168    case TCG_BSWAP_OS:
1169        /* If the sign bit may be 1, force all the bits above to 1. */
1170        if (z_mask & sign) {
1171            z_mask |= sign;
1172            s_mask = sign << 1;
1173        }
1174        break;
1175    default:
1176        /* The high bits are undefined: force all bits above the sign to 1. */
1177        z_mask |= sign << 1;
1178        s_mask = 0;
1179        break;
1180    }
1181    ctx->z_mask = z_mask;
1182    ctx->s_mask = s_mask;
1183
1184    return fold_masks(ctx, op);
1185}
1186
1187static bool fold_call(OptContext *ctx, TCGOp *op)
1188{
1189    TCGContext *s = ctx->tcg;
1190    int nb_oargs = TCGOP_CALLO(op);
1191    int nb_iargs = TCGOP_CALLI(op);
1192    int flags, i;
1193
1194    init_arguments(ctx, op, nb_oargs + nb_iargs);
1195    copy_propagate(ctx, op, nb_oargs, nb_iargs);
1196
1197    /* If the function reads or writes globals, reset temp data. */
1198    flags = tcg_call_flags(op);
1199    if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1200        int nb_globals = s->nb_globals;
1201
1202        for (i = 0; i < nb_globals; i++) {
1203            if (test_bit(i, ctx->temps_used.l)) {
1204                reset_ts(&ctx->tcg->temps[i]);
1205            }
1206        }
1207    }
1208
1209    /* Reset temp data for outputs. */
1210    for (i = 0; i < nb_oargs; i++) {
1211        reset_temp(op->args[i]);
1212    }
1213
1214    /* Stop optimizing MB across calls. */
1215    ctx->prev_mb = NULL;
1216    return true;
1217}
1218
1219static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1220{
1221    uint64_t z_mask;
1222
1223    if (arg_is_const(op->args[1])) {
1224        uint64_t t = arg_info(op->args[1])->val;
1225
1226        if (t != 0) {
1227            t = do_constant_folding(op->opc, ctx->type, t, 0);
1228            return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1229        }
1230        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1231    }
1232
1233    switch (ctx->type) {
1234    case TCG_TYPE_I32:
1235        z_mask = 31;
1236        break;
1237    case TCG_TYPE_I64:
1238        z_mask = 63;
1239        break;
1240    default:
1241        g_assert_not_reached();
1242    }
1243    ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
1244    ctx->s_mask = smask_from_zmask(ctx->z_mask);
1245    return false;
1246}
1247
1248static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1249{
1250    if (fold_const1(ctx, op)) {
1251        return true;
1252    }
1253
1254    switch (ctx->type) {
1255    case TCG_TYPE_I32:
1256        ctx->z_mask = 32 | 31;
1257        break;
1258    case TCG_TYPE_I64:
1259        ctx->z_mask = 64 | 63;
1260        break;
1261    default:
1262        g_assert_not_reached();
1263    }
1264    ctx->s_mask = smask_from_zmask(ctx->z_mask);
1265    return false;
1266}
1267
1268static bool fold_deposit(OptContext *ctx, TCGOp *op)
1269{
1270    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1271        uint64_t t1 = arg_info(op->args[1])->val;
1272        uint64_t t2 = arg_info(op->args[2])->val;
1273
1274        t1 = deposit64(t1, op->args[3], op->args[4], t2);
1275        return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1276    }
1277
1278    ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
1279                            op->args[3], op->args[4],
1280                            arg_info(op->args[2])->z_mask);
1281    return false;
1282}
1283
1284static bool fold_divide(OptContext *ctx, TCGOp *op)
1285{
1286    if (fold_const2(ctx, op) ||
1287        fold_xi_to_x(ctx, op, 1)) {
1288        return true;
1289    }
1290    return false;
1291}
1292
1293static bool fold_dup(OptContext *ctx, TCGOp *op)
1294{
1295    if (arg_is_const(op->args[1])) {
1296        uint64_t t = arg_info(op->args[1])->val;
1297        t = dup_const(TCGOP_VECE(op), t);
1298        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1299    }
1300    return false;
1301}
1302
1303static bool fold_dup2(OptContext *ctx, TCGOp *op)
1304{
1305    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1306        uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
1307                               arg_info(op->args[2])->val);
1308        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1309    }
1310
1311    if (args_are_copies(op->args[1], op->args[2])) {
1312        op->opc = INDEX_op_dup_vec;
1313        TCGOP_VECE(op) = MO_32;
1314    }
1315    return false;
1316}
1317
1318static bool fold_eqv(OptContext *ctx, TCGOp *op)
1319{
1320    if (fold_const2_commutative(ctx, op) ||
1321        fold_xi_to_x(ctx, op, -1) ||
1322        fold_xi_to_not(ctx, op, 0)) {
1323        return true;
1324    }
1325
1326    ctx->s_mask = arg_info(op->args[1])->s_mask
1327                & arg_info(op->args[2])->s_mask;
1328    return false;
1329}
1330
1331static bool fold_extract(OptContext *ctx, TCGOp *op)
1332{
1333    uint64_t z_mask_old, z_mask;
1334    int pos = op->args[2];
1335    int len = op->args[3];
1336
1337    if (arg_is_const(op->args[1])) {
1338        uint64_t t;
1339
1340        t = arg_info(op->args[1])->val;
1341        t = extract64(t, pos, len);
1342        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1343    }
1344
1345    z_mask_old = arg_info(op->args[1])->z_mask;
1346    z_mask = extract64(z_mask_old, pos, len);
1347    if (pos == 0) {
1348        ctx->a_mask = z_mask_old ^ z_mask;
1349    }
1350    ctx->z_mask = z_mask;
1351    ctx->s_mask = smask_from_zmask(z_mask);
1352
1353    return fold_masks(ctx, op);
1354}
1355
1356static bool fold_extract2(OptContext *ctx, TCGOp *op)
1357{
1358    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1359        uint64_t v1 = arg_info(op->args[1])->val;
1360        uint64_t v2 = arg_info(op->args[2])->val;
1361        int shr = op->args[3];
1362
1363        if (op->opc == INDEX_op_extract2_i64) {
1364            v1 >>= shr;
1365            v2 <<= 64 - shr;
1366        } else {
1367            v1 = (uint32_t)v1 >> shr;
1368            v2 = (uint64_t)((int32_t)v2 << (32 - shr));
1369        }
1370        return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
1371    }
1372    return false;
1373}
1374
1375static bool fold_exts(OptContext *ctx, TCGOp *op)
1376{
1377    uint64_t s_mask_old, s_mask, z_mask, sign;
1378    bool type_change = false;
1379
1380    if (fold_const1(ctx, op)) {
1381        return true;
1382    }
1383
1384    z_mask = arg_info(op->args[1])->z_mask;
1385    s_mask = arg_info(op->args[1])->s_mask;
1386    s_mask_old = s_mask;
1387
1388    switch (op->opc) {
1389    CASE_OP_32_64(ext8s):
1390        sign = INT8_MIN;
1391        z_mask = (uint8_t)z_mask;
1392        break;
1393    CASE_OP_32_64(ext16s):
1394        sign = INT16_MIN;
1395        z_mask = (uint16_t)z_mask;
1396        break;
1397    case INDEX_op_ext_i32_i64:
1398        type_change = true;
1399        QEMU_FALLTHROUGH;
1400    case INDEX_op_ext32s_i64:
1401        sign = INT32_MIN;
1402        z_mask = (uint32_t)z_mask;
1403        break;
1404    default:
1405        g_assert_not_reached();
1406    }
1407
1408    if (z_mask & sign) {
1409        z_mask |= sign;
1410    }
1411    s_mask |= sign << 1;
1412
1413    ctx->z_mask = z_mask;
1414    ctx->s_mask = s_mask;
1415    if (!type_change) {
1416        ctx->a_mask = s_mask & ~s_mask_old;
1417    }
1418
1419    return fold_masks(ctx, op);
1420}
1421
1422static bool fold_extu(OptContext *ctx, TCGOp *op)
1423{
1424    uint64_t z_mask_old, z_mask;
1425    bool type_change = false;
1426
1427    if (fold_const1(ctx, op)) {
1428        return true;
1429    }
1430
1431    z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
1432
1433    switch (op->opc) {
1434    CASE_OP_32_64(ext8u):
1435        z_mask = (uint8_t)z_mask;
1436        break;
1437    CASE_OP_32_64(ext16u):
1438        z_mask = (uint16_t)z_mask;
1439        break;
1440    case INDEX_op_extrl_i64_i32:
1441    case INDEX_op_extu_i32_i64:
1442        type_change = true;
1443        QEMU_FALLTHROUGH;
1444    case INDEX_op_ext32u_i64:
1445        z_mask = (uint32_t)z_mask;
1446        break;
1447    case INDEX_op_extrh_i64_i32:
1448        type_change = true;
1449        z_mask >>= 32;
1450        break;
1451    default:
1452        g_assert_not_reached();
1453    }
1454
1455    ctx->z_mask = z_mask;
1456    ctx->s_mask = smask_from_zmask(z_mask);
1457    if (!type_change) {
1458        ctx->a_mask = z_mask_old ^ z_mask;
1459    }
1460    return fold_masks(ctx, op);
1461}
1462
1463static bool fold_mb(OptContext *ctx, TCGOp *op)
1464{
1465    /* Eliminate duplicate and redundant fence instructions.  */
1466    if (ctx->prev_mb) {
1467        /*
1468         * Merge two barriers of the same type into one,
1469         * or a weaker barrier into a stronger one,
1470         * or two weaker barriers into a stronger one.
1471         *   mb X; mb Y => mb X|Y
1472         *   mb; strl => mb; st
1473         *   ldaq; mb => ld; mb
1474         *   ldaq; strl => ld; mb; st
1475         * Other combinations are also merged into a strong
1476         * barrier.  This is stricter than specified but for
1477         * the purposes of TCG is better than not optimizing.
1478         */
1479        ctx->prev_mb->args[0] |= op->args[0];
1480        tcg_op_remove(ctx->tcg, op);
1481    } else {
1482        ctx->prev_mb = op;
1483    }
1484    return true;
1485}
1486
1487static bool fold_mov(OptContext *ctx, TCGOp *op)
1488{
1489    return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1490}
1491
1492static bool fold_movcond(OptContext *ctx, TCGOp *op)
1493{
1494    TCGCond cond = op->args[5];
1495    int i;
1496
1497    if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1498        op->args[5] = cond = tcg_swap_cond(cond);
1499    }
1500    /*
1501     * Canonicalize the "false" input reg to match the destination reg so
1502     * that the tcg backend can implement a "move if true" operation.
1503     */
1504    if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1505        op->args[5] = cond = tcg_invert_cond(cond);
1506    }
1507
1508    i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1509    if (i >= 0) {
1510        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
1511    }
1512
1513    ctx->z_mask = arg_info(op->args[3])->z_mask
1514                | arg_info(op->args[4])->z_mask;
1515    ctx->s_mask = arg_info(op->args[3])->s_mask
1516                & arg_info(op->args[4])->s_mask;
1517
1518    if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1519        uint64_t tv = arg_info(op->args[3])->val;
1520        uint64_t fv = arg_info(op->args[4])->val;
1521        TCGOpcode opc;
1522
1523        switch (ctx->type) {
1524        case TCG_TYPE_I32:
1525            opc = INDEX_op_setcond_i32;
1526            break;
1527        case TCG_TYPE_I64:
1528            opc = INDEX_op_setcond_i64;
1529            break;
1530        default:
1531            g_assert_not_reached();
1532        }
1533
1534        if (tv == 1 && fv == 0) {
1535            op->opc = opc;
1536            op->args[3] = cond;
1537        } else if (fv == 1 && tv == 0) {
1538            op->opc = opc;
1539            op->args[3] = tcg_invert_cond(cond);
1540        }
1541    }
1542    return false;
1543}
1544
1545static bool fold_mul(OptContext *ctx, TCGOp *op)
1546{
1547    if (fold_const2(ctx, op) ||
1548        fold_xi_to_i(ctx, op, 0) ||
1549        fold_xi_to_x(ctx, op, 1)) {
1550        return true;
1551    }
1552    return false;
1553}
1554
1555static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
1556{
1557    if (fold_const2_commutative(ctx, op) ||
1558        fold_xi_to_i(ctx, op, 0)) {
1559        return true;
1560    }
1561    return false;
1562}
1563
1564static bool fold_multiply2(OptContext *ctx, TCGOp *op)
1565{
1566    swap_commutative(op->args[0], &op->args[2], &op->args[3]);
1567
1568    if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1569        uint64_t a = arg_info(op->args[2])->val;
1570        uint64_t b = arg_info(op->args[3])->val;
1571        uint64_t h, l;
1572        TCGArg rl, rh;
1573        TCGOp *op2;
1574
1575        switch (op->opc) {
1576        case INDEX_op_mulu2_i32:
1577            l = (uint64_t)(uint32_t)a * (uint32_t)b;
1578            h = (int32_t)(l >> 32);
1579            l = (int32_t)l;
1580            break;
1581        case INDEX_op_muls2_i32:
1582            l = (int64_t)(int32_t)a * (int32_t)b;
1583            h = l >> 32;
1584            l = (int32_t)l;
1585            break;
1586        case INDEX_op_mulu2_i64:
1587            mulu64(&l, &h, a, b);
1588            break;
1589        case INDEX_op_muls2_i64:
1590            muls64(&l, &h, a, b);
1591            break;
1592        default:
1593            g_assert_not_reached();
1594        }
1595
1596        rl = op->args[0];
1597        rh = op->args[1];
1598
1599        /* The proper opcode is supplied by tcg_opt_gen_mov. */
1600        op2 = tcg_op_insert_before(ctx->tcg, op, 0);
1601
1602        tcg_opt_gen_movi(ctx, op, rl, l);
1603        tcg_opt_gen_movi(ctx, op2, rh, h);
1604        return true;
1605    }
1606    return false;
1607}
1608
1609static bool fold_nand(OptContext *ctx, TCGOp *op)
1610{
1611    if (fold_const2_commutative(ctx, op) ||
1612        fold_xi_to_not(ctx, op, -1)) {
1613        return true;
1614    }
1615
1616    ctx->s_mask = arg_info(op->args[1])->s_mask
1617                & arg_info(op->args[2])->s_mask;
1618    return false;
1619}
1620
1621static bool fold_neg(OptContext *ctx, TCGOp *op)
1622{
1623    uint64_t z_mask;
1624
1625    if (fold_const1(ctx, op)) {
1626        return true;
1627    }
1628
1629    /* Set to 1 all bits to the left of the rightmost.  */
1630    z_mask = arg_info(op->args[1])->z_mask;
1631    ctx->z_mask = -(z_mask & -z_mask);
1632
1633    /*
1634     * Because of fold_sub_to_neg, we want to always return true,
1635     * via finish_folding.
1636     */
1637    finish_folding(ctx, op);
1638    return true;
1639}
1640
1641static bool fold_nor(OptContext *ctx, TCGOp *op)
1642{
1643    if (fold_const2_commutative(ctx, op) ||
1644        fold_xi_to_not(ctx, op, 0)) {
1645        return true;
1646    }
1647
1648    ctx->s_mask = arg_info(op->args[1])->s_mask
1649                & arg_info(op->args[2])->s_mask;
1650    return false;
1651}
1652
1653static bool fold_not(OptContext *ctx, TCGOp *op)
1654{
1655    if (fold_const1(ctx, op)) {
1656        return true;
1657    }
1658
1659    ctx->s_mask = arg_info(op->args[1])->s_mask;
1660
1661    /* Because of fold_to_not, we want to always return true, via finish. */
1662    finish_folding(ctx, op);
1663    return true;
1664}
1665
1666static bool fold_or(OptContext *ctx, TCGOp *op)
1667{
1668    if (fold_const2_commutative(ctx, op) ||
1669        fold_xi_to_x(ctx, op, 0) ||
1670        fold_xx_to_x(ctx, op)) {
1671        return true;
1672    }
1673
1674    ctx->z_mask = arg_info(op->args[1])->z_mask
1675                | arg_info(op->args[2])->z_mask;
1676    ctx->s_mask = arg_info(op->args[1])->s_mask
1677                & arg_info(op->args[2])->s_mask;
1678    return fold_masks(ctx, op);
1679}
1680
1681static bool fold_orc(OptContext *ctx, TCGOp *op)
1682{
1683    if (fold_const2(ctx, op) ||
1684        fold_xx_to_i(ctx, op, -1) ||
1685        fold_xi_to_x(ctx, op, -1) ||
1686        fold_ix_to_not(ctx, op, 0)) {
1687        return true;
1688    }
1689
1690    ctx->s_mask = arg_info(op->args[1])->s_mask
1691                & arg_info(op->args[2])->s_mask;
1692    return false;
1693}
1694
1695static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
1696{
1697    const TCGOpDef *def = &tcg_op_defs[op->opc];
1698    MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
1699    MemOp mop = get_memop(oi);
1700    int width = 8 * memop_size(mop);
1701
1702    if (width < 64) {
1703        ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
1704        if (!(mop & MO_SIGN)) {
1705            ctx->z_mask = MAKE_64BIT_MASK(0, width);
1706            ctx->s_mask <<= 1;
1707        }
1708    }
1709
1710    /* Opcodes that touch guest memory stop the mb optimization.  */
1711    ctx->prev_mb = NULL;
1712    return false;
1713}
1714
1715static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
1716{
1717    /* Opcodes that touch guest memory stop the mb optimization.  */
1718    ctx->prev_mb = NULL;
1719    return false;
1720}
1721
1722static bool fold_remainder(OptContext *ctx, TCGOp *op)
1723{
1724    if (fold_const2(ctx, op) ||
1725        fold_xx_to_i(ctx, op, 0)) {
1726        return true;
1727    }
1728    return false;
1729}
1730
1731static bool fold_setcond(OptContext *ctx, TCGOp *op)
1732{
1733    TCGCond cond = op->args[3];
1734    int i;
1735
1736    if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
1737        op->args[3] = cond = tcg_swap_cond(cond);
1738    }
1739
1740    i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1741    if (i >= 0) {
1742        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1743    }
1744
1745    ctx->z_mask = 1;
1746    ctx->s_mask = smask_from_zmask(1);
1747    return false;
1748}
1749
1750static bool fold_setcond2(OptContext *ctx, TCGOp *op)
1751{
1752    TCGCond cond = op->args[5];
1753    int i, inv = 0;
1754
1755    if (swap_commutative2(&op->args[1], &op->args[3])) {
1756        op->args[5] = cond = tcg_swap_cond(cond);
1757    }
1758
1759    i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
1760    if (i >= 0) {
1761        goto do_setcond_const;
1762    }
1763
1764    switch (cond) {
1765    case TCG_COND_LT:
1766    case TCG_COND_GE:
1767        /*
1768         * Simplify LT/GE comparisons vs zero to a single compare
1769         * vs the high word of the input.
1770         */
1771        if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
1772            arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
1773            goto do_setcond_high;
1774        }
1775        break;
1776
1777    case TCG_COND_NE:
1778        inv = 1;
1779        QEMU_FALLTHROUGH;
1780    case TCG_COND_EQ:
1781        /*
1782         * Simplify EQ/NE comparisons where one of the pairs
1783         * can be simplified.
1784         */
1785        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1786                                     op->args[3], cond);
1787        switch (i ^ inv) {
1788        case 0:
1789            goto do_setcond_const;
1790        case 1:
1791            goto do_setcond_high;
1792        }
1793
1794        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
1795                                     op->args[4], cond);
1796        switch (i ^ inv) {
1797        case 0:
1798            goto do_setcond_const;
1799        case 1:
1800            op->args[2] = op->args[3];
1801            op->args[3] = cond;
1802            op->opc = INDEX_op_setcond_i32;
1803            break;
1804        }
1805        break;
1806
1807    default:
1808        break;
1809
1810    do_setcond_high:
1811        op->args[1] = op->args[2];
1812        op->args[2] = op->args[4];
1813        op->args[3] = cond;
1814        op->opc = INDEX_op_setcond_i32;
1815        break;
1816    }
1817
1818    ctx->z_mask = 1;
1819    ctx->s_mask = smask_from_zmask(1);
1820    return false;
1821
1822 do_setcond_const:
1823    return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1824}
1825
1826static bool fold_sextract(OptContext *ctx, TCGOp *op)
1827{
1828    uint64_t z_mask, s_mask, s_mask_old;
1829    int pos = op->args[2];
1830    int len = op->args[3];
1831
1832    if (arg_is_const(op->args[1])) {
1833        uint64_t t;
1834
1835        t = arg_info(op->args[1])->val;
1836        t = sextract64(t, pos, len);
1837        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1838    }
1839
1840    z_mask = arg_info(op->args[1])->z_mask;
1841    z_mask = sextract64(z_mask, pos, len);
1842    ctx->z_mask = z_mask;
1843
1844    s_mask_old = arg_info(op->args[1])->s_mask;
1845    s_mask = sextract64(s_mask_old, pos, len);
1846    s_mask |= MAKE_64BIT_MASK(len, 64 - len);
1847    ctx->s_mask = s_mask;
1848
1849    if (pos == 0) {
1850        ctx->a_mask = s_mask & ~s_mask_old;
1851    }
1852
1853    return fold_masks(ctx, op);
1854}
1855
1856static bool fold_shift(OptContext *ctx, TCGOp *op)
1857{
1858    uint64_t s_mask, z_mask, sign;
1859
1860    if (fold_const2(ctx, op) ||
1861        fold_ix_to_i(ctx, op, 0) ||
1862        fold_xi_to_x(ctx, op, 0)) {
1863        return true;
1864    }
1865
1866    s_mask = arg_info(op->args[1])->s_mask;
1867    z_mask = arg_info(op->args[1])->z_mask;
1868
1869    if (arg_is_const(op->args[2])) {
1870        int sh = arg_info(op->args[2])->val;
1871
1872        ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
1873
1874        s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
1875        ctx->s_mask = smask_from_smask(s_mask);
1876
1877        return fold_masks(ctx, op);
1878    }
1879
1880    switch (op->opc) {
1881    CASE_OP_32_64(sar):
1882        /*
1883         * Arithmetic right shift will not reduce the number of
1884         * input sign repetitions.
1885         */
1886        ctx->s_mask = s_mask;
1887        break;
1888    CASE_OP_32_64(shr):
1889        /*
1890         * If the sign bit is known zero, then logical right shift
1891         * will not reduced the number of input sign repetitions.
1892         */
1893        sign = (s_mask & -s_mask) >> 1;
1894        if (!(z_mask & sign)) {
1895            ctx->s_mask = s_mask;
1896        }
1897        break;
1898    default:
1899        break;
1900    }
1901
1902    return false;
1903}
1904
1905static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
1906{
1907    TCGOpcode neg_op;
1908    bool have_neg;
1909
1910    if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
1911        return false;
1912    }
1913
1914    switch (ctx->type) {
1915    case TCG_TYPE_I32:
1916        neg_op = INDEX_op_neg_i32;
1917        have_neg = TCG_TARGET_HAS_neg_i32;
1918        break;
1919    case TCG_TYPE_I64:
1920        neg_op = INDEX_op_neg_i64;
1921        have_neg = TCG_TARGET_HAS_neg_i64;
1922        break;
1923    case TCG_TYPE_V64:
1924    case TCG_TYPE_V128:
1925    case TCG_TYPE_V256:
1926        neg_op = INDEX_op_neg_vec;
1927        have_neg = (TCG_TARGET_HAS_neg_vec &&
1928                    tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
1929        break;
1930    default:
1931        g_assert_not_reached();
1932    }
1933    if (have_neg) {
1934        op->opc = neg_op;
1935        op->args[1] = op->args[2];
1936        return fold_neg(ctx, op);
1937    }
1938    return false;
1939}
1940
1941static bool fold_sub(OptContext *ctx, TCGOp *op)
1942{
1943    if (fold_const2(ctx, op) ||
1944        fold_xx_to_i(ctx, op, 0) ||
1945        fold_xi_to_x(ctx, op, 0) ||
1946        fold_sub_to_neg(ctx, op)) {
1947        return true;
1948    }
1949    return false;
1950}
1951
1952static bool fold_sub2(OptContext *ctx, TCGOp *op)
1953{
1954    return fold_addsub2(ctx, op, false);
1955}
1956
1957static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
1958{
1959    /* We can't do any folding with a load, but we can record bits. */
1960    switch (op->opc) {
1961    CASE_OP_32_64(ld8s):
1962        ctx->s_mask = MAKE_64BIT_MASK(8, 56);
1963        break;
1964    CASE_OP_32_64(ld8u):
1965        ctx->z_mask = MAKE_64BIT_MASK(0, 8);
1966        ctx->s_mask = MAKE_64BIT_MASK(9, 55);
1967        break;
1968    CASE_OP_32_64(ld16s):
1969        ctx->s_mask = MAKE_64BIT_MASK(16, 48);
1970        break;
1971    CASE_OP_32_64(ld16u):
1972        ctx->z_mask = MAKE_64BIT_MASK(0, 16);
1973        ctx->s_mask = MAKE_64BIT_MASK(17, 47);
1974        break;
1975    case INDEX_op_ld32s_i64:
1976        ctx->s_mask = MAKE_64BIT_MASK(32, 32);
1977        break;
1978    case INDEX_op_ld32u_i64:
1979        ctx->z_mask = MAKE_64BIT_MASK(0, 32);
1980        ctx->s_mask = MAKE_64BIT_MASK(33, 31);
1981        break;
1982    default:
1983        g_assert_not_reached();
1984    }
1985    return false;
1986}
1987
1988static bool fold_xor(OptContext *ctx, TCGOp *op)
1989{
1990    if (fold_const2_commutative(ctx, op) ||
1991        fold_xx_to_i(ctx, op, 0) ||
1992        fold_xi_to_x(ctx, op, 0) ||
1993        fold_xi_to_not(ctx, op, -1)) {
1994        return true;
1995    }
1996
1997    ctx->z_mask = arg_info(op->args[1])->z_mask
1998                | arg_info(op->args[2])->z_mask;
1999    ctx->s_mask = arg_info(op->args[1])->s_mask
2000                & arg_info(op->args[2])->s_mask;
2001    return fold_masks(ctx, op);
2002}
2003
2004/* Propagate constants and copies, fold constant expressions. */
2005void tcg_optimize(TCGContext *s)
2006{
2007    int nb_temps, i;
2008    TCGOp *op, *op_next;
2009    OptContext ctx = { .tcg = s };
2010
2011    /* Array VALS has an element for each temp.
2012       If this temp holds a constant then its value is kept in VALS' element.
2013       If this temp is a copy of other ones then the other copies are
2014       available through the doubly linked circular list. */
2015
2016    nb_temps = s->nb_temps;
2017    for (i = 0; i < nb_temps; ++i) {
2018        s->temps[i].state_ptr = NULL;
2019    }
2020
2021    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2022        TCGOpcode opc = op->opc;
2023        const TCGOpDef *def;
2024        bool done = false;
2025
2026        /* Calls are special. */
2027        if (opc == INDEX_op_call) {
2028            fold_call(&ctx, op);
2029            continue;
2030        }
2031
2032        def = &tcg_op_defs[opc];
2033        init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
2034        copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
2035
2036        /* Pre-compute the type of the operation. */
2037        if (def->flags & TCG_OPF_VECTOR) {
2038            ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
2039        } else if (def->flags & TCG_OPF_64BIT) {
2040            ctx.type = TCG_TYPE_I64;
2041        } else {
2042            ctx.type = TCG_TYPE_I32;
2043        }
2044
2045        /* Assume all bits affected, no bits known zero, no sign reps. */
2046        ctx.a_mask = -1;
2047        ctx.z_mask = -1;
2048        ctx.s_mask = 0;
2049
2050        /*
2051         * Process each opcode.
2052         * Sorted alphabetically by opcode as much as possible.
2053         */
2054        switch (opc) {
2055        CASE_OP_32_64_VEC(add):
2056            done = fold_add(&ctx, op);
2057            break;
2058        CASE_OP_32_64(add2):
2059            done = fold_add2(&ctx, op);
2060            break;
2061        CASE_OP_32_64_VEC(and):
2062            done = fold_and(&ctx, op);
2063            break;
2064        CASE_OP_32_64_VEC(andc):
2065            done = fold_andc(&ctx, op);
2066            break;
2067        CASE_OP_32_64(brcond):
2068            done = fold_brcond(&ctx, op);
2069            break;
2070        case INDEX_op_brcond2_i32:
2071            done = fold_brcond2(&ctx, op);
2072            break;
2073        CASE_OP_32_64(bswap16):
2074        CASE_OP_32_64(bswap32):
2075        case INDEX_op_bswap64_i64:
2076            done = fold_bswap(&ctx, op);
2077            break;
2078        CASE_OP_32_64(clz):
2079        CASE_OP_32_64(ctz):
2080            done = fold_count_zeros(&ctx, op);
2081            break;
2082        CASE_OP_32_64(ctpop):
2083            done = fold_ctpop(&ctx, op);
2084            break;
2085        CASE_OP_32_64(deposit):
2086            done = fold_deposit(&ctx, op);
2087            break;
2088        CASE_OP_32_64(div):
2089        CASE_OP_32_64(divu):
2090            done = fold_divide(&ctx, op);
2091            break;
2092        case INDEX_op_dup_vec:
2093            done = fold_dup(&ctx, op);
2094            break;
2095        case INDEX_op_dup2_vec:
2096            done = fold_dup2(&ctx, op);
2097            break;
2098        CASE_OP_32_64(eqv):
2099            done = fold_eqv(&ctx, op);
2100            break;
2101        CASE_OP_32_64(extract):
2102            done = fold_extract(&ctx, op);
2103            break;
2104        CASE_OP_32_64(extract2):
2105            done = fold_extract2(&ctx, op);
2106            break;
2107        CASE_OP_32_64(ext8s):
2108        CASE_OP_32_64(ext16s):
2109        case INDEX_op_ext32s_i64:
2110        case INDEX_op_ext_i32_i64:
2111            done = fold_exts(&ctx, op);
2112            break;
2113        CASE_OP_32_64(ext8u):
2114        CASE_OP_32_64(ext16u):
2115        case INDEX_op_ext32u_i64:
2116        case INDEX_op_extu_i32_i64:
2117        case INDEX_op_extrl_i64_i32:
2118        case INDEX_op_extrh_i64_i32:
2119            done = fold_extu(&ctx, op);
2120            break;
2121        CASE_OP_32_64(ld8s):
2122        CASE_OP_32_64(ld8u):
2123        CASE_OP_32_64(ld16s):
2124        CASE_OP_32_64(ld16u):
2125        case INDEX_op_ld32s_i64:
2126        case INDEX_op_ld32u_i64:
2127            done = fold_tcg_ld(&ctx, op);
2128            break;
2129        case INDEX_op_mb:
2130            done = fold_mb(&ctx, op);
2131            break;
2132        CASE_OP_32_64_VEC(mov):
2133            done = fold_mov(&ctx, op);
2134            break;
2135        CASE_OP_32_64(movcond):
2136            done = fold_movcond(&ctx, op);
2137            break;
2138        CASE_OP_32_64(mul):
2139            done = fold_mul(&ctx, op);
2140            break;
2141        CASE_OP_32_64(mulsh):
2142        CASE_OP_32_64(muluh):
2143            done = fold_mul_highpart(&ctx, op);
2144            break;
2145        CASE_OP_32_64(muls2):
2146        CASE_OP_32_64(mulu2):
2147            done = fold_multiply2(&ctx, op);
2148            break;
2149        CASE_OP_32_64(nand):
2150            done = fold_nand(&ctx, op);
2151            break;
2152        CASE_OP_32_64(neg):
2153            done = fold_neg(&ctx, op);
2154            break;
2155        CASE_OP_32_64(nor):
2156            done = fold_nor(&ctx, op);
2157            break;
2158        CASE_OP_32_64_VEC(not):
2159            done = fold_not(&ctx, op);
2160            break;
2161        CASE_OP_32_64_VEC(or):
2162            done = fold_or(&ctx, op);
2163            break;
2164        CASE_OP_32_64_VEC(orc):
2165            done = fold_orc(&ctx, op);
2166            break;
2167        case INDEX_op_qemu_ld_i32:
2168        case INDEX_op_qemu_ld_i64:
2169            done = fold_qemu_ld(&ctx, op);
2170            break;
2171        case INDEX_op_qemu_st_i32:
2172        case INDEX_op_qemu_st8_i32:
2173        case INDEX_op_qemu_st_i64:
2174            done = fold_qemu_st(&ctx, op);
2175            break;
2176        CASE_OP_32_64(rem):
2177        CASE_OP_32_64(remu):
2178            done = fold_remainder(&ctx, op);
2179            break;
2180        CASE_OP_32_64(rotl):
2181        CASE_OP_32_64(rotr):
2182        CASE_OP_32_64(sar):
2183        CASE_OP_32_64(shl):
2184        CASE_OP_32_64(shr):
2185            done = fold_shift(&ctx, op);
2186            break;
2187        CASE_OP_32_64(setcond):
2188            done = fold_setcond(&ctx, op);
2189            break;
2190        case INDEX_op_setcond2_i32:
2191            done = fold_setcond2(&ctx, op);
2192            break;
2193        CASE_OP_32_64(sextract):
2194            done = fold_sextract(&ctx, op);
2195            break;
2196        CASE_OP_32_64_VEC(sub):
2197            done = fold_sub(&ctx, op);
2198            break;
2199        CASE_OP_32_64(sub2):
2200            done = fold_sub2(&ctx, op);
2201            break;
2202        CASE_OP_32_64_VEC(xor):
2203            done = fold_xor(&ctx, op);
2204            break;
2205        default:
2206            break;
2207        }
2208
2209        if (!done) {
2210            finish_folding(&ctx, op);
2211        }
2212    }
2213}
2214