qemu/tcg/optimize.c
<<
>>
Prefs
   1/*
   2 * Optimizations for Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2010 Samsung Electronics.
   5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "qemu/int128.h"
  28#include "tcg/tcg-op-common.h"
  29#include "tcg-internal.h"
  30
  31#define CASE_OP_32_64(x)                        \
  32        glue(glue(case INDEX_op_, x), _i32):    \
  33        glue(glue(case INDEX_op_, x), _i64)
  34
  35#define CASE_OP_32_64_VEC(x)                    \
  36        glue(glue(case INDEX_op_, x), _i32):    \
  37        glue(glue(case INDEX_op_, x), _i64):    \
  38        glue(glue(case INDEX_op_, x), _vec)
  39
  40typedef struct TempOptInfo {
  41    bool is_const;
  42    TCGTemp *prev_copy;
  43    TCGTemp *next_copy;
  44    uint64_t val;
  45    uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
  46    uint64_t s_mask;  /* a left-aligned mask of clrsb(value) bits. */
  47} TempOptInfo;
  48
  49typedef struct OptContext {
  50    TCGContext *tcg;
  51    TCGOp *prev_mb;
  52    TCGTempSet temps_used;
  53
  54    /* In flight values from optimization. */
  55    uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
  56    uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
  57    uint64_t s_mask;  /* mask of clrsb(value) bits */
  58    TCGType type;
  59} OptContext;
  60
  61/* Calculate the smask for a specific value. */
  62static uint64_t smask_from_value(uint64_t value)
  63{
  64    int rep = clrsb64(value);
  65    return ~(~0ull >> rep);
  66}
  67
  68/*
  69 * Calculate the smask for a given set of known-zeros.
  70 * If there are lots of zeros on the left, we can consider the remainder
  71 * an unsigned field, and thus the corresponding signed field is one bit
  72 * larger.
  73 */
  74static uint64_t smask_from_zmask(uint64_t zmask)
  75{
  76    /*
  77     * Only the 0 bits are significant for zmask, thus the msb itself
  78     * must be zero, else we have no sign information.
  79     */
  80    int rep = clz64(zmask);
  81    if (rep == 0) {
  82        return 0;
  83    }
  84    rep -= 1;
  85    return ~(~0ull >> rep);
  86}
  87
  88/*
  89 * Recreate a properly left-aligned smask after manipulation.
  90 * Some bit-shuffling, particularly shifts and rotates, may
  91 * retain sign bits on the left, but may scatter disconnected
  92 * sign bits on the right.  Retain only what remains to the left.
  93 */
  94static uint64_t smask_from_smask(int64_t smask)
  95{
  96    /* Only the 1 bits are significant for smask */
  97    return smask_from_zmask(~smask);
  98}
  99
 100static inline TempOptInfo *ts_info(TCGTemp *ts)
 101{
 102    return ts->state_ptr;
 103}
 104
 105static inline TempOptInfo *arg_info(TCGArg arg)
 106{
 107    return ts_info(arg_temp(arg));
 108}
 109
 110static inline bool ts_is_const(TCGTemp *ts)
 111{
 112    return ts_info(ts)->is_const;
 113}
 114
 115static inline bool arg_is_const(TCGArg arg)
 116{
 117    return ts_is_const(arg_temp(arg));
 118}
 119
 120static inline bool ts_is_copy(TCGTemp *ts)
 121{
 122    return ts_info(ts)->next_copy != ts;
 123}
 124
 125/* Reset TEMP's state, possibly removing the temp for the list of copies.  */
 126static void reset_ts(TCGTemp *ts)
 127{
 128    TempOptInfo *ti = ts_info(ts);
 129    TempOptInfo *pi = ts_info(ti->prev_copy);
 130    TempOptInfo *ni = ts_info(ti->next_copy);
 131
 132    ni->prev_copy = ti->prev_copy;
 133    pi->next_copy = ti->next_copy;
 134    ti->next_copy = ts;
 135    ti->prev_copy = ts;
 136    ti->is_const = false;
 137    ti->z_mask = -1;
 138    ti->s_mask = 0;
 139}
 140
 141static void reset_temp(TCGArg arg)
 142{
 143    reset_ts(arg_temp(arg));
 144}
 145
 146/* Initialize and activate a temporary.  */
 147static void init_ts_info(OptContext *ctx, TCGTemp *ts)
 148{
 149    size_t idx = temp_idx(ts);
 150    TempOptInfo *ti;
 151
 152    if (test_bit(idx, ctx->temps_used.l)) {
 153        return;
 154    }
 155    set_bit(idx, ctx->temps_used.l);
 156
 157    ti = ts->state_ptr;
 158    if (ti == NULL) {
 159        ti = tcg_malloc(sizeof(TempOptInfo));
 160        ts->state_ptr = ti;
 161    }
 162
 163    ti->next_copy = ts;
 164    ti->prev_copy = ts;
 165    if (ts->kind == TEMP_CONST) {
 166        ti->is_const = true;
 167        ti->val = ts->val;
 168        ti->z_mask = ts->val;
 169        ti->s_mask = smask_from_value(ts->val);
 170    } else {
 171        ti->is_const = false;
 172        ti->z_mask = -1;
 173        ti->s_mask = 0;
 174    }
 175}
 176
 177static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
 178{
 179    TCGTemp *i, *g, *l;
 180
 181    /* If this is already readonly, we can't do better. */
 182    if (temp_readonly(ts)) {
 183        return ts;
 184    }
 185
 186    g = l = NULL;
 187    for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
 188        if (temp_readonly(i)) {
 189            return i;
 190        } else if (i->kind > ts->kind) {
 191            if (i->kind == TEMP_GLOBAL) {
 192                g = i;
 193            } else if (i->kind == TEMP_TB) {
 194                l = i;
 195            }
 196        }
 197    }
 198
 199    /* If we didn't find a better representation, return the same temp. */
 200    return g ? g : l ? l : ts;
 201}
 202
 203static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
 204{
 205    TCGTemp *i;
 206
 207    if (ts1 == ts2) {
 208        return true;
 209    }
 210
 211    if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
 212        return false;
 213    }
 214
 215    for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
 216        if (i == ts2) {
 217            return true;
 218        }
 219    }
 220
 221    return false;
 222}
 223
 224static bool args_are_copies(TCGArg arg1, TCGArg arg2)
 225{
 226    return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
 227}
 228
 229static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
 230{
 231    TCGTemp *dst_ts = arg_temp(dst);
 232    TCGTemp *src_ts = arg_temp(src);
 233    TempOptInfo *di;
 234    TempOptInfo *si;
 235    TCGOpcode new_op;
 236
 237    if (ts_are_copies(dst_ts, src_ts)) {
 238        tcg_op_remove(ctx->tcg, op);
 239        return true;
 240    }
 241
 242    reset_ts(dst_ts);
 243    di = ts_info(dst_ts);
 244    si = ts_info(src_ts);
 245
 246    switch (ctx->type) {
 247    case TCG_TYPE_I32:
 248        new_op = INDEX_op_mov_i32;
 249        break;
 250    case TCG_TYPE_I64:
 251        new_op = INDEX_op_mov_i64;
 252        break;
 253    case TCG_TYPE_V64:
 254    case TCG_TYPE_V128:
 255    case TCG_TYPE_V256:
 256        /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
 257        new_op = INDEX_op_mov_vec;
 258        break;
 259    default:
 260        g_assert_not_reached();
 261    }
 262    op->opc = new_op;
 263    op->args[0] = dst;
 264    op->args[1] = src;
 265
 266    di->z_mask = si->z_mask;
 267    di->s_mask = si->s_mask;
 268
 269    if (src_ts->type == dst_ts->type) {
 270        TempOptInfo *ni = ts_info(si->next_copy);
 271
 272        di->next_copy = si->next_copy;
 273        di->prev_copy = src_ts;
 274        ni->prev_copy = dst_ts;
 275        si->next_copy = dst_ts;
 276        di->is_const = si->is_const;
 277        di->val = si->val;
 278    }
 279    return true;
 280}
 281
 282static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
 283                             TCGArg dst, uint64_t val)
 284{
 285    TCGTemp *tv;
 286
 287    if (ctx->type == TCG_TYPE_I32) {
 288        val = (int32_t)val;
 289    }
 290
 291    /* Convert movi to mov with constant temp. */
 292    tv = tcg_constant_internal(ctx->type, val);
 293    init_ts_info(ctx, tv);
 294    return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
 295}
 296
 297static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
 298{
 299    uint64_t l64, h64;
 300
 301    switch (op) {
 302    CASE_OP_32_64(add):
 303        return x + y;
 304
 305    CASE_OP_32_64(sub):
 306        return x - y;
 307
 308    CASE_OP_32_64(mul):
 309        return x * y;
 310
 311    CASE_OP_32_64_VEC(and):
 312        return x & y;
 313
 314    CASE_OP_32_64_VEC(or):
 315        return x | y;
 316
 317    CASE_OP_32_64_VEC(xor):
 318        return x ^ y;
 319
 320    case INDEX_op_shl_i32:
 321        return (uint32_t)x << (y & 31);
 322
 323    case INDEX_op_shl_i64:
 324        return (uint64_t)x << (y & 63);
 325
 326    case INDEX_op_shr_i32:
 327        return (uint32_t)x >> (y & 31);
 328
 329    case INDEX_op_shr_i64:
 330        return (uint64_t)x >> (y & 63);
 331
 332    case INDEX_op_sar_i32:
 333        return (int32_t)x >> (y & 31);
 334
 335    case INDEX_op_sar_i64:
 336        return (int64_t)x >> (y & 63);
 337
 338    case INDEX_op_rotr_i32:
 339        return ror32(x, y & 31);
 340
 341    case INDEX_op_rotr_i64:
 342        return ror64(x, y & 63);
 343
 344    case INDEX_op_rotl_i32:
 345        return rol32(x, y & 31);
 346
 347    case INDEX_op_rotl_i64:
 348        return rol64(x, y & 63);
 349
 350    CASE_OP_32_64_VEC(not):
 351        return ~x;
 352
 353    CASE_OP_32_64(neg):
 354        return -x;
 355
 356    CASE_OP_32_64_VEC(andc):
 357        return x & ~y;
 358
 359    CASE_OP_32_64_VEC(orc):
 360        return x | ~y;
 361
 362    CASE_OP_32_64_VEC(eqv):
 363        return ~(x ^ y);
 364
 365    CASE_OP_32_64_VEC(nand):
 366        return ~(x & y);
 367
 368    CASE_OP_32_64_VEC(nor):
 369        return ~(x | y);
 370
 371    case INDEX_op_clz_i32:
 372        return (uint32_t)x ? clz32(x) : y;
 373
 374    case INDEX_op_clz_i64:
 375        return x ? clz64(x) : y;
 376
 377    case INDEX_op_ctz_i32:
 378        return (uint32_t)x ? ctz32(x) : y;
 379
 380    case INDEX_op_ctz_i64:
 381        return x ? ctz64(x) : y;
 382
 383    case INDEX_op_ctpop_i32:
 384        return ctpop32(x);
 385
 386    case INDEX_op_ctpop_i64:
 387        return ctpop64(x);
 388
 389    CASE_OP_32_64(ext8s):
 390        return (int8_t)x;
 391
 392    CASE_OP_32_64(ext16s):
 393        return (int16_t)x;
 394
 395    CASE_OP_32_64(ext8u):
 396        return (uint8_t)x;
 397
 398    CASE_OP_32_64(ext16u):
 399        return (uint16_t)x;
 400
 401    CASE_OP_32_64(bswap16):
 402        x = bswap16(x);
 403        return y & TCG_BSWAP_OS ? (int16_t)x : x;
 404
 405    CASE_OP_32_64(bswap32):
 406        x = bswap32(x);
 407        return y & TCG_BSWAP_OS ? (int32_t)x : x;
 408
 409    case INDEX_op_bswap64_i64:
 410        return bswap64(x);
 411
 412    case INDEX_op_ext_i32_i64:
 413    case INDEX_op_ext32s_i64:
 414        return (int32_t)x;
 415
 416    case INDEX_op_extu_i32_i64:
 417    case INDEX_op_extrl_i64_i32:
 418    case INDEX_op_ext32u_i64:
 419        return (uint32_t)x;
 420
 421    case INDEX_op_extrh_i64_i32:
 422        return (uint64_t)x >> 32;
 423
 424    case INDEX_op_muluh_i32:
 425        return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
 426    case INDEX_op_mulsh_i32:
 427        return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
 428
 429    case INDEX_op_muluh_i64:
 430        mulu64(&l64, &h64, x, y);
 431        return h64;
 432    case INDEX_op_mulsh_i64:
 433        muls64(&l64, &h64, x, y);
 434        return h64;
 435
 436    case INDEX_op_div_i32:
 437        /* Avoid crashing on divide by zero, otherwise undefined.  */
 438        return (int32_t)x / ((int32_t)y ? : 1);
 439    case INDEX_op_divu_i32:
 440        return (uint32_t)x / ((uint32_t)y ? : 1);
 441    case INDEX_op_div_i64:
 442        return (int64_t)x / ((int64_t)y ? : 1);
 443    case INDEX_op_divu_i64:
 444        return (uint64_t)x / ((uint64_t)y ? : 1);
 445
 446    case INDEX_op_rem_i32:
 447        return (int32_t)x % ((int32_t)y ? : 1);
 448    case INDEX_op_remu_i32:
 449        return (uint32_t)x % ((uint32_t)y ? : 1);
 450    case INDEX_op_rem_i64:
 451        return (int64_t)x % ((int64_t)y ? : 1);
 452    case INDEX_op_remu_i64:
 453        return (uint64_t)x % ((uint64_t)y ? : 1);
 454
 455    default:
 456        g_assert_not_reached();
 457    }
 458}
 459
 460static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
 461                                    uint64_t x, uint64_t y)
 462{
 463    uint64_t res = do_constant_folding_2(op, x, y);
 464    if (type == TCG_TYPE_I32) {
 465        res = (int32_t)res;
 466    }
 467    return res;
 468}
 469
 470static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
 471{
 472    switch (c) {
 473    case TCG_COND_EQ:
 474        return x == y;
 475    case TCG_COND_NE:
 476        return x != y;
 477    case TCG_COND_LT:
 478        return (int32_t)x < (int32_t)y;
 479    case TCG_COND_GE:
 480        return (int32_t)x >= (int32_t)y;
 481    case TCG_COND_LE:
 482        return (int32_t)x <= (int32_t)y;
 483    case TCG_COND_GT:
 484        return (int32_t)x > (int32_t)y;
 485    case TCG_COND_LTU:
 486        return x < y;
 487    case TCG_COND_GEU:
 488        return x >= y;
 489    case TCG_COND_LEU:
 490        return x <= y;
 491    case TCG_COND_GTU:
 492        return x > y;
 493    default:
 494        g_assert_not_reached();
 495    }
 496}
 497
 498static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
 499{
 500    switch (c) {
 501    case TCG_COND_EQ:
 502        return x == y;
 503    case TCG_COND_NE:
 504        return x != y;
 505    case TCG_COND_LT:
 506        return (int64_t)x < (int64_t)y;
 507    case TCG_COND_GE:
 508        return (int64_t)x >= (int64_t)y;
 509    case TCG_COND_LE:
 510        return (int64_t)x <= (int64_t)y;
 511    case TCG_COND_GT:
 512        return (int64_t)x > (int64_t)y;
 513    case TCG_COND_LTU:
 514        return x < y;
 515    case TCG_COND_GEU:
 516        return x >= y;
 517    case TCG_COND_LEU:
 518        return x <= y;
 519    case TCG_COND_GTU:
 520        return x > y;
 521    default:
 522        g_assert_not_reached();
 523    }
 524}
 525
 526static bool do_constant_folding_cond_eq(TCGCond c)
 527{
 528    switch (c) {
 529    case TCG_COND_GT:
 530    case TCG_COND_LTU:
 531    case TCG_COND_LT:
 532    case TCG_COND_GTU:
 533    case TCG_COND_NE:
 534        return 0;
 535    case TCG_COND_GE:
 536    case TCG_COND_GEU:
 537    case TCG_COND_LE:
 538    case TCG_COND_LEU:
 539    case TCG_COND_EQ:
 540        return 1;
 541    default:
 542        g_assert_not_reached();
 543    }
 544}
 545
 546/*
 547 * Return -1 if the condition can't be simplified,
 548 * and the result of the condition (0 or 1) if it can.
 549 */
 550static int do_constant_folding_cond(TCGType type, TCGArg x,
 551                                    TCGArg y, TCGCond c)
 552{
 553    if (arg_is_const(x) && arg_is_const(y)) {
 554        uint64_t xv = arg_info(x)->val;
 555        uint64_t yv = arg_info(y)->val;
 556
 557        switch (type) {
 558        case TCG_TYPE_I32:
 559            return do_constant_folding_cond_32(xv, yv, c);
 560        case TCG_TYPE_I64:
 561            return do_constant_folding_cond_64(xv, yv, c);
 562        default:
 563            /* Only scalar comparisons are optimizable */
 564            return -1;
 565        }
 566    } else if (args_are_copies(x, y)) {
 567        return do_constant_folding_cond_eq(c);
 568    } else if (arg_is_const(y) && arg_info(y)->val == 0) {
 569        switch (c) {
 570        case TCG_COND_LTU:
 571            return 0;
 572        case TCG_COND_GEU:
 573            return 1;
 574        default:
 575            return -1;
 576        }
 577    }
 578    return -1;
 579}
 580
 581/*
 582 * Return -1 if the condition can't be simplified,
 583 * and the result of the condition (0 or 1) if it can.
 584 */
 585static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
 586{
 587    TCGArg al = p1[0], ah = p1[1];
 588    TCGArg bl = p2[0], bh = p2[1];
 589
 590    if (arg_is_const(bl) && arg_is_const(bh)) {
 591        tcg_target_ulong blv = arg_info(bl)->val;
 592        tcg_target_ulong bhv = arg_info(bh)->val;
 593        uint64_t b = deposit64(blv, 32, 32, bhv);
 594
 595        if (arg_is_const(al) && arg_is_const(ah)) {
 596            tcg_target_ulong alv = arg_info(al)->val;
 597            tcg_target_ulong ahv = arg_info(ah)->val;
 598            uint64_t a = deposit64(alv, 32, 32, ahv);
 599            return do_constant_folding_cond_64(a, b, c);
 600        }
 601        if (b == 0) {
 602            switch (c) {
 603            case TCG_COND_LTU:
 604                return 0;
 605            case TCG_COND_GEU:
 606                return 1;
 607            default:
 608                break;
 609            }
 610        }
 611    }
 612    if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
 613        return do_constant_folding_cond_eq(c);
 614    }
 615    return -1;
 616}
 617
 618/**
 619 * swap_commutative:
 620 * @dest: TCGArg of the destination argument, or NO_DEST.
 621 * @p1: first paired argument
 622 * @p2: second paired argument
 623 *
 624 * If *@p1 is a constant and *@p2 is not, swap.
 625 * If *@p2 matches @dest, swap.
 626 * Return true if a swap was performed.
 627 */
 628
 629#define NO_DEST  temp_arg(NULL)
 630
 631static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 632{
 633    TCGArg a1 = *p1, a2 = *p2;
 634    int sum = 0;
 635    sum += arg_is_const(a1);
 636    sum -= arg_is_const(a2);
 637
 638    /* Prefer the constant in second argument, and then the form
 639       op a, a, b, which is better handled on non-RISC hosts. */
 640    if (sum > 0 || (sum == 0 && dest == a2)) {
 641        *p1 = a2;
 642        *p2 = a1;
 643        return true;
 644    }
 645    return false;
 646}
 647
 648static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 649{
 650    int sum = 0;
 651    sum += arg_is_const(p1[0]);
 652    sum += arg_is_const(p1[1]);
 653    sum -= arg_is_const(p2[0]);
 654    sum -= arg_is_const(p2[1]);
 655    if (sum > 0) {
 656        TCGArg t;
 657        t = p1[0], p1[0] = p2[0], p2[0] = t;
 658        t = p1[1], p1[1] = p2[1], p2[1] = t;
 659        return true;
 660    }
 661    return false;
 662}
 663
 664static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
 665{
 666    for (int i = 0; i < nb_args; i++) {
 667        TCGTemp *ts = arg_temp(op->args[i]);
 668        init_ts_info(ctx, ts);
 669    }
 670}
 671
 672static void copy_propagate(OptContext *ctx, TCGOp *op,
 673                           int nb_oargs, int nb_iargs)
 674{
 675    TCGContext *s = ctx->tcg;
 676
 677    for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
 678        TCGTemp *ts = arg_temp(op->args[i]);
 679        if (ts_is_copy(ts)) {
 680            op->args[i] = temp_arg(find_better_copy(s, ts));
 681        }
 682    }
 683}
 684
 685static void finish_folding(OptContext *ctx, TCGOp *op)
 686{
 687    const TCGOpDef *def = &tcg_op_defs[op->opc];
 688    int i, nb_oargs;
 689
 690    /*
 691     * For an opcode that ends a BB, reset all temp data.
 692     * We do no cross-BB optimization.
 693     */
 694    if (def->flags & TCG_OPF_BB_END) {
 695        memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
 696        ctx->prev_mb = NULL;
 697        return;
 698    }
 699
 700    nb_oargs = def->nb_oargs;
 701    for (i = 0; i < nb_oargs; i++) {
 702        TCGTemp *ts = arg_temp(op->args[i]);
 703        reset_ts(ts);
 704        /*
 705         * Save the corresponding known-zero/sign bits mask for the
 706         * first output argument (only one supported so far).
 707         */
 708        if (i == 0) {
 709            ts_info(ts)->z_mask = ctx->z_mask;
 710            ts_info(ts)->s_mask = ctx->s_mask;
 711        }
 712    }
 713}
 714
 715/*
 716 * The fold_* functions return true when processing is complete,
 717 * usually by folding the operation to a constant or to a copy,
 718 * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
 719 * like collect information about the value produced, for use in
 720 * optimizing a subsequent operation.
 721 *
 722 * These first fold_* functions are all helpers, used by other
 723 * folders for more specific operations.
 724 */
 725
 726static bool fold_const1(OptContext *ctx, TCGOp *op)
 727{
 728    if (arg_is_const(op->args[1])) {
 729        uint64_t t;
 730
 731        t = arg_info(op->args[1])->val;
 732        t = do_constant_folding(op->opc, ctx->type, t, 0);
 733        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
 734    }
 735    return false;
 736}
 737
 738static bool fold_const2(OptContext *ctx, TCGOp *op)
 739{
 740    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
 741        uint64_t t1 = arg_info(op->args[1])->val;
 742        uint64_t t2 = arg_info(op->args[2])->val;
 743
 744        t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
 745        return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
 746    }
 747    return false;
 748}
 749
 750static bool fold_commutative(OptContext *ctx, TCGOp *op)
 751{
 752    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
 753    return false;
 754}
 755
 756static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
 757{
 758    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
 759    return fold_const2(ctx, op);
 760}
 761
 762static bool fold_masks(OptContext *ctx, TCGOp *op)
 763{
 764    uint64_t a_mask = ctx->a_mask;
 765    uint64_t z_mask = ctx->z_mask;
 766    uint64_t s_mask = ctx->s_mask;
 767
 768    /*
 769     * 32-bit ops generate 32-bit results, which for the purpose of
 770     * simplifying tcg are sign-extended.  Certainly that's how we
 771     * represent our constants elsewhere.  Note that the bits will
 772     * be reset properly for a 64-bit value when encountering the
 773     * type changing opcodes.
 774     */
 775    if (ctx->type == TCG_TYPE_I32) {
 776        a_mask = (int32_t)a_mask;
 777        z_mask = (int32_t)z_mask;
 778        s_mask |= MAKE_64BIT_MASK(32, 32);
 779        ctx->z_mask = z_mask;
 780        ctx->s_mask = s_mask;
 781    }
 782
 783    if (z_mask == 0) {
 784        return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
 785    }
 786    if (a_mask == 0) {
 787        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 788    }
 789    return false;
 790}
 791
 792/*
 793 * Convert @op to NOT, if NOT is supported by the host.
 794 * Return true f the conversion is successful, which will still
 795 * indicate that the processing is complete.
 796 */
 797static bool fold_not(OptContext *ctx, TCGOp *op);
 798static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
 799{
 800    TCGOpcode not_op;
 801    bool have_not;
 802
 803    switch (ctx->type) {
 804    case TCG_TYPE_I32:
 805        not_op = INDEX_op_not_i32;
 806        have_not = TCG_TARGET_HAS_not_i32;
 807        break;
 808    case TCG_TYPE_I64:
 809        not_op = INDEX_op_not_i64;
 810        have_not = TCG_TARGET_HAS_not_i64;
 811        break;
 812    case TCG_TYPE_V64:
 813    case TCG_TYPE_V128:
 814    case TCG_TYPE_V256:
 815        not_op = INDEX_op_not_vec;
 816        have_not = TCG_TARGET_HAS_not_vec;
 817        break;
 818    default:
 819        g_assert_not_reached();
 820    }
 821    if (have_not) {
 822        op->opc = not_op;
 823        op->args[1] = op->args[idx];
 824        return fold_not(ctx, op);
 825    }
 826    return false;
 827}
 828
 829/* If the binary operation has first argument @i, fold to @i. */
 830static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 831{
 832    if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
 833        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 834    }
 835    return false;
 836}
 837
 838/* If the binary operation has first argument @i, fold to NOT. */
 839static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
 840{
 841    if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
 842        return fold_to_not(ctx, op, 2);
 843    }
 844    return false;
 845}
 846
 847/* If the binary operation has second argument @i, fold to @i. */
 848static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 849{
 850    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
 851        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 852    }
 853    return false;
 854}
 855
 856/* If the binary operation has second argument @i, fold to identity. */
 857static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
 858{
 859    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
 860        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 861    }
 862    return false;
 863}
 864
 865/* If the binary operation has second argument @i, fold to NOT. */
 866static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
 867{
 868    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
 869        return fold_to_not(ctx, op, 1);
 870    }
 871    return false;
 872}
 873
 874/* If the binary operation has both arguments equal, fold to @i. */
 875static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 876{
 877    if (args_are_copies(op->args[1], op->args[2])) {
 878        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 879    }
 880    return false;
 881}
 882
 883/* If the binary operation has both arguments equal, fold to identity. */
 884static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
 885{
 886    if (args_are_copies(op->args[1], op->args[2])) {
 887        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 888    }
 889    return false;
 890}
 891
 892/*
 893 * These outermost fold_<op> functions are sorted alphabetically.
 894 *
 895 * The ordering of the transformations should be:
 896 *   1) those that produce a constant
 897 *   2) those that produce a copy
 898 *   3) those that produce information about the result value.
 899 */
 900
 901static bool fold_add(OptContext *ctx, TCGOp *op)
 902{
 903    if (fold_const2_commutative(ctx, op) ||
 904        fold_xi_to_x(ctx, op, 0)) {
 905        return true;
 906    }
 907    return false;
 908}
 909
 910/* We cannot as yet do_constant_folding with vectors. */
 911static bool fold_add_vec(OptContext *ctx, TCGOp *op)
 912{
 913    if (fold_commutative(ctx, op) ||
 914        fold_xi_to_x(ctx, op, 0)) {
 915        return true;
 916    }
 917    return false;
 918}
 919
 920static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
 921{
 922    if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
 923        arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
 924        uint64_t al = arg_info(op->args[2])->val;
 925        uint64_t ah = arg_info(op->args[3])->val;
 926        uint64_t bl = arg_info(op->args[4])->val;
 927        uint64_t bh = arg_info(op->args[5])->val;
 928        TCGArg rl, rh;
 929        TCGOp *op2;
 930
 931        if (ctx->type == TCG_TYPE_I32) {
 932            uint64_t a = deposit64(al, 32, 32, ah);
 933            uint64_t b = deposit64(bl, 32, 32, bh);
 934
 935            if (add) {
 936                a += b;
 937            } else {
 938                a -= b;
 939            }
 940
 941            al = sextract64(a, 0, 32);
 942            ah = sextract64(a, 32, 32);
 943        } else {
 944            Int128 a = int128_make128(al, ah);
 945            Int128 b = int128_make128(bl, bh);
 946
 947            if (add) {
 948                a = int128_add(a, b);
 949            } else {
 950                a = int128_sub(a, b);
 951            }
 952
 953            al = int128_getlo(a);
 954            ah = int128_gethi(a);
 955        }
 956
 957        rl = op->args[0];
 958        rh = op->args[1];
 959
 960        /* The proper opcode is supplied by tcg_opt_gen_mov. */
 961        op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
 962
 963        tcg_opt_gen_movi(ctx, op, rl, al);
 964        tcg_opt_gen_movi(ctx, op2, rh, ah);
 965        return true;
 966    }
 967    return false;
 968}
 969
 970static bool fold_add2(OptContext *ctx, TCGOp *op)
 971{
 972    /* Note that the high and low parts may be independently swapped. */
 973    swap_commutative(op->args[0], &op->args[2], &op->args[4]);
 974    swap_commutative(op->args[1], &op->args[3], &op->args[5]);
 975
 976    return fold_addsub2(ctx, op, true);
 977}
 978
 979static bool fold_and(OptContext *ctx, TCGOp *op)
 980{
 981    uint64_t z1, z2;
 982
 983    if (fold_const2_commutative(ctx, op) ||
 984        fold_xi_to_i(ctx, op, 0) ||
 985        fold_xi_to_x(ctx, op, -1) ||
 986        fold_xx_to_x(ctx, op)) {
 987        return true;
 988    }
 989
 990    z1 = arg_info(op->args[1])->z_mask;
 991    z2 = arg_info(op->args[2])->z_mask;
 992    ctx->z_mask = z1 & z2;
 993
 994    /*
 995     * Sign repetitions are perforce all identical, whether they are 1 or 0.
 996     * Bitwise operations preserve the relative quantity of the repetitions.
 997     */
 998    ctx->s_mask = arg_info(op->args[1])->s_mask
 999                & arg_info(op->args[2])->s_mask;
1000
1001    /*
1002     * Known-zeros does not imply known-ones.  Therefore unless
1003     * arg2 is constant, we can't infer affected bits from it.
1004     */
1005    if (arg_is_const(op->args[2])) {
1006        ctx->a_mask = z1 & ~z2;
1007    }
1008
1009    return fold_masks(ctx, op);
1010}
1011
1012static bool fold_andc(OptContext *ctx, TCGOp *op)
1013{
1014    uint64_t z1;
1015
1016    if (fold_const2(ctx, op) ||
1017        fold_xx_to_i(ctx, op, 0) ||
1018        fold_xi_to_x(ctx, op, 0) ||
1019        fold_ix_to_not(ctx, op, -1)) {
1020        return true;
1021    }
1022
1023    z1 = arg_info(op->args[1])->z_mask;
1024
1025    /*
1026     * Known-zeros does not imply known-ones.  Therefore unless
1027     * arg2 is constant, we can't infer anything from it.
1028     */
1029    if (arg_is_const(op->args[2])) {
1030        uint64_t z2 = ~arg_info(op->args[2])->z_mask;
1031        ctx->a_mask = z1 & ~z2;
1032        z1 &= z2;
1033    }
1034    ctx->z_mask = z1;
1035
1036    ctx->s_mask = arg_info(op->args[1])->s_mask
1037                & arg_info(op->args[2])->s_mask;
1038    return fold_masks(ctx, op);
1039}
1040
1041static bool fold_brcond(OptContext *ctx, TCGOp *op)
1042{
1043    TCGCond cond = op->args[2];
1044    int i;
1045
1046    if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
1047        op->args[2] = cond = tcg_swap_cond(cond);
1048    }
1049
1050    i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
1051    if (i == 0) {
1052        tcg_op_remove(ctx->tcg, op);
1053        return true;
1054    }
1055    if (i > 0) {
1056        op->opc = INDEX_op_br;
1057        op->args[0] = op->args[3];
1058    }
1059    return false;
1060}
1061
1062static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1063{
1064    TCGCond cond = op->args[4];
1065    TCGArg label = op->args[5];
1066    int i, inv = 0;
1067
1068    if (swap_commutative2(&op->args[0], &op->args[2])) {
1069        op->args[4] = cond = tcg_swap_cond(cond);
1070    }
1071
1072    i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
1073    if (i >= 0) {
1074        goto do_brcond_const;
1075    }
1076
1077    switch (cond) {
1078    case TCG_COND_LT:
1079    case TCG_COND_GE:
1080        /*
1081         * Simplify LT/GE comparisons vs zero to a single compare
1082         * vs the high word of the input.
1083         */
1084        if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
1085            arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
1086            goto do_brcond_high;
1087        }
1088        break;
1089
1090    case TCG_COND_NE:
1091        inv = 1;
1092        QEMU_FALLTHROUGH;
1093    case TCG_COND_EQ:
1094        /*
1095         * Simplify EQ/NE comparisons where one of the pairs
1096         * can be simplified.
1097         */
1098        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1099                                     op->args[2], cond);
1100        switch (i ^ inv) {
1101        case 0:
1102            goto do_brcond_const;
1103        case 1:
1104            goto do_brcond_high;
1105        }
1106
1107        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1108                                     op->args[3], cond);
1109        switch (i ^ inv) {
1110        case 0:
1111            goto do_brcond_const;
1112        case 1:
1113            op->opc = INDEX_op_brcond_i32;
1114            op->args[1] = op->args[2];
1115            op->args[2] = cond;
1116            op->args[3] = label;
1117            break;
1118        }
1119        break;
1120
1121    default:
1122        break;
1123
1124    do_brcond_high:
1125        op->opc = INDEX_op_brcond_i32;
1126        op->args[0] = op->args[1];
1127        op->args[1] = op->args[3];
1128        op->args[2] = cond;
1129        op->args[3] = label;
1130        break;
1131
1132    do_brcond_const:
1133        if (i == 0) {
1134            tcg_op_remove(ctx->tcg, op);
1135            return true;
1136        }
1137        op->opc = INDEX_op_br;
1138        op->args[0] = label;
1139        break;
1140    }
1141    return false;
1142}
1143
1144static bool fold_bswap(OptContext *ctx, TCGOp *op)
1145{
1146    uint64_t z_mask, s_mask, sign;
1147
1148    if (arg_is_const(op->args[1])) {
1149        uint64_t t = arg_info(op->args[1])->val;
1150
1151        t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
1152        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1153    }
1154
1155    z_mask = arg_info(op->args[1])->z_mask;
1156
1157    switch (op->opc) {
1158    case INDEX_op_bswap16_i32:
1159    case INDEX_op_bswap16_i64:
1160        z_mask = bswap16(z_mask);
1161        sign = INT16_MIN;
1162        break;
1163    case INDEX_op_bswap32_i32:
1164    case INDEX_op_bswap32_i64:
1165        z_mask = bswap32(z_mask);
1166        sign = INT32_MIN;
1167        break;
1168    case INDEX_op_bswap64_i64:
1169        z_mask = bswap64(z_mask);
1170        sign = INT64_MIN;
1171        break;
1172    default:
1173        g_assert_not_reached();
1174    }
1175    s_mask = smask_from_zmask(z_mask);
1176
1177    switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1178    case TCG_BSWAP_OZ:
1179        break;
1180    case TCG_BSWAP_OS:
1181        /* If the sign bit may be 1, force all the bits above to 1. */
1182        if (z_mask & sign) {
1183            z_mask |= sign;
1184            s_mask = sign << 1;
1185        }
1186        break;
1187    default:
1188        /* The high bits are undefined: force all bits above the sign to 1. */
1189        z_mask |= sign << 1;
1190        s_mask = 0;
1191        break;
1192    }
1193    ctx->z_mask = z_mask;
1194    ctx->s_mask = s_mask;
1195
1196    return fold_masks(ctx, op);
1197}
1198
1199static bool fold_call(OptContext *ctx, TCGOp *op)
1200{
1201    TCGContext *s = ctx->tcg;
1202    int nb_oargs = TCGOP_CALLO(op);
1203    int nb_iargs = TCGOP_CALLI(op);
1204    int flags, i;
1205
1206    init_arguments(ctx, op, nb_oargs + nb_iargs);
1207    copy_propagate(ctx, op, nb_oargs, nb_iargs);
1208
1209    /* If the function reads or writes globals, reset temp data. */
1210    flags = tcg_call_flags(op);
1211    if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1212        int nb_globals = s->nb_globals;
1213
1214        for (i = 0; i < nb_globals; i++) {
1215            if (test_bit(i, ctx->temps_used.l)) {
1216                reset_ts(&ctx->tcg->temps[i]);
1217            }
1218        }
1219    }
1220
1221    /* Reset temp data for outputs. */
1222    for (i = 0; i < nb_oargs; i++) {
1223        reset_temp(op->args[i]);
1224    }
1225
1226    /* Stop optimizing MB across calls. */
1227    ctx->prev_mb = NULL;
1228    return true;
1229}
1230
1231static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1232{
1233    uint64_t z_mask;
1234
1235    if (arg_is_const(op->args[1])) {
1236        uint64_t t = arg_info(op->args[1])->val;
1237
1238        if (t != 0) {
1239            t = do_constant_folding(op->opc, ctx->type, t, 0);
1240            return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1241        }
1242        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1243    }
1244
1245    switch (ctx->type) {
1246    case TCG_TYPE_I32:
1247        z_mask = 31;
1248        break;
1249    case TCG_TYPE_I64:
1250        z_mask = 63;
1251        break;
1252    default:
1253        g_assert_not_reached();
1254    }
1255    ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
1256    ctx->s_mask = smask_from_zmask(ctx->z_mask);
1257    return false;
1258}
1259
1260static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1261{
1262    if (fold_const1(ctx, op)) {
1263        return true;
1264    }
1265
1266    switch (ctx->type) {
1267    case TCG_TYPE_I32:
1268        ctx->z_mask = 32 | 31;
1269        break;
1270    case TCG_TYPE_I64:
1271        ctx->z_mask = 64 | 63;
1272        break;
1273    default:
1274        g_assert_not_reached();
1275    }
1276    ctx->s_mask = smask_from_zmask(ctx->z_mask);
1277    return false;
1278}
1279
1280static bool fold_deposit(OptContext *ctx, TCGOp *op)
1281{
1282    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1283        uint64_t t1 = arg_info(op->args[1])->val;
1284        uint64_t t2 = arg_info(op->args[2])->val;
1285
1286        t1 = deposit64(t1, op->args[3], op->args[4], t2);
1287        return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1288    }
1289
1290    ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
1291                            op->args[3], op->args[4],
1292                            arg_info(op->args[2])->z_mask);
1293    return false;
1294}
1295
1296static bool fold_divide(OptContext *ctx, TCGOp *op)
1297{
1298    if (fold_const2(ctx, op) ||
1299        fold_xi_to_x(ctx, op, 1)) {
1300        return true;
1301    }
1302    return false;
1303}
1304
1305static bool fold_dup(OptContext *ctx, TCGOp *op)
1306{
1307    if (arg_is_const(op->args[1])) {
1308        uint64_t t = arg_info(op->args[1])->val;
1309        t = dup_const(TCGOP_VECE(op), t);
1310        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1311    }
1312    return false;
1313}
1314
1315static bool fold_dup2(OptContext *ctx, TCGOp *op)
1316{
1317    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1318        uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
1319                               arg_info(op->args[2])->val);
1320        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1321    }
1322
1323    if (args_are_copies(op->args[1], op->args[2])) {
1324        op->opc = INDEX_op_dup_vec;
1325        TCGOP_VECE(op) = MO_32;
1326    }
1327    return false;
1328}
1329
1330static bool fold_eqv(OptContext *ctx, TCGOp *op)
1331{
1332    if (fold_const2_commutative(ctx, op) ||
1333        fold_xi_to_x(ctx, op, -1) ||
1334        fold_xi_to_not(ctx, op, 0)) {
1335        return true;
1336    }
1337
1338    ctx->s_mask = arg_info(op->args[1])->s_mask
1339                & arg_info(op->args[2])->s_mask;
1340    return false;
1341}
1342
1343static bool fold_extract(OptContext *ctx, TCGOp *op)
1344{
1345    uint64_t z_mask_old, z_mask;
1346    int pos = op->args[2];
1347    int len = op->args[3];
1348
1349    if (arg_is_const(op->args[1])) {
1350        uint64_t t;
1351
1352        t = arg_info(op->args[1])->val;
1353        t = extract64(t, pos, len);
1354        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1355    }
1356
1357    z_mask_old = arg_info(op->args[1])->z_mask;
1358    z_mask = extract64(z_mask_old, pos, len);
1359    if (pos == 0) {
1360        ctx->a_mask = z_mask_old ^ z_mask;
1361    }
1362    ctx->z_mask = z_mask;
1363    ctx->s_mask = smask_from_zmask(z_mask);
1364
1365    return fold_masks(ctx, op);
1366}
1367
1368static bool fold_extract2(OptContext *ctx, TCGOp *op)
1369{
1370    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1371        uint64_t v1 = arg_info(op->args[1])->val;
1372        uint64_t v2 = arg_info(op->args[2])->val;
1373        int shr = op->args[3];
1374
1375        if (op->opc == INDEX_op_extract2_i64) {
1376            v1 >>= shr;
1377            v2 <<= 64 - shr;
1378        } else {
1379            v1 = (uint32_t)v1 >> shr;
1380            v2 = (uint64_t)((int32_t)v2 << (32 - shr));
1381        }
1382        return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
1383    }
1384    return false;
1385}
1386
1387static bool fold_exts(OptContext *ctx, TCGOp *op)
1388{
1389    uint64_t s_mask_old, s_mask, z_mask, sign;
1390    bool type_change = false;
1391
1392    if (fold_const1(ctx, op)) {
1393        return true;
1394    }
1395
1396    z_mask = arg_info(op->args[1])->z_mask;
1397    s_mask = arg_info(op->args[1])->s_mask;
1398    s_mask_old = s_mask;
1399
1400    switch (op->opc) {
1401    CASE_OP_32_64(ext8s):
1402        sign = INT8_MIN;
1403        z_mask = (uint8_t)z_mask;
1404        break;
1405    CASE_OP_32_64(ext16s):
1406        sign = INT16_MIN;
1407        z_mask = (uint16_t)z_mask;
1408        break;
1409    case INDEX_op_ext_i32_i64:
1410        type_change = true;
1411        QEMU_FALLTHROUGH;
1412    case INDEX_op_ext32s_i64:
1413        sign = INT32_MIN;
1414        z_mask = (uint32_t)z_mask;
1415        break;
1416    default:
1417        g_assert_not_reached();
1418    }
1419
1420    if (z_mask & sign) {
1421        z_mask |= sign;
1422    }
1423    s_mask |= sign << 1;
1424
1425    ctx->z_mask = z_mask;
1426    ctx->s_mask = s_mask;
1427    if (!type_change) {
1428        ctx->a_mask = s_mask & ~s_mask_old;
1429    }
1430
1431    return fold_masks(ctx, op);
1432}
1433
1434static bool fold_extu(OptContext *ctx, TCGOp *op)
1435{
1436    uint64_t z_mask_old, z_mask;
1437    bool type_change = false;
1438
1439    if (fold_const1(ctx, op)) {
1440        return true;
1441    }
1442
1443    z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
1444
1445    switch (op->opc) {
1446    CASE_OP_32_64(ext8u):
1447        z_mask = (uint8_t)z_mask;
1448        break;
1449    CASE_OP_32_64(ext16u):
1450        z_mask = (uint16_t)z_mask;
1451        break;
1452    case INDEX_op_extrl_i64_i32:
1453    case INDEX_op_extu_i32_i64:
1454        type_change = true;
1455        QEMU_FALLTHROUGH;
1456    case INDEX_op_ext32u_i64:
1457        z_mask = (uint32_t)z_mask;
1458        break;
1459    case INDEX_op_extrh_i64_i32:
1460        type_change = true;
1461        z_mask >>= 32;
1462        break;
1463    default:
1464        g_assert_not_reached();
1465    }
1466
1467    ctx->z_mask = z_mask;
1468    ctx->s_mask = smask_from_zmask(z_mask);
1469    if (!type_change) {
1470        ctx->a_mask = z_mask_old ^ z_mask;
1471    }
1472    return fold_masks(ctx, op);
1473}
1474
1475static bool fold_mb(OptContext *ctx, TCGOp *op)
1476{
1477    /* Eliminate duplicate and redundant fence instructions.  */
1478    if (ctx->prev_mb) {
1479        /*
1480         * Merge two barriers of the same type into one,
1481         * or a weaker barrier into a stronger one,
1482         * or two weaker barriers into a stronger one.
1483         *   mb X; mb Y => mb X|Y
1484         *   mb; strl => mb; st
1485         *   ldaq; mb => ld; mb
1486         *   ldaq; strl => ld; mb; st
1487         * Other combinations are also merged into a strong
1488         * barrier.  This is stricter than specified but for
1489         * the purposes of TCG is better than not optimizing.
1490         */
1491        ctx->prev_mb->args[0] |= op->args[0];
1492        tcg_op_remove(ctx->tcg, op);
1493    } else {
1494        ctx->prev_mb = op;
1495    }
1496    return true;
1497}
1498
1499static bool fold_mov(OptContext *ctx, TCGOp *op)
1500{
1501    return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1502}
1503
1504static bool fold_movcond(OptContext *ctx, TCGOp *op)
1505{
1506    TCGCond cond = op->args[5];
1507    int i;
1508
1509    if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1510        op->args[5] = cond = tcg_swap_cond(cond);
1511    }
1512    /*
1513     * Canonicalize the "false" input reg to match the destination reg so
1514     * that the tcg backend can implement a "move if true" operation.
1515     */
1516    if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1517        op->args[5] = cond = tcg_invert_cond(cond);
1518    }
1519
1520    i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1521    if (i >= 0) {
1522        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
1523    }
1524
1525    ctx->z_mask = arg_info(op->args[3])->z_mask
1526                | arg_info(op->args[4])->z_mask;
1527    ctx->s_mask = arg_info(op->args[3])->s_mask
1528                & arg_info(op->args[4])->s_mask;
1529
1530    if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1531        uint64_t tv = arg_info(op->args[3])->val;
1532        uint64_t fv = arg_info(op->args[4])->val;
1533        TCGOpcode opc;
1534
1535        switch (ctx->type) {
1536        case TCG_TYPE_I32:
1537            opc = INDEX_op_setcond_i32;
1538            break;
1539        case TCG_TYPE_I64:
1540            opc = INDEX_op_setcond_i64;
1541            break;
1542        default:
1543            g_assert_not_reached();
1544        }
1545
1546        if (tv == 1 && fv == 0) {
1547            op->opc = opc;
1548            op->args[3] = cond;
1549        } else if (fv == 1 && tv == 0) {
1550            op->opc = opc;
1551            op->args[3] = tcg_invert_cond(cond);
1552        }
1553    }
1554    return false;
1555}
1556
1557static bool fold_mul(OptContext *ctx, TCGOp *op)
1558{
1559    if (fold_const2(ctx, op) ||
1560        fold_xi_to_i(ctx, op, 0) ||
1561        fold_xi_to_x(ctx, op, 1)) {
1562        return true;
1563    }
1564    return false;
1565}
1566
1567static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
1568{
1569    if (fold_const2_commutative(ctx, op) ||
1570        fold_xi_to_i(ctx, op, 0)) {
1571        return true;
1572    }
1573    return false;
1574}
1575
1576static bool fold_multiply2(OptContext *ctx, TCGOp *op)
1577{
1578    swap_commutative(op->args[0], &op->args[2], &op->args[3]);
1579
1580    if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1581        uint64_t a = arg_info(op->args[2])->val;
1582        uint64_t b = arg_info(op->args[3])->val;
1583        uint64_t h, l;
1584        TCGArg rl, rh;
1585        TCGOp *op2;
1586
1587        switch (op->opc) {
1588        case INDEX_op_mulu2_i32:
1589            l = (uint64_t)(uint32_t)a * (uint32_t)b;
1590            h = (int32_t)(l >> 32);
1591            l = (int32_t)l;
1592            break;
1593        case INDEX_op_muls2_i32:
1594            l = (int64_t)(int32_t)a * (int32_t)b;
1595            h = l >> 32;
1596            l = (int32_t)l;
1597            break;
1598        case INDEX_op_mulu2_i64:
1599            mulu64(&l, &h, a, b);
1600            break;
1601        case INDEX_op_muls2_i64:
1602            muls64(&l, &h, a, b);
1603            break;
1604        default:
1605            g_assert_not_reached();
1606        }
1607
1608        rl = op->args[0];
1609        rh = op->args[1];
1610
1611        /* The proper opcode is supplied by tcg_opt_gen_mov. */
1612        op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
1613
1614        tcg_opt_gen_movi(ctx, op, rl, l);
1615        tcg_opt_gen_movi(ctx, op2, rh, h);
1616        return true;
1617    }
1618    return false;
1619}
1620
1621static bool fold_nand(OptContext *ctx, TCGOp *op)
1622{
1623    if (fold_const2_commutative(ctx, op) ||
1624        fold_xi_to_not(ctx, op, -1)) {
1625        return true;
1626    }
1627
1628    ctx->s_mask = arg_info(op->args[1])->s_mask
1629                & arg_info(op->args[2])->s_mask;
1630    return false;
1631}
1632
1633static bool fold_neg(OptContext *ctx, TCGOp *op)
1634{
1635    uint64_t z_mask;
1636
1637    if (fold_const1(ctx, op)) {
1638        return true;
1639    }
1640
1641    /* Set to 1 all bits to the left of the rightmost.  */
1642    z_mask = arg_info(op->args[1])->z_mask;
1643    ctx->z_mask = -(z_mask & -z_mask);
1644
1645    /*
1646     * Because of fold_sub_to_neg, we want to always return true,
1647     * via finish_folding.
1648     */
1649    finish_folding(ctx, op);
1650    return true;
1651}
1652
1653static bool fold_nor(OptContext *ctx, TCGOp *op)
1654{
1655    if (fold_const2_commutative(ctx, op) ||
1656        fold_xi_to_not(ctx, op, 0)) {
1657        return true;
1658    }
1659
1660    ctx->s_mask = arg_info(op->args[1])->s_mask
1661                & arg_info(op->args[2])->s_mask;
1662    return false;
1663}
1664
1665static bool fold_not(OptContext *ctx, TCGOp *op)
1666{
1667    if (fold_const1(ctx, op)) {
1668        return true;
1669    }
1670
1671    ctx->s_mask = arg_info(op->args[1])->s_mask;
1672
1673    /* Because of fold_to_not, we want to always return true, via finish. */
1674    finish_folding(ctx, op);
1675    return true;
1676}
1677
1678static bool fold_or(OptContext *ctx, TCGOp *op)
1679{
1680    if (fold_const2_commutative(ctx, op) ||
1681        fold_xi_to_x(ctx, op, 0) ||
1682        fold_xx_to_x(ctx, op)) {
1683        return true;
1684    }
1685
1686    ctx->z_mask = arg_info(op->args[1])->z_mask
1687                | arg_info(op->args[2])->z_mask;
1688    ctx->s_mask = arg_info(op->args[1])->s_mask
1689                & arg_info(op->args[2])->s_mask;
1690    return fold_masks(ctx, op);
1691}
1692
1693static bool fold_orc(OptContext *ctx, TCGOp *op)
1694{
1695    if (fold_const2(ctx, op) ||
1696        fold_xx_to_i(ctx, op, -1) ||
1697        fold_xi_to_x(ctx, op, -1) ||
1698        fold_ix_to_not(ctx, op, 0)) {
1699        return true;
1700    }
1701
1702    ctx->s_mask = arg_info(op->args[1])->s_mask
1703                & arg_info(op->args[2])->s_mask;
1704    return false;
1705}
1706
1707static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
1708{
1709    const TCGOpDef *def = &tcg_op_defs[op->opc];
1710    MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
1711    MemOp mop = get_memop(oi);
1712    int width = 8 * memop_size(mop);
1713
1714    if (width < 64) {
1715        ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
1716        if (!(mop & MO_SIGN)) {
1717            ctx->z_mask = MAKE_64BIT_MASK(0, width);
1718            ctx->s_mask <<= 1;
1719        }
1720    }
1721
1722    /* Opcodes that touch guest memory stop the mb optimization.  */
1723    ctx->prev_mb = NULL;
1724    return false;
1725}
1726
1727static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
1728{
1729    /* Opcodes that touch guest memory stop the mb optimization.  */
1730    ctx->prev_mb = NULL;
1731    return false;
1732}
1733
1734static bool fold_remainder(OptContext *ctx, TCGOp *op)
1735{
1736    if (fold_const2(ctx, op) ||
1737        fold_xx_to_i(ctx, op, 0)) {
1738        return true;
1739    }
1740    return false;
1741}
1742
1743static bool fold_setcond(OptContext *ctx, TCGOp *op)
1744{
1745    TCGCond cond = op->args[3];
1746    int i;
1747
1748    if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
1749        op->args[3] = cond = tcg_swap_cond(cond);
1750    }
1751
1752    i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1753    if (i >= 0) {
1754        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1755    }
1756
1757    ctx->z_mask = 1;
1758    ctx->s_mask = smask_from_zmask(1);
1759    return false;
1760}
1761
1762static bool fold_setcond2(OptContext *ctx, TCGOp *op)
1763{
1764    TCGCond cond = op->args[5];
1765    int i, inv = 0;
1766
1767    if (swap_commutative2(&op->args[1], &op->args[3])) {
1768        op->args[5] = cond = tcg_swap_cond(cond);
1769    }
1770
1771    i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
1772    if (i >= 0) {
1773        goto do_setcond_const;
1774    }
1775
1776    switch (cond) {
1777    case TCG_COND_LT:
1778    case TCG_COND_GE:
1779        /*
1780         * Simplify LT/GE comparisons vs zero to a single compare
1781         * vs the high word of the input.
1782         */
1783        if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
1784            arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
1785            goto do_setcond_high;
1786        }
1787        break;
1788
1789    case TCG_COND_NE:
1790        inv = 1;
1791        QEMU_FALLTHROUGH;
1792    case TCG_COND_EQ:
1793        /*
1794         * Simplify EQ/NE comparisons where one of the pairs
1795         * can be simplified.
1796         */
1797        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1798                                     op->args[3], cond);
1799        switch (i ^ inv) {
1800        case 0:
1801            goto do_setcond_const;
1802        case 1:
1803            goto do_setcond_high;
1804        }
1805
1806        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
1807                                     op->args[4], cond);
1808        switch (i ^ inv) {
1809        case 0:
1810            goto do_setcond_const;
1811        case 1:
1812            op->args[2] = op->args[3];
1813            op->args[3] = cond;
1814            op->opc = INDEX_op_setcond_i32;
1815            break;
1816        }
1817        break;
1818
1819    default:
1820        break;
1821
1822    do_setcond_high:
1823        op->args[1] = op->args[2];
1824        op->args[2] = op->args[4];
1825        op->args[3] = cond;
1826        op->opc = INDEX_op_setcond_i32;
1827        break;
1828    }
1829
1830    ctx->z_mask = 1;
1831    ctx->s_mask = smask_from_zmask(1);
1832    return false;
1833
1834 do_setcond_const:
1835    return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1836}
1837
1838static bool fold_sextract(OptContext *ctx, TCGOp *op)
1839{
1840    uint64_t z_mask, s_mask, s_mask_old;
1841    int pos = op->args[2];
1842    int len = op->args[3];
1843
1844    if (arg_is_const(op->args[1])) {
1845        uint64_t t;
1846
1847        t = arg_info(op->args[1])->val;
1848        t = sextract64(t, pos, len);
1849        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1850    }
1851
1852    z_mask = arg_info(op->args[1])->z_mask;
1853    z_mask = sextract64(z_mask, pos, len);
1854    ctx->z_mask = z_mask;
1855
1856    s_mask_old = arg_info(op->args[1])->s_mask;
1857    s_mask = sextract64(s_mask_old, pos, len);
1858    s_mask |= MAKE_64BIT_MASK(len, 64 - len);
1859    ctx->s_mask = s_mask;
1860
1861    if (pos == 0) {
1862        ctx->a_mask = s_mask & ~s_mask_old;
1863    }
1864
1865    return fold_masks(ctx, op);
1866}
1867
1868static bool fold_shift(OptContext *ctx, TCGOp *op)
1869{
1870    uint64_t s_mask, z_mask, sign;
1871
1872    if (fold_const2(ctx, op) ||
1873        fold_ix_to_i(ctx, op, 0) ||
1874        fold_xi_to_x(ctx, op, 0)) {
1875        return true;
1876    }
1877
1878    s_mask = arg_info(op->args[1])->s_mask;
1879    z_mask = arg_info(op->args[1])->z_mask;
1880
1881    if (arg_is_const(op->args[2])) {
1882        int sh = arg_info(op->args[2])->val;
1883
1884        ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
1885
1886        s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
1887        ctx->s_mask = smask_from_smask(s_mask);
1888
1889        return fold_masks(ctx, op);
1890    }
1891
1892    switch (op->opc) {
1893    CASE_OP_32_64(sar):
1894        /*
1895         * Arithmetic right shift will not reduce the number of
1896         * input sign repetitions.
1897         */
1898        ctx->s_mask = s_mask;
1899        break;
1900    CASE_OP_32_64(shr):
1901        /*
1902         * If the sign bit is known zero, then logical right shift
1903         * will not reduced the number of input sign repetitions.
1904         */
1905        sign = (s_mask & -s_mask) >> 1;
1906        if (!(z_mask & sign)) {
1907            ctx->s_mask = s_mask;
1908        }
1909        break;
1910    default:
1911        break;
1912    }
1913
1914    return false;
1915}
1916
1917static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
1918{
1919    TCGOpcode neg_op;
1920    bool have_neg;
1921
1922    if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
1923        return false;
1924    }
1925
1926    switch (ctx->type) {
1927    case TCG_TYPE_I32:
1928        neg_op = INDEX_op_neg_i32;
1929        have_neg = TCG_TARGET_HAS_neg_i32;
1930        break;
1931    case TCG_TYPE_I64:
1932        neg_op = INDEX_op_neg_i64;
1933        have_neg = TCG_TARGET_HAS_neg_i64;
1934        break;
1935    case TCG_TYPE_V64:
1936    case TCG_TYPE_V128:
1937    case TCG_TYPE_V256:
1938        neg_op = INDEX_op_neg_vec;
1939        have_neg = (TCG_TARGET_HAS_neg_vec &&
1940                    tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
1941        break;
1942    default:
1943        g_assert_not_reached();
1944    }
1945    if (have_neg) {
1946        op->opc = neg_op;
1947        op->args[1] = op->args[2];
1948        return fold_neg(ctx, op);
1949    }
1950    return false;
1951}
1952
1953/* We cannot as yet do_constant_folding with vectors. */
1954static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
1955{
1956    if (fold_xx_to_i(ctx, op, 0) ||
1957        fold_xi_to_x(ctx, op, 0) ||
1958        fold_sub_to_neg(ctx, op)) {
1959        return true;
1960    }
1961    return false;
1962}
1963
1964static bool fold_sub(OptContext *ctx, TCGOp *op)
1965{
1966    return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
1967}
1968
1969static bool fold_sub2(OptContext *ctx, TCGOp *op)
1970{
1971    return fold_addsub2(ctx, op, false);
1972}
1973
1974static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
1975{
1976    /* We can't do any folding with a load, but we can record bits. */
1977    switch (op->opc) {
1978    CASE_OP_32_64(ld8s):
1979        ctx->s_mask = MAKE_64BIT_MASK(8, 56);
1980        break;
1981    CASE_OP_32_64(ld8u):
1982        ctx->z_mask = MAKE_64BIT_MASK(0, 8);
1983        ctx->s_mask = MAKE_64BIT_MASK(9, 55);
1984        break;
1985    CASE_OP_32_64(ld16s):
1986        ctx->s_mask = MAKE_64BIT_MASK(16, 48);
1987        break;
1988    CASE_OP_32_64(ld16u):
1989        ctx->z_mask = MAKE_64BIT_MASK(0, 16);
1990        ctx->s_mask = MAKE_64BIT_MASK(17, 47);
1991        break;
1992    case INDEX_op_ld32s_i64:
1993        ctx->s_mask = MAKE_64BIT_MASK(32, 32);
1994        break;
1995    case INDEX_op_ld32u_i64:
1996        ctx->z_mask = MAKE_64BIT_MASK(0, 32);
1997        ctx->s_mask = MAKE_64BIT_MASK(33, 31);
1998        break;
1999    default:
2000        g_assert_not_reached();
2001    }
2002    return false;
2003}
2004
2005static bool fold_xor(OptContext *ctx, TCGOp *op)
2006{
2007    if (fold_const2_commutative(ctx, op) ||
2008        fold_xx_to_i(ctx, op, 0) ||
2009        fold_xi_to_x(ctx, op, 0) ||
2010        fold_xi_to_not(ctx, op, -1)) {
2011        return true;
2012    }
2013
2014    ctx->z_mask = arg_info(op->args[1])->z_mask
2015                | arg_info(op->args[2])->z_mask;
2016    ctx->s_mask = arg_info(op->args[1])->s_mask
2017                & arg_info(op->args[2])->s_mask;
2018    return fold_masks(ctx, op);
2019}
2020
2021/* Propagate constants and copies, fold constant expressions. */
2022void tcg_optimize(TCGContext *s)
2023{
2024    int nb_temps, i;
2025    TCGOp *op, *op_next;
2026    OptContext ctx = { .tcg = s };
2027
2028    /* Array VALS has an element for each temp.
2029       If this temp holds a constant then its value is kept in VALS' element.
2030       If this temp is a copy of other ones then the other copies are
2031       available through the doubly linked circular list. */
2032
2033    nb_temps = s->nb_temps;
2034    for (i = 0; i < nb_temps; ++i) {
2035        s->temps[i].state_ptr = NULL;
2036    }
2037
2038    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2039        TCGOpcode opc = op->opc;
2040        const TCGOpDef *def;
2041        bool done = false;
2042
2043        /* Calls are special. */
2044        if (opc == INDEX_op_call) {
2045            fold_call(&ctx, op);
2046            continue;
2047        }
2048
2049        def = &tcg_op_defs[opc];
2050        init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
2051        copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
2052
2053        /* Pre-compute the type of the operation. */
2054        if (def->flags & TCG_OPF_VECTOR) {
2055            ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
2056        } else if (def->flags & TCG_OPF_64BIT) {
2057            ctx.type = TCG_TYPE_I64;
2058        } else {
2059            ctx.type = TCG_TYPE_I32;
2060        }
2061
2062        /* Assume all bits affected, no bits known zero, no sign reps. */
2063        ctx.a_mask = -1;
2064        ctx.z_mask = -1;
2065        ctx.s_mask = 0;
2066
2067        /*
2068         * Process each opcode.
2069         * Sorted alphabetically by opcode as much as possible.
2070         */
2071        switch (opc) {
2072        CASE_OP_32_64(add):
2073            done = fold_add(&ctx, op);
2074            break;
2075        case INDEX_op_add_vec:
2076            done = fold_add_vec(&ctx, op);
2077            break;
2078        CASE_OP_32_64(add2):
2079            done = fold_add2(&ctx, op);
2080            break;
2081        CASE_OP_32_64_VEC(and):
2082            done = fold_and(&ctx, op);
2083            break;
2084        CASE_OP_32_64_VEC(andc):
2085            done = fold_andc(&ctx, op);
2086            break;
2087        CASE_OP_32_64(brcond):
2088            done = fold_brcond(&ctx, op);
2089            break;
2090        case INDEX_op_brcond2_i32:
2091            done = fold_brcond2(&ctx, op);
2092            break;
2093        CASE_OP_32_64(bswap16):
2094        CASE_OP_32_64(bswap32):
2095        case INDEX_op_bswap64_i64:
2096            done = fold_bswap(&ctx, op);
2097            break;
2098        CASE_OP_32_64(clz):
2099        CASE_OP_32_64(ctz):
2100            done = fold_count_zeros(&ctx, op);
2101            break;
2102        CASE_OP_32_64(ctpop):
2103            done = fold_ctpop(&ctx, op);
2104            break;
2105        CASE_OP_32_64(deposit):
2106            done = fold_deposit(&ctx, op);
2107            break;
2108        CASE_OP_32_64(div):
2109        CASE_OP_32_64(divu):
2110            done = fold_divide(&ctx, op);
2111            break;
2112        case INDEX_op_dup_vec:
2113            done = fold_dup(&ctx, op);
2114            break;
2115        case INDEX_op_dup2_vec:
2116            done = fold_dup2(&ctx, op);
2117            break;
2118        CASE_OP_32_64_VEC(eqv):
2119            done = fold_eqv(&ctx, op);
2120            break;
2121        CASE_OP_32_64(extract):
2122            done = fold_extract(&ctx, op);
2123            break;
2124        CASE_OP_32_64(extract2):
2125            done = fold_extract2(&ctx, op);
2126            break;
2127        CASE_OP_32_64(ext8s):
2128        CASE_OP_32_64(ext16s):
2129        case INDEX_op_ext32s_i64:
2130        case INDEX_op_ext_i32_i64:
2131            done = fold_exts(&ctx, op);
2132            break;
2133        CASE_OP_32_64(ext8u):
2134        CASE_OP_32_64(ext16u):
2135        case INDEX_op_ext32u_i64:
2136        case INDEX_op_extu_i32_i64:
2137        case INDEX_op_extrl_i64_i32:
2138        case INDEX_op_extrh_i64_i32:
2139            done = fold_extu(&ctx, op);
2140            break;
2141        CASE_OP_32_64(ld8s):
2142        CASE_OP_32_64(ld8u):
2143        CASE_OP_32_64(ld16s):
2144        CASE_OP_32_64(ld16u):
2145        case INDEX_op_ld32s_i64:
2146        case INDEX_op_ld32u_i64:
2147            done = fold_tcg_ld(&ctx, op);
2148            break;
2149        case INDEX_op_mb:
2150            done = fold_mb(&ctx, op);
2151            break;
2152        CASE_OP_32_64_VEC(mov):
2153            done = fold_mov(&ctx, op);
2154            break;
2155        CASE_OP_32_64(movcond):
2156            done = fold_movcond(&ctx, op);
2157            break;
2158        CASE_OP_32_64(mul):
2159            done = fold_mul(&ctx, op);
2160            break;
2161        CASE_OP_32_64(mulsh):
2162        CASE_OP_32_64(muluh):
2163            done = fold_mul_highpart(&ctx, op);
2164            break;
2165        CASE_OP_32_64(muls2):
2166        CASE_OP_32_64(mulu2):
2167            done = fold_multiply2(&ctx, op);
2168            break;
2169        CASE_OP_32_64_VEC(nand):
2170            done = fold_nand(&ctx, op);
2171            break;
2172        CASE_OP_32_64(neg):
2173            done = fold_neg(&ctx, op);
2174            break;
2175        CASE_OP_32_64_VEC(nor):
2176            done = fold_nor(&ctx, op);
2177            break;
2178        CASE_OP_32_64_VEC(not):
2179            done = fold_not(&ctx, op);
2180            break;
2181        CASE_OP_32_64_VEC(or):
2182            done = fold_or(&ctx, op);
2183            break;
2184        CASE_OP_32_64_VEC(orc):
2185            done = fold_orc(&ctx, op);
2186            break;
2187        case INDEX_op_qemu_ld_a32_i32:
2188        case INDEX_op_qemu_ld_a64_i32:
2189        case INDEX_op_qemu_ld_a32_i64:
2190        case INDEX_op_qemu_ld_a64_i64:
2191        case INDEX_op_qemu_ld_a32_i128:
2192        case INDEX_op_qemu_ld_a64_i128:
2193            done = fold_qemu_ld(&ctx, op);
2194            break;
2195        case INDEX_op_qemu_st8_a32_i32:
2196        case INDEX_op_qemu_st8_a64_i32:
2197        case INDEX_op_qemu_st_a32_i32:
2198        case INDEX_op_qemu_st_a64_i32:
2199        case INDEX_op_qemu_st_a32_i64:
2200        case INDEX_op_qemu_st_a64_i64:
2201        case INDEX_op_qemu_st_a32_i128:
2202        case INDEX_op_qemu_st_a64_i128:
2203            done = fold_qemu_st(&ctx, op);
2204            break;
2205        CASE_OP_32_64(rem):
2206        CASE_OP_32_64(remu):
2207            done = fold_remainder(&ctx, op);
2208            break;
2209        CASE_OP_32_64(rotl):
2210        CASE_OP_32_64(rotr):
2211        CASE_OP_32_64(sar):
2212        CASE_OP_32_64(shl):
2213        CASE_OP_32_64(shr):
2214            done = fold_shift(&ctx, op);
2215            break;
2216        CASE_OP_32_64(setcond):
2217            done = fold_setcond(&ctx, op);
2218            break;
2219        case INDEX_op_setcond2_i32:
2220            done = fold_setcond2(&ctx, op);
2221            break;
2222        CASE_OP_32_64(sextract):
2223            done = fold_sextract(&ctx, op);
2224            break;
2225        CASE_OP_32_64(sub):
2226            done = fold_sub(&ctx, op);
2227            break;
2228        case INDEX_op_sub_vec:
2229            done = fold_sub_vec(&ctx, op);
2230            break;
2231        CASE_OP_32_64(sub2):
2232            done = fold_sub2(&ctx, op);
2233            break;
2234        CASE_OP_32_64_VEC(xor):
2235            done = fold_xor(&ctx, op);
2236            break;
2237        default:
2238            break;
2239        }
2240
2241        if (!done) {
2242            finish_folding(&ctx, op);
2243        }
2244    }
2245}
2246