qemu/tcg/optimize.c
<<
>>
Prefs
   1/*
   2 * Optimizations for Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2010 Samsung Electronics.
   5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "qemu/int128.h"
  28#include "qemu/interval-tree.h"
  29#include "tcg/tcg-op-common.h"
  30#include "tcg-internal.h"
  31#include "tcg-has.h"
  32
  33
  34typedef struct MemCopyInfo {
  35    IntervalTreeNode itree;
  36    QSIMPLEQ_ENTRY (MemCopyInfo) next;
  37    TCGTemp *ts;
  38    TCGType type;
  39} MemCopyInfo;
  40
  41typedef struct TempOptInfo {
  42    TCGTemp *prev_copy;
  43    TCGTemp *next_copy;
  44    QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
  45    uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
  46    uint64_t o_mask;  /* mask bit is 1 if and only if value bit is 1 */
  47    uint64_t s_mask;  /* mask bit is 1 if value bit matches msb */
  48} TempOptInfo;
  49
  50typedef struct OptContext {
  51    TCGContext *tcg;
  52    TCGOp *prev_mb;
  53    TCGTempSet temps_used;
  54
  55    IntervalTreeRoot mem_copy;
  56    QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
  57
  58    /* In flight values from optimization. */
  59    TCGType type;
  60    int carry_state;  /* -1 = non-constant, {0,1} = constant carry-in */
  61} OptContext;
  62
  63static inline TempOptInfo *ts_info(TCGTemp *ts)
  64{
  65    return ts->state_ptr;
  66}
  67
  68static inline TempOptInfo *arg_info(TCGArg arg)
  69{
  70    return ts_info(arg_temp(arg));
  71}
  72
  73static inline bool ti_is_const(TempOptInfo *ti)
  74{
  75    /* If all bits that are not known zeros are known ones, it's constant. */
  76    return ti->z_mask == ti->o_mask;
  77}
  78
  79static inline uint64_t ti_const_val(TempOptInfo *ti)
  80{
  81    /* If constant, both z_mask and o_mask contain the value. */
  82    return ti->z_mask;
  83}
  84
  85static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
  86{
  87    return ti_is_const(ti) && ti_const_val(ti) == val;
  88}
  89
  90static inline bool ts_is_const(TCGTemp *ts)
  91{
  92    return ti_is_const(ts_info(ts));
  93}
  94
  95static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
  96{
  97    return ti_is_const_val(ts_info(ts), val);
  98}
  99
 100static inline bool arg_is_const(TCGArg arg)
 101{
 102    return ts_is_const(arg_temp(arg));
 103}
 104
 105static inline uint64_t arg_const_val(TCGArg arg)
 106{
 107    return ti_const_val(arg_info(arg));
 108}
 109
 110static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
 111{
 112    return ts_is_const_val(arg_temp(arg), val);
 113}
 114
 115static inline bool ts_is_copy(TCGTemp *ts)
 116{
 117    return ts_info(ts)->next_copy != ts;
 118}
 119
 120static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
 121{
 122    return a->kind < b->kind ? b : a;
 123}
 124
 125/* Initialize and activate a temporary.  */
 126static void init_ts_info(OptContext *ctx, TCGTemp *ts)
 127{
 128    size_t idx = temp_idx(ts);
 129    TempOptInfo *ti;
 130
 131    if (test_bit(idx, ctx->temps_used.l)) {
 132        return;
 133    }
 134    set_bit(idx, ctx->temps_used.l);
 135
 136    ti = ts->state_ptr;
 137    if (ti == NULL) {
 138        ti = tcg_malloc(sizeof(TempOptInfo));
 139        ts->state_ptr = ti;
 140    }
 141
 142    ti->next_copy = ts;
 143    ti->prev_copy = ts;
 144    QSIMPLEQ_INIT(&ti->mem_copy);
 145    if (ts->kind == TEMP_CONST) {
 146        ti->z_mask = ts->val;
 147        ti->o_mask = ts->val;
 148        ti->s_mask = INT64_MIN >> clrsb64(ts->val);
 149    } else {
 150        ti->z_mask = -1;
 151        ti->o_mask = 0;
 152        ti->s_mask = 0;
 153    }
 154}
 155
 156static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
 157{
 158    IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
 159    return r ? container_of(r, MemCopyInfo, itree) : NULL;
 160}
 161
 162static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
 163{
 164    IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
 165    return r ? container_of(r, MemCopyInfo, itree) : NULL;
 166}
 167
 168static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
 169{
 170    TCGTemp *ts = mc->ts;
 171    TempOptInfo *ti = ts_info(ts);
 172
 173    interval_tree_remove(&mc->itree, &ctx->mem_copy);
 174    QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
 175    QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
 176}
 177
 178static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
 179{
 180    while (true) {
 181        MemCopyInfo *mc = mem_copy_first(ctx, s, l);
 182        if (!mc) {
 183            break;
 184        }
 185        remove_mem_copy(ctx, mc);
 186    }
 187}
 188
 189static void remove_mem_copy_all(OptContext *ctx)
 190{
 191    remove_mem_copy_in(ctx, 0, -1);
 192    tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
 193}
 194
 195static TCGTemp *find_better_copy(TCGTemp *ts)
 196{
 197    TCGTemp *i, *ret;
 198
 199    /* If this is already readonly, we can't do better. */
 200    if (temp_readonly(ts)) {
 201        return ts;
 202    }
 203
 204    ret = ts;
 205    for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
 206        ret = cmp_better_copy(ret, i);
 207    }
 208    return ret;
 209}
 210
 211static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
 212{
 213    TempOptInfo *si = ts_info(src_ts);
 214    TempOptInfo *di = ts_info(dst_ts);
 215    MemCopyInfo *mc;
 216
 217    QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
 218        tcg_debug_assert(mc->ts == src_ts);
 219        mc->ts = dst_ts;
 220    }
 221    QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
 222}
 223
 224/* Reset TEMP's state, possibly removing the temp for the list of copies.  */
 225static void reset_ts(OptContext *ctx, TCGTemp *ts)
 226{
 227    TempOptInfo *ti = ts_info(ts);
 228    TCGTemp *pts = ti->prev_copy;
 229    TCGTemp *nts = ti->next_copy;
 230    TempOptInfo *pi = ts_info(pts);
 231    TempOptInfo *ni = ts_info(nts);
 232
 233    ni->prev_copy = ti->prev_copy;
 234    pi->next_copy = ti->next_copy;
 235    ti->next_copy = ts;
 236    ti->prev_copy = ts;
 237    ti->z_mask = -1;
 238    ti->o_mask = 0;
 239    ti->s_mask = 0;
 240
 241    if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
 242        if (ts == nts) {
 243            /* Last temp copy being removed, the mem copies die. */
 244            MemCopyInfo *mc;
 245            QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
 246                interval_tree_remove(&mc->itree, &ctx->mem_copy);
 247            }
 248            QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
 249        } else {
 250            move_mem_copies(find_better_copy(nts), ts);
 251        }
 252    }
 253}
 254
 255static void reset_temp(OptContext *ctx, TCGArg arg)
 256{
 257    reset_ts(ctx, arg_temp(arg));
 258}
 259
 260static void record_mem_copy(OptContext *ctx, TCGType type,
 261                            TCGTemp *ts, intptr_t start, intptr_t last)
 262{
 263    MemCopyInfo *mc;
 264    TempOptInfo *ti;
 265
 266    mc = QSIMPLEQ_FIRST(&ctx->mem_free);
 267    if (mc) {
 268        QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
 269    } else {
 270        mc = tcg_malloc(sizeof(*mc));
 271    }
 272
 273    memset(mc, 0, sizeof(*mc));
 274    mc->itree.start = start;
 275    mc->itree.last = last;
 276    mc->type = type;
 277    interval_tree_insert(&mc->itree, &ctx->mem_copy);
 278
 279    ts = find_better_copy(ts);
 280    ti = ts_info(ts);
 281    mc->ts = ts;
 282    QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
 283}
 284
 285static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
 286{
 287    TCGTemp *i;
 288
 289    if (ts1 == ts2) {
 290        return true;
 291    }
 292
 293    if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
 294        return false;
 295    }
 296
 297    for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
 298        if (i == ts2) {
 299            return true;
 300        }
 301    }
 302
 303    return false;
 304}
 305
 306static bool args_are_copies(TCGArg arg1, TCGArg arg2)
 307{
 308    return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
 309}
 310
 311static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
 312{
 313    MemCopyInfo *mc;
 314
 315    for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
 316        if (mc->itree.start == s && mc->type == type) {
 317            return find_better_copy(mc->ts);
 318        }
 319    }
 320    return NULL;
 321}
 322
 323static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
 324{
 325    TCGType type = ctx->type;
 326    TCGTemp *ts;
 327
 328    if (type == TCG_TYPE_I32) {
 329        val = (int32_t)val;
 330    }
 331
 332    ts = tcg_constant_internal(type, val);
 333    init_ts_info(ctx, ts);
 334
 335    return temp_arg(ts);
 336}
 337
 338static TCGArg arg_new_temp(OptContext *ctx)
 339{
 340    TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB);
 341    init_ts_info(ctx, ts);
 342    return temp_arg(ts);
 343}
 344
 345static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op,
 346                               TCGOpcode opc, unsigned narg)
 347{
 348    return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg);
 349}
 350
 351static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op,
 352                                TCGOpcode opc, unsigned narg)
 353{
 354    return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg);
 355}
 356
 357static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
 358{
 359    TCGTemp *dst_ts = arg_temp(dst);
 360    TCGTemp *src_ts = arg_temp(src);
 361    TempOptInfo *di;
 362    TempOptInfo *si;
 363    TCGOpcode new_op;
 364
 365    if (ts_are_copies(dst_ts, src_ts)) {
 366        tcg_op_remove(ctx->tcg, op);
 367        return true;
 368    }
 369
 370    reset_ts(ctx, dst_ts);
 371    di = ts_info(dst_ts);
 372    si = ts_info(src_ts);
 373
 374    switch (ctx->type) {
 375    case TCG_TYPE_I32:
 376    case TCG_TYPE_I64:
 377        new_op = INDEX_op_mov;
 378        break;
 379    case TCG_TYPE_V64:
 380    case TCG_TYPE_V128:
 381    case TCG_TYPE_V256:
 382        /* TCGOP_TYPE and TCGOP_VECE remain unchanged.  */
 383        new_op = INDEX_op_mov_vec;
 384        break;
 385    default:
 386        g_assert_not_reached();
 387    }
 388    op->opc = new_op;
 389    op->args[0] = dst;
 390    op->args[1] = src;
 391
 392    di->z_mask = si->z_mask;
 393    di->o_mask = si->o_mask;
 394    di->s_mask = si->s_mask;
 395
 396    if (src_ts->type == dst_ts->type) {
 397        TempOptInfo *ni = ts_info(si->next_copy);
 398
 399        di->next_copy = si->next_copy;
 400        di->prev_copy = src_ts;
 401        ni->prev_copy = dst_ts;
 402        si->next_copy = dst_ts;
 403
 404        if (!QSIMPLEQ_EMPTY(&si->mem_copy)
 405            && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
 406            move_mem_copies(dst_ts, src_ts);
 407        }
 408    } else if (dst_ts->type == TCG_TYPE_I32) {
 409        di->z_mask = (int32_t)di->z_mask;
 410        di->o_mask = (int32_t)di->o_mask;
 411        di->s_mask |= INT32_MIN;
 412    } else {
 413        di->z_mask |= MAKE_64BIT_MASK(32, 32);
 414        di->o_mask = (uint32_t)di->o_mask;
 415        di->s_mask = INT64_MIN;
 416    }
 417    return true;
 418}
 419
 420static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
 421                             TCGArg dst, uint64_t val)
 422{
 423    /* Convert movi to mov with constant temp. */
 424    return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
 425}
 426
 427static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type,
 428                                      uint64_t x, uint64_t y)
 429{
 430    uint64_t l64, h64;
 431
 432    switch (op) {
 433    case INDEX_op_add:
 434        return x + y;
 435
 436    case INDEX_op_sub:
 437        return x - y;
 438
 439    case INDEX_op_mul:
 440        return x * y;
 441
 442    case INDEX_op_and:
 443    case INDEX_op_and_vec:
 444        return x & y;
 445
 446    case INDEX_op_or:
 447    case INDEX_op_or_vec:
 448        return x | y;
 449
 450    case INDEX_op_xor:
 451    case INDEX_op_xor_vec:
 452        return x ^ y;
 453
 454    case INDEX_op_shl:
 455        if (type == TCG_TYPE_I32) {
 456            return (uint32_t)x << (y & 31);
 457        }
 458        return (uint64_t)x << (y & 63);
 459
 460    case INDEX_op_shr:
 461        if (type == TCG_TYPE_I32) {
 462            return (uint32_t)x >> (y & 31);
 463        }
 464        return (uint64_t)x >> (y & 63);
 465
 466    case INDEX_op_sar:
 467        if (type == TCG_TYPE_I32) {
 468            return (int32_t)x >> (y & 31);
 469        }
 470        return (int64_t)x >> (y & 63);
 471
 472    case INDEX_op_rotr:
 473        if (type == TCG_TYPE_I32) {
 474            return ror32(x, y & 31);
 475        }
 476        return ror64(x, y & 63);
 477
 478    case INDEX_op_rotl:
 479        if (type == TCG_TYPE_I32) {
 480            return rol32(x, y & 31);
 481        }
 482        return rol64(x, y & 63);
 483
 484    case INDEX_op_not:
 485    case INDEX_op_not_vec:
 486        return ~x;
 487
 488    case INDEX_op_neg:
 489        return -x;
 490
 491    case INDEX_op_andc:
 492    case INDEX_op_andc_vec:
 493        return x & ~y;
 494
 495    case INDEX_op_orc:
 496    case INDEX_op_orc_vec:
 497        return x | ~y;
 498
 499    case INDEX_op_eqv:
 500    case INDEX_op_eqv_vec:
 501        return ~(x ^ y);
 502
 503    case INDEX_op_nand:
 504    case INDEX_op_nand_vec:
 505        return ~(x & y);
 506
 507    case INDEX_op_nor:
 508    case INDEX_op_nor_vec:
 509        return ~(x | y);
 510
 511    case INDEX_op_clz:
 512        if (type == TCG_TYPE_I32) {
 513            return (uint32_t)x ? clz32(x) : y;
 514        }
 515        return x ? clz64(x) : y;
 516
 517    case INDEX_op_ctz:
 518        if (type == TCG_TYPE_I32) {
 519            return (uint32_t)x ? ctz32(x) : y;
 520        }
 521        return x ? ctz64(x) : y;
 522
 523    case INDEX_op_ctpop:
 524        return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x);
 525
 526    case INDEX_op_bswap16:
 527        x = bswap16(x);
 528        return y & TCG_BSWAP_OS ? (int16_t)x : x;
 529
 530    case INDEX_op_bswap32:
 531        x = bswap32(x);
 532        return y & TCG_BSWAP_OS ? (int32_t)x : x;
 533
 534    case INDEX_op_bswap64:
 535        return bswap64(x);
 536
 537    case INDEX_op_ext_i32_i64:
 538        return (int32_t)x;
 539
 540    case INDEX_op_extu_i32_i64:
 541    case INDEX_op_extrl_i64_i32:
 542        return (uint32_t)x;
 543
 544    case INDEX_op_extrh_i64_i32:
 545        return (uint64_t)x >> 32;
 546
 547    case INDEX_op_muluh:
 548        if (type == TCG_TYPE_I32) {
 549            return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
 550        }
 551        mulu64(&l64, &h64, x, y);
 552        return h64;
 553
 554    case INDEX_op_mulsh:
 555        if (type == TCG_TYPE_I32) {
 556            return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
 557        }
 558        muls64(&l64, &h64, x, y);
 559        return h64;
 560
 561    case INDEX_op_divs:
 562        /* Avoid crashing on divide by zero, otherwise undefined.  */
 563        if (type == TCG_TYPE_I32) {
 564            return (int32_t)x / ((int32_t)y ? : 1);
 565        }
 566        return (int64_t)x / ((int64_t)y ? : 1);
 567
 568    case INDEX_op_divu:
 569        if (type == TCG_TYPE_I32) {
 570            return (uint32_t)x / ((uint32_t)y ? : 1);
 571        }
 572        return (uint64_t)x / ((uint64_t)y ? : 1);
 573
 574    case INDEX_op_rems:
 575        if (type == TCG_TYPE_I32) {
 576            return (int32_t)x % ((int32_t)y ? : 1);
 577        }
 578        return (int64_t)x % ((int64_t)y ? : 1);
 579
 580    case INDEX_op_remu:
 581        if (type == TCG_TYPE_I32) {
 582            return (uint32_t)x % ((uint32_t)y ? : 1);
 583        }
 584        return (uint64_t)x % ((uint64_t)y ? : 1);
 585
 586    default:
 587        g_assert_not_reached();
 588    }
 589}
 590
 591static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
 592                                    uint64_t x, uint64_t y)
 593{
 594    uint64_t res = do_constant_folding_2(op, type, x, y);
 595    if (type == TCG_TYPE_I32) {
 596        res = (int32_t)res;
 597    }
 598    return res;
 599}
 600
 601static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
 602{
 603    switch (c) {
 604    case TCG_COND_EQ:
 605        return x == y;
 606    case TCG_COND_NE:
 607        return x != y;
 608    case TCG_COND_LT:
 609        return (int32_t)x < (int32_t)y;
 610    case TCG_COND_GE:
 611        return (int32_t)x >= (int32_t)y;
 612    case TCG_COND_LE:
 613        return (int32_t)x <= (int32_t)y;
 614    case TCG_COND_GT:
 615        return (int32_t)x > (int32_t)y;
 616    case TCG_COND_LTU:
 617        return x < y;
 618    case TCG_COND_GEU:
 619        return x >= y;
 620    case TCG_COND_LEU:
 621        return x <= y;
 622    case TCG_COND_GTU:
 623        return x > y;
 624    case TCG_COND_TSTEQ:
 625        return (x & y) == 0;
 626    case TCG_COND_TSTNE:
 627        return (x & y) != 0;
 628    case TCG_COND_ALWAYS:
 629    case TCG_COND_NEVER:
 630        break;
 631    }
 632    g_assert_not_reached();
 633}
 634
 635static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
 636{
 637    switch (c) {
 638    case TCG_COND_EQ:
 639        return x == y;
 640    case TCG_COND_NE:
 641        return x != y;
 642    case TCG_COND_LT:
 643        return (int64_t)x < (int64_t)y;
 644    case TCG_COND_GE:
 645        return (int64_t)x >= (int64_t)y;
 646    case TCG_COND_LE:
 647        return (int64_t)x <= (int64_t)y;
 648    case TCG_COND_GT:
 649        return (int64_t)x > (int64_t)y;
 650    case TCG_COND_LTU:
 651        return x < y;
 652    case TCG_COND_GEU:
 653        return x >= y;
 654    case TCG_COND_LEU:
 655        return x <= y;
 656    case TCG_COND_GTU:
 657        return x > y;
 658    case TCG_COND_TSTEQ:
 659        return (x & y) == 0;
 660    case TCG_COND_TSTNE:
 661        return (x & y) != 0;
 662    case TCG_COND_ALWAYS:
 663    case TCG_COND_NEVER:
 664        break;
 665    }
 666    g_assert_not_reached();
 667}
 668
 669static int do_constant_folding_cond_eq(TCGCond c)
 670{
 671    switch (c) {
 672    case TCG_COND_GT:
 673    case TCG_COND_LTU:
 674    case TCG_COND_LT:
 675    case TCG_COND_GTU:
 676    case TCG_COND_NE:
 677        return 0;
 678    case TCG_COND_GE:
 679    case TCG_COND_GEU:
 680    case TCG_COND_LE:
 681    case TCG_COND_LEU:
 682    case TCG_COND_EQ:
 683        return 1;
 684    case TCG_COND_TSTEQ:
 685    case TCG_COND_TSTNE:
 686        return -1;
 687    case TCG_COND_ALWAYS:
 688    case TCG_COND_NEVER:
 689        break;
 690    }
 691    g_assert_not_reached();
 692}
 693
 694/*
 695 * Return -1 if the condition can't be simplified,
 696 * and the result of the condition (0 or 1) if it can.
 697 */
 698static int do_constant_folding_cond(TCGType type, TCGArg x,
 699                                    TCGArg y, TCGCond c)
 700{
 701    if (arg_is_const(x) && arg_is_const(y)) {
 702        uint64_t xv = arg_const_val(x);
 703        uint64_t yv = arg_const_val(y);
 704
 705        switch (type) {
 706        case TCG_TYPE_I32:
 707            return do_constant_folding_cond_32(xv, yv, c);
 708        case TCG_TYPE_I64:
 709            return do_constant_folding_cond_64(xv, yv, c);
 710        default:
 711            /* Only scalar comparisons are optimizable */
 712            return -1;
 713        }
 714    } else if (args_are_copies(x, y)) {
 715        return do_constant_folding_cond_eq(c);
 716    } else if (arg_is_const_val(y, 0)) {
 717        switch (c) {
 718        case TCG_COND_LTU:
 719        case TCG_COND_TSTNE:
 720            return 0;
 721        case TCG_COND_GEU:
 722        case TCG_COND_TSTEQ:
 723            return 1;
 724        default:
 725            return -1;
 726        }
 727    }
 728    return -1;
 729}
 730
 731/**
 732 * swap_commutative:
 733 * @dest: TCGArg of the destination argument, or NO_DEST.
 734 * @p1: first paired argument
 735 * @p2: second paired argument
 736 *
 737 * If *@p1 is a constant and *@p2 is not, swap.
 738 * If *@p2 matches @dest, swap.
 739 * Return true if a swap was performed.
 740 */
 741
 742#define NO_DEST  temp_arg(NULL)
 743
 744static int pref_commutative(TempOptInfo *ti)
 745{
 746    /* Slight preference for non-zero constants second. */
 747    return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2;
 748}
 749
 750static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 751{
 752    TCGArg a1 = *p1, a2 = *p2;
 753    int sum = 0;
 754    sum += pref_commutative(arg_info(a1));
 755    sum -= pref_commutative(arg_info(a2));
 756
 757    /* Prefer the constant in second argument, and then the form
 758       op a, a, b, which is better handled on non-RISC hosts. */
 759    if (sum > 0 || (sum == 0 && dest == a2)) {
 760        *p1 = a2;
 761        *p2 = a1;
 762        return true;
 763    }
 764    return false;
 765}
 766
 767static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 768{
 769    int sum = 0;
 770    sum += pref_commutative(arg_info(p1[0]));
 771    sum += pref_commutative(arg_info(p1[1]));
 772    sum -= pref_commutative(arg_info(p2[0]));
 773    sum -= pref_commutative(arg_info(p2[1]));
 774    if (sum > 0) {
 775        TCGArg t;
 776        t = p1[0], p1[0] = p2[0], p2[0] = t;
 777        t = p1[1], p1[1] = p2[1], p2[1] = t;
 778        return true;
 779    }
 780    return false;
 781}
 782
 783/*
 784 * Return -1 if the condition can't be simplified,
 785 * and the result of the condition (0 or 1) if it can.
 786 */
 787static bool fold_and(OptContext *ctx, TCGOp *op);
 788static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
 789                                     TCGArg *p1, TCGArg *p2, TCGArg *pcond)
 790{
 791    TCGCond cond;
 792    TempOptInfo *i1;
 793    bool swap;
 794    int r;
 795
 796    swap = swap_commutative(dest, p1, p2);
 797    cond = *pcond;
 798    if (swap) {
 799        *pcond = cond = tcg_swap_cond(cond);
 800    }
 801
 802    r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
 803    if (r >= 0) {
 804        return r;
 805    }
 806    if (!is_tst_cond(cond)) {
 807        return -1;
 808    }
 809
 810    i1 = arg_info(*p1);
 811
 812    /*
 813     * TSTNE x,x -> NE x,0
 814     * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
 815     */
 816    if (args_are_copies(*p1, *p2) ||
 817        (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) {
 818        *p2 = arg_new_constant(ctx, 0);
 819        *pcond = tcg_tst_eqne_cond(cond);
 820        return -1;
 821    }
 822
 823    /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
 824    if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) {
 825        *p2 = arg_new_constant(ctx, 0);
 826        *pcond = tcg_tst_ltge_cond(cond);
 827        return -1;
 828    }
 829
 830    /* Expand to AND with a temporary if no backend support. */
 831    if (!TCG_TARGET_HAS_tst) {
 832        TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
 833        TCGArg tmp = arg_new_temp(ctx);
 834
 835        op2->args[0] = tmp;
 836        op2->args[1] = *p1;
 837        op2->args[2] = *p2;
 838        fold_and(ctx, op2);
 839
 840        *p1 = tmp;
 841        *p2 = arg_new_constant(ctx, 0);
 842        *pcond = tcg_tst_eqne_cond(cond);
 843    }
 844    return -1;
 845}
 846
 847static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
 848{
 849    TCGArg al, ah, bl, bh;
 850    TCGCond c;
 851    bool swap;
 852    int r;
 853
 854    swap = swap_commutative2(args, args + 2);
 855    c = args[4];
 856    if (swap) {
 857        args[4] = c = tcg_swap_cond(c);
 858    }
 859
 860    al = args[0];
 861    ah = args[1];
 862    bl = args[2];
 863    bh = args[3];
 864
 865    if (arg_is_const(bl) && arg_is_const(bh)) {
 866        tcg_target_ulong blv = arg_const_val(bl);
 867        tcg_target_ulong bhv = arg_const_val(bh);
 868        uint64_t b = deposit64(blv, 32, 32, bhv);
 869
 870        if (arg_is_const(al) && arg_is_const(ah)) {
 871            tcg_target_ulong alv = arg_const_val(al);
 872            tcg_target_ulong ahv = arg_const_val(ah);
 873            uint64_t a = deposit64(alv, 32, 32, ahv);
 874
 875            r = do_constant_folding_cond_64(a, b, c);
 876            if (r >= 0) {
 877                return r;
 878            }
 879        }
 880
 881        if (b == 0) {
 882            switch (c) {
 883            case TCG_COND_LTU:
 884            case TCG_COND_TSTNE:
 885                return 0;
 886            case TCG_COND_GEU:
 887            case TCG_COND_TSTEQ:
 888                return 1;
 889            default:
 890                break;
 891            }
 892        }
 893
 894        /* TSTNE x,-1 -> NE x,0 */
 895        if (b == -1 && is_tst_cond(c)) {
 896            args[3] = args[2] = arg_new_constant(ctx, 0);
 897            args[4] = tcg_tst_eqne_cond(c);
 898            return -1;
 899        }
 900
 901        /* TSTNE x,sign -> LT x,0 */
 902        if (b == INT64_MIN && is_tst_cond(c)) {
 903            /* bl must be 0, so copy that to bh */
 904            args[3] = bl;
 905            args[4] = tcg_tst_ltge_cond(c);
 906            return -1;
 907        }
 908    }
 909
 910    if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
 911        r = do_constant_folding_cond_eq(c);
 912        if (r >= 0) {
 913            return r;
 914        }
 915
 916        /* TSTNE x,x -> NE x,0 */
 917        if (is_tst_cond(c)) {
 918            args[3] = args[2] = arg_new_constant(ctx, 0);
 919            args[4] = tcg_tst_eqne_cond(c);
 920            return -1;
 921        }
 922    }
 923
 924    /* Expand to AND with a temporary if no backend support. */
 925    if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) {
 926        TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3);
 927        TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
 928        TCGArg t1 = arg_new_temp(ctx);
 929        TCGArg t2 = arg_new_temp(ctx);
 930
 931        op1->args[0] = t1;
 932        op1->args[1] = al;
 933        op1->args[2] = bl;
 934        fold_and(ctx, op1);
 935
 936        op2->args[0] = t2;
 937        op2->args[1] = ah;
 938        op2->args[2] = bh;
 939        fold_and(ctx, op1);
 940
 941        args[0] = t1;
 942        args[1] = t2;
 943        args[3] = args[2] = arg_new_constant(ctx, 0);
 944        args[4] = tcg_tst_eqne_cond(c);
 945    }
 946    return -1;
 947}
 948
 949static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
 950{
 951    for (int i = 0; i < nb_args; i++) {
 952        TCGTemp *ts = arg_temp(op->args[i]);
 953        init_ts_info(ctx, ts);
 954    }
 955}
 956
 957static void copy_propagate(OptContext *ctx, TCGOp *op,
 958                           int nb_oargs, int nb_iargs)
 959{
 960    for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
 961        TCGTemp *ts = arg_temp(op->args[i]);
 962        if (ts_is_copy(ts)) {
 963            op->args[i] = temp_arg(find_better_copy(ts));
 964        }
 965    }
 966}
 967
 968static void finish_bb(OptContext *ctx)
 969{
 970    /* We only optimize memory barriers across basic blocks. */
 971    ctx->prev_mb = NULL;
 972}
 973
 974static void finish_ebb(OptContext *ctx)
 975{
 976    finish_bb(ctx);
 977    /* We only optimize across extended basic blocks. */
 978    memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
 979    remove_mem_copy_all(ctx);
 980}
 981
 982static bool finish_folding(OptContext *ctx, TCGOp *op)
 983{
 984    const TCGOpDef *def = &tcg_op_defs[op->opc];
 985    int i, nb_oargs;
 986
 987    nb_oargs = def->nb_oargs;
 988    for (i = 0; i < nb_oargs; i++) {
 989        TCGTemp *ts = arg_temp(op->args[i]);
 990        reset_ts(ctx, ts);
 991    }
 992    return true;
 993}
 994
 995/*
 996 * The fold_* functions return true when processing is complete,
 997 * usually by folding the operation to a constant or to a copy,
 998 * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
 999 * like collect information about the value produced, for use in
1000 * optimizing a subsequent operation.
1001 *
1002 * These first fold_* functions are all helpers, used by other
1003 * folders for more specific operations.
1004 */
1005
1006static bool fold_const1(OptContext *ctx, TCGOp *op)
1007{
1008    if (arg_is_const(op->args[1])) {
1009        uint64_t t = arg_const_val(op->args[1]);
1010
1011        t = do_constant_folding(op->opc, ctx->type, t, 0);
1012        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1013    }
1014    return false;
1015}
1016
1017static bool fold_const2(OptContext *ctx, TCGOp *op)
1018{
1019    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1020        uint64_t t1 = arg_const_val(op->args[1]);
1021        uint64_t t2 = arg_const_val(op->args[2]);
1022
1023        t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
1024        return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1025    }
1026    return false;
1027}
1028
1029static bool fold_commutative(OptContext *ctx, TCGOp *op)
1030{
1031    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1032    return false;
1033}
1034
1035static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
1036{
1037    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1038    return fold_const2(ctx, op);
1039}
1040
1041/*
1042 * Record "zero" and "sign" masks for the single output of @op.
1043 * See TempOptInfo definition of z_mask and s_mask.
1044 * If z_mask allows, fold the output to constant zero.
1045 * The passed s_mask may be augmented by z_mask.
1046 */
1047static bool fold_masks_zosa_int(OptContext *ctx, TCGOp *op,
1048                                uint64_t z_mask, uint64_t o_mask,
1049                                int64_t s_mask, uint64_t a_mask)
1050{
1051    const TCGOpDef *def = &tcg_op_defs[op->opc];
1052    TCGTemp *ts;
1053    TempOptInfo *ti;
1054    int rep;
1055
1056    /* Only single-output opcodes are supported here. */
1057    tcg_debug_assert(def->nb_oargs == 1);
1058
1059    /*
1060     * 32-bit ops generate 32-bit results, which for the purpose of
1061     * simplifying tcg are sign-extended.  Certainly that's how we
1062     * represent our constants elsewhere.  Note that the bits will
1063     * be reset properly for a 64-bit value when encountering the
1064     * type changing opcodes.
1065     */
1066    if (ctx->type == TCG_TYPE_I32) {
1067        z_mask = (int32_t)z_mask;
1068        o_mask = (int32_t)o_mask;
1069        s_mask |= INT32_MIN;
1070        a_mask = (uint32_t)a_mask;
1071    }
1072
1073    /* Bits that are known 1 and bits that are known 0 must not overlap. */
1074    tcg_debug_assert((o_mask & ~z_mask) == 0);
1075
1076    /* All bits that are not known zero are known one is a constant. */
1077    if (z_mask == o_mask) {
1078        return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask);
1079    }
1080
1081    /* If no bits are affected, the operation devolves to a copy. */
1082    if (a_mask == 0) {
1083        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1084    }
1085
1086    ts = arg_temp(op->args[0]);
1087    reset_ts(ctx, ts);
1088
1089    ti = ts_info(ts);
1090    ti->z_mask = z_mask;
1091
1092    /* Canonicalize s_mask and incorporate data from z_mask. */
1093    rep = clz64(~s_mask);
1094    rep = MAX(rep, clz64(z_mask));
1095    rep = MAX(rep, clz64(~o_mask));
1096    rep = MAX(rep - 1, 0);
1097    ti->s_mask = INT64_MIN >> rep;
1098
1099    return false;
1100}
1101
1102static bool fold_masks_zosa(OptContext *ctx, TCGOp *op, uint64_t z_mask,
1103                            uint64_t o_mask, int64_t s_mask, uint64_t a_mask)
1104{
1105    fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, -1);
1106    return true;
1107}
1108
1109static bool fold_masks_zos(OptContext *ctx, TCGOp *op,
1110                           uint64_t z_mask, uint64_t o_mask, uint64_t s_mask)
1111{
1112    return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, -1);
1113}
1114
1115static bool fold_masks_zo(OptContext *ctx, TCGOp *op,
1116                          uint64_t z_mask, uint64_t o_mask)
1117{
1118    return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, -1);
1119}
1120
1121static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
1122                          uint64_t z_mask, uint64_t s_mask)
1123{
1124    return fold_masks_zosa(ctx, op, z_mask, 0, s_mask, -1);
1125}
1126
1127static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
1128{
1129    return fold_masks_zosa(ctx, op, z_mask, 0, 0, -1);
1130}
1131
1132static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
1133{
1134    return fold_masks_zosa(ctx, op, -1, 0, s_mask, -1);
1135}
1136
1137/*
1138 * Convert @op to NOT, if NOT is supported by the host.
1139 * Return true f the conversion is successful, which will still
1140 * indicate that the processing is complete.
1141 */
1142static bool fold_not(OptContext *ctx, TCGOp *op);
1143static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
1144{
1145    TCGOpcode not_op;
1146    bool have_not;
1147
1148    switch (ctx->type) {
1149    case TCG_TYPE_I32:
1150    case TCG_TYPE_I64:
1151        not_op = INDEX_op_not;
1152        have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0);
1153        break;
1154    case TCG_TYPE_V64:
1155    case TCG_TYPE_V128:
1156    case TCG_TYPE_V256:
1157        not_op = INDEX_op_not_vec;
1158        have_not = TCG_TARGET_HAS_not_vec;
1159        break;
1160    default:
1161        g_assert_not_reached();
1162    }
1163    if (have_not) {
1164        op->opc = not_op;
1165        op->args[1] = op->args[idx];
1166        return fold_not(ctx, op);
1167    }
1168    return false;
1169}
1170
1171/* If the binary operation has first argument @i, fold to @i. */
1172static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1173{
1174    if (arg_is_const_val(op->args[1], i)) {
1175        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1176    }
1177    return false;
1178}
1179
1180/* If the binary operation has first argument @i, fold to NOT. */
1181static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1182{
1183    if (arg_is_const_val(op->args[1], i)) {
1184        return fold_to_not(ctx, op, 2);
1185    }
1186    return false;
1187}
1188
1189/* If the binary operation has second argument @i, fold to @i. */
1190static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1191{
1192    if (arg_is_const_val(op->args[2], i)) {
1193        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1194    }
1195    return false;
1196}
1197
1198/* If the binary operation has second argument @i, fold to identity. */
1199static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
1200{
1201    if (arg_is_const_val(op->args[2], i)) {
1202        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1203    }
1204    return false;
1205}
1206
1207/* If the binary operation has second argument @i, fold to NOT. */
1208static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1209{
1210    if (arg_is_const_val(op->args[2], i)) {
1211        return fold_to_not(ctx, op, 1);
1212    }
1213    return false;
1214}
1215
1216/* If the binary operation has both arguments equal, fold to @i. */
1217static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1218{
1219    if (args_are_copies(op->args[1], op->args[2])) {
1220        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1221    }
1222    return false;
1223}
1224
1225/* If the binary operation has both arguments equal, fold to identity. */
1226static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
1227{
1228    if (args_are_copies(op->args[1], op->args[2])) {
1229        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1230    }
1231    return false;
1232}
1233
1234/*
1235 * These outermost fold_<op> functions are sorted alphabetically.
1236 *
1237 * The ordering of the transformations should be:
1238 *   1) those that produce a constant
1239 *   2) those that produce a copy
1240 *   3) those that produce information about the result value.
1241 */
1242
1243static bool fold_addco(OptContext *ctx, TCGOp *op);
1244static bool fold_or(OptContext *ctx, TCGOp *op);
1245static bool fold_orc(OptContext *ctx, TCGOp *op);
1246static bool fold_subbo(OptContext *ctx, TCGOp *op);
1247static bool fold_xor(OptContext *ctx, TCGOp *op);
1248
1249static bool fold_add(OptContext *ctx, TCGOp *op)
1250{
1251    if (fold_const2_commutative(ctx, op) ||
1252        fold_xi_to_x(ctx, op, 0)) {
1253        return true;
1254    }
1255    return finish_folding(ctx, op);
1256}
1257
1258/* We cannot as yet do_constant_folding with vectors. */
1259static bool fold_add_vec(OptContext *ctx, TCGOp *op)
1260{
1261    if (fold_commutative(ctx, op) ||
1262        fold_xi_to_x(ctx, op, 0)) {
1263        return true;
1264    }
1265    return finish_folding(ctx, op);
1266}
1267
1268static void squash_prev_carryout(OptContext *ctx, TCGOp *op)
1269{
1270    TempOptInfo *t2;
1271
1272    op = QTAILQ_PREV(op, link);
1273    switch (op->opc) {
1274    case INDEX_op_addco:
1275        op->opc = INDEX_op_add;
1276        fold_add(ctx, op);
1277        break;
1278    case INDEX_op_addcio:
1279        op->opc = INDEX_op_addci;
1280        break;
1281    case INDEX_op_addc1o:
1282        op->opc = INDEX_op_add;
1283        t2 = arg_info(op->args[2]);
1284        if (ti_is_const(t2)) {
1285            op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1286            /* Perform other constant folding, if needed. */
1287            fold_add(ctx, op);
1288        } else {
1289            TCGArg ret = op->args[0];
1290            op = opt_insert_after(ctx, op, INDEX_op_add, 3);
1291            op->args[0] = ret;
1292            op->args[1] = ret;
1293            op->args[2] = arg_new_constant(ctx, 1);
1294        }
1295        break;
1296    default:
1297        g_assert_not_reached();
1298    }
1299}
1300
1301static bool fold_addci(OptContext *ctx, TCGOp *op)
1302{
1303    fold_commutative(ctx, op);
1304
1305    if (ctx->carry_state < 0) {
1306        return finish_folding(ctx, op);
1307    }
1308
1309    squash_prev_carryout(ctx, op);
1310    op->opc = INDEX_op_add;
1311
1312    if (ctx->carry_state > 0) {
1313        TempOptInfo *t2 = arg_info(op->args[2]);
1314
1315        /*
1316         * Propagate the known carry-in into a constant, if possible.
1317         * Otherwise emit a second add +1.
1318         */
1319        if (ti_is_const(t2)) {
1320            op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1321        } else {
1322            TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3);
1323
1324            op2->args[0] = op->args[0];
1325            op2->args[1] = op->args[1];
1326            op2->args[2] = op->args[2];
1327            fold_add(ctx, op2);
1328
1329            op->args[1] = op->args[0];
1330            op->args[2] = arg_new_constant(ctx, 1);
1331        }
1332    }
1333
1334    ctx->carry_state = -1;
1335    return fold_add(ctx, op);
1336}
1337
1338static bool fold_addcio(OptContext *ctx, TCGOp *op)
1339{
1340    TempOptInfo *t1, *t2;
1341    int carry_out = -1;
1342    uint64_t sum, max;
1343
1344    fold_commutative(ctx, op);
1345    t1 = arg_info(op->args[1]);
1346    t2 = arg_info(op->args[2]);
1347
1348    /*
1349     * The z_mask value is >= the maximum value that can be represented
1350     * with the known zero bits.  So adding the z_mask values will not
1351     * overflow if and only if the true values cannot overflow.
1352     */
1353    if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) &&
1354        !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) {
1355        carry_out = 0;
1356    }
1357
1358    if (ctx->carry_state < 0) {
1359        ctx->carry_state = carry_out;
1360        return finish_folding(ctx, op);
1361    }
1362
1363    squash_prev_carryout(ctx, op);
1364    if (ctx->carry_state == 0) {
1365        goto do_addco;
1366    }
1367
1368    /* Propagate the known carry-in into a constant, if possible. */
1369    max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
1370    if (ti_is_const(t2)) {
1371        uint64_t v = ti_const_val(t2) & max;
1372        if (v < max) {
1373            op->args[2] = arg_new_constant(ctx, v + 1);
1374            goto do_addco;
1375        }
1376        /* max + known carry in produces known carry out. */
1377        carry_out = 1;
1378    }
1379    if (ti_is_const(t1)) {
1380        uint64_t v = ti_const_val(t1) & max;
1381        if (v < max) {
1382            op->args[1] = arg_new_constant(ctx, v + 1);
1383            goto do_addco;
1384        }
1385        carry_out = 1;
1386    }
1387
1388    /* Adjust the opcode to remember the known carry-in. */
1389    op->opc = INDEX_op_addc1o;
1390    ctx->carry_state = carry_out;
1391    return finish_folding(ctx, op);
1392
1393 do_addco:
1394    op->opc = INDEX_op_addco;
1395    return fold_addco(ctx, op);
1396}
1397
1398static bool fold_addco(OptContext *ctx, TCGOp *op)
1399{
1400    TempOptInfo *t1, *t2;
1401    int carry_out = -1;
1402    uint64_t ign;
1403
1404    fold_commutative(ctx, op);
1405    t1 = arg_info(op->args[1]);
1406    t2 = arg_info(op->args[2]);
1407
1408    if (ti_is_const(t2)) {
1409        uint64_t v2 = ti_const_val(t2);
1410
1411        if (ti_is_const(t1)) {
1412            uint64_t v1 = ti_const_val(t1);
1413            /* Given sign-extension of z_mask for I32, we need not truncate. */
1414            carry_out = uadd64_overflow(v1, v2, &ign);
1415        } else if (v2 == 0) {
1416            carry_out = 0;
1417        }
1418    } else {
1419        /*
1420         * The z_mask value is >= the maximum value that can be represented
1421         * with the known zero bits.  So adding the z_mask values will not
1422         * overflow if and only if the true values cannot overflow.
1423         */
1424        if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) {
1425            carry_out = 0;
1426        }
1427    }
1428    ctx->carry_state = carry_out;
1429    return finish_folding(ctx, op);
1430}
1431
1432static bool fold_and(OptContext *ctx, TCGOp *op)
1433{
1434    uint64_t z_mask, o_mask, s_mask, a_mask;
1435    TempOptInfo *t1, *t2;
1436
1437    if (fold_const2_commutative(ctx, op)) {
1438        return true;
1439    }
1440
1441    t1 = arg_info(op->args[1]);
1442    t2 = arg_info(op->args[2]);
1443
1444    z_mask = t1->z_mask & t2->z_mask;
1445    o_mask = t1->o_mask & t2->o_mask;
1446
1447    /*
1448     * Sign repetitions are perforce all identical, whether they are 1 or 0.
1449     * Bitwise operations preserve the relative quantity of the repetitions.
1450     */
1451    s_mask = t1->s_mask & t2->s_mask;
1452
1453    /* Affected bits are those not known zero, masked by those known one. */
1454    a_mask = t1->z_mask & ~t2->o_mask;
1455
1456    if (!fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask)) {
1457        if (op->opc == INDEX_op_and && ti_is_const(t2)) {
1458            /*
1459             * Canonicalize on extract, if valid.  This aids x86 with its
1460             * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode
1461             * which does not require matching operands.  Other backends can
1462             * trivially expand the extract to AND during code generation.
1463             */
1464            uint64_t val = ti_const_val(t2);
1465            if (!(val & (val + 1))) {
1466                unsigned len = ctz64(~val);
1467                if (TCG_TARGET_extract_valid(ctx->type, 0, len)) {
1468                    op->opc = INDEX_op_extract;
1469                    op->args[2] = 0;
1470                    op->args[3] = len;
1471                }
1472            }
1473        } else {
1474            fold_xx_to_x(ctx, op);
1475        }
1476    }
1477    return true;
1478}
1479
1480static bool fold_andc(OptContext *ctx, TCGOp *op)
1481{
1482    uint64_t z_mask, o_mask, s_mask, a_mask;
1483    TempOptInfo *t1, *t2;
1484
1485    if (fold_const2(ctx, op)) {
1486        return true;
1487    }
1488
1489    t1 = arg_info(op->args[1]);
1490    t2 = arg_info(op->args[2]);
1491
1492    if (ti_is_const(t2)) {
1493        /* Fold andc r,x,i to and r,x,~i. */
1494        switch (ctx->type) {
1495        case TCG_TYPE_I32:
1496        case TCG_TYPE_I64:
1497            op->opc = INDEX_op_and;
1498            break;
1499        case TCG_TYPE_V64:
1500        case TCG_TYPE_V128:
1501        case TCG_TYPE_V256:
1502            op->opc = INDEX_op_and_vec;
1503            break;
1504        default:
1505            g_assert_not_reached();
1506        }
1507        op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1508        return fold_and(ctx, op);
1509    }
1510    if (fold_xx_to_i(ctx, op, 0) ||
1511        fold_ix_to_not(ctx, op, -1)) {
1512        return true;
1513    }
1514
1515    z_mask = t1->z_mask & ~t2->o_mask;
1516    o_mask = t1->o_mask & ~t2->z_mask;
1517    s_mask = t1->s_mask & t2->s_mask;
1518
1519    /* Affected bits are those not known zero, masked by those known zero. */
1520    a_mask = t1->z_mask & t2->z_mask;
1521
1522    return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
1523}
1524
1525static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
1526{
1527    /* If true and false values are the same, eliminate the cmp. */
1528    if (args_are_copies(op->args[2], op->args[3])) {
1529        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1530    }
1531
1532    if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1533        uint64_t tv = arg_const_val(op->args[2]);
1534        uint64_t fv = arg_const_val(op->args[3]);
1535
1536        if (tv == -1 && fv == 0) {
1537            return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1538        }
1539        if (tv == 0 && fv == -1) {
1540            if (TCG_TARGET_HAS_not_vec) {
1541                op->opc = INDEX_op_not_vec;
1542                return fold_not(ctx, op);
1543            } else {
1544                op->opc = INDEX_op_xor_vec;
1545                op->args[2] = arg_new_constant(ctx, -1);
1546                return fold_xor(ctx, op);
1547            }
1548        }
1549    }
1550    if (arg_is_const(op->args[2])) {
1551        uint64_t tv = arg_const_val(op->args[2]);
1552        if (tv == -1) {
1553            op->opc = INDEX_op_or_vec;
1554            op->args[2] = op->args[3];
1555            return fold_or(ctx, op);
1556        }
1557        if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
1558            op->opc = INDEX_op_andc_vec;
1559            op->args[2] = op->args[1];
1560            op->args[1] = op->args[3];
1561            return fold_andc(ctx, op);
1562        }
1563    }
1564    if (arg_is_const(op->args[3])) {
1565        uint64_t fv = arg_const_val(op->args[3]);
1566        if (fv == 0) {
1567            op->opc = INDEX_op_and_vec;
1568            return fold_and(ctx, op);
1569        }
1570        if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
1571            op->opc = INDEX_op_orc_vec;
1572            op->args[2] = op->args[1];
1573            op->args[1] = op->args[3];
1574            return fold_orc(ctx, op);
1575        }
1576    }
1577    return finish_folding(ctx, op);
1578}
1579
1580static bool fold_brcond(OptContext *ctx, TCGOp *op)
1581{
1582    int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
1583                                      &op->args[1], &op->args[2]);
1584    if (i == 0) {
1585        tcg_op_remove(ctx->tcg, op);
1586        return true;
1587    }
1588    if (i > 0) {
1589        op->opc = INDEX_op_br;
1590        op->args[0] = op->args[3];
1591        finish_ebb(ctx);
1592    } else {
1593        finish_bb(ctx);
1594    }
1595    return true;
1596}
1597
1598static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1599{
1600    TCGCond cond;
1601    TCGArg label;
1602    int i, inv = 0;
1603
1604    i = do_constant_folding_cond2(ctx, op, &op->args[0]);
1605    cond = op->args[4];
1606    label = op->args[5];
1607    if (i >= 0) {
1608        goto do_brcond_const;
1609    }
1610
1611    switch (cond) {
1612    case TCG_COND_LT:
1613    case TCG_COND_GE:
1614        /*
1615         * Simplify LT/GE comparisons vs zero to a single compare
1616         * vs the high word of the input.
1617         */
1618        if (arg_is_const_val(op->args[2], 0) &&
1619            arg_is_const_val(op->args[3], 0)) {
1620            goto do_brcond_high;
1621        }
1622        break;
1623
1624    case TCG_COND_NE:
1625        inv = 1;
1626        QEMU_FALLTHROUGH;
1627    case TCG_COND_EQ:
1628        /*
1629         * Simplify EQ/NE comparisons where one of the pairs
1630         * can be simplified.
1631         */
1632        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1633                                     op->args[2], cond);
1634        switch (i ^ inv) {
1635        case 0:
1636            goto do_brcond_const;
1637        case 1:
1638            goto do_brcond_high;
1639        }
1640
1641        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1642                                     op->args[3], cond);
1643        switch (i ^ inv) {
1644        case 0:
1645            goto do_brcond_const;
1646        case 1:
1647            goto do_brcond_low;
1648        }
1649        break;
1650
1651    case TCG_COND_TSTEQ:
1652    case TCG_COND_TSTNE:
1653        if (arg_is_const_val(op->args[2], 0)) {
1654            goto do_brcond_high;
1655        }
1656        if (arg_is_const_val(op->args[3], 0)) {
1657            goto do_brcond_low;
1658        }
1659        break;
1660
1661    default:
1662        break;
1663
1664    do_brcond_low:
1665        op->opc = INDEX_op_brcond;
1666        op->args[1] = op->args[2];
1667        op->args[2] = cond;
1668        op->args[3] = label;
1669        return fold_brcond(ctx, op);
1670
1671    do_brcond_high:
1672        op->opc = INDEX_op_brcond;
1673        op->args[0] = op->args[1];
1674        op->args[1] = op->args[3];
1675        op->args[2] = cond;
1676        op->args[3] = label;
1677        return fold_brcond(ctx, op);
1678
1679    do_brcond_const:
1680        if (i == 0) {
1681            tcg_op_remove(ctx->tcg, op);
1682            return true;
1683        }
1684        op->opc = INDEX_op_br;
1685        op->args[0] = label;
1686        finish_ebb(ctx);
1687        return true;
1688    }
1689
1690    finish_bb(ctx);
1691    return true;
1692}
1693
1694static bool fold_bswap(OptContext *ctx, TCGOp *op)
1695{
1696    uint64_t z_mask, o_mask, s_mask;
1697    TempOptInfo *t1 = arg_info(op->args[1]);
1698    int flags = op->args[2];
1699
1700    if (ti_is_const(t1)) {
1701        return tcg_opt_gen_movi(ctx, op, op->args[0],
1702                                do_constant_folding(op->opc, ctx->type,
1703                                                    ti_const_val(t1), flags));
1704    }
1705
1706    z_mask = t1->z_mask;
1707    o_mask = t1->o_mask;
1708    s_mask = 0;
1709
1710    switch (op->opc) {
1711    case INDEX_op_bswap16:
1712        z_mask = bswap16(z_mask);
1713        o_mask = bswap16(o_mask);
1714        if (flags & TCG_BSWAP_OS) {
1715            z_mask = (int16_t)z_mask;
1716            o_mask = (int16_t)o_mask;
1717            s_mask = INT16_MIN;
1718        } else if (!(flags & TCG_BSWAP_OZ)) {
1719            z_mask |= MAKE_64BIT_MASK(16, 48);
1720        }
1721        break;
1722    case INDEX_op_bswap32:
1723        z_mask = bswap32(z_mask);
1724        o_mask = bswap32(o_mask);
1725        if (flags & TCG_BSWAP_OS) {
1726            z_mask = (int32_t)z_mask;
1727            o_mask = (int32_t)o_mask;
1728            s_mask = INT32_MIN;
1729        } else if (!(flags & TCG_BSWAP_OZ)) {
1730            z_mask |= MAKE_64BIT_MASK(32, 32);
1731        }
1732        break;
1733    case INDEX_op_bswap64:
1734        z_mask = bswap64(z_mask);
1735        o_mask = bswap64(o_mask);
1736        break;
1737    default:
1738        g_assert_not_reached();
1739    }
1740
1741    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1742}
1743
1744static bool fold_call(OptContext *ctx, TCGOp *op)
1745{
1746    TCGContext *s = ctx->tcg;
1747    int nb_oargs = TCGOP_CALLO(op);
1748    int nb_iargs = TCGOP_CALLI(op);
1749    int flags, i;
1750
1751    init_arguments(ctx, op, nb_oargs + nb_iargs);
1752    copy_propagate(ctx, op, nb_oargs, nb_iargs);
1753
1754    /* If the function reads or writes globals, reset temp data. */
1755    flags = tcg_call_flags(op);
1756    if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1757        int nb_globals = s->nb_globals;
1758
1759        for (i = 0; i < nb_globals; i++) {
1760            if (test_bit(i, ctx->temps_used.l)) {
1761                reset_ts(ctx, &ctx->tcg->temps[i]);
1762            }
1763        }
1764    }
1765
1766    /* If the function has side effects, reset mem data. */
1767    if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1768        remove_mem_copy_all(ctx);
1769    }
1770
1771    /* Reset temp data for outputs. */
1772    for (i = 0; i < nb_oargs; i++) {
1773        reset_temp(ctx, op->args[i]);
1774    }
1775
1776    /* Stop optimizing MB across calls. */
1777    ctx->prev_mb = NULL;
1778    return true;
1779}
1780
1781static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
1782{
1783    /* Canonicalize the comparison to put immediate second. */
1784    if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1785        op->args[3] = tcg_swap_cond(op->args[3]);
1786    }
1787    return finish_folding(ctx, op);
1788}
1789
1790static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
1791{
1792    /* If true and false values are the same, eliminate the cmp. */
1793    if (args_are_copies(op->args[3], op->args[4])) {
1794        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
1795    }
1796
1797    /* Canonicalize the comparison to put immediate second. */
1798    if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1799        op->args[5] = tcg_swap_cond(op->args[5]);
1800    }
1801    /*
1802     * Canonicalize the "false" input reg to match the destination,
1803     * so that the tcg backend can implement "move if true".
1804     */
1805    if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1806        op->args[5] = tcg_invert_cond(op->args[5]);
1807    }
1808    return finish_folding(ctx, op);
1809}
1810
1811static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1812{
1813    uint64_t z_mask, s_mask;
1814    TempOptInfo *t1 = arg_info(op->args[1]);
1815    TempOptInfo *t2 = arg_info(op->args[2]);
1816
1817    if (ti_is_const(t1)) {
1818        uint64_t t = ti_const_val(t1);
1819
1820        if (t != 0) {
1821            t = do_constant_folding(op->opc, ctx->type, t, 0);
1822            return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1823        }
1824        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1825    }
1826
1827    switch (ctx->type) {
1828    case TCG_TYPE_I32:
1829        z_mask = 31;
1830        break;
1831    case TCG_TYPE_I64:
1832        z_mask = 63;
1833        break;
1834    default:
1835        g_assert_not_reached();
1836    }
1837    s_mask = ~z_mask;
1838    z_mask |= t2->z_mask;
1839    s_mask &= t2->s_mask;
1840
1841    return fold_masks_zs(ctx, op, z_mask, s_mask);
1842}
1843
1844static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1845{
1846    uint64_t z_mask;
1847
1848    if (fold_const1(ctx, op)) {
1849        return true;
1850    }
1851
1852    switch (ctx->type) {
1853    case TCG_TYPE_I32:
1854        z_mask = 32 | 31;
1855        break;
1856    case TCG_TYPE_I64:
1857        z_mask = 64 | 63;
1858        break;
1859    default:
1860        g_assert_not_reached();
1861    }
1862    return fold_masks_z(ctx, op, z_mask);
1863}
1864
1865static bool fold_deposit(OptContext *ctx, TCGOp *op)
1866{
1867    TempOptInfo *t1 = arg_info(op->args[1]);
1868    TempOptInfo *t2 = arg_info(op->args[2]);
1869    int ofs = op->args[3];
1870    int len = op->args[4];
1871    int width = 8 * tcg_type_size(ctx->type);
1872    uint64_t z_mask, o_mask, s_mask;
1873
1874    if (ti_is_const(t1) && ti_is_const(t2)) {
1875        return tcg_opt_gen_movi(ctx, op, op->args[0],
1876                                deposit64(ti_const_val(t1), ofs, len,
1877                                          ti_const_val(t2)));
1878    }
1879
1880    /* Inserting a value into zero at offset 0. */
1881    if (ti_is_const_val(t1, 0) && ofs == 0) {
1882        uint64_t mask = MAKE_64BIT_MASK(0, len);
1883
1884        op->opc = INDEX_op_and;
1885        op->args[1] = op->args[2];
1886        op->args[2] = arg_new_constant(ctx, mask);
1887        return fold_and(ctx, op);
1888    }
1889
1890    /* Inserting zero into a value. */
1891    if (ti_is_const_val(t2, 0)) {
1892        uint64_t mask = deposit64(-1, ofs, len, 0);
1893
1894        op->opc = INDEX_op_and;
1895        op->args[2] = arg_new_constant(ctx, mask);
1896        return fold_and(ctx, op);
1897    }
1898
1899    /* The s_mask from the top portion of the deposit is still valid. */
1900    if (ofs + len == width) {
1901        s_mask = t2->s_mask << ofs;
1902    } else {
1903        s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
1904    }
1905
1906    z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
1907    o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask);
1908
1909    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1910}
1911
1912static bool fold_divide(OptContext *ctx, TCGOp *op)
1913{
1914    if (fold_const2(ctx, op) ||
1915        fold_xi_to_x(ctx, op, 1)) {
1916        return true;
1917    }
1918    return finish_folding(ctx, op);
1919}
1920
1921static bool fold_dup(OptContext *ctx, TCGOp *op)
1922{
1923    if (arg_is_const(op->args[1])) {
1924        uint64_t t = arg_const_val(op->args[1]);
1925        t = dup_const(TCGOP_VECE(op), t);
1926        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1927    }
1928    return finish_folding(ctx, op);
1929}
1930
1931static bool fold_dup2(OptContext *ctx, TCGOp *op)
1932{
1933    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1934        uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32,
1935                               arg_const_val(op->args[2]));
1936        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1937    }
1938
1939    if (args_are_copies(op->args[1], op->args[2])) {
1940        op->opc = INDEX_op_dup_vec;
1941        TCGOP_VECE(op) = MO_32;
1942    }
1943    return finish_folding(ctx, op);
1944}
1945
1946static bool fold_eqv(OptContext *ctx, TCGOp *op)
1947{
1948    uint64_t z_mask, o_mask, s_mask;
1949    TempOptInfo *t1, *t2;
1950
1951    if (fold_const2_commutative(ctx, op)) {
1952        return true;
1953    }
1954
1955    t2 = arg_info(op->args[2]);
1956    if (ti_is_const(t2)) {
1957        /* Fold eqv r,x,i to xor r,x,~i. */
1958        switch (ctx->type) {
1959        case TCG_TYPE_I32:
1960        case TCG_TYPE_I64:
1961            op->opc = INDEX_op_xor;
1962            break;
1963        case TCG_TYPE_V64:
1964        case TCG_TYPE_V128:
1965        case TCG_TYPE_V256:
1966            op->opc = INDEX_op_xor_vec;
1967            break;
1968        default:
1969            g_assert_not_reached();
1970        }
1971        op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1972        return fold_xor(ctx, op);
1973    }
1974
1975    t1 = arg_info(op->args[1]);
1976
1977    z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask);
1978    o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask);
1979    s_mask = t1->s_mask & t2->s_mask;
1980
1981    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1982}
1983
1984static bool fold_extract(OptContext *ctx, TCGOp *op)
1985{
1986    uint64_t z_mask, o_mask, a_mask;
1987    TempOptInfo *t1 = arg_info(op->args[1]);
1988    int pos = op->args[2];
1989    int len = op->args[3];
1990
1991    if (ti_is_const(t1)) {
1992        return tcg_opt_gen_movi(ctx, op, op->args[0],
1993                                extract64(ti_const_val(t1), pos, len));
1994    }
1995
1996    z_mask = extract64(t1->z_mask, pos, len);
1997    o_mask = extract64(t1->o_mask, pos, len);
1998    a_mask = pos ? -1 : t1->z_mask ^ z_mask;
1999
2000    return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, a_mask);
2001}
2002
2003static bool fold_extract2(OptContext *ctx, TCGOp *op)
2004{
2005    TempOptInfo *t1 = arg_info(op->args[1]);
2006    TempOptInfo *t2 = arg_info(op->args[2]);
2007    uint64_t z1 = t1->z_mask;
2008    uint64_t z2 = t2->z_mask;
2009    uint64_t o1 = t1->o_mask;
2010    uint64_t o2 = t2->o_mask;
2011    int shr = op->args[3];
2012
2013    if (ctx->type == TCG_TYPE_I32) {
2014        z1 = (uint32_t)z1 >> shr;
2015        o1 = (uint32_t)o1 >> shr;
2016        z2 = (uint64_t)((int32_t)z2 << (32 - shr));
2017        o2 = (uint64_t)((int32_t)o2 << (32 - shr));
2018    } else {
2019        z1 >>= shr;
2020        o1 >>= shr;
2021        z2 <<= 64 - shr;
2022        o2 <<= 64 - shr;
2023    }
2024
2025    return fold_masks_zo(ctx, op, z1 | z2, o1 | o2);
2026}
2027
2028static bool fold_exts(OptContext *ctx, TCGOp *op)
2029{
2030    uint64_t z_mask, o_mask, s_mask;
2031    TempOptInfo *t1;
2032
2033    if (fold_const1(ctx, op)) {
2034        return true;
2035    }
2036
2037    t1 = arg_info(op->args[1]);
2038    z_mask = t1->z_mask;
2039    o_mask = t1->o_mask;
2040    s_mask = t1->s_mask;
2041
2042    switch (op->opc) {
2043    case INDEX_op_ext_i32_i64:
2044        s_mask |= INT32_MIN;
2045        z_mask = (int32_t)z_mask;
2046        o_mask = (int32_t)o_mask;
2047        break;
2048    default:
2049        g_assert_not_reached();
2050    }
2051    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2052}
2053
2054static bool fold_extu(OptContext *ctx, TCGOp *op)
2055{
2056    uint64_t z_mask, o_mask;
2057    TempOptInfo *t1;
2058
2059    if (fold_const1(ctx, op)) {
2060        return true;
2061    }
2062
2063    t1 = arg_info(op->args[1]);
2064    z_mask = t1->z_mask;
2065    o_mask = t1->o_mask;
2066
2067    switch (op->opc) {
2068    case INDEX_op_extrl_i64_i32:
2069    case INDEX_op_extu_i32_i64:
2070        z_mask = (uint32_t)z_mask;
2071        o_mask = (uint32_t)o_mask;
2072        break;
2073    case INDEX_op_extrh_i64_i32:
2074        z_mask >>= 32;
2075        o_mask >>= 32;
2076        break;
2077    default:
2078        g_assert_not_reached();
2079    }
2080    return fold_masks_zo(ctx, op, z_mask, o_mask);
2081}
2082
2083static bool fold_mb(OptContext *ctx, TCGOp *op)
2084{
2085    /* Eliminate duplicate and redundant fence instructions.  */
2086    if (ctx->prev_mb) {
2087        /*
2088         * Merge two barriers of the same type into one,
2089         * or a weaker barrier into a stronger one,
2090         * or two weaker barriers into a stronger one.
2091         *   mb X; mb Y => mb X|Y
2092         *   mb; strl => mb; st
2093         *   ldaq; mb => ld; mb
2094         *   ldaq; strl => ld; mb; st
2095         * Other combinations are also merged into a strong
2096         * barrier.  This is stricter than specified but for
2097         * the purposes of TCG is better than not optimizing.
2098         */
2099        ctx->prev_mb->args[0] |= op->args[0];
2100        tcg_op_remove(ctx->tcg, op);
2101    } else {
2102        ctx->prev_mb = op;
2103    }
2104    return true;
2105}
2106
2107static bool fold_mov(OptContext *ctx, TCGOp *op)
2108{
2109    return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2110}
2111
2112static bool fold_movcond(OptContext *ctx, TCGOp *op)
2113{
2114    uint64_t z_mask, o_mask, s_mask;
2115    TempOptInfo *tt, *ft;
2116    int i;
2117
2118    /* If true and false values are the same, eliminate the cmp. */
2119    if (args_are_copies(op->args[3], op->args[4])) {
2120        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
2121    }
2122
2123    /*
2124     * Canonicalize the "false" input reg to match the destination reg so
2125     * that the tcg backend can implement a "move if true" operation.
2126     */
2127    if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
2128        op->args[5] = tcg_invert_cond(op->args[5]);
2129    }
2130
2131    i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1],
2132                                  &op->args[2], &op->args[5]);
2133    if (i >= 0) {
2134        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
2135    }
2136
2137    tt = arg_info(op->args[3]);
2138    ft = arg_info(op->args[4]);
2139    z_mask = tt->z_mask | ft->z_mask;
2140    o_mask = tt->o_mask & ft->o_mask;
2141    s_mask = tt->s_mask & ft->s_mask;
2142
2143    if (ti_is_const(tt) && ti_is_const(ft)) {
2144        uint64_t tv = ti_const_val(tt);
2145        uint64_t fv = ti_const_val(ft);
2146        TCGCond cond = op->args[5];
2147
2148        if (tv == 1 && fv == 0) {
2149            op->opc = INDEX_op_setcond;
2150            op->args[3] = cond;
2151        } else if (fv == 1 && tv == 0) {
2152            op->opc = INDEX_op_setcond;
2153            op->args[3] = tcg_invert_cond(cond);
2154        } else if (tv == -1 && fv == 0) {
2155            op->opc = INDEX_op_negsetcond;
2156            op->args[3] = cond;
2157        } else if (fv == -1 && tv == 0) {
2158            op->opc = INDEX_op_negsetcond;
2159            op->args[3] = tcg_invert_cond(cond);
2160        }
2161    }
2162
2163    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2164}
2165
2166static bool fold_mul(OptContext *ctx, TCGOp *op)
2167{
2168    if (fold_const2(ctx, op) ||
2169        fold_xi_to_i(ctx, op, 0) ||
2170        fold_xi_to_x(ctx, op, 1)) {
2171        return true;
2172    }
2173    return finish_folding(ctx, op);
2174}
2175
2176static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
2177{
2178    if (fold_const2_commutative(ctx, op) ||
2179        fold_xi_to_i(ctx, op, 0)) {
2180        return true;
2181    }
2182    return finish_folding(ctx, op);
2183}
2184
2185static bool fold_multiply2(OptContext *ctx, TCGOp *op)
2186{
2187    swap_commutative(op->args[0], &op->args[2], &op->args[3]);
2188
2189    if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
2190        uint64_t a = arg_const_val(op->args[2]);
2191        uint64_t b = arg_const_val(op->args[3]);
2192        uint64_t h, l;
2193        TCGArg rl, rh;
2194        TCGOp *op2;
2195
2196        switch (op->opc) {
2197        case INDEX_op_mulu2:
2198            if (ctx->type == TCG_TYPE_I32) {
2199                l = (uint64_t)(uint32_t)a * (uint32_t)b;
2200                h = (int32_t)(l >> 32);
2201                l = (int32_t)l;
2202            } else {
2203                mulu64(&l, &h, a, b);
2204            }
2205            break;
2206        case INDEX_op_muls2:
2207            if (ctx->type == TCG_TYPE_I32) {
2208                l = (int64_t)(int32_t)a * (int32_t)b;
2209                h = l >> 32;
2210                l = (int32_t)l;
2211            } else {
2212                muls64(&l, &h, a, b);
2213            }
2214            break;
2215        default:
2216            g_assert_not_reached();
2217        }
2218
2219        rl = op->args[0];
2220        rh = op->args[1];
2221
2222        /* The proper opcode is supplied by tcg_opt_gen_mov. */
2223        op2 = opt_insert_before(ctx, op, 0, 2);
2224
2225        tcg_opt_gen_movi(ctx, op, rl, l);
2226        tcg_opt_gen_movi(ctx, op2, rh, h);
2227        return true;
2228    }
2229    return finish_folding(ctx, op);
2230}
2231
2232static bool fold_nand(OptContext *ctx, TCGOp *op)
2233{
2234    uint64_t z_mask, o_mask, s_mask;
2235    TempOptInfo *t1, *t2;
2236
2237    if (fold_const2_commutative(ctx, op) ||
2238        fold_xi_to_not(ctx, op, -1)) {
2239        return true;
2240    }
2241
2242    t1 = arg_info(op->args[1]);
2243    t2 = arg_info(op->args[2]);
2244
2245    z_mask = ~(t1->o_mask & t2->o_mask);
2246    o_mask = ~(t1->z_mask & t2->z_mask);
2247    s_mask = t1->s_mask & t2->s_mask;
2248
2249    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2250}
2251
2252static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
2253{
2254    /* Set to 1 all bits to the left of the rightmost.  */
2255    uint64_t z_mask = arg_info(op->args[1])->z_mask;
2256    z_mask = -(z_mask & -z_mask);
2257
2258    return fold_masks_z(ctx, op, z_mask);
2259}
2260
2261static bool fold_neg(OptContext *ctx, TCGOp *op)
2262{
2263    return fold_const1(ctx, op) || fold_neg_no_const(ctx, op);
2264}
2265
2266static bool fold_nor(OptContext *ctx, TCGOp *op)
2267{
2268    uint64_t z_mask, o_mask, s_mask;
2269    TempOptInfo *t1, *t2;
2270
2271    if (fold_const2_commutative(ctx, op) ||
2272        fold_xi_to_not(ctx, op, 0)) {
2273        return true;
2274    }
2275
2276    t1 = arg_info(op->args[1]);
2277    t2 = arg_info(op->args[2]);
2278
2279    z_mask = ~(t1->o_mask | t2->o_mask);
2280    o_mask = ~(t1->z_mask | t2->z_mask);
2281    s_mask = t1->s_mask & t2->s_mask;
2282
2283    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2284}
2285
2286static bool fold_not(OptContext *ctx, TCGOp *op)
2287{
2288    TempOptInfo *t1;
2289
2290    if (fold_const1(ctx, op)) {
2291        return true;
2292    }
2293
2294    t1 = arg_info(op->args[1]);
2295    return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask);
2296}
2297
2298static bool fold_or(OptContext *ctx, TCGOp *op)
2299{
2300    uint64_t z_mask, o_mask, s_mask, a_mask;
2301    TempOptInfo *t1, *t2;
2302
2303    if (fold_const2_commutative(ctx, op) ||
2304        fold_xi_to_x(ctx, op, 0) ||
2305        fold_xx_to_x(ctx, op)) {
2306        return true;
2307    }
2308
2309    t1 = arg_info(op->args[1]);
2310    t2 = arg_info(op->args[2]);
2311
2312    z_mask = t1->z_mask | t2->z_mask;
2313    o_mask = t1->o_mask | t2->o_mask;
2314    s_mask = t1->s_mask & t2->s_mask;
2315
2316    /* Affected bits are those not known one, masked by those known zero. */
2317    a_mask = ~t1->o_mask & t2->z_mask;
2318
2319    return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2320}
2321
2322static bool fold_orc(OptContext *ctx, TCGOp *op)
2323{
2324    uint64_t z_mask, o_mask, s_mask, a_mask;
2325    TempOptInfo *t1, *t2;
2326
2327    if (fold_const2(ctx, op)) {
2328        return true;
2329    }
2330
2331    t2 = arg_info(op->args[2]);
2332    if (ti_is_const(t2)) {
2333        /* Fold orc r,x,i to or r,x,~i. */
2334        switch (ctx->type) {
2335        case TCG_TYPE_I32:
2336        case TCG_TYPE_I64:
2337            op->opc = INDEX_op_or;
2338            break;
2339        case TCG_TYPE_V64:
2340        case TCG_TYPE_V128:
2341        case TCG_TYPE_V256:
2342            op->opc = INDEX_op_or_vec;
2343            break;
2344        default:
2345            g_assert_not_reached();
2346        }
2347        op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
2348        return fold_or(ctx, op);
2349    }
2350    if (fold_xx_to_i(ctx, op, -1) ||
2351        fold_ix_to_not(ctx, op, 0)) {
2352        return true;
2353    }
2354    t1 = arg_info(op->args[1]);
2355
2356    z_mask = t1->z_mask | ~t2->o_mask;
2357    o_mask = t1->o_mask | ~t2->z_mask;
2358    s_mask = t1->s_mask & t2->s_mask;
2359
2360    /* Affected bits are those not known one, masked by those known one. */
2361    a_mask = ~t1->o_mask & t2->o_mask;
2362
2363    return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2364}
2365
2366static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
2367{
2368    const TCGOpDef *def = &tcg_op_defs[op->opc];
2369    MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
2370    MemOp mop = get_memop(oi);
2371    int width = 8 * memop_size(mop);
2372    uint64_t z_mask = -1, s_mask = 0;
2373
2374    if (width < 64) {
2375        if (mop & MO_SIGN) {
2376            s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
2377        } else {
2378            z_mask = MAKE_64BIT_MASK(0, width);
2379        }
2380    }
2381
2382    /* Opcodes that touch guest memory stop the mb optimization.  */
2383    ctx->prev_mb = NULL;
2384
2385    return fold_masks_zs(ctx, op, z_mask, s_mask);
2386}
2387
2388static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
2389{
2390    /* Opcodes that touch guest memory stop the mb optimization.  */
2391    ctx->prev_mb = NULL;
2392    return finish_folding(ctx, op);
2393}
2394
2395static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
2396{
2397    /* Opcodes that touch guest memory stop the mb optimization.  */
2398    ctx->prev_mb = NULL;
2399    return true;
2400}
2401
2402static bool fold_remainder(OptContext *ctx, TCGOp *op)
2403{
2404    if (fold_const2(ctx, op) ||
2405        fold_xx_to_i(ctx, op, 0)) {
2406        return true;
2407    }
2408    return finish_folding(ctx, op);
2409}
2410
2411/* Return 1 if finished, -1 if simplified, 0 if unchanged. */
2412static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
2413{
2414    uint64_t a_zmask, b_val;
2415    TCGCond cond;
2416
2417    if (!arg_is_const(op->args[2])) {
2418        return false;
2419    }
2420
2421    a_zmask = arg_info(op->args[1])->z_mask;
2422    b_val = arg_const_val(op->args[2]);
2423    cond = op->args[3];
2424
2425    if (ctx->type == TCG_TYPE_I32) {
2426        a_zmask = (uint32_t)a_zmask;
2427        b_val = (uint32_t)b_val;
2428    }
2429
2430    /*
2431     * A with only low bits set vs B with high bits set means that A < B.
2432     */
2433    if (a_zmask < b_val) {
2434        bool inv = false;
2435
2436        switch (cond) {
2437        case TCG_COND_NE:
2438        case TCG_COND_LEU:
2439        case TCG_COND_LTU:
2440            inv = true;
2441            /* fall through */
2442        case TCG_COND_GTU:
2443        case TCG_COND_GEU:
2444        case TCG_COND_EQ:
2445            return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
2446        default:
2447            break;
2448        }
2449    }
2450
2451    /*
2452     * A with only lsb set is already boolean.
2453     */
2454    if (a_zmask <= 1) {
2455        bool convert = false;
2456        bool inv = false;
2457
2458        switch (cond) {
2459        case TCG_COND_EQ:
2460            inv = true;
2461            /* fall through */
2462        case TCG_COND_NE:
2463            convert = (b_val == 0);
2464            break;
2465        case TCG_COND_LTU:
2466        case TCG_COND_TSTEQ:
2467            inv = true;
2468            /* fall through */
2469        case TCG_COND_GEU:
2470        case TCG_COND_TSTNE:
2471            convert = (b_val == 1);
2472            break;
2473        default:
2474            break;
2475        }
2476        if (convert) {
2477            if (!inv && !neg) {
2478                return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2479            }
2480
2481            if (!inv) {
2482                op->opc = INDEX_op_neg;
2483            } else if (neg) {
2484                op->opc = INDEX_op_add;
2485                op->args[2] = arg_new_constant(ctx, -1);
2486            } else {
2487                op->opc = INDEX_op_xor;
2488                op->args[2] = arg_new_constant(ctx, 1);
2489            }
2490            return -1;
2491        }
2492    }
2493    return 0;
2494}
2495
2496static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
2497{
2498    TCGCond cond = op->args[3];
2499    TCGArg ret, src1, src2;
2500    TCGOp *op2;
2501    uint64_t val;
2502    int sh;
2503    bool inv;
2504
2505    if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
2506        return;
2507    }
2508
2509    src2 = op->args[2];
2510    val = arg_const_val(src2);
2511    if (!is_power_of_2(val)) {
2512        return;
2513    }
2514    sh = ctz64(val);
2515
2516    ret = op->args[0];
2517    src1 = op->args[1];
2518    inv = cond == TCG_COND_TSTEQ;
2519
2520    if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) {
2521        op->opc = INDEX_op_sextract;
2522        op->args[1] = src1;
2523        op->args[2] = sh;
2524        op->args[3] = 1;
2525        return;
2526    } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) {
2527        op->opc = INDEX_op_extract;
2528        op->args[1] = src1;
2529        op->args[2] = sh;
2530        op->args[3] = 1;
2531    } else {
2532        if (sh) {
2533            op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3);
2534            op2->args[0] = ret;
2535            op2->args[1] = src1;
2536            op2->args[2] = arg_new_constant(ctx, sh);
2537            src1 = ret;
2538        }
2539        op->opc = INDEX_op_and;
2540        op->args[1] = src1;
2541        op->args[2] = arg_new_constant(ctx, 1);
2542    }
2543
2544    if (neg && inv) {
2545        op2 = opt_insert_after(ctx, op, INDEX_op_add, 3);
2546        op2->args[0] = ret;
2547        op2->args[1] = ret;
2548        op2->args[2] = arg_new_constant(ctx, -1);
2549    } else if (inv) {
2550        op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3);
2551        op2->args[0] = ret;
2552        op2->args[1] = ret;
2553        op2->args[2] = arg_new_constant(ctx, 1);
2554    } else if (neg) {
2555        op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2);
2556        op2->args[0] = ret;
2557        op2->args[1] = ret;
2558    }
2559}
2560
2561static bool fold_setcond(OptContext *ctx, TCGOp *op)
2562{
2563    int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2564                                      &op->args[2], &op->args[3]);
2565    if (i >= 0) {
2566        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2567    }
2568
2569    i = fold_setcond_zmask(ctx, op, false);
2570    if (i > 0) {
2571        return true;
2572    }
2573    if (i == 0) {
2574        fold_setcond_tst_pow2(ctx, op, false);
2575    }
2576
2577    return fold_masks_z(ctx, op, 1);
2578}
2579
2580static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
2581{
2582    int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2583                                      &op->args[2], &op->args[3]);
2584    if (i >= 0) {
2585        return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
2586    }
2587
2588    i = fold_setcond_zmask(ctx, op, true);
2589    if (i > 0) {
2590        return true;
2591    }
2592    if (i == 0) {
2593        fold_setcond_tst_pow2(ctx, op, true);
2594    }
2595
2596    /* Value is {0,-1} so all bits are repetitions of the sign. */
2597    return fold_masks_s(ctx, op, -1);
2598}
2599
2600static bool fold_setcond2(OptContext *ctx, TCGOp *op)
2601{
2602    TCGCond cond;
2603    int i, inv = 0;
2604
2605    i = do_constant_folding_cond2(ctx, op, &op->args[1]);
2606    cond = op->args[5];
2607    if (i >= 0) {
2608        goto do_setcond_const;
2609    }
2610
2611    switch (cond) {
2612    case TCG_COND_LT:
2613    case TCG_COND_GE:
2614        /*
2615         * Simplify LT/GE comparisons vs zero to a single compare
2616         * vs the high word of the input.
2617         */
2618        if (arg_is_const_val(op->args[3], 0) &&
2619            arg_is_const_val(op->args[4], 0)) {
2620            goto do_setcond_high;
2621        }
2622        break;
2623
2624    case TCG_COND_NE:
2625        inv = 1;
2626        QEMU_FALLTHROUGH;
2627    case TCG_COND_EQ:
2628        /*
2629         * Simplify EQ/NE comparisons where one of the pairs
2630         * can be simplified.
2631         */
2632        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
2633                                     op->args[3], cond);
2634        switch (i ^ inv) {
2635        case 0:
2636            goto do_setcond_const;
2637        case 1:
2638            goto do_setcond_high;
2639        }
2640
2641        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
2642                                     op->args[4], cond);
2643        switch (i ^ inv) {
2644        case 0:
2645            goto do_setcond_const;
2646        case 1:
2647            goto do_setcond_low;
2648        }
2649        break;
2650
2651    case TCG_COND_TSTEQ:
2652    case TCG_COND_TSTNE:
2653        if (arg_is_const_val(op->args[3], 0)) {
2654            goto do_setcond_high;
2655        }
2656        if (arg_is_const_val(op->args[4], 0)) {
2657            goto do_setcond_low;
2658        }
2659        break;
2660
2661    default:
2662        break;
2663
2664    do_setcond_low:
2665        op->args[2] = op->args[3];
2666        op->args[3] = cond;
2667        op->opc = INDEX_op_setcond;
2668        return fold_setcond(ctx, op);
2669
2670    do_setcond_high:
2671        op->args[1] = op->args[2];
2672        op->args[2] = op->args[4];
2673        op->args[3] = cond;
2674        op->opc = INDEX_op_setcond;
2675        return fold_setcond(ctx, op);
2676    }
2677
2678    return fold_masks_z(ctx, op, 1);
2679
2680 do_setcond_const:
2681    return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2682}
2683
2684static bool fold_sextract(OptContext *ctx, TCGOp *op)
2685{
2686    uint64_t z_mask, o_mask, s_mask, a_mask;
2687    TempOptInfo *t1 = arg_info(op->args[1]);
2688    int pos = op->args[2];
2689    int len = op->args[3];
2690
2691    if (ti_is_const(t1)) {
2692        return tcg_opt_gen_movi(ctx, op, op->args[0],
2693                                sextract64(ti_const_val(t1), pos, len));
2694    }
2695
2696    s_mask = t1->s_mask >> pos;
2697    s_mask |= -1ull << (len - 1);
2698    a_mask = pos ? -1 : s_mask & ~t1->s_mask;
2699
2700    z_mask = sextract64(t1->z_mask, pos, len);
2701    o_mask = sextract64(t1->o_mask, pos, len);
2702
2703    return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2704}
2705
2706static bool fold_shift(OptContext *ctx, TCGOp *op)
2707{
2708    uint64_t s_mask, z_mask, o_mask;
2709    TempOptInfo *t1, *t2;
2710
2711    if (fold_const2(ctx, op) ||
2712        fold_ix_to_i(ctx, op, 0) ||
2713        fold_xi_to_x(ctx, op, 0)) {
2714        return true;
2715    }
2716
2717    t1 = arg_info(op->args[1]);
2718    t2 = arg_info(op->args[2]);
2719    s_mask = t1->s_mask;
2720    z_mask = t1->z_mask;
2721    o_mask = t1->o_mask;
2722
2723    if (ti_is_const(t2)) {
2724        int sh = ti_const_val(t2);
2725
2726        z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
2727        o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh);
2728        s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
2729
2730        return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2731    }
2732
2733    switch (op->opc) {
2734    case INDEX_op_sar:
2735        /*
2736         * Arithmetic right shift will not reduce the number of
2737         * input sign repetitions.
2738         */
2739        return fold_masks_s(ctx, op, s_mask);
2740    case INDEX_op_shr:
2741        /*
2742         * If the sign bit is known zero, then logical right shift
2743         * will not reduce the number of input sign repetitions.
2744         */
2745        if (~z_mask & -s_mask) {
2746            return fold_masks_s(ctx, op, s_mask);
2747        }
2748        break;
2749    default:
2750        break;
2751    }
2752
2753    return finish_folding(ctx, op);
2754}
2755
2756static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
2757{
2758    TCGOpcode neg_op;
2759    bool have_neg;
2760
2761    if (!arg_is_const_val(op->args[1], 0)) {
2762        return false;
2763    }
2764
2765    switch (ctx->type) {
2766    case TCG_TYPE_I32:
2767    case TCG_TYPE_I64:
2768        neg_op = INDEX_op_neg;
2769        have_neg = true;
2770        break;
2771    case TCG_TYPE_V64:
2772    case TCG_TYPE_V128:
2773    case TCG_TYPE_V256:
2774        neg_op = INDEX_op_neg_vec;
2775        have_neg = (TCG_TARGET_HAS_neg_vec &&
2776                    tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2777        break;
2778    default:
2779        g_assert_not_reached();
2780    }
2781    if (have_neg) {
2782        op->opc = neg_op;
2783        op->args[1] = op->args[2];
2784        return fold_neg_no_const(ctx, op);
2785    }
2786    return false;
2787}
2788
2789/* We cannot as yet do_constant_folding with vectors. */
2790static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
2791{
2792    if (fold_xx_to_i(ctx, op, 0) ||
2793        fold_xi_to_x(ctx, op, 0) ||
2794        fold_sub_to_neg(ctx, op)) {
2795        return true;
2796    }
2797    return finish_folding(ctx, op);
2798}
2799
2800static bool fold_sub(OptContext *ctx, TCGOp *op)
2801{
2802    if (fold_const2(ctx, op) ||
2803        fold_xx_to_i(ctx, op, 0) ||
2804        fold_xi_to_x(ctx, op, 0) ||
2805        fold_sub_to_neg(ctx, op)) {
2806        return true;
2807    }
2808
2809    /* Fold sub r,x,i to add r,x,-i */
2810    if (arg_is_const(op->args[2])) {
2811        uint64_t val = arg_const_val(op->args[2]);
2812
2813        op->opc = INDEX_op_add;
2814        op->args[2] = arg_new_constant(ctx, -val);
2815    }
2816    return finish_folding(ctx, op);
2817}
2818
2819static void squash_prev_borrowout(OptContext *ctx, TCGOp *op)
2820{
2821    TempOptInfo *t2;
2822
2823    op = QTAILQ_PREV(op, link);
2824    switch (op->opc) {
2825    case INDEX_op_subbo:
2826        op->opc = INDEX_op_sub;
2827        fold_sub(ctx, op);
2828        break;
2829    case INDEX_op_subbio:
2830        op->opc = INDEX_op_subbi;
2831        break;
2832    case INDEX_op_subb1o:
2833        t2 = arg_info(op->args[2]);
2834        if (ti_is_const(t2)) {
2835            op->opc = INDEX_op_add;
2836            op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2837            /* Perform other constant folding, if needed. */
2838            fold_add(ctx, op);
2839        } else {
2840            TCGArg ret = op->args[0];
2841            op->opc = INDEX_op_sub;
2842            op = opt_insert_after(ctx, op, INDEX_op_add, 3);
2843            op->args[0] = ret;
2844            op->args[1] = ret;
2845            op->args[2] = arg_new_constant(ctx, -1);
2846        }
2847        break;
2848    default:
2849        g_assert_not_reached();
2850    }
2851}
2852
2853static bool fold_subbi(OptContext *ctx, TCGOp *op)
2854{
2855    TempOptInfo *t2;
2856    int borrow_in = ctx->carry_state;
2857
2858    if (borrow_in < 0) {
2859        return finish_folding(ctx, op);
2860    }
2861    ctx->carry_state = -1;
2862
2863    squash_prev_borrowout(ctx, op);
2864    if (borrow_in == 0) {
2865        op->opc = INDEX_op_sub;
2866        return fold_sub(ctx, op);
2867    }
2868
2869    /*
2870     * Propagate the known carry-in into any constant, then negate to
2871     * transform from sub to add.  If there is no constant, emit a
2872     * separate add -1.
2873     */
2874    t2 = arg_info(op->args[2]);
2875    if (ti_is_const(t2)) {
2876        op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2877    } else {
2878        TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3);
2879
2880        op2->args[0] = op->args[0];
2881        op2->args[1] = op->args[1];
2882        op2->args[2] = op->args[2];
2883        fold_sub(ctx, op2);
2884
2885        op->args[1] = op->args[0];
2886        op->args[2] = arg_new_constant(ctx, -1);
2887    }
2888    op->opc = INDEX_op_add;
2889    return fold_add(ctx, op);
2890}
2891
2892static bool fold_subbio(OptContext *ctx, TCGOp *op)
2893{
2894    TempOptInfo *t1, *t2;
2895    int borrow_out = -1;
2896
2897    if (ctx->carry_state < 0) {
2898        return finish_folding(ctx, op);
2899    }
2900
2901    squash_prev_borrowout(ctx, op);
2902    if (ctx->carry_state == 0) {
2903        goto do_subbo;
2904    }
2905
2906    t1 = arg_info(op->args[1]);
2907    t2 = arg_info(op->args[2]);
2908
2909    /* Propagate the known borrow-in into a constant, if possible. */
2910    if (ti_is_const(t2)) {
2911        uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
2912        uint64_t v = ti_const_val(t2) & max;
2913
2914        if (v < max) {
2915            op->args[2] = arg_new_constant(ctx, v + 1);
2916            goto do_subbo;
2917        }
2918        /* subtracting max + 1 produces known borrow out. */
2919        borrow_out = 1;
2920    }
2921    if (ti_is_const(t1)) {
2922        uint64_t v = ti_const_val(t1);
2923        if (v != 0) {
2924            op->args[2] = arg_new_constant(ctx, v - 1);
2925            goto do_subbo;
2926        }
2927    }
2928
2929    /* Adjust the opcode to remember the known carry-in. */
2930    op->opc = INDEX_op_subb1o;
2931    ctx->carry_state = borrow_out;
2932    return finish_folding(ctx, op);
2933
2934 do_subbo:
2935    op->opc = INDEX_op_subbo;
2936    return fold_subbo(ctx, op);
2937}
2938
2939static bool fold_subbo(OptContext *ctx, TCGOp *op)
2940{
2941    TempOptInfo *t1 = arg_info(op->args[1]);
2942    TempOptInfo *t2 = arg_info(op->args[2]);
2943    int borrow_out = -1;
2944
2945    if (ti_is_const(t2)) {
2946        uint64_t v2 = ti_const_val(t2);
2947        if (v2 == 0) {
2948            borrow_out = 0;
2949        } else if (ti_is_const(t1)) {
2950            uint64_t v1 = ti_const_val(t1);
2951            borrow_out = v1 < v2;
2952        }
2953    }
2954    ctx->carry_state = borrow_out;
2955    return finish_folding(ctx, op);
2956}
2957
2958static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2959{
2960    uint64_t z_mask = -1, s_mask = 0;
2961
2962    /* We can't do any folding with a load, but we can record bits. */
2963    switch (op->opc) {
2964    case INDEX_op_ld8s:
2965        s_mask = INT8_MIN;
2966        break;
2967    case INDEX_op_ld8u:
2968        z_mask = MAKE_64BIT_MASK(0, 8);
2969        break;
2970    case INDEX_op_ld16s:
2971        s_mask = INT16_MIN;
2972        break;
2973    case INDEX_op_ld16u:
2974        z_mask = MAKE_64BIT_MASK(0, 16);
2975        break;
2976    case INDEX_op_ld32s:
2977        s_mask = INT32_MIN;
2978        break;
2979    case INDEX_op_ld32u:
2980        z_mask = MAKE_64BIT_MASK(0, 32);
2981        break;
2982    default:
2983        g_assert_not_reached();
2984    }
2985    return fold_masks_zs(ctx, op, z_mask, s_mask);
2986}
2987
2988static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
2989{
2990    TCGTemp *dst, *src;
2991    intptr_t ofs;
2992    TCGType type;
2993
2994    if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2995        return finish_folding(ctx, op);
2996    }
2997
2998    type = ctx->type;
2999    ofs = op->args[2];
3000    dst = arg_temp(op->args[0]);
3001    src = find_mem_copy_for(ctx, type, ofs);
3002    if (src && src->base_type == type) {
3003        return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
3004    }
3005
3006    reset_ts(ctx, dst);
3007    record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
3008    return true;
3009}
3010
3011static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
3012{
3013    intptr_t ofs = op->args[2];
3014    intptr_t lm1;
3015
3016    if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
3017        remove_mem_copy_all(ctx);
3018        return true;
3019    }
3020
3021    switch (op->opc) {
3022    case INDEX_op_st8:
3023        lm1 = 0;
3024        break;
3025    case INDEX_op_st16:
3026        lm1 = 1;
3027        break;
3028    case INDEX_op_st32:
3029        lm1 = 3;
3030        break;
3031    case INDEX_op_st:
3032    case INDEX_op_st_vec:
3033        lm1 = tcg_type_size(ctx->type) - 1;
3034        break;
3035    default:
3036        g_assert_not_reached();
3037    }
3038    remove_mem_copy_in(ctx, ofs, ofs + lm1);
3039    return true;
3040}
3041
3042static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
3043{
3044    TCGTemp *src;
3045    intptr_t ofs, last;
3046    TCGType type;
3047
3048    if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
3049        return fold_tcg_st(ctx, op);
3050    }
3051
3052    src = arg_temp(op->args[0]);
3053    ofs = op->args[2];
3054    type = ctx->type;
3055
3056    /*
3057     * Eliminate duplicate stores of a constant.
3058     * This happens frequently when the target ISA zero-extends.
3059     */
3060    if (ts_is_const(src)) {
3061        TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
3062        if (src == prev) {
3063            tcg_op_remove(ctx->tcg, op);
3064            return true;
3065        }
3066    }
3067
3068    last = ofs + tcg_type_size(type) - 1;
3069    remove_mem_copy_in(ctx, ofs, last);
3070    record_mem_copy(ctx, type, src, ofs, last);
3071    return true;
3072}
3073
3074static bool fold_xor(OptContext *ctx, TCGOp *op)
3075{
3076    uint64_t z_mask, o_mask, s_mask;
3077    TempOptInfo *t1, *t2;
3078
3079    if (fold_const2_commutative(ctx, op) ||
3080        fold_xx_to_i(ctx, op, 0) ||
3081        fold_xi_to_x(ctx, op, 0) ||
3082        fold_xi_to_not(ctx, op, -1)) {
3083        return true;
3084    }
3085
3086    t1 = arg_info(op->args[1]);
3087    t2 = arg_info(op->args[2]);
3088
3089    z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask);
3090    o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask);
3091    s_mask = t1->s_mask & t2->s_mask;
3092
3093    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
3094}
3095
3096/* Propagate constants and copies, fold constant expressions. */
3097void tcg_optimize(TCGContext *s)
3098{
3099    int nb_temps, i;
3100    TCGOp *op, *op_next;
3101    OptContext ctx = { .tcg = s };
3102
3103    QSIMPLEQ_INIT(&ctx.mem_free);
3104
3105    /* Array VALS has an element for each temp.
3106       If this temp holds a constant then its value is kept in VALS' element.
3107       If this temp is a copy of other ones then the other copies are
3108       available through the doubly linked circular list. */
3109
3110    nb_temps = s->nb_temps;
3111    for (i = 0; i < nb_temps; ++i) {
3112        s->temps[i].state_ptr = NULL;
3113    }
3114
3115    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3116        TCGOpcode opc = op->opc;
3117        const TCGOpDef *def;
3118        bool done = false;
3119
3120        /* Calls are special. */
3121        if (opc == INDEX_op_call) {
3122            fold_call(&ctx, op);
3123            continue;
3124        }
3125
3126        def = &tcg_op_defs[opc];
3127        init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
3128        copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
3129
3130        /* Pre-compute the type of the operation. */
3131        ctx.type = TCGOP_TYPE(op);
3132
3133        /*
3134         * Process each opcode.
3135         * Sorted alphabetically by opcode as much as possible.
3136         */
3137        switch (opc) {
3138        case INDEX_op_add:
3139            done = fold_add(&ctx, op);
3140            break;
3141        case INDEX_op_add_vec:
3142            done = fold_add_vec(&ctx, op);
3143            break;
3144        case INDEX_op_addci:
3145            done = fold_addci(&ctx, op);
3146            break;
3147        case INDEX_op_addcio:
3148            done = fold_addcio(&ctx, op);
3149            break;
3150        case INDEX_op_addco:
3151            done = fold_addco(&ctx, op);
3152            break;
3153        case INDEX_op_and:
3154        case INDEX_op_and_vec:
3155            done = fold_and(&ctx, op);
3156            break;
3157        case INDEX_op_andc:
3158        case INDEX_op_andc_vec:
3159            done = fold_andc(&ctx, op);
3160            break;
3161        case INDEX_op_brcond:
3162            done = fold_brcond(&ctx, op);
3163            break;
3164        case INDEX_op_brcond2_i32:
3165            done = fold_brcond2(&ctx, op);
3166            break;
3167        case INDEX_op_bswap16:
3168        case INDEX_op_bswap32:
3169        case INDEX_op_bswap64:
3170            done = fold_bswap(&ctx, op);
3171            break;
3172        case INDEX_op_clz:
3173        case INDEX_op_ctz:
3174            done = fold_count_zeros(&ctx, op);
3175            break;
3176        case INDEX_op_ctpop:
3177            done = fold_ctpop(&ctx, op);
3178            break;
3179        case INDEX_op_deposit:
3180            done = fold_deposit(&ctx, op);
3181            break;
3182        case INDEX_op_divs:
3183        case INDEX_op_divu:
3184            done = fold_divide(&ctx, op);
3185            break;
3186        case INDEX_op_dup_vec:
3187            done = fold_dup(&ctx, op);
3188            break;
3189        case INDEX_op_dup2_vec:
3190            done = fold_dup2(&ctx, op);
3191            break;
3192        case INDEX_op_eqv:
3193        case INDEX_op_eqv_vec:
3194            done = fold_eqv(&ctx, op);
3195            break;
3196        case INDEX_op_extract:
3197            done = fold_extract(&ctx, op);
3198            break;
3199        case INDEX_op_extract2:
3200            done = fold_extract2(&ctx, op);
3201            break;
3202        case INDEX_op_ext_i32_i64:
3203            done = fold_exts(&ctx, op);
3204            break;
3205        case INDEX_op_extu_i32_i64:
3206        case INDEX_op_extrl_i64_i32:
3207        case INDEX_op_extrh_i64_i32:
3208            done = fold_extu(&ctx, op);
3209            break;
3210        case INDEX_op_ld8s:
3211        case INDEX_op_ld8u:
3212        case INDEX_op_ld16s:
3213        case INDEX_op_ld16u:
3214        case INDEX_op_ld32s:
3215        case INDEX_op_ld32u:
3216            done = fold_tcg_ld(&ctx, op);
3217            break;
3218        case INDEX_op_ld:
3219        case INDEX_op_ld_vec:
3220            done = fold_tcg_ld_memcopy(&ctx, op);
3221            break;
3222        case INDEX_op_st8:
3223        case INDEX_op_st16:
3224        case INDEX_op_st32:
3225            done = fold_tcg_st(&ctx, op);
3226            break;
3227        case INDEX_op_st:
3228        case INDEX_op_st_vec:
3229            done = fold_tcg_st_memcopy(&ctx, op);
3230            break;
3231        case INDEX_op_mb:
3232            done = fold_mb(&ctx, op);
3233            break;
3234        case INDEX_op_mov:
3235        case INDEX_op_mov_vec:
3236            done = fold_mov(&ctx, op);
3237            break;
3238        case INDEX_op_movcond:
3239            done = fold_movcond(&ctx, op);
3240            break;
3241        case INDEX_op_mul:
3242            done = fold_mul(&ctx, op);
3243            break;
3244        case INDEX_op_mulsh:
3245        case INDEX_op_muluh:
3246            done = fold_mul_highpart(&ctx, op);
3247            break;
3248        case INDEX_op_muls2:
3249        case INDEX_op_mulu2:
3250            done = fold_multiply2(&ctx, op);
3251            break;
3252        case INDEX_op_nand:
3253        case INDEX_op_nand_vec:
3254            done = fold_nand(&ctx, op);
3255            break;
3256        case INDEX_op_neg:
3257            done = fold_neg(&ctx, op);
3258            break;
3259        case INDEX_op_nor:
3260        case INDEX_op_nor_vec:
3261            done = fold_nor(&ctx, op);
3262            break;
3263        case INDEX_op_not:
3264        case INDEX_op_not_vec:
3265            done = fold_not(&ctx, op);
3266            break;
3267        case INDEX_op_or:
3268        case INDEX_op_or_vec:
3269            done = fold_or(&ctx, op);
3270            break;
3271        case INDEX_op_orc:
3272        case INDEX_op_orc_vec:
3273            done = fold_orc(&ctx, op);
3274            break;
3275        case INDEX_op_qemu_ld:
3276            done = fold_qemu_ld_1reg(&ctx, op);
3277            break;
3278        case INDEX_op_qemu_ld2:
3279            done = fold_qemu_ld_2reg(&ctx, op);
3280            break;
3281        case INDEX_op_qemu_st:
3282        case INDEX_op_qemu_st2:
3283            done = fold_qemu_st(&ctx, op);
3284            break;
3285        case INDEX_op_rems:
3286        case INDEX_op_remu:
3287            done = fold_remainder(&ctx, op);
3288            break;
3289        case INDEX_op_rotl:
3290        case INDEX_op_rotr:
3291        case INDEX_op_sar:
3292        case INDEX_op_shl:
3293        case INDEX_op_shr:
3294            done = fold_shift(&ctx, op);
3295            break;
3296        case INDEX_op_setcond:
3297            done = fold_setcond(&ctx, op);
3298            break;
3299        case INDEX_op_negsetcond:
3300            done = fold_negsetcond(&ctx, op);
3301            break;
3302        case INDEX_op_setcond2_i32:
3303            done = fold_setcond2(&ctx, op);
3304            break;
3305        case INDEX_op_cmp_vec:
3306            done = fold_cmp_vec(&ctx, op);
3307            break;
3308        case INDEX_op_cmpsel_vec:
3309            done = fold_cmpsel_vec(&ctx, op);
3310            break;
3311        case INDEX_op_bitsel_vec:
3312            done = fold_bitsel_vec(&ctx, op);
3313            break;
3314        case INDEX_op_sextract:
3315            done = fold_sextract(&ctx, op);
3316            break;
3317        case INDEX_op_sub:
3318            done = fold_sub(&ctx, op);
3319            break;
3320        case INDEX_op_subbi:
3321            done = fold_subbi(&ctx, op);
3322            break;
3323        case INDEX_op_subbio:
3324            done = fold_subbio(&ctx, op);
3325            break;
3326        case INDEX_op_subbo:
3327            done = fold_subbo(&ctx, op);
3328            break;
3329        case INDEX_op_sub_vec:
3330            done = fold_sub_vec(&ctx, op);
3331            break;
3332        case INDEX_op_xor:
3333        case INDEX_op_xor_vec:
3334            done = fold_xor(&ctx, op);
3335            break;
3336        case INDEX_op_set_label:
3337        case INDEX_op_br:
3338        case INDEX_op_exit_tb:
3339        case INDEX_op_goto_tb:
3340        case INDEX_op_goto_ptr:
3341            finish_ebb(&ctx);
3342            done = true;
3343            break;
3344        default:
3345            done = finish_folding(&ctx, op);
3346            break;
3347        }
3348        tcg_debug_assert(done);
3349    }
3350}
3351