qemu/tcg/optimize.c
<<
>>
Prefs
   1/*
   2 * Optimizations for Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2010 Samsung Electronics.
   5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "tcg/tcg-op.h"
  28#include "tcg-internal.h"
  29
  30#define CASE_OP_32_64(x)                        \
  31        glue(glue(case INDEX_op_, x), _i32):    \
  32        glue(glue(case INDEX_op_, x), _i64)
  33
  34#define CASE_OP_32_64_VEC(x)                    \
  35        glue(glue(case INDEX_op_, x), _i32):    \
  36        glue(glue(case INDEX_op_, x), _i64):    \
  37        glue(glue(case INDEX_op_, x), _vec)
  38
  39typedef struct TempOptInfo {
  40    bool is_const;
  41    TCGTemp *prev_copy;
  42    TCGTemp *next_copy;
  43    uint64_t val;
  44    uint64_t mask;
  45} TempOptInfo;
  46
  47static inline TempOptInfo *ts_info(TCGTemp *ts)
  48{
  49    return ts->state_ptr;
  50}
  51
  52static inline TempOptInfo *arg_info(TCGArg arg)
  53{
  54    return ts_info(arg_temp(arg));
  55}
  56
  57static inline bool ts_is_const(TCGTemp *ts)
  58{
  59    return ts_info(ts)->is_const;
  60}
  61
  62static inline bool arg_is_const(TCGArg arg)
  63{
  64    return ts_is_const(arg_temp(arg));
  65}
  66
  67static inline bool ts_is_copy(TCGTemp *ts)
  68{
  69    return ts_info(ts)->next_copy != ts;
  70}
  71
  72/* Reset TEMP's state, possibly removing the temp for the list of copies.  */
  73static void reset_ts(TCGTemp *ts)
  74{
  75    TempOptInfo *ti = ts_info(ts);
  76    TempOptInfo *pi = ts_info(ti->prev_copy);
  77    TempOptInfo *ni = ts_info(ti->next_copy);
  78
  79    ni->prev_copy = ti->prev_copy;
  80    pi->next_copy = ti->next_copy;
  81    ti->next_copy = ts;
  82    ti->prev_copy = ts;
  83    ti->is_const = false;
  84    ti->mask = -1;
  85}
  86
  87static void reset_temp(TCGArg arg)
  88{
  89    reset_ts(arg_temp(arg));
  90}
  91
  92/* Initialize and activate a temporary.  */
  93static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
  94{
  95    size_t idx = temp_idx(ts);
  96    TempOptInfo *ti;
  97
  98    if (test_bit(idx, temps_used->l)) {
  99        return;
 100    }
 101    set_bit(idx, temps_used->l);
 102
 103    ti = ts->state_ptr;
 104    if (ti == NULL) {
 105        ti = tcg_malloc(sizeof(TempOptInfo));
 106        ts->state_ptr = ti;
 107    }
 108
 109    ti->next_copy = ts;
 110    ti->prev_copy = ts;
 111    if (ts->kind == TEMP_CONST) {
 112        ti->is_const = true;
 113        ti->val = ts->val;
 114        ti->mask = ts->val;
 115        if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
 116            /* High bits of a 32-bit quantity are garbage.  */
 117            ti->mask |= ~0xffffffffull;
 118        }
 119    } else {
 120        ti->is_const = false;
 121        ti->mask = -1;
 122    }
 123}
 124
 125static void init_arg_info(TCGTempSet *temps_used, TCGArg arg)
 126{
 127    init_ts_info(temps_used, arg_temp(arg));
 128}
 129
 130static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
 131{
 132    TCGTemp *i, *g, *l;
 133
 134    /* If this is already readonly, we can't do better. */
 135    if (temp_readonly(ts)) {
 136        return ts;
 137    }
 138
 139    g = l = NULL;
 140    for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
 141        if (temp_readonly(i)) {
 142            return i;
 143        } else if (i->kind > ts->kind) {
 144            if (i->kind == TEMP_GLOBAL) {
 145                g = i;
 146            } else if (i->kind == TEMP_LOCAL) {
 147                l = i;
 148            }
 149        }
 150    }
 151
 152    /* If we didn't find a better representation, return the same temp. */
 153    return g ? g : l ? l : ts;
 154}
 155
 156static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
 157{
 158    TCGTemp *i;
 159
 160    if (ts1 == ts2) {
 161        return true;
 162    }
 163
 164    if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
 165        return false;
 166    }
 167
 168    for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
 169        if (i == ts2) {
 170            return true;
 171        }
 172    }
 173
 174    return false;
 175}
 176
 177static bool args_are_copies(TCGArg arg1, TCGArg arg2)
 178{
 179    return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
 180}
 181
 182static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
 183{
 184    TCGTemp *dst_ts = arg_temp(dst);
 185    TCGTemp *src_ts = arg_temp(src);
 186    const TCGOpDef *def;
 187    TempOptInfo *di;
 188    TempOptInfo *si;
 189    uint64_t mask;
 190    TCGOpcode new_op;
 191
 192    if (ts_are_copies(dst_ts, src_ts)) {
 193        tcg_op_remove(s, op);
 194        return;
 195    }
 196
 197    reset_ts(dst_ts);
 198    di = ts_info(dst_ts);
 199    si = ts_info(src_ts);
 200    def = &tcg_op_defs[op->opc];
 201    if (def->flags & TCG_OPF_VECTOR) {
 202        new_op = INDEX_op_mov_vec;
 203    } else if (def->flags & TCG_OPF_64BIT) {
 204        new_op = INDEX_op_mov_i64;
 205    } else {
 206        new_op = INDEX_op_mov_i32;
 207    }
 208    op->opc = new_op;
 209    /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
 210    op->args[0] = dst;
 211    op->args[1] = src;
 212
 213    mask = si->mask;
 214    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
 215        /* High bits of the destination are now garbage.  */
 216        mask |= ~0xffffffffull;
 217    }
 218    di->mask = mask;
 219
 220    if (src_ts->type == dst_ts->type) {
 221        TempOptInfo *ni = ts_info(si->next_copy);
 222
 223        di->next_copy = si->next_copy;
 224        di->prev_copy = src_ts;
 225        ni->prev_copy = dst_ts;
 226        si->next_copy = dst_ts;
 227        di->is_const = si->is_const;
 228        di->val = si->val;
 229    }
 230}
 231
 232static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
 233                             TCGOp *op, TCGArg dst, uint64_t val)
 234{
 235    const TCGOpDef *def = &tcg_op_defs[op->opc];
 236    TCGType type;
 237    TCGTemp *tv;
 238
 239    if (def->flags & TCG_OPF_VECTOR) {
 240        type = TCGOP_VECL(op) + TCG_TYPE_V64;
 241    } else if (def->flags & TCG_OPF_64BIT) {
 242        type = TCG_TYPE_I64;
 243    } else {
 244        type = TCG_TYPE_I32;
 245    }
 246
 247    /* Convert movi to mov with constant temp. */
 248    tv = tcg_constant_internal(type, val);
 249    init_ts_info(temps_used, tv);
 250    tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
 251}
 252
 253static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
 254{
 255    uint64_t l64, h64;
 256
 257    switch (op) {
 258    CASE_OP_32_64(add):
 259        return x + y;
 260
 261    CASE_OP_32_64(sub):
 262        return x - y;
 263
 264    CASE_OP_32_64(mul):
 265        return x * y;
 266
 267    CASE_OP_32_64(and):
 268        return x & y;
 269
 270    CASE_OP_32_64(or):
 271        return x | y;
 272
 273    CASE_OP_32_64(xor):
 274        return x ^ y;
 275
 276    case INDEX_op_shl_i32:
 277        return (uint32_t)x << (y & 31);
 278
 279    case INDEX_op_shl_i64:
 280        return (uint64_t)x << (y & 63);
 281
 282    case INDEX_op_shr_i32:
 283        return (uint32_t)x >> (y & 31);
 284
 285    case INDEX_op_shr_i64:
 286        return (uint64_t)x >> (y & 63);
 287
 288    case INDEX_op_sar_i32:
 289        return (int32_t)x >> (y & 31);
 290
 291    case INDEX_op_sar_i64:
 292        return (int64_t)x >> (y & 63);
 293
 294    case INDEX_op_rotr_i32:
 295        return ror32(x, y & 31);
 296
 297    case INDEX_op_rotr_i64:
 298        return ror64(x, y & 63);
 299
 300    case INDEX_op_rotl_i32:
 301        return rol32(x, y & 31);
 302
 303    case INDEX_op_rotl_i64:
 304        return rol64(x, y & 63);
 305
 306    CASE_OP_32_64(not):
 307        return ~x;
 308
 309    CASE_OP_32_64(neg):
 310        return -x;
 311
 312    CASE_OP_32_64(andc):
 313        return x & ~y;
 314
 315    CASE_OP_32_64(orc):
 316        return x | ~y;
 317
 318    CASE_OP_32_64(eqv):
 319        return ~(x ^ y);
 320
 321    CASE_OP_32_64(nand):
 322        return ~(x & y);
 323
 324    CASE_OP_32_64(nor):
 325        return ~(x | y);
 326
 327    case INDEX_op_clz_i32:
 328        return (uint32_t)x ? clz32(x) : y;
 329
 330    case INDEX_op_clz_i64:
 331        return x ? clz64(x) : y;
 332
 333    case INDEX_op_ctz_i32:
 334        return (uint32_t)x ? ctz32(x) : y;
 335
 336    case INDEX_op_ctz_i64:
 337        return x ? ctz64(x) : y;
 338
 339    case INDEX_op_ctpop_i32:
 340        return ctpop32(x);
 341
 342    case INDEX_op_ctpop_i64:
 343        return ctpop64(x);
 344
 345    CASE_OP_32_64(ext8s):
 346        return (int8_t)x;
 347
 348    CASE_OP_32_64(ext16s):
 349        return (int16_t)x;
 350
 351    CASE_OP_32_64(ext8u):
 352        return (uint8_t)x;
 353
 354    CASE_OP_32_64(ext16u):
 355        return (uint16_t)x;
 356
 357    CASE_OP_32_64(bswap16):
 358        x = bswap16(x);
 359        return y & TCG_BSWAP_OS ? (int16_t)x : x;
 360
 361    CASE_OP_32_64(bswap32):
 362        x = bswap32(x);
 363        return y & TCG_BSWAP_OS ? (int32_t)x : x;
 364
 365    case INDEX_op_bswap64_i64:
 366        return bswap64(x);
 367
 368    case INDEX_op_ext_i32_i64:
 369    case INDEX_op_ext32s_i64:
 370        return (int32_t)x;
 371
 372    case INDEX_op_extu_i32_i64:
 373    case INDEX_op_extrl_i64_i32:
 374    case INDEX_op_ext32u_i64:
 375        return (uint32_t)x;
 376
 377    case INDEX_op_extrh_i64_i32:
 378        return (uint64_t)x >> 32;
 379
 380    case INDEX_op_muluh_i32:
 381        return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
 382    case INDEX_op_mulsh_i32:
 383        return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
 384
 385    case INDEX_op_muluh_i64:
 386        mulu64(&l64, &h64, x, y);
 387        return h64;
 388    case INDEX_op_mulsh_i64:
 389        muls64(&l64, &h64, x, y);
 390        return h64;
 391
 392    case INDEX_op_div_i32:
 393        /* Avoid crashing on divide by zero, otherwise undefined.  */
 394        return (int32_t)x / ((int32_t)y ? : 1);
 395    case INDEX_op_divu_i32:
 396        return (uint32_t)x / ((uint32_t)y ? : 1);
 397    case INDEX_op_div_i64:
 398        return (int64_t)x / ((int64_t)y ? : 1);
 399    case INDEX_op_divu_i64:
 400        return (uint64_t)x / ((uint64_t)y ? : 1);
 401
 402    case INDEX_op_rem_i32:
 403        return (int32_t)x % ((int32_t)y ? : 1);
 404    case INDEX_op_remu_i32:
 405        return (uint32_t)x % ((uint32_t)y ? : 1);
 406    case INDEX_op_rem_i64:
 407        return (int64_t)x % ((int64_t)y ? : 1);
 408    case INDEX_op_remu_i64:
 409        return (uint64_t)x % ((uint64_t)y ? : 1);
 410
 411    default:
 412        fprintf(stderr,
 413                "Unrecognized operation %d in do_constant_folding.\n", op);
 414        tcg_abort();
 415    }
 416}
 417
 418static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y)
 419{
 420    const TCGOpDef *def = &tcg_op_defs[op];
 421    uint64_t res = do_constant_folding_2(op, x, y);
 422    if (!(def->flags & TCG_OPF_64BIT)) {
 423        res = (int32_t)res;
 424    }
 425    return res;
 426}
 427
 428static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
 429{
 430    switch (c) {
 431    case TCG_COND_EQ:
 432        return x == y;
 433    case TCG_COND_NE:
 434        return x != y;
 435    case TCG_COND_LT:
 436        return (int32_t)x < (int32_t)y;
 437    case TCG_COND_GE:
 438        return (int32_t)x >= (int32_t)y;
 439    case TCG_COND_LE:
 440        return (int32_t)x <= (int32_t)y;
 441    case TCG_COND_GT:
 442        return (int32_t)x > (int32_t)y;
 443    case TCG_COND_LTU:
 444        return x < y;
 445    case TCG_COND_GEU:
 446        return x >= y;
 447    case TCG_COND_LEU:
 448        return x <= y;
 449    case TCG_COND_GTU:
 450        return x > y;
 451    default:
 452        tcg_abort();
 453    }
 454}
 455
 456static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
 457{
 458    switch (c) {
 459    case TCG_COND_EQ:
 460        return x == y;
 461    case TCG_COND_NE:
 462        return x != y;
 463    case TCG_COND_LT:
 464        return (int64_t)x < (int64_t)y;
 465    case TCG_COND_GE:
 466        return (int64_t)x >= (int64_t)y;
 467    case TCG_COND_LE:
 468        return (int64_t)x <= (int64_t)y;
 469    case TCG_COND_GT:
 470        return (int64_t)x > (int64_t)y;
 471    case TCG_COND_LTU:
 472        return x < y;
 473    case TCG_COND_GEU:
 474        return x >= y;
 475    case TCG_COND_LEU:
 476        return x <= y;
 477    case TCG_COND_GTU:
 478        return x > y;
 479    default:
 480        tcg_abort();
 481    }
 482}
 483
 484static bool do_constant_folding_cond_eq(TCGCond c)
 485{
 486    switch (c) {
 487    case TCG_COND_GT:
 488    case TCG_COND_LTU:
 489    case TCG_COND_LT:
 490    case TCG_COND_GTU:
 491    case TCG_COND_NE:
 492        return 0;
 493    case TCG_COND_GE:
 494    case TCG_COND_GEU:
 495    case TCG_COND_LE:
 496    case TCG_COND_LEU:
 497    case TCG_COND_EQ:
 498        return 1;
 499    default:
 500        tcg_abort();
 501    }
 502}
 503
 504/* Return 2 if the condition can't be simplified, and the result
 505   of the condition (0 or 1) if it can */
 506static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
 507                                       TCGArg y, TCGCond c)
 508{
 509    uint64_t xv = arg_info(x)->val;
 510    uint64_t yv = arg_info(y)->val;
 511
 512    if (arg_is_const(x) && arg_is_const(y)) {
 513        const TCGOpDef *def = &tcg_op_defs[op];
 514        tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
 515        if (def->flags & TCG_OPF_64BIT) {
 516            return do_constant_folding_cond_64(xv, yv, c);
 517        } else {
 518            return do_constant_folding_cond_32(xv, yv, c);
 519        }
 520    } else if (args_are_copies(x, y)) {
 521        return do_constant_folding_cond_eq(c);
 522    } else if (arg_is_const(y) && yv == 0) {
 523        switch (c) {
 524        case TCG_COND_LTU:
 525            return 0;
 526        case TCG_COND_GEU:
 527            return 1;
 528        default:
 529            return 2;
 530        }
 531    }
 532    return 2;
 533}
 534
 535/* Return 2 if the condition can't be simplified, and the result
 536   of the condition (0 or 1) if it can */
 537static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
 538{
 539    TCGArg al = p1[0], ah = p1[1];
 540    TCGArg bl = p2[0], bh = p2[1];
 541
 542    if (arg_is_const(bl) && arg_is_const(bh)) {
 543        tcg_target_ulong blv = arg_info(bl)->val;
 544        tcg_target_ulong bhv = arg_info(bh)->val;
 545        uint64_t b = deposit64(blv, 32, 32, bhv);
 546
 547        if (arg_is_const(al) && arg_is_const(ah)) {
 548            tcg_target_ulong alv = arg_info(al)->val;
 549            tcg_target_ulong ahv = arg_info(ah)->val;
 550            uint64_t a = deposit64(alv, 32, 32, ahv);
 551            return do_constant_folding_cond_64(a, b, c);
 552        }
 553        if (b == 0) {
 554            switch (c) {
 555            case TCG_COND_LTU:
 556                return 0;
 557            case TCG_COND_GEU:
 558                return 1;
 559            default:
 560                break;
 561            }
 562        }
 563    }
 564    if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
 565        return do_constant_folding_cond_eq(c);
 566    }
 567    return 2;
 568}
 569
 570static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 571{
 572    TCGArg a1 = *p1, a2 = *p2;
 573    int sum = 0;
 574    sum += arg_is_const(a1);
 575    sum -= arg_is_const(a2);
 576
 577    /* Prefer the constant in second argument, and then the form
 578       op a, a, b, which is better handled on non-RISC hosts. */
 579    if (sum > 0 || (sum == 0 && dest == a2)) {
 580        *p1 = a2;
 581        *p2 = a1;
 582        return true;
 583    }
 584    return false;
 585}
 586
 587static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 588{
 589    int sum = 0;
 590    sum += arg_is_const(p1[0]);
 591    sum += arg_is_const(p1[1]);
 592    sum -= arg_is_const(p2[0]);
 593    sum -= arg_is_const(p2[1]);
 594    if (sum > 0) {
 595        TCGArg t;
 596        t = p1[0], p1[0] = p2[0], p2[0] = t;
 597        t = p1[1], p1[1] = p2[1], p2[1] = t;
 598        return true;
 599    }
 600    return false;
 601}
 602
 603/* Propagate constants and copies, fold constant expressions. */
 604void tcg_optimize(TCGContext *s)
 605{
 606    int nb_temps, nb_globals, i;
 607    TCGOp *op, *op_next, *prev_mb = NULL;
 608    TCGTempSet temps_used;
 609
 610    /* Array VALS has an element for each temp.
 611       If this temp holds a constant then its value is kept in VALS' element.
 612       If this temp is a copy of other ones then the other copies are
 613       available through the doubly linked circular list. */
 614
 615    nb_temps = s->nb_temps;
 616    nb_globals = s->nb_globals;
 617
 618    memset(&temps_used, 0, sizeof(temps_used));
 619    for (i = 0; i < nb_temps; ++i) {
 620        s->temps[i].state_ptr = NULL;
 621    }
 622
 623    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
 624        uint64_t mask, partmask, affected, tmp;
 625        int nb_oargs, nb_iargs;
 626        TCGOpcode opc = op->opc;
 627        const TCGOpDef *def = &tcg_op_defs[opc];
 628
 629        /* Count the arguments, and initialize the temps that are
 630           going to be used */
 631        if (opc == INDEX_op_call) {
 632            nb_oargs = TCGOP_CALLO(op);
 633            nb_iargs = TCGOP_CALLI(op);
 634            for (i = 0; i < nb_oargs + nb_iargs; i++) {
 635                TCGTemp *ts = arg_temp(op->args[i]);
 636                if (ts) {
 637                    init_ts_info(&temps_used, ts);
 638                }
 639            }
 640        } else {
 641            nb_oargs = def->nb_oargs;
 642            nb_iargs = def->nb_iargs;
 643            for (i = 0; i < nb_oargs + nb_iargs; i++) {
 644                init_arg_info(&temps_used, op->args[i]);
 645            }
 646        }
 647
 648        /* Do copy propagation */
 649        for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
 650            TCGTemp *ts = arg_temp(op->args[i]);
 651            if (ts && ts_is_copy(ts)) {
 652                op->args[i] = temp_arg(find_better_copy(s, ts));
 653            }
 654        }
 655
 656        /* For commutative operations make constant second argument */
 657        switch (opc) {
 658        CASE_OP_32_64_VEC(add):
 659        CASE_OP_32_64_VEC(mul):
 660        CASE_OP_32_64_VEC(and):
 661        CASE_OP_32_64_VEC(or):
 662        CASE_OP_32_64_VEC(xor):
 663        CASE_OP_32_64(eqv):
 664        CASE_OP_32_64(nand):
 665        CASE_OP_32_64(nor):
 666        CASE_OP_32_64(muluh):
 667        CASE_OP_32_64(mulsh):
 668            swap_commutative(op->args[0], &op->args[1], &op->args[2]);
 669            break;
 670        CASE_OP_32_64(brcond):
 671            if (swap_commutative(-1, &op->args[0], &op->args[1])) {
 672                op->args[2] = tcg_swap_cond(op->args[2]);
 673            }
 674            break;
 675        CASE_OP_32_64(setcond):
 676            if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
 677                op->args[3] = tcg_swap_cond(op->args[3]);
 678            }
 679            break;
 680        CASE_OP_32_64(movcond):
 681            if (swap_commutative(-1, &op->args[1], &op->args[2])) {
 682                op->args[5] = tcg_swap_cond(op->args[5]);
 683            }
 684            /* For movcond, we canonicalize the "false" input reg to match
 685               the destination reg so that the tcg backend can implement
 686               a "move if true" operation.  */
 687            if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
 688                op->args[5] = tcg_invert_cond(op->args[5]);
 689            }
 690            break;
 691        CASE_OP_32_64(add2):
 692            swap_commutative(op->args[0], &op->args[2], &op->args[4]);
 693            swap_commutative(op->args[1], &op->args[3], &op->args[5]);
 694            break;
 695        CASE_OP_32_64(mulu2):
 696        CASE_OP_32_64(muls2):
 697            swap_commutative(op->args[0], &op->args[2], &op->args[3]);
 698            break;
 699        case INDEX_op_brcond2_i32:
 700            if (swap_commutative2(&op->args[0], &op->args[2])) {
 701                op->args[4] = tcg_swap_cond(op->args[4]);
 702            }
 703            break;
 704        case INDEX_op_setcond2_i32:
 705            if (swap_commutative2(&op->args[1], &op->args[3])) {
 706                op->args[5] = tcg_swap_cond(op->args[5]);
 707            }
 708            break;
 709        default:
 710            break;
 711        }
 712
 713        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
 714           and "sub r, 0, a => neg r, a" case.  */
 715        switch (opc) {
 716        CASE_OP_32_64(shl):
 717        CASE_OP_32_64(shr):
 718        CASE_OP_32_64(sar):
 719        CASE_OP_32_64(rotl):
 720        CASE_OP_32_64(rotr):
 721            if (arg_is_const(op->args[1])
 722                && arg_info(op->args[1])->val == 0) {
 723                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
 724                continue;
 725            }
 726            break;
 727        CASE_OP_32_64_VEC(sub):
 728            {
 729                TCGOpcode neg_op;
 730                bool have_neg;
 731
 732                if (arg_is_const(op->args[2])) {
 733                    /* Proceed with possible constant folding. */
 734                    break;
 735                }
 736                if (opc == INDEX_op_sub_i32) {
 737                    neg_op = INDEX_op_neg_i32;
 738                    have_neg = TCG_TARGET_HAS_neg_i32;
 739                } else if (opc == INDEX_op_sub_i64) {
 740                    neg_op = INDEX_op_neg_i64;
 741                    have_neg = TCG_TARGET_HAS_neg_i64;
 742                } else if (TCG_TARGET_HAS_neg_vec) {
 743                    TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
 744                    unsigned vece = TCGOP_VECE(op);
 745                    neg_op = INDEX_op_neg_vec;
 746                    have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
 747                } else {
 748                    break;
 749                }
 750                if (!have_neg) {
 751                    break;
 752                }
 753                if (arg_is_const(op->args[1])
 754                    && arg_info(op->args[1])->val == 0) {
 755                    op->opc = neg_op;
 756                    reset_temp(op->args[0]);
 757                    op->args[1] = op->args[2];
 758                    continue;
 759                }
 760            }
 761            break;
 762        CASE_OP_32_64_VEC(xor):
 763        CASE_OP_32_64(nand):
 764            if (!arg_is_const(op->args[1])
 765                && arg_is_const(op->args[2])
 766                && arg_info(op->args[2])->val == -1) {
 767                i = 1;
 768                goto try_not;
 769            }
 770            break;
 771        CASE_OP_32_64(nor):
 772            if (!arg_is_const(op->args[1])
 773                && arg_is_const(op->args[2])
 774                && arg_info(op->args[2])->val == 0) {
 775                i = 1;
 776                goto try_not;
 777            }
 778            break;
 779        CASE_OP_32_64_VEC(andc):
 780            if (!arg_is_const(op->args[2])
 781                && arg_is_const(op->args[1])
 782                && arg_info(op->args[1])->val == -1) {
 783                i = 2;
 784                goto try_not;
 785            }
 786            break;
 787        CASE_OP_32_64_VEC(orc):
 788        CASE_OP_32_64(eqv):
 789            if (!arg_is_const(op->args[2])
 790                && arg_is_const(op->args[1])
 791                && arg_info(op->args[1])->val == 0) {
 792                i = 2;
 793                goto try_not;
 794            }
 795            break;
 796        try_not:
 797            {
 798                TCGOpcode not_op;
 799                bool have_not;
 800
 801                if (def->flags & TCG_OPF_VECTOR) {
 802                    not_op = INDEX_op_not_vec;
 803                    have_not = TCG_TARGET_HAS_not_vec;
 804                } else if (def->flags & TCG_OPF_64BIT) {
 805                    not_op = INDEX_op_not_i64;
 806                    have_not = TCG_TARGET_HAS_not_i64;
 807                } else {
 808                    not_op = INDEX_op_not_i32;
 809                    have_not = TCG_TARGET_HAS_not_i32;
 810                }
 811                if (!have_not) {
 812                    break;
 813                }
 814                op->opc = not_op;
 815                reset_temp(op->args[0]);
 816                op->args[1] = op->args[i];
 817                continue;
 818            }
 819        default:
 820            break;
 821        }
 822
 823        /* Simplify expression for "op r, a, const => mov r, a" cases */
 824        switch (opc) {
 825        CASE_OP_32_64_VEC(add):
 826        CASE_OP_32_64_VEC(sub):
 827        CASE_OP_32_64_VEC(or):
 828        CASE_OP_32_64_VEC(xor):
 829        CASE_OP_32_64_VEC(andc):
 830        CASE_OP_32_64(shl):
 831        CASE_OP_32_64(shr):
 832        CASE_OP_32_64(sar):
 833        CASE_OP_32_64(rotl):
 834        CASE_OP_32_64(rotr):
 835            if (!arg_is_const(op->args[1])
 836                && arg_is_const(op->args[2])
 837                && arg_info(op->args[2])->val == 0) {
 838                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
 839                continue;
 840            }
 841            break;
 842        CASE_OP_32_64_VEC(and):
 843        CASE_OP_32_64_VEC(orc):
 844        CASE_OP_32_64(eqv):
 845            if (!arg_is_const(op->args[1])
 846                && arg_is_const(op->args[2])
 847                && arg_info(op->args[2])->val == -1) {
 848                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
 849                continue;
 850            }
 851            break;
 852        default:
 853            break;
 854        }
 855
 856        /* Simplify using known-zero bits. Currently only ops with a single
 857           output argument is supported. */
 858        mask = -1;
 859        affected = -1;
 860        switch (opc) {
 861        CASE_OP_32_64(ext8s):
 862            if ((arg_info(op->args[1])->mask & 0x80) != 0) {
 863                break;
 864            }
 865            QEMU_FALLTHROUGH;
 866        CASE_OP_32_64(ext8u):
 867            mask = 0xff;
 868            goto and_const;
 869        CASE_OP_32_64(ext16s):
 870            if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
 871                break;
 872            }
 873            QEMU_FALLTHROUGH;
 874        CASE_OP_32_64(ext16u):
 875            mask = 0xffff;
 876            goto and_const;
 877        case INDEX_op_ext32s_i64:
 878            if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
 879                break;
 880            }
 881            QEMU_FALLTHROUGH;
 882        case INDEX_op_ext32u_i64:
 883            mask = 0xffffffffU;
 884            goto and_const;
 885
 886        CASE_OP_32_64(and):
 887            mask = arg_info(op->args[2])->mask;
 888            if (arg_is_const(op->args[2])) {
 889        and_const:
 890                affected = arg_info(op->args[1])->mask & ~mask;
 891            }
 892            mask = arg_info(op->args[1])->mask & mask;
 893            break;
 894
 895        case INDEX_op_ext_i32_i64:
 896            if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
 897                break;
 898            }
 899            QEMU_FALLTHROUGH;
 900        case INDEX_op_extu_i32_i64:
 901            /* We do not compute affected as it is a size changing op.  */
 902            mask = (uint32_t)arg_info(op->args[1])->mask;
 903            break;
 904
 905        CASE_OP_32_64(andc):
 906            /* Known-zeros does not imply known-ones.  Therefore unless
 907               op->args[2] is constant, we can't infer anything from it.  */
 908            if (arg_is_const(op->args[2])) {
 909                mask = ~arg_info(op->args[2])->mask;
 910                goto and_const;
 911            }
 912            /* But we certainly know nothing outside args[1] may be set. */
 913            mask = arg_info(op->args[1])->mask;
 914            break;
 915
 916        case INDEX_op_sar_i32:
 917            if (arg_is_const(op->args[2])) {
 918                tmp = arg_info(op->args[2])->val & 31;
 919                mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
 920            }
 921            break;
 922        case INDEX_op_sar_i64:
 923            if (arg_is_const(op->args[2])) {
 924                tmp = arg_info(op->args[2])->val & 63;
 925                mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
 926            }
 927            break;
 928
 929        case INDEX_op_shr_i32:
 930            if (arg_is_const(op->args[2])) {
 931                tmp = arg_info(op->args[2])->val & 31;
 932                mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
 933            }
 934            break;
 935        case INDEX_op_shr_i64:
 936            if (arg_is_const(op->args[2])) {
 937                tmp = arg_info(op->args[2])->val & 63;
 938                mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
 939            }
 940            break;
 941
 942        case INDEX_op_extrl_i64_i32:
 943            mask = (uint32_t)arg_info(op->args[1])->mask;
 944            break;
 945        case INDEX_op_extrh_i64_i32:
 946            mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
 947            break;
 948
 949        CASE_OP_32_64(shl):
 950            if (arg_is_const(op->args[2])) {
 951                tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
 952                mask = arg_info(op->args[1])->mask << tmp;
 953            }
 954            break;
 955
 956        CASE_OP_32_64(neg):
 957            /* Set to 1 all bits to the left of the rightmost.  */
 958            mask = -(arg_info(op->args[1])->mask
 959                     & -arg_info(op->args[1])->mask);
 960            break;
 961
 962        CASE_OP_32_64(deposit):
 963            mask = deposit64(arg_info(op->args[1])->mask,
 964                             op->args[3], op->args[4],
 965                             arg_info(op->args[2])->mask);
 966            break;
 967
 968        CASE_OP_32_64(extract):
 969            mask = extract64(arg_info(op->args[1])->mask,
 970                             op->args[2], op->args[3]);
 971            if (op->args[2] == 0) {
 972                affected = arg_info(op->args[1])->mask & ~mask;
 973            }
 974            break;
 975        CASE_OP_32_64(sextract):
 976            mask = sextract64(arg_info(op->args[1])->mask,
 977                              op->args[2], op->args[3]);
 978            if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
 979                affected = arg_info(op->args[1])->mask & ~mask;
 980            }
 981            break;
 982
 983        CASE_OP_32_64(or):
 984        CASE_OP_32_64(xor):
 985            mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
 986            break;
 987
 988        case INDEX_op_clz_i32:
 989        case INDEX_op_ctz_i32:
 990            mask = arg_info(op->args[2])->mask | 31;
 991            break;
 992
 993        case INDEX_op_clz_i64:
 994        case INDEX_op_ctz_i64:
 995            mask = arg_info(op->args[2])->mask | 63;
 996            break;
 997
 998        case INDEX_op_ctpop_i32:
 999            mask = 32 | 31;
1000            break;
1001        case INDEX_op_ctpop_i64:
1002            mask = 64 | 63;
1003            break;
1004
1005        CASE_OP_32_64(setcond):
1006        case INDEX_op_setcond2_i32:
1007            mask = 1;
1008            break;
1009
1010        CASE_OP_32_64(movcond):
1011            mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
1012            break;
1013
1014        CASE_OP_32_64(ld8u):
1015            mask = 0xff;
1016            break;
1017        CASE_OP_32_64(ld16u):
1018            mask = 0xffff;
1019            break;
1020        case INDEX_op_ld32u_i64:
1021            mask = 0xffffffffu;
1022            break;
1023
1024        CASE_OP_32_64(qemu_ld):
1025            {
1026                TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs];
1027                MemOp mop = get_memop(oi);
1028                if (!(mop & MO_SIGN)) {
1029                    mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
1030                }
1031            }
1032            break;
1033
1034        CASE_OP_32_64(bswap16):
1035            mask = arg_info(op->args[1])->mask;
1036            if (mask <= 0xffff) {
1037                op->args[2] |= TCG_BSWAP_IZ;
1038            }
1039            mask = bswap16(mask);
1040            switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1041            case TCG_BSWAP_OZ:
1042                break;
1043            case TCG_BSWAP_OS:
1044                mask = (int16_t)mask;
1045                break;
1046            default: /* undefined high bits */
1047                mask |= MAKE_64BIT_MASK(16, 48);
1048                break;
1049            }
1050            break;
1051
1052        case INDEX_op_bswap32_i64:
1053            mask = arg_info(op->args[1])->mask;
1054            if (mask <= 0xffffffffu) {
1055                op->args[2] |= TCG_BSWAP_IZ;
1056            }
1057            mask = bswap32(mask);
1058            switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1059            case TCG_BSWAP_OZ:
1060                break;
1061            case TCG_BSWAP_OS:
1062                mask = (int32_t)mask;
1063                break;
1064            default: /* undefined high bits */
1065                mask |= MAKE_64BIT_MASK(32, 32);
1066                break;
1067            }
1068            break;
1069
1070        default:
1071            break;
1072        }
1073
1074        /* 32-bit ops generate 32-bit results.  For the result is zero test
1075           below, we can ignore high bits, but for further optimizations we
1076           need to record that the high bits contain garbage.  */
1077        partmask = mask;
1078        if (!(def->flags & TCG_OPF_64BIT)) {
1079            mask |= ~(tcg_target_ulong)0xffffffffu;
1080            partmask &= 0xffffffffu;
1081            affected &= 0xffffffffu;
1082        }
1083
1084        if (partmask == 0) {
1085            tcg_debug_assert(nb_oargs == 1);
1086            tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
1087            continue;
1088        }
1089        if (affected == 0) {
1090            tcg_debug_assert(nb_oargs == 1);
1091            tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1092            continue;
1093        }
1094
1095        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
1096        switch (opc) {
1097        CASE_OP_32_64_VEC(and):
1098        CASE_OP_32_64_VEC(mul):
1099        CASE_OP_32_64(muluh):
1100        CASE_OP_32_64(mulsh):
1101            if (arg_is_const(op->args[2])
1102                && arg_info(op->args[2])->val == 0) {
1103                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
1104                continue;
1105            }
1106            break;
1107        default:
1108            break;
1109        }
1110
1111        /* Simplify expression for "op r, a, a => mov r, a" cases */
1112        switch (opc) {
1113        CASE_OP_32_64_VEC(or):
1114        CASE_OP_32_64_VEC(and):
1115            if (args_are_copies(op->args[1], op->args[2])) {
1116                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1117                continue;
1118            }
1119            break;
1120        default:
1121            break;
1122        }
1123
1124        /* Simplify expression for "op r, a, a => movi r, 0" cases */
1125        switch (opc) {
1126        CASE_OP_32_64_VEC(andc):
1127        CASE_OP_32_64_VEC(sub):
1128        CASE_OP_32_64_VEC(xor):
1129            if (args_are_copies(op->args[1], op->args[2])) {
1130                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
1131                continue;
1132            }
1133            break;
1134        default:
1135            break;
1136        }
1137
1138        /* Propagate constants through copy operations and do constant
1139           folding.  Constants will be substituted to arguments by register
1140           allocator where needed and possible.  Also detect copies. */
1141        switch (opc) {
1142        CASE_OP_32_64_VEC(mov):
1143            tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1144            break;
1145
1146        case INDEX_op_dup_vec:
1147            if (arg_is_const(op->args[1])) {
1148                tmp = arg_info(op->args[1])->val;
1149                tmp = dup_const(TCGOP_VECE(op), tmp);
1150                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
1151                break;
1152            }
1153            goto do_default;
1154
1155        case INDEX_op_dup2_vec:
1156            assert(TCG_TARGET_REG_BITS == 32);
1157            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1158                tcg_opt_gen_movi(s, &temps_used, op, op->args[0],
1159                                 deposit64(arg_info(op->args[1])->val, 32, 32,
1160                                           arg_info(op->args[2])->val));
1161                break;
1162            } else if (args_are_copies(op->args[1], op->args[2])) {
1163                op->opc = INDEX_op_dup_vec;
1164                TCGOP_VECE(op) = MO_32;
1165                nb_iargs = 1;
1166            }
1167            goto do_default;
1168
1169        CASE_OP_32_64(not):
1170        CASE_OP_32_64(neg):
1171        CASE_OP_32_64(ext8s):
1172        CASE_OP_32_64(ext8u):
1173        CASE_OP_32_64(ext16s):
1174        CASE_OP_32_64(ext16u):
1175        CASE_OP_32_64(ctpop):
1176        case INDEX_op_ext32s_i64:
1177        case INDEX_op_ext32u_i64:
1178        case INDEX_op_ext_i32_i64:
1179        case INDEX_op_extu_i32_i64:
1180        case INDEX_op_extrl_i64_i32:
1181        case INDEX_op_extrh_i64_i32:
1182            if (arg_is_const(op->args[1])) {
1183                tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
1184                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
1185                break;
1186            }
1187            goto do_default;
1188
1189        CASE_OP_32_64(bswap16):
1190        CASE_OP_32_64(bswap32):
1191        case INDEX_op_bswap64_i64:
1192            if (arg_is_const(op->args[1])) {
1193                tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
1194                                          op->args[2]);
1195                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
1196                break;
1197            }
1198            goto do_default;
1199
1200        CASE_OP_32_64(add):
1201        CASE_OP_32_64(sub):
1202        CASE_OP_32_64(mul):
1203        CASE_OP_32_64(or):
1204        CASE_OP_32_64(and):
1205        CASE_OP_32_64(xor):
1206        CASE_OP_32_64(shl):
1207        CASE_OP_32_64(shr):
1208        CASE_OP_32_64(sar):
1209        CASE_OP_32_64(rotl):
1210        CASE_OP_32_64(rotr):
1211        CASE_OP_32_64(andc):
1212        CASE_OP_32_64(orc):
1213        CASE_OP_32_64(eqv):
1214        CASE_OP_32_64(nand):
1215        CASE_OP_32_64(nor):
1216        CASE_OP_32_64(muluh):
1217        CASE_OP_32_64(mulsh):
1218        CASE_OP_32_64(div):
1219        CASE_OP_32_64(divu):
1220        CASE_OP_32_64(rem):
1221        CASE_OP_32_64(remu):
1222            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1223                tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
1224                                          arg_info(op->args[2])->val);
1225                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
1226                break;
1227            }
1228            goto do_default;
1229
1230        CASE_OP_32_64(clz):
1231        CASE_OP_32_64(ctz):
1232            if (arg_is_const(op->args[1])) {
1233                TCGArg v = arg_info(op->args[1])->val;
1234                if (v != 0) {
1235                    tmp = do_constant_folding(opc, v, 0);
1236                    tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
1237                } else {
1238                    tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
1239                }
1240                break;
1241            }
1242            goto do_default;
1243
1244        CASE_OP_32_64(deposit):
1245            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1246                tmp = deposit64(arg_info(op->args[1])->val,
1247                                op->args[3], op->args[4],
1248                                arg_info(op->args[2])->val);
1249                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
1250                break;
1251            }
1252            goto do_default;
1253
1254        CASE_OP_32_64(extract):
1255            if (arg_is_const(op->args[1])) {
1256                tmp = extract64(arg_info(op->args[1])->val,
1257                                op->args[2], op->args[3]);
1258                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
1259                break;
1260            }
1261            goto do_default;
1262
1263        CASE_OP_32_64(sextract):
1264            if (arg_is_const(op->args[1])) {
1265                tmp = sextract64(arg_info(op->args[1])->val,
1266                                 op->args[2], op->args[3]);
1267                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
1268                break;
1269            }
1270            goto do_default;
1271
1272        CASE_OP_32_64(extract2):
1273            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1274                uint64_t v1 = arg_info(op->args[1])->val;
1275                uint64_t v2 = arg_info(op->args[2])->val;
1276                int shr = op->args[3];
1277
1278                if (opc == INDEX_op_extract2_i64) {
1279                    tmp = (v1 >> shr) | (v2 << (64 - shr));
1280                } else {
1281                    tmp = (int32_t)(((uint32_t)v1 >> shr) |
1282                                    ((uint32_t)v2 << (32 - shr)));
1283                }
1284                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
1285                break;
1286            }
1287            goto do_default;
1288
1289        CASE_OP_32_64(setcond):
1290            tmp = do_constant_folding_cond(opc, op->args[1],
1291                                           op->args[2], op->args[3]);
1292            if (tmp != 2) {
1293                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
1294                break;
1295            }
1296            goto do_default;
1297
1298        CASE_OP_32_64(brcond):
1299            tmp = do_constant_folding_cond(opc, op->args[0],
1300                                           op->args[1], op->args[2]);
1301            if (tmp != 2) {
1302                if (tmp) {
1303                    memset(&temps_used, 0, sizeof(temps_used));
1304                    op->opc = INDEX_op_br;
1305                    op->args[0] = op->args[3];
1306                } else {
1307                    tcg_op_remove(s, op);
1308                }
1309                break;
1310            }
1311            goto do_default;
1312
1313        CASE_OP_32_64(movcond):
1314            tmp = do_constant_folding_cond(opc, op->args[1],
1315                                           op->args[2], op->args[5]);
1316            if (tmp != 2) {
1317                tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
1318                break;
1319            }
1320            if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1321                uint64_t tv = arg_info(op->args[3])->val;
1322                uint64_t fv = arg_info(op->args[4])->val;
1323                TCGCond cond = op->args[5];
1324
1325                if (fv == 1 && tv == 0) {
1326                    cond = tcg_invert_cond(cond);
1327                } else if (!(tv == 1 && fv == 0)) {
1328                    goto do_default;
1329                }
1330                op->args[3] = cond;
1331                op->opc = opc = (opc == INDEX_op_movcond_i32
1332                                 ? INDEX_op_setcond_i32
1333                                 : INDEX_op_setcond_i64);
1334                nb_iargs = 2;
1335            }
1336            goto do_default;
1337
1338        case INDEX_op_add2_i32:
1339        case INDEX_op_sub2_i32:
1340            if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
1341                && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
1342                uint32_t al = arg_info(op->args[2])->val;
1343                uint32_t ah = arg_info(op->args[3])->val;
1344                uint32_t bl = arg_info(op->args[4])->val;
1345                uint32_t bh = arg_info(op->args[5])->val;
1346                uint64_t a = ((uint64_t)ah << 32) | al;
1347                uint64_t b = ((uint64_t)bh << 32) | bl;
1348                TCGArg rl, rh;
1349                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
1350
1351                if (opc == INDEX_op_add2_i32) {
1352                    a += b;
1353                } else {
1354                    a -= b;
1355                }
1356
1357                rl = op->args[0];
1358                rh = op->args[1];
1359                tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a);
1360                tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32));
1361                break;
1362            }
1363            goto do_default;
1364
1365        case INDEX_op_mulu2_i32:
1366            if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1367                uint32_t a = arg_info(op->args[2])->val;
1368                uint32_t b = arg_info(op->args[3])->val;
1369                uint64_t r = (uint64_t)a * b;
1370                TCGArg rl, rh;
1371                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
1372
1373                rl = op->args[0];
1374                rh = op->args[1];
1375                tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r);
1376                tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32));
1377                break;
1378            }
1379            goto do_default;
1380
1381        case INDEX_op_brcond2_i32:
1382            tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
1383                                            op->args[4]);
1384            if (tmp != 2) {
1385                if (tmp) {
1386            do_brcond_true:
1387                    memset(&temps_used, 0, sizeof(temps_used));
1388                    op->opc = INDEX_op_br;
1389                    op->args[0] = op->args[5];
1390                } else {
1391            do_brcond_false:
1392                    tcg_op_remove(s, op);
1393                }
1394            } else if ((op->args[4] == TCG_COND_LT
1395                        || op->args[4] == TCG_COND_GE)
1396                       && arg_is_const(op->args[2])
1397                       && arg_info(op->args[2])->val == 0
1398                       && arg_is_const(op->args[3])
1399                       && arg_info(op->args[3])->val == 0) {
1400                /* Simplify LT/GE comparisons vs zero to a single compare
1401                   vs the high word of the input.  */
1402            do_brcond_high:
1403                memset(&temps_used, 0, sizeof(temps_used));
1404                op->opc = INDEX_op_brcond_i32;
1405                op->args[0] = op->args[1];
1406                op->args[1] = op->args[3];
1407                op->args[2] = op->args[4];
1408                op->args[3] = op->args[5];
1409            } else if (op->args[4] == TCG_COND_EQ) {
1410                /* Simplify EQ comparisons where one of the pairs
1411                   can be simplified.  */
1412                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1413                                               op->args[0], op->args[2],
1414                                               TCG_COND_EQ);
1415                if (tmp == 0) {
1416                    goto do_brcond_false;
1417                } else if (tmp == 1) {
1418                    goto do_brcond_high;
1419                }
1420                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1421                                               op->args[1], op->args[3],
1422                                               TCG_COND_EQ);
1423                if (tmp == 0) {
1424                    goto do_brcond_false;
1425                } else if (tmp != 1) {
1426                    goto do_default;
1427                }
1428            do_brcond_low:
1429                memset(&temps_used, 0, sizeof(temps_used));
1430                op->opc = INDEX_op_brcond_i32;
1431                op->args[1] = op->args[2];
1432                op->args[2] = op->args[4];
1433                op->args[3] = op->args[5];
1434            } else if (op->args[4] == TCG_COND_NE) {
1435                /* Simplify NE comparisons where one of the pairs
1436                   can be simplified.  */
1437                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1438                                               op->args[0], op->args[2],
1439                                               TCG_COND_NE);
1440                if (tmp == 0) {
1441                    goto do_brcond_high;
1442                } else if (tmp == 1) {
1443                    goto do_brcond_true;
1444                }
1445                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1446                                               op->args[1], op->args[3],
1447                                               TCG_COND_NE);
1448                if (tmp == 0) {
1449                    goto do_brcond_low;
1450                } else if (tmp == 1) {
1451                    goto do_brcond_true;
1452                }
1453                goto do_default;
1454            } else {
1455                goto do_default;
1456            }
1457            break;
1458
1459        case INDEX_op_setcond2_i32:
1460            tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
1461                                            op->args[5]);
1462            if (tmp != 2) {
1463            do_setcond_const:
1464                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
1465            } else if ((op->args[5] == TCG_COND_LT
1466                        || op->args[5] == TCG_COND_GE)
1467                       && arg_is_const(op->args[3])
1468                       && arg_info(op->args[3])->val == 0
1469                       && arg_is_const(op->args[4])
1470                       && arg_info(op->args[4])->val == 0) {
1471                /* Simplify LT/GE comparisons vs zero to a single compare
1472                   vs the high word of the input.  */
1473            do_setcond_high:
1474                reset_temp(op->args[0]);
1475                arg_info(op->args[0])->mask = 1;
1476                op->opc = INDEX_op_setcond_i32;
1477                op->args[1] = op->args[2];
1478                op->args[2] = op->args[4];
1479                op->args[3] = op->args[5];
1480            } else if (op->args[5] == TCG_COND_EQ) {
1481                /* Simplify EQ comparisons where one of the pairs
1482                   can be simplified.  */
1483                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1484                                               op->args[1], op->args[3],
1485                                               TCG_COND_EQ);
1486                if (tmp == 0) {
1487                    goto do_setcond_const;
1488                } else if (tmp == 1) {
1489                    goto do_setcond_high;
1490                }
1491                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1492                                               op->args[2], op->args[4],
1493                                               TCG_COND_EQ);
1494                if (tmp == 0) {
1495                    goto do_setcond_high;
1496                } else if (tmp != 1) {
1497                    goto do_default;
1498                }
1499            do_setcond_low:
1500                reset_temp(op->args[0]);
1501                arg_info(op->args[0])->mask = 1;
1502                op->opc = INDEX_op_setcond_i32;
1503                op->args[2] = op->args[3];
1504                op->args[3] = op->args[5];
1505            } else if (op->args[5] == TCG_COND_NE) {
1506                /* Simplify NE comparisons where one of the pairs
1507                   can be simplified.  */
1508                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1509                                               op->args[1], op->args[3],
1510                                               TCG_COND_NE);
1511                if (tmp == 0) {
1512                    goto do_setcond_high;
1513                } else if (tmp == 1) {
1514                    goto do_setcond_const;
1515                }
1516                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1517                                               op->args[2], op->args[4],
1518                                               TCG_COND_NE);
1519                if (tmp == 0) {
1520                    goto do_setcond_low;
1521                } else if (tmp == 1) {
1522                    goto do_setcond_const;
1523                }
1524                goto do_default;
1525            } else {
1526                goto do_default;
1527            }
1528            break;
1529
1530        case INDEX_op_call:
1531            if (!(tcg_call_flags(op)
1532                  & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1533                for (i = 0; i < nb_globals; i++) {
1534                    if (test_bit(i, temps_used.l)) {
1535                        reset_ts(&s->temps[i]);
1536                    }
1537                }
1538            }
1539            goto do_reset_output;
1540
1541        default:
1542        do_default:
1543            /* Default case: we know nothing about operation (or were unable
1544               to compute the operation result) so no propagation is done.
1545               We trash everything if the operation is the end of a basic
1546               block, otherwise we only trash the output args.  "mask" is
1547               the non-zero bits mask for the first output arg.  */
1548            if (def->flags & TCG_OPF_BB_END) {
1549                memset(&temps_used, 0, sizeof(temps_used));
1550            } else {
1551        do_reset_output:
1552                for (i = 0; i < nb_oargs; i++) {
1553                    reset_temp(op->args[i]);
1554                    /* Save the corresponding known-zero bits mask for the
1555                       first output argument (only one supported so far). */
1556                    if (i == 0) {
1557                        arg_info(op->args[i])->mask = mask;
1558                    }
1559                }
1560            }
1561            break;
1562        }
1563
1564        /* Eliminate duplicate and redundant fence instructions.  */
1565        if (prev_mb) {
1566            switch (opc) {
1567            case INDEX_op_mb:
1568                /* Merge two barriers of the same type into one,
1569                 * or a weaker barrier into a stronger one,
1570                 * or two weaker barriers into a stronger one.
1571                 *   mb X; mb Y => mb X|Y
1572                 *   mb; strl => mb; st
1573                 *   ldaq; mb => ld; mb
1574                 *   ldaq; strl => ld; mb; st
1575                 * Other combinations are also merged into a strong
1576                 * barrier.  This is stricter than specified but for
1577                 * the purposes of TCG is better than not optimizing.
1578                 */
1579                prev_mb->args[0] |= op->args[0];
1580                tcg_op_remove(s, op);
1581                break;
1582
1583            default:
1584                /* Opcodes that end the block stop the optimization.  */
1585                if ((def->flags & TCG_OPF_BB_END) == 0) {
1586                    break;
1587                }
1588                /* fallthru */
1589            case INDEX_op_qemu_ld_i32:
1590            case INDEX_op_qemu_ld_i64:
1591            case INDEX_op_qemu_st_i32:
1592            case INDEX_op_qemu_st8_i32:
1593            case INDEX_op_qemu_st_i64:
1594            case INDEX_op_call:
1595                /* Opcodes that touch guest memory stop the optimization.  */
1596                prev_mb = NULL;
1597                break;
1598            }
1599        } else if (opc == INDEX_op_mb) {
1600            prev_mb = op;
1601        }
1602    }
1603}
1604