qemu/tcg/optimize.c
<<
>>
Prefs
   1/*
   2 * Optimizations for Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2010 Samsung Electronics.
   5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "tcg-op.h"
  28
  29#define CASE_OP_32_64(x)                        \
  30        glue(glue(case INDEX_op_, x), _i32):    \
  31        glue(glue(case INDEX_op_, x), _i64)
  32
  33#define CASE_OP_32_64_VEC(x)                    \
  34        glue(glue(case INDEX_op_, x), _i32):    \
  35        glue(glue(case INDEX_op_, x), _i64):    \
  36        glue(glue(case INDEX_op_, x), _vec)
  37
  38struct tcg_temp_info {
  39    bool is_const;
  40    TCGTemp *prev_copy;
  41    TCGTemp *next_copy;
  42    tcg_target_ulong val;
  43    tcg_target_ulong mask;
  44};
  45
  46static inline struct tcg_temp_info *ts_info(TCGTemp *ts)
  47{
  48    return ts->state_ptr;
  49}
  50
  51static inline struct tcg_temp_info *arg_info(TCGArg arg)
  52{
  53    return ts_info(arg_temp(arg));
  54}
  55
  56static inline bool ts_is_const(TCGTemp *ts)
  57{
  58    return ts_info(ts)->is_const;
  59}
  60
  61static inline bool arg_is_const(TCGArg arg)
  62{
  63    return ts_is_const(arg_temp(arg));
  64}
  65
  66static inline bool ts_is_copy(TCGTemp *ts)
  67{
  68    return ts_info(ts)->next_copy != ts;
  69}
  70
  71/* Reset TEMP's state, possibly removing the temp for the list of copies.  */
  72static void reset_ts(TCGTemp *ts)
  73{
  74    struct tcg_temp_info *ti = ts_info(ts);
  75    struct tcg_temp_info *pi = ts_info(ti->prev_copy);
  76    struct tcg_temp_info *ni = ts_info(ti->next_copy);
  77
  78    ni->prev_copy = ti->prev_copy;
  79    pi->next_copy = ti->next_copy;
  80    ti->next_copy = ts;
  81    ti->prev_copy = ts;
  82    ti->is_const = false;
  83    ti->mask = -1;
  84}
  85
  86static void reset_temp(TCGArg arg)
  87{
  88    reset_ts(arg_temp(arg));
  89}
  90
  91/* Initialize and activate a temporary.  */
  92static void init_ts_info(struct tcg_temp_info *infos,
  93                         TCGTempSet *temps_used, TCGTemp *ts)
  94{
  95    size_t idx = temp_idx(ts);
  96    if (!test_bit(idx, temps_used->l)) {
  97        struct tcg_temp_info *ti = &infos[idx];
  98
  99        ts->state_ptr = ti;
 100        ti->next_copy = ts;
 101        ti->prev_copy = ts;
 102        ti->is_const = false;
 103        ti->mask = -1;
 104        set_bit(idx, temps_used->l);
 105    }
 106}
 107
 108static void init_arg_info(struct tcg_temp_info *infos,
 109                          TCGTempSet *temps_used, TCGArg arg)
 110{
 111    init_ts_info(infos, temps_used, arg_temp(arg));
 112}
 113
 114static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
 115{
 116    TCGTemp *i;
 117
 118    /* If this is already a global, we can't do better. */
 119    if (ts->temp_global) {
 120        return ts;
 121    }
 122
 123    /* Search for a global first. */
 124    for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
 125        if (i->temp_global) {
 126            return i;
 127        }
 128    }
 129
 130    /* If it is a temp, search for a temp local. */
 131    if (!ts->temp_local) {
 132        for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
 133            if (ts->temp_local) {
 134                return i;
 135            }
 136        }
 137    }
 138
 139    /* Failure to find a better representation, return the same temp. */
 140    return ts;
 141}
 142
 143static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
 144{
 145    TCGTemp *i;
 146
 147    if (ts1 == ts2) {
 148        return true;
 149    }
 150
 151    if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
 152        return false;
 153    }
 154
 155    for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
 156        if (i == ts2) {
 157            return true;
 158        }
 159    }
 160
 161    return false;
 162}
 163
 164static bool args_are_copies(TCGArg arg1, TCGArg arg2)
 165{
 166    return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
 167}
 168
 169static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
 170{
 171    const TCGOpDef *def;
 172    TCGOpcode new_op;
 173    tcg_target_ulong mask;
 174    struct tcg_temp_info *di = arg_info(dst);
 175
 176    def = &tcg_op_defs[op->opc];
 177    if (def->flags & TCG_OPF_VECTOR) {
 178        new_op = INDEX_op_dupi_vec;
 179    } else if (def->flags & TCG_OPF_64BIT) {
 180        new_op = INDEX_op_movi_i64;
 181    } else {
 182        new_op = INDEX_op_movi_i32;
 183    }
 184    op->opc = new_op;
 185    /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
 186    op->args[0] = dst;
 187    op->args[1] = val;
 188
 189    reset_temp(dst);
 190    di->is_const = true;
 191    di->val = val;
 192    mask = val;
 193    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
 194        /* High bits of the destination are now garbage.  */
 195        mask |= ~0xffffffffull;
 196    }
 197    di->mask = mask;
 198}
 199
 200static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
 201{
 202    TCGTemp *dst_ts = arg_temp(dst);
 203    TCGTemp *src_ts = arg_temp(src);
 204    const TCGOpDef *def;
 205    struct tcg_temp_info *di;
 206    struct tcg_temp_info *si;
 207    tcg_target_ulong mask;
 208    TCGOpcode new_op;
 209
 210    if (ts_are_copies(dst_ts, src_ts)) {
 211        tcg_op_remove(s, op);
 212        return;
 213    }
 214
 215    reset_ts(dst_ts);
 216    di = ts_info(dst_ts);
 217    si = ts_info(src_ts);
 218    def = &tcg_op_defs[op->opc];
 219    if (def->flags & TCG_OPF_VECTOR) {
 220        new_op = INDEX_op_mov_vec;
 221    } else if (def->flags & TCG_OPF_64BIT) {
 222        new_op = INDEX_op_mov_i64;
 223    } else {
 224        new_op = INDEX_op_mov_i32;
 225    }
 226    op->opc = new_op;
 227    /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
 228    op->args[0] = dst;
 229    op->args[1] = src;
 230
 231    mask = si->mask;
 232    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
 233        /* High bits of the destination are now garbage.  */
 234        mask |= ~0xffffffffull;
 235    }
 236    di->mask = mask;
 237
 238    if (src_ts->type == dst_ts->type) {
 239        struct tcg_temp_info *ni = ts_info(si->next_copy);
 240
 241        di->next_copy = si->next_copy;
 242        di->prev_copy = src_ts;
 243        ni->prev_copy = dst_ts;
 244        si->next_copy = dst_ts;
 245        di->is_const = si->is_const;
 246        di->val = si->val;
 247    }
 248}
 249
 250static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
 251{
 252    uint64_t l64, h64;
 253
 254    switch (op) {
 255    CASE_OP_32_64(add):
 256        return x + y;
 257
 258    CASE_OP_32_64(sub):
 259        return x - y;
 260
 261    CASE_OP_32_64(mul):
 262        return x * y;
 263
 264    CASE_OP_32_64(and):
 265        return x & y;
 266
 267    CASE_OP_32_64(or):
 268        return x | y;
 269
 270    CASE_OP_32_64(xor):
 271        return x ^ y;
 272
 273    case INDEX_op_shl_i32:
 274        return (uint32_t)x << (y & 31);
 275
 276    case INDEX_op_shl_i64:
 277        return (uint64_t)x << (y & 63);
 278
 279    case INDEX_op_shr_i32:
 280        return (uint32_t)x >> (y & 31);
 281
 282    case INDEX_op_shr_i64:
 283        return (uint64_t)x >> (y & 63);
 284
 285    case INDEX_op_sar_i32:
 286        return (int32_t)x >> (y & 31);
 287
 288    case INDEX_op_sar_i64:
 289        return (int64_t)x >> (y & 63);
 290
 291    case INDEX_op_rotr_i32:
 292        return ror32(x, y & 31);
 293
 294    case INDEX_op_rotr_i64:
 295        return ror64(x, y & 63);
 296
 297    case INDEX_op_rotl_i32:
 298        return rol32(x, y & 31);
 299
 300    case INDEX_op_rotl_i64:
 301        return rol64(x, y & 63);
 302
 303    CASE_OP_32_64(not):
 304        return ~x;
 305
 306    CASE_OP_32_64(neg):
 307        return -x;
 308
 309    CASE_OP_32_64(andc):
 310        return x & ~y;
 311
 312    CASE_OP_32_64(orc):
 313        return x | ~y;
 314
 315    CASE_OP_32_64(eqv):
 316        return ~(x ^ y);
 317
 318    CASE_OP_32_64(nand):
 319        return ~(x & y);
 320
 321    CASE_OP_32_64(nor):
 322        return ~(x | y);
 323
 324    case INDEX_op_clz_i32:
 325        return (uint32_t)x ? clz32(x) : y;
 326
 327    case INDEX_op_clz_i64:
 328        return x ? clz64(x) : y;
 329
 330    case INDEX_op_ctz_i32:
 331        return (uint32_t)x ? ctz32(x) : y;
 332
 333    case INDEX_op_ctz_i64:
 334        return x ? ctz64(x) : y;
 335
 336    case INDEX_op_ctpop_i32:
 337        return ctpop32(x);
 338
 339    case INDEX_op_ctpop_i64:
 340        return ctpop64(x);
 341
 342    CASE_OP_32_64(ext8s):
 343        return (int8_t)x;
 344
 345    CASE_OP_32_64(ext16s):
 346        return (int16_t)x;
 347
 348    CASE_OP_32_64(ext8u):
 349        return (uint8_t)x;
 350
 351    CASE_OP_32_64(ext16u):
 352        return (uint16_t)x;
 353
 354    CASE_OP_32_64(bswap16):
 355        return bswap16(x);
 356
 357    CASE_OP_32_64(bswap32):
 358        return bswap32(x);
 359
 360    case INDEX_op_bswap64_i64:
 361        return bswap64(x);
 362
 363    case INDEX_op_ext_i32_i64:
 364    case INDEX_op_ext32s_i64:
 365        return (int32_t)x;
 366
 367    case INDEX_op_extu_i32_i64:
 368    case INDEX_op_extrl_i64_i32:
 369    case INDEX_op_ext32u_i64:
 370        return (uint32_t)x;
 371
 372    case INDEX_op_extrh_i64_i32:
 373        return (uint64_t)x >> 32;
 374
 375    case INDEX_op_muluh_i32:
 376        return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
 377    case INDEX_op_mulsh_i32:
 378        return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
 379
 380    case INDEX_op_muluh_i64:
 381        mulu64(&l64, &h64, x, y);
 382        return h64;
 383    case INDEX_op_mulsh_i64:
 384        muls64(&l64, &h64, x, y);
 385        return h64;
 386
 387    case INDEX_op_div_i32:
 388        /* Avoid crashing on divide by zero, otherwise undefined.  */
 389        return (int32_t)x / ((int32_t)y ? : 1);
 390    case INDEX_op_divu_i32:
 391        return (uint32_t)x / ((uint32_t)y ? : 1);
 392    case INDEX_op_div_i64:
 393        return (int64_t)x / ((int64_t)y ? : 1);
 394    case INDEX_op_divu_i64:
 395        return (uint64_t)x / ((uint64_t)y ? : 1);
 396
 397    case INDEX_op_rem_i32:
 398        return (int32_t)x % ((int32_t)y ? : 1);
 399    case INDEX_op_remu_i32:
 400        return (uint32_t)x % ((uint32_t)y ? : 1);
 401    case INDEX_op_rem_i64:
 402        return (int64_t)x % ((int64_t)y ? : 1);
 403    case INDEX_op_remu_i64:
 404        return (uint64_t)x % ((uint64_t)y ? : 1);
 405
 406    default:
 407        fprintf(stderr,
 408                "Unrecognized operation %d in do_constant_folding.\n", op);
 409        tcg_abort();
 410    }
 411}
 412
 413static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
 414{
 415    const TCGOpDef *def = &tcg_op_defs[op];
 416    TCGArg res = do_constant_folding_2(op, x, y);
 417    if (!(def->flags & TCG_OPF_64BIT)) {
 418        res = (int32_t)res;
 419    }
 420    return res;
 421}
 422
 423static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
 424{
 425    switch (c) {
 426    case TCG_COND_EQ:
 427        return x == y;
 428    case TCG_COND_NE:
 429        return x != y;
 430    case TCG_COND_LT:
 431        return (int32_t)x < (int32_t)y;
 432    case TCG_COND_GE:
 433        return (int32_t)x >= (int32_t)y;
 434    case TCG_COND_LE:
 435        return (int32_t)x <= (int32_t)y;
 436    case TCG_COND_GT:
 437        return (int32_t)x > (int32_t)y;
 438    case TCG_COND_LTU:
 439        return x < y;
 440    case TCG_COND_GEU:
 441        return x >= y;
 442    case TCG_COND_LEU:
 443        return x <= y;
 444    case TCG_COND_GTU:
 445        return x > y;
 446    default:
 447        tcg_abort();
 448    }
 449}
 450
 451static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
 452{
 453    switch (c) {
 454    case TCG_COND_EQ:
 455        return x == y;
 456    case TCG_COND_NE:
 457        return x != y;
 458    case TCG_COND_LT:
 459        return (int64_t)x < (int64_t)y;
 460    case TCG_COND_GE:
 461        return (int64_t)x >= (int64_t)y;
 462    case TCG_COND_LE:
 463        return (int64_t)x <= (int64_t)y;
 464    case TCG_COND_GT:
 465        return (int64_t)x > (int64_t)y;
 466    case TCG_COND_LTU:
 467        return x < y;
 468    case TCG_COND_GEU:
 469        return x >= y;
 470    case TCG_COND_LEU:
 471        return x <= y;
 472    case TCG_COND_GTU:
 473        return x > y;
 474    default:
 475        tcg_abort();
 476    }
 477}
 478
 479static bool do_constant_folding_cond_eq(TCGCond c)
 480{
 481    switch (c) {
 482    case TCG_COND_GT:
 483    case TCG_COND_LTU:
 484    case TCG_COND_LT:
 485    case TCG_COND_GTU:
 486    case TCG_COND_NE:
 487        return 0;
 488    case TCG_COND_GE:
 489    case TCG_COND_GEU:
 490    case TCG_COND_LE:
 491    case TCG_COND_LEU:
 492    case TCG_COND_EQ:
 493        return 1;
 494    default:
 495        tcg_abort();
 496    }
 497}
 498
 499/* Return 2 if the condition can't be simplified, and the result
 500   of the condition (0 or 1) if it can */
 501static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
 502                                       TCGArg y, TCGCond c)
 503{
 504    tcg_target_ulong xv = arg_info(x)->val;
 505    tcg_target_ulong yv = arg_info(y)->val;
 506    if (arg_is_const(x) && arg_is_const(y)) {
 507        const TCGOpDef *def = &tcg_op_defs[op];
 508        tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
 509        if (def->flags & TCG_OPF_64BIT) {
 510            return do_constant_folding_cond_64(xv, yv, c);
 511        } else {
 512            return do_constant_folding_cond_32(xv, yv, c);
 513        }
 514    } else if (args_are_copies(x, y)) {
 515        return do_constant_folding_cond_eq(c);
 516    } else if (arg_is_const(y) && yv == 0) {
 517        switch (c) {
 518        case TCG_COND_LTU:
 519            return 0;
 520        case TCG_COND_GEU:
 521            return 1;
 522        default:
 523            return 2;
 524        }
 525    }
 526    return 2;
 527}
 528
 529/* Return 2 if the condition can't be simplified, and the result
 530   of the condition (0 or 1) if it can */
 531static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
 532{
 533    TCGArg al = p1[0], ah = p1[1];
 534    TCGArg bl = p2[0], bh = p2[1];
 535
 536    if (arg_is_const(bl) && arg_is_const(bh)) {
 537        tcg_target_ulong blv = arg_info(bl)->val;
 538        tcg_target_ulong bhv = arg_info(bh)->val;
 539        uint64_t b = deposit64(blv, 32, 32, bhv);
 540
 541        if (arg_is_const(al) && arg_is_const(ah)) {
 542            tcg_target_ulong alv = arg_info(al)->val;
 543            tcg_target_ulong ahv = arg_info(ah)->val;
 544            uint64_t a = deposit64(alv, 32, 32, ahv);
 545            return do_constant_folding_cond_64(a, b, c);
 546        }
 547        if (b == 0) {
 548            switch (c) {
 549            case TCG_COND_LTU:
 550                return 0;
 551            case TCG_COND_GEU:
 552                return 1;
 553            default:
 554                break;
 555            }
 556        }
 557    }
 558    if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
 559        return do_constant_folding_cond_eq(c);
 560    }
 561    return 2;
 562}
 563
 564static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 565{
 566    TCGArg a1 = *p1, a2 = *p2;
 567    int sum = 0;
 568    sum += arg_is_const(a1);
 569    sum -= arg_is_const(a2);
 570
 571    /* Prefer the constant in second argument, and then the form
 572       op a, a, b, which is better handled on non-RISC hosts. */
 573    if (sum > 0 || (sum == 0 && dest == a2)) {
 574        *p1 = a2;
 575        *p2 = a1;
 576        return true;
 577    }
 578    return false;
 579}
 580
 581static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 582{
 583    int sum = 0;
 584    sum += arg_is_const(p1[0]);
 585    sum += arg_is_const(p1[1]);
 586    sum -= arg_is_const(p2[0]);
 587    sum -= arg_is_const(p2[1]);
 588    if (sum > 0) {
 589        TCGArg t;
 590        t = p1[0], p1[0] = p2[0], p2[0] = t;
 591        t = p1[1], p1[1] = p2[1], p2[1] = t;
 592        return true;
 593    }
 594    return false;
 595}
 596
 597/* Propagate constants and copies, fold constant expressions. */
 598void tcg_optimize(TCGContext *s)
 599{
 600    int nb_temps, nb_globals;
 601    TCGOp *op, *op_next, *prev_mb = NULL;
 602    struct tcg_temp_info *infos;
 603    TCGTempSet temps_used;
 604
 605    /* Array VALS has an element for each temp.
 606       If this temp holds a constant then its value is kept in VALS' element.
 607       If this temp is a copy of other ones then the other copies are
 608       available through the doubly linked circular list. */
 609
 610    nb_temps = s->nb_temps;
 611    nb_globals = s->nb_globals;
 612    bitmap_zero(temps_used.l, nb_temps);
 613    infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps);
 614
 615    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
 616        tcg_target_ulong mask, partmask, affected;
 617        int nb_oargs, nb_iargs, i;
 618        TCGArg tmp;
 619        TCGOpcode opc = op->opc;
 620        const TCGOpDef *def = &tcg_op_defs[opc];
 621
 622        /* Count the arguments, and initialize the temps that are
 623           going to be used */
 624        if (opc == INDEX_op_call) {
 625            nb_oargs = TCGOP_CALLO(op);
 626            nb_iargs = TCGOP_CALLI(op);
 627            for (i = 0; i < nb_oargs + nb_iargs; i++) {
 628                TCGTemp *ts = arg_temp(op->args[i]);
 629                if (ts) {
 630                    init_ts_info(infos, &temps_used, ts);
 631                }
 632            }
 633        } else {
 634            nb_oargs = def->nb_oargs;
 635            nb_iargs = def->nb_iargs;
 636            for (i = 0; i < nb_oargs + nb_iargs; i++) {
 637                init_arg_info(infos, &temps_used, op->args[i]);
 638            }
 639        }
 640
 641        /* Do copy propagation */
 642        for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
 643            TCGTemp *ts = arg_temp(op->args[i]);
 644            if (ts && ts_is_copy(ts)) {
 645                op->args[i] = temp_arg(find_better_copy(s, ts));
 646            }
 647        }
 648
 649        /* For commutative operations make constant second argument */
 650        switch (opc) {
 651        CASE_OP_32_64_VEC(add):
 652        CASE_OP_32_64_VEC(mul):
 653        CASE_OP_32_64_VEC(and):
 654        CASE_OP_32_64_VEC(or):
 655        CASE_OP_32_64_VEC(xor):
 656        CASE_OP_32_64(eqv):
 657        CASE_OP_32_64(nand):
 658        CASE_OP_32_64(nor):
 659        CASE_OP_32_64(muluh):
 660        CASE_OP_32_64(mulsh):
 661            swap_commutative(op->args[0], &op->args[1], &op->args[2]);
 662            break;
 663        CASE_OP_32_64(brcond):
 664            if (swap_commutative(-1, &op->args[0], &op->args[1])) {
 665                op->args[2] = tcg_swap_cond(op->args[2]);
 666            }
 667            break;
 668        CASE_OP_32_64(setcond):
 669            if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
 670                op->args[3] = tcg_swap_cond(op->args[3]);
 671            }
 672            break;
 673        CASE_OP_32_64(movcond):
 674            if (swap_commutative(-1, &op->args[1], &op->args[2])) {
 675                op->args[5] = tcg_swap_cond(op->args[5]);
 676            }
 677            /* For movcond, we canonicalize the "false" input reg to match
 678               the destination reg so that the tcg backend can implement
 679               a "move if true" operation.  */
 680            if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
 681                op->args[5] = tcg_invert_cond(op->args[5]);
 682            }
 683            break;
 684        CASE_OP_32_64(add2):
 685            swap_commutative(op->args[0], &op->args[2], &op->args[4]);
 686            swap_commutative(op->args[1], &op->args[3], &op->args[5]);
 687            break;
 688        CASE_OP_32_64(mulu2):
 689        CASE_OP_32_64(muls2):
 690            swap_commutative(op->args[0], &op->args[2], &op->args[3]);
 691            break;
 692        case INDEX_op_brcond2_i32:
 693            if (swap_commutative2(&op->args[0], &op->args[2])) {
 694                op->args[4] = tcg_swap_cond(op->args[4]);
 695            }
 696            break;
 697        case INDEX_op_setcond2_i32:
 698            if (swap_commutative2(&op->args[1], &op->args[3])) {
 699                op->args[5] = tcg_swap_cond(op->args[5]);
 700            }
 701            break;
 702        default:
 703            break;
 704        }
 705
 706        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
 707           and "sub r, 0, a => neg r, a" case.  */
 708        switch (opc) {
 709        CASE_OP_32_64(shl):
 710        CASE_OP_32_64(shr):
 711        CASE_OP_32_64(sar):
 712        CASE_OP_32_64(rotl):
 713        CASE_OP_32_64(rotr):
 714            if (arg_is_const(op->args[1])
 715                && arg_info(op->args[1])->val == 0) {
 716                tcg_opt_gen_movi(s, op, op->args[0], 0);
 717                continue;
 718            }
 719            break;
 720        CASE_OP_32_64_VEC(sub):
 721            {
 722                TCGOpcode neg_op;
 723                bool have_neg;
 724
 725                if (arg_is_const(op->args[2])) {
 726                    /* Proceed with possible constant folding. */
 727                    break;
 728                }
 729                if (opc == INDEX_op_sub_i32) {
 730                    neg_op = INDEX_op_neg_i32;
 731                    have_neg = TCG_TARGET_HAS_neg_i32;
 732                } else if (opc == INDEX_op_sub_i64) {
 733                    neg_op = INDEX_op_neg_i64;
 734                    have_neg = TCG_TARGET_HAS_neg_i64;
 735                } else if (TCG_TARGET_HAS_neg_vec) {
 736                    TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
 737                    unsigned vece = TCGOP_VECE(op);
 738                    neg_op = INDEX_op_neg_vec;
 739                    have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
 740                } else {
 741                    break;
 742                }
 743                if (!have_neg) {
 744                    break;
 745                }
 746                if (arg_is_const(op->args[1])
 747                    && arg_info(op->args[1])->val == 0) {
 748                    op->opc = neg_op;
 749                    reset_temp(op->args[0]);
 750                    op->args[1] = op->args[2];
 751                    continue;
 752                }
 753            }
 754            break;
 755        CASE_OP_32_64_VEC(xor):
 756        CASE_OP_32_64(nand):
 757            if (!arg_is_const(op->args[1])
 758                && arg_is_const(op->args[2])
 759                && arg_info(op->args[2])->val == -1) {
 760                i = 1;
 761                goto try_not;
 762            }
 763            break;
 764        CASE_OP_32_64(nor):
 765            if (!arg_is_const(op->args[1])
 766                && arg_is_const(op->args[2])
 767                && arg_info(op->args[2])->val == 0) {
 768                i = 1;
 769                goto try_not;
 770            }
 771            break;
 772        CASE_OP_32_64_VEC(andc):
 773            if (!arg_is_const(op->args[2])
 774                && arg_is_const(op->args[1])
 775                && arg_info(op->args[1])->val == -1) {
 776                i = 2;
 777                goto try_not;
 778            }
 779            break;
 780        CASE_OP_32_64_VEC(orc):
 781        CASE_OP_32_64(eqv):
 782            if (!arg_is_const(op->args[2])
 783                && arg_is_const(op->args[1])
 784                && arg_info(op->args[1])->val == 0) {
 785                i = 2;
 786                goto try_not;
 787            }
 788            break;
 789        try_not:
 790            {
 791                TCGOpcode not_op;
 792                bool have_not;
 793
 794                if (def->flags & TCG_OPF_VECTOR) {
 795                    not_op = INDEX_op_not_vec;
 796                    have_not = TCG_TARGET_HAS_not_vec;
 797                } else if (def->flags & TCG_OPF_64BIT) {
 798                    not_op = INDEX_op_not_i64;
 799                    have_not = TCG_TARGET_HAS_not_i64;
 800                } else {
 801                    not_op = INDEX_op_not_i32;
 802                    have_not = TCG_TARGET_HAS_not_i32;
 803                }
 804                if (!have_not) {
 805                    break;
 806                }
 807                op->opc = not_op;
 808                reset_temp(op->args[0]);
 809                op->args[1] = op->args[i];
 810                continue;
 811            }
 812        default:
 813            break;
 814        }
 815
 816        /* Simplify expression for "op r, a, const => mov r, a" cases */
 817        switch (opc) {
 818        CASE_OP_32_64_VEC(add):
 819        CASE_OP_32_64_VEC(sub):
 820        CASE_OP_32_64_VEC(or):
 821        CASE_OP_32_64_VEC(xor):
 822        CASE_OP_32_64_VEC(andc):
 823        CASE_OP_32_64(shl):
 824        CASE_OP_32_64(shr):
 825        CASE_OP_32_64(sar):
 826        CASE_OP_32_64(rotl):
 827        CASE_OP_32_64(rotr):
 828            if (!arg_is_const(op->args[1])
 829                && arg_is_const(op->args[2])
 830                && arg_info(op->args[2])->val == 0) {
 831                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
 832                continue;
 833            }
 834            break;
 835        CASE_OP_32_64_VEC(and):
 836        CASE_OP_32_64_VEC(orc):
 837        CASE_OP_32_64(eqv):
 838            if (!arg_is_const(op->args[1])
 839                && arg_is_const(op->args[2])
 840                && arg_info(op->args[2])->val == -1) {
 841                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
 842                continue;
 843            }
 844            break;
 845        default:
 846            break;
 847        }
 848
 849        /* Simplify using known-zero bits. Currently only ops with a single
 850           output argument is supported. */
 851        mask = -1;
 852        affected = -1;
 853        switch (opc) {
 854        CASE_OP_32_64(ext8s):
 855            if ((arg_info(op->args[1])->mask & 0x80) != 0) {
 856                break;
 857            }
 858        CASE_OP_32_64(ext8u):
 859            mask = 0xff;
 860            goto and_const;
 861        CASE_OP_32_64(ext16s):
 862            if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
 863                break;
 864            }
 865        CASE_OP_32_64(ext16u):
 866            mask = 0xffff;
 867            goto and_const;
 868        case INDEX_op_ext32s_i64:
 869            if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
 870                break;
 871            }
 872        case INDEX_op_ext32u_i64:
 873            mask = 0xffffffffU;
 874            goto and_const;
 875
 876        CASE_OP_32_64(and):
 877            mask = arg_info(op->args[2])->mask;
 878            if (arg_is_const(op->args[2])) {
 879        and_const:
 880                affected = arg_info(op->args[1])->mask & ~mask;
 881            }
 882            mask = arg_info(op->args[1])->mask & mask;
 883            break;
 884
 885        case INDEX_op_ext_i32_i64:
 886            if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
 887                break;
 888            }
 889        case INDEX_op_extu_i32_i64:
 890            /* We do not compute affected as it is a size changing op.  */
 891            mask = (uint32_t)arg_info(op->args[1])->mask;
 892            break;
 893
 894        CASE_OP_32_64(andc):
 895            /* Known-zeros does not imply known-ones.  Therefore unless
 896               op->args[2] is constant, we can't infer anything from it.  */
 897            if (arg_is_const(op->args[2])) {
 898                mask = ~arg_info(op->args[2])->mask;
 899                goto and_const;
 900            }
 901            /* But we certainly know nothing outside args[1] may be set. */
 902            mask = arg_info(op->args[1])->mask;
 903            break;
 904
 905        case INDEX_op_sar_i32:
 906            if (arg_is_const(op->args[2])) {
 907                tmp = arg_info(op->args[2])->val & 31;
 908                mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
 909            }
 910            break;
 911        case INDEX_op_sar_i64:
 912            if (arg_is_const(op->args[2])) {
 913                tmp = arg_info(op->args[2])->val & 63;
 914                mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
 915            }
 916            break;
 917
 918        case INDEX_op_shr_i32:
 919            if (arg_is_const(op->args[2])) {
 920                tmp = arg_info(op->args[2])->val & 31;
 921                mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
 922            }
 923            break;
 924        case INDEX_op_shr_i64:
 925            if (arg_is_const(op->args[2])) {
 926                tmp = arg_info(op->args[2])->val & 63;
 927                mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
 928            }
 929            break;
 930
 931        case INDEX_op_extrl_i64_i32:
 932            mask = (uint32_t)arg_info(op->args[1])->mask;
 933            break;
 934        case INDEX_op_extrh_i64_i32:
 935            mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
 936            break;
 937
 938        CASE_OP_32_64(shl):
 939            if (arg_is_const(op->args[2])) {
 940                tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
 941                mask = arg_info(op->args[1])->mask << tmp;
 942            }
 943            break;
 944
 945        CASE_OP_32_64(neg):
 946            /* Set to 1 all bits to the left of the rightmost.  */
 947            mask = -(arg_info(op->args[1])->mask
 948                     & -arg_info(op->args[1])->mask);
 949            break;
 950
 951        CASE_OP_32_64(deposit):
 952            mask = deposit64(arg_info(op->args[1])->mask,
 953                             op->args[3], op->args[4],
 954                             arg_info(op->args[2])->mask);
 955            break;
 956
 957        CASE_OP_32_64(extract):
 958            mask = extract64(arg_info(op->args[1])->mask,
 959                             op->args[2], op->args[3]);
 960            if (op->args[2] == 0) {
 961                affected = arg_info(op->args[1])->mask & ~mask;
 962            }
 963            break;
 964        CASE_OP_32_64(sextract):
 965            mask = sextract64(arg_info(op->args[1])->mask,
 966                              op->args[2], op->args[3]);
 967            if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
 968                affected = arg_info(op->args[1])->mask & ~mask;
 969            }
 970            break;
 971
 972        CASE_OP_32_64(or):
 973        CASE_OP_32_64(xor):
 974            mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
 975            break;
 976
 977        case INDEX_op_clz_i32:
 978        case INDEX_op_ctz_i32:
 979            mask = arg_info(op->args[2])->mask | 31;
 980            break;
 981
 982        case INDEX_op_clz_i64:
 983        case INDEX_op_ctz_i64:
 984            mask = arg_info(op->args[2])->mask | 63;
 985            break;
 986
 987        case INDEX_op_ctpop_i32:
 988            mask = 32 | 31;
 989            break;
 990        case INDEX_op_ctpop_i64:
 991            mask = 64 | 63;
 992            break;
 993
 994        CASE_OP_32_64(setcond):
 995        case INDEX_op_setcond2_i32:
 996            mask = 1;
 997            break;
 998
 999        CASE_OP_32_64(movcond):
1000            mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
1001            break;
1002
1003        CASE_OP_32_64(ld8u):
1004            mask = 0xff;
1005            break;
1006        CASE_OP_32_64(ld16u):
1007            mask = 0xffff;
1008            break;
1009        case INDEX_op_ld32u_i64:
1010            mask = 0xffffffffu;
1011            break;
1012
1013        CASE_OP_32_64(qemu_ld):
1014            {
1015                TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs];
1016                MemOp mop = get_memop(oi);
1017                if (!(mop & MO_SIGN)) {
1018                    mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
1019                }
1020            }
1021            break;
1022
1023        default:
1024            break;
1025        }
1026
1027        /* 32-bit ops generate 32-bit results.  For the result is zero test
1028           below, we can ignore high bits, but for further optimizations we
1029           need to record that the high bits contain garbage.  */
1030        partmask = mask;
1031        if (!(def->flags & TCG_OPF_64BIT)) {
1032            mask |= ~(tcg_target_ulong)0xffffffffu;
1033            partmask &= 0xffffffffu;
1034            affected &= 0xffffffffu;
1035        }
1036
1037        if (partmask == 0) {
1038            tcg_debug_assert(nb_oargs == 1);
1039            tcg_opt_gen_movi(s, op, op->args[0], 0);
1040            continue;
1041        }
1042        if (affected == 0) {
1043            tcg_debug_assert(nb_oargs == 1);
1044            tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1045            continue;
1046        }
1047
1048        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
1049        switch (opc) {
1050        CASE_OP_32_64_VEC(and):
1051        CASE_OP_32_64_VEC(mul):
1052        CASE_OP_32_64(muluh):
1053        CASE_OP_32_64(mulsh):
1054            if (arg_is_const(op->args[2])
1055                && arg_info(op->args[2])->val == 0) {
1056                tcg_opt_gen_movi(s, op, op->args[0], 0);
1057                continue;
1058            }
1059            break;
1060        default:
1061            break;
1062        }
1063
1064        /* Simplify expression for "op r, a, a => mov r, a" cases */
1065        switch (opc) {
1066        CASE_OP_32_64_VEC(or):
1067        CASE_OP_32_64_VEC(and):
1068            if (args_are_copies(op->args[1], op->args[2])) {
1069                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1070                continue;
1071            }
1072            break;
1073        default:
1074            break;
1075        }
1076
1077        /* Simplify expression for "op r, a, a => movi r, 0" cases */
1078        switch (opc) {
1079        CASE_OP_32_64_VEC(andc):
1080        CASE_OP_32_64_VEC(sub):
1081        CASE_OP_32_64_VEC(xor):
1082            if (args_are_copies(op->args[1], op->args[2])) {
1083                tcg_opt_gen_movi(s, op, op->args[0], 0);
1084                continue;
1085            }
1086            break;
1087        default:
1088            break;
1089        }
1090
1091        /* Propagate constants through copy operations and do constant
1092           folding.  Constants will be substituted to arguments by register
1093           allocator where needed and possible.  Also detect copies. */
1094        switch (opc) {
1095        CASE_OP_32_64_VEC(mov):
1096            tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1097            break;
1098        CASE_OP_32_64(movi):
1099        case INDEX_op_dupi_vec:
1100            tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);
1101            break;
1102
1103        case INDEX_op_dup_vec:
1104            if (arg_is_const(op->args[1])) {
1105                tmp = arg_info(op->args[1])->val;
1106                tmp = dup_const(TCGOP_VECE(op), tmp);
1107                tcg_opt_gen_movi(s, op, op->args[0], tmp);
1108                break;
1109            }
1110            goto do_default;
1111
1112        CASE_OP_32_64(not):
1113        CASE_OP_32_64(neg):
1114        CASE_OP_32_64(ext8s):
1115        CASE_OP_32_64(ext8u):
1116        CASE_OP_32_64(ext16s):
1117        CASE_OP_32_64(ext16u):
1118        CASE_OP_32_64(ctpop):
1119        CASE_OP_32_64(bswap16):
1120        CASE_OP_32_64(bswap32):
1121        case INDEX_op_bswap64_i64:
1122        case INDEX_op_ext32s_i64:
1123        case INDEX_op_ext32u_i64:
1124        case INDEX_op_ext_i32_i64:
1125        case INDEX_op_extu_i32_i64:
1126        case INDEX_op_extrl_i64_i32:
1127        case INDEX_op_extrh_i64_i32:
1128            if (arg_is_const(op->args[1])) {
1129                tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
1130                tcg_opt_gen_movi(s, op, op->args[0], tmp);
1131                break;
1132            }
1133            goto do_default;
1134
1135        CASE_OP_32_64(add):
1136        CASE_OP_32_64(sub):
1137        CASE_OP_32_64(mul):
1138        CASE_OP_32_64(or):
1139        CASE_OP_32_64(and):
1140        CASE_OP_32_64(xor):
1141        CASE_OP_32_64(shl):
1142        CASE_OP_32_64(shr):
1143        CASE_OP_32_64(sar):
1144        CASE_OP_32_64(rotl):
1145        CASE_OP_32_64(rotr):
1146        CASE_OP_32_64(andc):
1147        CASE_OP_32_64(orc):
1148        CASE_OP_32_64(eqv):
1149        CASE_OP_32_64(nand):
1150        CASE_OP_32_64(nor):
1151        CASE_OP_32_64(muluh):
1152        CASE_OP_32_64(mulsh):
1153        CASE_OP_32_64(div):
1154        CASE_OP_32_64(divu):
1155        CASE_OP_32_64(rem):
1156        CASE_OP_32_64(remu):
1157            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1158                tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
1159                                          arg_info(op->args[2])->val);
1160                tcg_opt_gen_movi(s, op, op->args[0], tmp);
1161                break;
1162            }
1163            goto do_default;
1164
1165        CASE_OP_32_64(clz):
1166        CASE_OP_32_64(ctz):
1167            if (arg_is_const(op->args[1])) {
1168                TCGArg v = arg_info(op->args[1])->val;
1169                if (v != 0) {
1170                    tmp = do_constant_folding(opc, v, 0);
1171                    tcg_opt_gen_movi(s, op, op->args[0], tmp);
1172                } else {
1173                    tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
1174                }
1175                break;
1176            }
1177            goto do_default;
1178
1179        CASE_OP_32_64(deposit):
1180            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1181                tmp = deposit64(arg_info(op->args[1])->val,
1182                                op->args[3], op->args[4],
1183                                arg_info(op->args[2])->val);
1184                tcg_opt_gen_movi(s, op, op->args[0], tmp);
1185                break;
1186            }
1187            goto do_default;
1188
1189        CASE_OP_32_64(extract):
1190            if (arg_is_const(op->args[1])) {
1191                tmp = extract64(arg_info(op->args[1])->val,
1192                                op->args[2], op->args[3]);
1193                tcg_opt_gen_movi(s, op, op->args[0], tmp);
1194                break;
1195            }
1196            goto do_default;
1197
1198        CASE_OP_32_64(sextract):
1199            if (arg_is_const(op->args[1])) {
1200                tmp = sextract64(arg_info(op->args[1])->val,
1201                                 op->args[2], op->args[3]);
1202                tcg_opt_gen_movi(s, op, op->args[0], tmp);
1203                break;
1204            }
1205            goto do_default;
1206
1207        CASE_OP_32_64(extract2):
1208            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1209                TCGArg v1 = arg_info(op->args[1])->val;
1210                TCGArg v2 = arg_info(op->args[2])->val;
1211
1212                if (opc == INDEX_op_extract2_i64) {
1213                    tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3]));
1214                } else {
1215                    tmp = (int32_t)(((uint32_t)v1 >> op->args[3]) |
1216                                    ((uint32_t)v2 << (32 - op->args[3])));
1217                }
1218                tcg_opt_gen_movi(s, op, op->args[0], tmp);
1219                break;
1220            }
1221            goto do_default;
1222
1223        CASE_OP_32_64(setcond):
1224            tmp = do_constant_folding_cond(opc, op->args[1],
1225                                           op->args[2], op->args[3]);
1226            if (tmp != 2) {
1227                tcg_opt_gen_movi(s, op, op->args[0], tmp);
1228                break;
1229            }
1230            goto do_default;
1231
1232        CASE_OP_32_64(brcond):
1233            tmp = do_constant_folding_cond(opc, op->args[0],
1234                                           op->args[1], op->args[2]);
1235            if (tmp != 2) {
1236                if (tmp) {
1237                    bitmap_zero(temps_used.l, nb_temps);
1238                    op->opc = INDEX_op_br;
1239                    op->args[0] = op->args[3];
1240                } else {
1241                    tcg_op_remove(s, op);
1242                }
1243                break;
1244            }
1245            goto do_default;
1246
1247        CASE_OP_32_64(movcond):
1248            tmp = do_constant_folding_cond(opc, op->args[1],
1249                                           op->args[2], op->args[5]);
1250            if (tmp != 2) {
1251                tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
1252                break;
1253            }
1254            if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1255                tcg_target_ulong tv = arg_info(op->args[3])->val;
1256                tcg_target_ulong fv = arg_info(op->args[4])->val;
1257                TCGCond cond = op->args[5];
1258                if (fv == 1 && tv == 0) {
1259                    cond = tcg_invert_cond(cond);
1260                } else if (!(tv == 1 && fv == 0)) {
1261                    goto do_default;
1262                }
1263                op->args[3] = cond;
1264                op->opc = opc = (opc == INDEX_op_movcond_i32
1265                                 ? INDEX_op_setcond_i32
1266                                 : INDEX_op_setcond_i64);
1267                nb_iargs = 2;
1268            }
1269            goto do_default;
1270
1271        case INDEX_op_add2_i32:
1272        case INDEX_op_sub2_i32:
1273            if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
1274                && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
1275                uint32_t al = arg_info(op->args[2])->val;
1276                uint32_t ah = arg_info(op->args[3])->val;
1277                uint32_t bl = arg_info(op->args[4])->val;
1278                uint32_t bh = arg_info(op->args[5])->val;
1279                uint64_t a = ((uint64_t)ah << 32) | al;
1280                uint64_t b = ((uint64_t)bh << 32) | bl;
1281                TCGArg rl, rh;
1282                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
1283
1284                if (opc == INDEX_op_add2_i32) {
1285                    a += b;
1286                } else {
1287                    a -= b;
1288                }
1289
1290                rl = op->args[0];
1291                rh = op->args[1];
1292                tcg_opt_gen_movi(s, op, rl, (int32_t)a);
1293                tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32));
1294                break;
1295            }
1296            goto do_default;
1297
1298        case INDEX_op_mulu2_i32:
1299            if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1300                uint32_t a = arg_info(op->args[2])->val;
1301                uint32_t b = arg_info(op->args[3])->val;
1302                uint64_t r = (uint64_t)a * b;
1303                TCGArg rl, rh;
1304                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
1305
1306                rl = op->args[0];
1307                rh = op->args[1];
1308                tcg_opt_gen_movi(s, op, rl, (int32_t)r);
1309                tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32));
1310                break;
1311            }
1312            goto do_default;
1313
1314        case INDEX_op_brcond2_i32:
1315            tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
1316                                            op->args[4]);
1317            if (tmp != 2) {
1318                if (tmp) {
1319            do_brcond_true:
1320                    bitmap_zero(temps_used.l, nb_temps);
1321                    op->opc = INDEX_op_br;
1322                    op->args[0] = op->args[5];
1323                } else {
1324            do_brcond_false:
1325                    tcg_op_remove(s, op);
1326                }
1327            } else if ((op->args[4] == TCG_COND_LT
1328                        || op->args[4] == TCG_COND_GE)
1329                       && arg_is_const(op->args[2])
1330                       && arg_info(op->args[2])->val == 0
1331                       && arg_is_const(op->args[3])
1332                       && arg_info(op->args[3])->val == 0) {
1333                /* Simplify LT/GE comparisons vs zero to a single compare
1334                   vs the high word of the input.  */
1335            do_brcond_high:
1336                bitmap_zero(temps_used.l, nb_temps);
1337                op->opc = INDEX_op_brcond_i32;
1338                op->args[0] = op->args[1];
1339                op->args[1] = op->args[3];
1340                op->args[2] = op->args[4];
1341                op->args[3] = op->args[5];
1342            } else if (op->args[4] == TCG_COND_EQ) {
1343                /* Simplify EQ comparisons where one of the pairs
1344                   can be simplified.  */
1345                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1346                                               op->args[0], op->args[2],
1347                                               TCG_COND_EQ);
1348                if (tmp == 0) {
1349                    goto do_brcond_false;
1350                } else if (tmp == 1) {
1351                    goto do_brcond_high;
1352                }
1353                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1354                                               op->args[1], op->args[3],
1355                                               TCG_COND_EQ);
1356                if (tmp == 0) {
1357                    goto do_brcond_false;
1358                } else if (tmp != 1) {
1359                    goto do_default;
1360                }
1361            do_brcond_low:
1362                bitmap_zero(temps_used.l, nb_temps);
1363                op->opc = INDEX_op_brcond_i32;
1364                op->args[1] = op->args[2];
1365                op->args[2] = op->args[4];
1366                op->args[3] = op->args[5];
1367            } else if (op->args[4] == TCG_COND_NE) {
1368                /* Simplify NE comparisons where one of the pairs
1369                   can be simplified.  */
1370                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1371                                               op->args[0], op->args[2],
1372                                               TCG_COND_NE);
1373                if (tmp == 0) {
1374                    goto do_brcond_high;
1375                } else if (tmp == 1) {
1376                    goto do_brcond_true;
1377                }
1378                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1379                                               op->args[1], op->args[3],
1380                                               TCG_COND_NE);
1381                if (tmp == 0) {
1382                    goto do_brcond_low;
1383                } else if (tmp == 1) {
1384                    goto do_brcond_true;
1385                }
1386                goto do_default;
1387            } else {
1388                goto do_default;
1389            }
1390            break;
1391
1392        case INDEX_op_setcond2_i32:
1393            tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
1394                                            op->args[5]);
1395            if (tmp != 2) {
1396            do_setcond_const:
1397                tcg_opt_gen_movi(s, op, op->args[0], tmp);
1398            } else if ((op->args[5] == TCG_COND_LT
1399                        || op->args[5] == TCG_COND_GE)
1400                       && arg_is_const(op->args[3])
1401                       && arg_info(op->args[3])->val == 0
1402                       && arg_is_const(op->args[4])
1403                       && arg_info(op->args[4])->val == 0) {
1404                /* Simplify LT/GE comparisons vs zero to a single compare
1405                   vs the high word of the input.  */
1406            do_setcond_high:
1407                reset_temp(op->args[0]);
1408                arg_info(op->args[0])->mask = 1;
1409                op->opc = INDEX_op_setcond_i32;
1410                op->args[1] = op->args[2];
1411                op->args[2] = op->args[4];
1412                op->args[3] = op->args[5];
1413            } else if (op->args[5] == TCG_COND_EQ) {
1414                /* Simplify EQ comparisons where one of the pairs
1415                   can be simplified.  */
1416                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1417                                               op->args[1], op->args[3],
1418                                               TCG_COND_EQ);
1419                if (tmp == 0) {
1420                    goto do_setcond_const;
1421                } else if (tmp == 1) {
1422                    goto do_setcond_high;
1423                }
1424                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1425                                               op->args[2], op->args[4],
1426                                               TCG_COND_EQ);
1427                if (tmp == 0) {
1428                    goto do_setcond_high;
1429                } else if (tmp != 1) {
1430                    goto do_default;
1431                }
1432            do_setcond_low:
1433                reset_temp(op->args[0]);
1434                arg_info(op->args[0])->mask = 1;
1435                op->opc = INDEX_op_setcond_i32;
1436                op->args[2] = op->args[3];
1437                op->args[3] = op->args[5];
1438            } else if (op->args[5] == TCG_COND_NE) {
1439                /* Simplify NE comparisons where one of the pairs
1440                   can be simplified.  */
1441                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1442                                               op->args[1], op->args[3],
1443                                               TCG_COND_NE);
1444                if (tmp == 0) {
1445                    goto do_setcond_high;
1446                } else if (tmp == 1) {
1447                    goto do_setcond_const;
1448                }
1449                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1450                                               op->args[2], op->args[4],
1451                                               TCG_COND_NE);
1452                if (tmp == 0) {
1453                    goto do_setcond_low;
1454                } else if (tmp == 1) {
1455                    goto do_setcond_const;
1456                }
1457                goto do_default;
1458            } else {
1459                goto do_default;
1460            }
1461            break;
1462
1463        case INDEX_op_call:
1464            if (!(op->args[nb_oargs + nb_iargs + 1]
1465                  & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1466                for (i = 0; i < nb_globals; i++) {
1467                    if (test_bit(i, temps_used.l)) {
1468                        reset_ts(&s->temps[i]);
1469                    }
1470                }
1471            }
1472            goto do_reset_output;
1473
1474        default:
1475        do_default:
1476            /* Default case: we know nothing about operation (or were unable
1477               to compute the operation result) so no propagation is done.
1478               We trash everything if the operation is the end of a basic
1479               block, otherwise we only trash the output args.  "mask" is
1480               the non-zero bits mask for the first output arg.  */
1481            if (def->flags & TCG_OPF_BB_END) {
1482                bitmap_zero(temps_used.l, nb_temps);
1483            } else {
1484        do_reset_output:
1485                for (i = 0; i < nb_oargs; i++) {
1486                    reset_temp(op->args[i]);
1487                    /* Save the corresponding known-zero bits mask for the
1488                       first output argument (only one supported so far). */
1489                    if (i == 0) {
1490                        arg_info(op->args[i])->mask = mask;
1491                    }
1492                }
1493            }
1494            break;
1495        }
1496
1497        /* Eliminate duplicate and redundant fence instructions.  */
1498        if (prev_mb) {
1499            switch (opc) {
1500            case INDEX_op_mb:
1501                /* Merge two barriers of the same type into one,
1502                 * or a weaker barrier into a stronger one,
1503                 * or two weaker barriers into a stronger one.
1504                 *   mb X; mb Y => mb X|Y
1505                 *   mb; strl => mb; st
1506                 *   ldaq; mb => ld; mb
1507                 *   ldaq; strl => ld; mb; st
1508                 * Other combinations are also merged into a strong
1509                 * barrier.  This is stricter than specified but for
1510                 * the purposes of TCG is better than not optimizing.
1511                 */
1512                prev_mb->args[0] |= op->args[0];
1513                tcg_op_remove(s, op);
1514                break;
1515
1516            default:
1517                /* Opcodes that end the block stop the optimization.  */
1518                if ((def->flags & TCG_OPF_BB_END) == 0) {
1519                    break;
1520                }
1521                /* fallthru */
1522            case INDEX_op_qemu_ld_i32:
1523            case INDEX_op_qemu_ld_i64:
1524            case INDEX_op_qemu_st_i32:
1525            case INDEX_op_qemu_st_i64:
1526            case INDEX_op_call:
1527                /* Opcodes that touch guest memory stop the optimization.  */
1528                prev_mb = NULL;
1529                break;
1530            }
1531        } else if (opc == INDEX_op_mb) {
1532            prev_mb = op;
1533        }
1534    }
1535}
1536