qemu/tcg/optimize.c
<<
>>
Prefs
   1/*
   2 * Optimizations for Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2010 Samsung Electronics.
   5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "qemu-common.h"
  28#include "exec/cpu-common.h"
  29#include "tcg-op.h"
  30
  31#define CASE_OP_32_64(x)                        \
  32        glue(glue(case INDEX_op_, x), _i32):    \
  33        glue(glue(case INDEX_op_, x), _i64)
  34
  35struct tcg_temp_info {
  36    bool is_const;
  37    uint16_t prev_copy;
  38    uint16_t next_copy;
  39    tcg_target_ulong val;
  40    tcg_target_ulong mask;
  41};
  42
  43static struct tcg_temp_info temps[TCG_MAX_TEMPS];
  44static TCGTempSet temps_used;
  45
  46static inline bool temp_is_const(TCGArg arg)
  47{
  48    return temps[arg].is_const;
  49}
  50
  51static inline bool temp_is_copy(TCGArg arg)
  52{
  53    return temps[arg].next_copy != arg;
  54}
  55
  56/* Reset TEMP's state, possibly removing the temp for the list of copies.  */
  57static void reset_temp(TCGArg temp)
  58{
  59    temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
  60    temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
  61    temps[temp].next_copy = temp;
  62    temps[temp].prev_copy = temp;
  63    temps[temp].is_const = false;
  64    temps[temp].mask = -1;
  65}
  66
  67/* Reset all temporaries, given that there are NB_TEMPS of them.  */
  68static void reset_all_temps(int nb_temps)
  69{
  70    bitmap_zero(temps_used.l, nb_temps);
  71}
  72
  73/* Initialize and activate a temporary.  */
  74static void init_temp_info(TCGArg temp)
  75{
  76    if (!test_bit(temp, temps_used.l)) {
  77        temps[temp].next_copy = temp;
  78        temps[temp].prev_copy = temp;
  79        temps[temp].is_const = false;
  80        temps[temp].mask = -1;
  81        set_bit(temp, temps_used.l);
  82    }
  83}
  84
  85static int op_bits(TCGOpcode op)
  86{
  87    const TCGOpDef *def = &tcg_op_defs[op];
  88    return def->flags & TCG_OPF_64BIT ? 64 : 32;
  89}
  90
  91static TCGOpcode op_to_mov(TCGOpcode op)
  92{
  93    switch (op_bits(op)) {
  94    case 32:
  95        return INDEX_op_mov_i32;
  96    case 64:
  97        return INDEX_op_mov_i64;
  98    default:
  99        fprintf(stderr, "op_to_mov: unexpected return value of "
 100                "function op_bits.\n");
 101        tcg_abort();
 102    }
 103}
 104
 105static TCGOpcode op_to_movi(TCGOpcode op)
 106{
 107    switch (op_bits(op)) {
 108    case 32:
 109        return INDEX_op_movi_i32;
 110    case 64:
 111        return INDEX_op_movi_i64;
 112    default:
 113        fprintf(stderr, "op_to_movi: unexpected return value of "
 114                "function op_bits.\n");
 115        tcg_abort();
 116    }
 117}
 118
 119static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
 120{
 121    TCGArg i;
 122
 123    /* If this is already a global, we can't do better. */
 124    if (temp < s->nb_globals) {
 125        return temp;
 126    }
 127
 128    /* Search for a global first. */
 129    for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
 130        if (i < s->nb_globals) {
 131            return i;
 132        }
 133    }
 134
 135    /* If it is a temp, search for a temp local. */
 136    if (!s->temps[temp].temp_local) {
 137        for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
 138            if (s->temps[i].temp_local) {
 139                return i;
 140            }
 141        }
 142    }
 143
 144    /* Failure to find a better representation, return the same temp. */
 145    return temp;
 146}
 147
 148static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
 149{
 150    TCGArg i;
 151
 152    if (arg1 == arg2) {
 153        return true;
 154    }
 155
 156    if (!temp_is_copy(arg1) || !temp_is_copy(arg2)) {
 157        return false;
 158    }
 159
 160    for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
 161        if (i == arg2) {
 162            return true;
 163        }
 164    }
 165
 166    return false;
 167}
 168
 169static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args,
 170                             TCGArg dst, TCGArg val)
 171{
 172    TCGOpcode new_op = op_to_movi(op->opc);
 173    tcg_target_ulong mask;
 174
 175    op->opc = new_op;
 176
 177    reset_temp(dst);
 178    temps[dst].is_const = true;
 179    temps[dst].val = val;
 180    mask = val;
 181    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
 182        /* High bits of the destination are now garbage.  */
 183        mask |= ~0xffffffffull;
 184    }
 185    temps[dst].mask = mask;
 186
 187    args[0] = dst;
 188    args[1] = val;
 189}
 190
 191static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args,
 192                            TCGArg dst, TCGArg src)
 193{
 194    if (temps_are_copies(dst, src)) {
 195        tcg_op_remove(s, op);
 196        return;
 197    }
 198
 199    TCGOpcode new_op = op_to_mov(op->opc);
 200    tcg_target_ulong mask;
 201
 202    op->opc = new_op;
 203
 204    reset_temp(dst);
 205    mask = temps[src].mask;
 206    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
 207        /* High bits of the destination are now garbage.  */
 208        mask |= ~0xffffffffull;
 209    }
 210    temps[dst].mask = mask;
 211
 212    if (s->temps[src].type == s->temps[dst].type) {
 213        temps[dst].next_copy = temps[src].next_copy;
 214        temps[dst].prev_copy = src;
 215        temps[temps[dst].next_copy].prev_copy = dst;
 216        temps[src].next_copy = dst;
 217        temps[dst].is_const = temps[src].is_const;
 218        temps[dst].val = temps[src].val;
 219    }
 220
 221    args[0] = dst;
 222    args[1] = src;
 223}
 224
 225static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
 226{
 227    uint64_t l64, h64;
 228
 229    switch (op) {
 230    CASE_OP_32_64(add):
 231        return x + y;
 232
 233    CASE_OP_32_64(sub):
 234        return x - y;
 235
 236    CASE_OP_32_64(mul):
 237        return x * y;
 238
 239    CASE_OP_32_64(and):
 240        return x & y;
 241
 242    CASE_OP_32_64(or):
 243        return x | y;
 244
 245    CASE_OP_32_64(xor):
 246        return x ^ y;
 247
 248    case INDEX_op_shl_i32:
 249        return (uint32_t)x << (y & 31);
 250
 251    case INDEX_op_shl_i64:
 252        return (uint64_t)x << (y & 63);
 253
 254    case INDEX_op_shr_i32:
 255        return (uint32_t)x >> (y & 31);
 256
 257    case INDEX_op_shr_i64:
 258        return (uint64_t)x >> (y & 63);
 259
 260    case INDEX_op_sar_i32:
 261        return (int32_t)x >> (y & 31);
 262
 263    case INDEX_op_sar_i64:
 264        return (int64_t)x >> (y & 63);
 265
 266    case INDEX_op_rotr_i32:
 267        return ror32(x, y & 31);
 268
 269    case INDEX_op_rotr_i64:
 270        return ror64(x, y & 63);
 271
 272    case INDEX_op_rotl_i32:
 273        return rol32(x, y & 31);
 274
 275    case INDEX_op_rotl_i64:
 276        return rol64(x, y & 63);
 277
 278    CASE_OP_32_64(not):
 279        return ~x;
 280
 281    CASE_OP_32_64(neg):
 282        return -x;
 283
 284    CASE_OP_32_64(andc):
 285        return x & ~y;
 286
 287    CASE_OP_32_64(orc):
 288        return x | ~y;
 289
 290    CASE_OP_32_64(eqv):
 291        return ~(x ^ y);
 292
 293    CASE_OP_32_64(nand):
 294        return ~(x & y);
 295
 296    CASE_OP_32_64(nor):
 297        return ~(x | y);
 298
 299    case INDEX_op_clz_i32:
 300        return (uint32_t)x ? clz32(x) : y;
 301
 302    case INDEX_op_clz_i64:
 303        return x ? clz64(x) : y;
 304
 305    case INDEX_op_ctz_i32:
 306        return (uint32_t)x ? ctz32(x) : y;
 307
 308    case INDEX_op_ctz_i64:
 309        return x ? ctz64(x) : y;
 310
 311    case INDEX_op_ctpop_i32:
 312        return ctpop32(x);
 313
 314    case INDEX_op_ctpop_i64:
 315        return ctpop64(x);
 316
 317    CASE_OP_32_64(ext8s):
 318        return (int8_t)x;
 319
 320    CASE_OP_32_64(ext16s):
 321        return (int16_t)x;
 322
 323    CASE_OP_32_64(ext8u):
 324        return (uint8_t)x;
 325
 326    CASE_OP_32_64(ext16u):
 327        return (uint16_t)x;
 328
 329    case INDEX_op_ext_i32_i64:
 330    case INDEX_op_ext32s_i64:
 331        return (int32_t)x;
 332
 333    case INDEX_op_extu_i32_i64:
 334    case INDEX_op_extrl_i64_i32:
 335    case INDEX_op_ext32u_i64:
 336        return (uint32_t)x;
 337
 338    case INDEX_op_extrh_i64_i32:
 339        return (uint64_t)x >> 32;
 340
 341    case INDEX_op_muluh_i32:
 342        return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
 343    case INDEX_op_mulsh_i32:
 344        return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
 345
 346    case INDEX_op_muluh_i64:
 347        mulu64(&l64, &h64, x, y);
 348        return h64;
 349    case INDEX_op_mulsh_i64:
 350        muls64(&l64, &h64, x, y);
 351        return h64;
 352
 353    case INDEX_op_div_i32:
 354        /* Avoid crashing on divide by zero, otherwise undefined.  */
 355        return (int32_t)x / ((int32_t)y ? : 1);
 356    case INDEX_op_divu_i32:
 357        return (uint32_t)x / ((uint32_t)y ? : 1);
 358    case INDEX_op_div_i64:
 359        return (int64_t)x / ((int64_t)y ? : 1);
 360    case INDEX_op_divu_i64:
 361        return (uint64_t)x / ((uint64_t)y ? : 1);
 362
 363    case INDEX_op_rem_i32:
 364        return (int32_t)x % ((int32_t)y ? : 1);
 365    case INDEX_op_remu_i32:
 366        return (uint32_t)x % ((uint32_t)y ? : 1);
 367    case INDEX_op_rem_i64:
 368        return (int64_t)x % ((int64_t)y ? : 1);
 369    case INDEX_op_remu_i64:
 370        return (uint64_t)x % ((uint64_t)y ? : 1);
 371
 372    default:
 373        fprintf(stderr,
 374                "Unrecognized operation %d in do_constant_folding.\n", op);
 375        tcg_abort();
 376    }
 377}
 378
 379static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
 380{
 381    TCGArg res = do_constant_folding_2(op, x, y);
 382    if (op_bits(op) == 32) {
 383        res = (int32_t)res;
 384    }
 385    return res;
 386}
 387
 388static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
 389{
 390    switch (c) {
 391    case TCG_COND_EQ:
 392        return x == y;
 393    case TCG_COND_NE:
 394        return x != y;
 395    case TCG_COND_LT:
 396        return (int32_t)x < (int32_t)y;
 397    case TCG_COND_GE:
 398        return (int32_t)x >= (int32_t)y;
 399    case TCG_COND_LE:
 400        return (int32_t)x <= (int32_t)y;
 401    case TCG_COND_GT:
 402        return (int32_t)x > (int32_t)y;
 403    case TCG_COND_LTU:
 404        return x < y;
 405    case TCG_COND_GEU:
 406        return x >= y;
 407    case TCG_COND_LEU:
 408        return x <= y;
 409    case TCG_COND_GTU:
 410        return x > y;
 411    default:
 412        tcg_abort();
 413    }
 414}
 415
 416static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
 417{
 418    switch (c) {
 419    case TCG_COND_EQ:
 420        return x == y;
 421    case TCG_COND_NE:
 422        return x != y;
 423    case TCG_COND_LT:
 424        return (int64_t)x < (int64_t)y;
 425    case TCG_COND_GE:
 426        return (int64_t)x >= (int64_t)y;
 427    case TCG_COND_LE:
 428        return (int64_t)x <= (int64_t)y;
 429    case TCG_COND_GT:
 430        return (int64_t)x > (int64_t)y;
 431    case TCG_COND_LTU:
 432        return x < y;
 433    case TCG_COND_GEU:
 434        return x >= y;
 435    case TCG_COND_LEU:
 436        return x <= y;
 437    case TCG_COND_GTU:
 438        return x > y;
 439    default:
 440        tcg_abort();
 441    }
 442}
 443
 444static bool do_constant_folding_cond_eq(TCGCond c)
 445{
 446    switch (c) {
 447    case TCG_COND_GT:
 448    case TCG_COND_LTU:
 449    case TCG_COND_LT:
 450    case TCG_COND_GTU:
 451    case TCG_COND_NE:
 452        return 0;
 453    case TCG_COND_GE:
 454    case TCG_COND_GEU:
 455    case TCG_COND_LE:
 456    case TCG_COND_LEU:
 457    case TCG_COND_EQ:
 458        return 1;
 459    default:
 460        tcg_abort();
 461    }
 462}
 463
 464/* Return 2 if the condition can't be simplified, and the result
 465   of the condition (0 or 1) if it can */
 466static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
 467                                       TCGArg y, TCGCond c)
 468{
 469    if (temp_is_const(x) && temp_is_const(y)) {
 470        switch (op_bits(op)) {
 471        case 32:
 472            return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
 473        case 64:
 474            return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
 475        default:
 476            tcg_abort();
 477        }
 478    } else if (temps_are_copies(x, y)) {
 479        return do_constant_folding_cond_eq(c);
 480    } else if (temp_is_const(y) && temps[y].val == 0) {
 481        switch (c) {
 482        case TCG_COND_LTU:
 483            return 0;
 484        case TCG_COND_GEU:
 485            return 1;
 486        default:
 487            return 2;
 488        }
 489    }
 490    return 2;
 491}
 492
 493/* Return 2 if the condition can't be simplified, and the result
 494   of the condition (0 or 1) if it can */
 495static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
 496{
 497    TCGArg al = p1[0], ah = p1[1];
 498    TCGArg bl = p2[0], bh = p2[1];
 499
 500    if (temp_is_const(bl) && temp_is_const(bh)) {
 501        uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
 502
 503        if (temp_is_const(al) && temp_is_const(ah)) {
 504            uint64_t a;
 505            a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
 506            return do_constant_folding_cond_64(a, b, c);
 507        }
 508        if (b == 0) {
 509            switch (c) {
 510            case TCG_COND_LTU:
 511                return 0;
 512            case TCG_COND_GEU:
 513                return 1;
 514            default:
 515                break;
 516            }
 517        }
 518    }
 519    if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
 520        return do_constant_folding_cond_eq(c);
 521    }
 522    return 2;
 523}
 524
 525static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 526{
 527    TCGArg a1 = *p1, a2 = *p2;
 528    int sum = 0;
 529    sum += temp_is_const(a1);
 530    sum -= temp_is_const(a2);
 531
 532    /* Prefer the constant in second argument, and then the form
 533       op a, a, b, which is better handled on non-RISC hosts. */
 534    if (sum > 0 || (sum == 0 && dest == a2)) {
 535        *p1 = a2;
 536        *p2 = a1;
 537        return true;
 538    }
 539    return false;
 540}
 541
 542static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 543{
 544    int sum = 0;
 545    sum += temp_is_const(p1[0]);
 546    sum += temp_is_const(p1[1]);
 547    sum -= temp_is_const(p2[0]);
 548    sum -= temp_is_const(p2[1]);
 549    if (sum > 0) {
 550        TCGArg t;
 551        t = p1[0], p1[0] = p2[0], p2[0] = t;
 552        t = p1[1], p1[1] = p2[1], p2[1] = t;
 553        return true;
 554    }
 555    return false;
 556}
 557
 558/* Propagate constants and copies, fold constant expressions. */
 559void tcg_optimize(TCGContext *s)
 560{
 561    int oi, oi_next, nb_temps, nb_globals;
 562    TCGArg *prev_mb_args = NULL;
 563
 564    /* Array VALS has an element for each temp.
 565       If this temp holds a constant then its value is kept in VALS' element.
 566       If this temp is a copy of other ones then the other copies are
 567       available through the doubly linked circular list. */
 568
 569    nb_temps = s->nb_temps;
 570    nb_globals = s->nb_globals;
 571    reset_all_temps(nb_temps);
 572
 573    for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
 574        tcg_target_ulong mask, partmask, affected;
 575        int nb_oargs, nb_iargs, i;
 576        TCGArg tmp;
 577
 578        TCGOp * const op = &s->gen_op_buf[oi];
 579        TCGArg * const args = &s->gen_opparam_buf[op->args];
 580        TCGOpcode opc = op->opc;
 581        const TCGOpDef *def = &tcg_op_defs[opc];
 582
 583        oi_next = op->next;
 584
 585        /* Count the arguments, and initialize the temps that are
 586           going to be used */
 587        if (opc == INDEX_op_call) {
 588            nb_oargs = op->callo;
 589            nb_iargs = op->calli;
 590            for (i = 0; i < nb_oargs + nb_iargs; i++) {
 591                tmp = args[i];
 592                if (tmp != TCG_CALL_DUMMY_ARG) {
 593                    init_temp_info(tmp);
 594                }
 595            }
 596        } else {
 597            nb_oargs = def->nb_oargs;
 598            nb_iargs = def->nb_iargs;
 599            for (i = 0; i < nb_oargs + nb_iargs; i++) {
 600                init_temp_info(args[i]);
 601            }
 602        }
 603
 604        /* Do copy propagation */
 605        for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
 606            if (temp_is_copy(args[i])) {
 607                args[i] = find_better_copy(s, args[i]);
 608            }
 609        }
 610
 611        /* For commutative operations make constant second argument */
 612        switch (opc) {
 613        CASE_OP_32_64(add):
 614        CASE_OP_32_64(mul):
 615        CASE_OP_32_64(and):
 616        CASE_OP_32_64(or):
 617        CASE_OP_32_64(xor):
 618        CASE_OP_32_64(eqv):
 619        CASE_OP_32_64(nand):
 620        CASE_OP_32_64(nor):
 621        CASE_OP_32_64(muluh):
 622        CASE_OP_32_64(mulsh):
 623            swap_commutative(args[0], &args[1], &args[2]);
 624            break;
 625        CASE_OP_32_64(brcond):
 626            if (swap_commutative(-1, &args[0], &args[1])) {
 627                args[2] = tcg_swap_cond(args[2]);
 628            }
 629            break;
 630        CASE_OP_32_64(setcond):
 631            if (swap_commutative(args[0], &args[1], &args[2])) {
 632                args[3] = tcg_swap_cond(args[3]);
 633            }
 634            break;
 635        CASE_OP_32_64(movcond):
 636            if (swap_commutative(-1, &args[1], &args[2])) {
 637                args[5] = tcg_swap_cond(args[5]);
 638            }
 639            /* For movcond, we canonicalize the "false" input reg to match
 640               the destination reg so that the tcg backend can implement
 641               a "move if true" operation.  */
 642            if (swap_commutative(args[0], &args[4], &args[3])) {
 643                args[5] = tcg_invert_cond(args[5]);
 644            }
 645            break;
 646        CASE_OP_32_64(add2):
 647            swap_commutative(args[0], &args[2], &args[4]);
 648            swap_commutative(args[1], &args[3], &args[5]);
 649            break;
 650        CASE_OP_32_64(mulu2):
 651        CASE_OP_32_64(muls2):
 652            swap_commutative(args[0], &args[2], &args[3]);
 653            break;
 654        case INDEX_op_brcond2_i32:
 655            if (swap_commutative2(&args[0], &args[2])) {
 656                args[4] = tcg_swap_cond(args[4]);
 657            }
 658            break;
 659        case INDEX_op_setcond2_i32:
 660            if (swap_commutative2(&args[1], &args[3])) {
 661                args[5] = tcg_swap_cond(args[5]);
 662            }
 663            break;
 664        default:
 665            break;
 666        }
 667
 668        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
 669           and "sub r, 0, a => neg r, a" case.  */
 670        switch (opc) {
 671        CASE_OP_32_64(shl):
 672        CASE_OP_32_64(shr):
 673        CASE_OP_32_64(sar):
 674        CASE_OP_32_64(rotl):
 675        CASE_OP_32_64(rotr):
 676            if (temp_is_const(args[1]) && temps[args[1]].val == 0) {
 677                tcg_opt_gen_movi(s, op, args, args[0], 0);
 678                continue;
 679            }
 680            break;
 681        CASE_OP_32_64(sub):
 682            {
 683                TCGOpcode neg_op;
 684                bool have_neg;
 685
 686                if (temp_is_const(args[2])) {
 687                    /* Proceed with possible constant folding. */
 688                    break;
 689                }
 690                if (opc == INDEX_op_sub_i32) {
 691                    neg_op = INDEX_op_neg_i32;
 692                    have_neg = TCG_TARGET_HAS_neg_i32;
 693                } else {
 694                    neg_op = INDEX_op_neg_i64;
 695                    have_neg = TCG_TARGET_HAS_neg_i64;
 696                }
 697                if (!have_neg) {
 698                    break;
 699                }
 700                if (temp_is_const(args[1]) && temps[args[1]].val == 0) {
 701                    op->opc = neg_op;
 702                    reset_temp(args[0]);
 703                    args[1] = args[2];
 704                    continue;
 705                }
 706            }
 707            break;
 708        CASE_OP_32_64(xor):
 709        CASE_OP_32_64(nand):
 710            if (!temp_is_const(args[1])
 711                && temp_is_const(args[2]) && temps[args[2]].val == -1) {
 712                i = 1;
 713                goto try_not;
 714            }
 715            break;
 716        CASE_OP_32_64(nor):
 717            if (!temp_is_const(args[1])
 718                && temp_is_const(args[2]) && temps[args[2]].val == 0) {
 719                i = 1;
 720                goto try_not;
 721            }
 722            break;
 723        CASE_OP_32_64(andc):
 724            if (!temp_is_const(args[2])
 725                && temp_is_const(args[1]) && temps[args[1]].val == -1) {
 726                i = 2;
 727                goto try_not;
 728            }
 729            break;
 730        CASE_OP_32_64(orc):
 731        CASE_OP_32_64(eqv):
 732            if (!temp_is_const(args[2])
 733                && temp_is_const(args[1]) && temps[args[1]].val == 0) {
 734                i = 2;
 735                goto try_not;
 736            }
 737            break;
 738        try_not:
 739            {
 740                TCGOpcode not_op;
 741                bool have_not;
 742
 743                if (def->flags & TCG_OPF_64BIT) {
 744                    not_op = INDEX_op_not_i64;
 745                    have_not = TCG_TARGET_HAS_not_i64;
 746                } else {
 747                    not_op = INDEX_op_not_i32;
 748                    have_not = TCG_TARGET_HAS_not_i32;
 749                }
 750                if (!have_not) {
 751                    break;
 752                }
 753                op->opc = not_op;
 754                reset_temp(args[0]);
 755                args[1] = args[i];
 756                continue;
 757            }
 758        default:
 759            break;
 760        }
 761
 762        /* Simplify expression for "op r, a, const => mov r, a" cases */
 763        switch (opc) {
 764        CASE_OP_32_64(add):
 765        CASE_OP_32_64(sub):
 766        CASE_OP_32_64(shl):
 767        CASE_OP_32_64(shr):
 768        CASE_OP_32_64(sar):
 769        CASE_OP_32_64(rotl):
 770        CASE_OP_32_64(rotr):
 771        CASE_OP_32_64(or):
 772        CASE_OP_32_64(xor):
 773        CASE_OP_32_64(andc):
 774            if (!temp_is_const(args[1])
 775                && temp_is_const(args[2]) && temps[args[2]].val == 0) {
 776                tcg_opt_gen_mov(s, op, args, args[0], args[1]);
 777                continue;
 778            }
 779            break;
 780        CASE_OP_32_64(and):
 781        CASE_OP_32_64(orc):
 782        CASE_OP_32_64(eqv):
 783            if (!temp_is_const(args[1])
 784                && temp_is_const(args[2]) && temps[args[2]].val == -1) {
 785                tcg_opt_gen_mov(s, op, args, args[0], args[1]);
 786                continue;
 787            }
 788            break;
 789        default:
 790            break;
 791        }
 792
 793        /* Simplify using known-zero bits. Currently only ops with a single
 794           output argument is supported. */
 795        mask = -1;
 796        affected = -1;
 797        switch (opc) {
 798        CASE_OP_32_64(ext8s):
 799            if ((temps[args[1]].mask & 0x80) != 0) {
 800                break;
 801            }
 802        CASE_OP_32_64(ext8u):
 803            mask = 0xff;
 804            goto and_const;
 805        CASE_OP_32_64(ext16s):
 806            if ((temps[args[1]].mask & 0x8000) != 0) {
 807                break;
 808            }
 809        CASE_OP_32_64(ext16u):
 810            mask = 0xffff;
 811            goto and_const;
 812        case INDEX_op_ext32s_i64:
 813            if ((temps[args[1]].mask & 0x80000000) != 0) {
 814                break;
 815            }
 816        case INDEX_op_ext32u_i64:
 817            mask = 0xffffffffU;
 818            goto and_const;
 819
 820        CASE_OP_32_64(and):
 821            mask = temps[args[2]].mask;
 822            if (temp_is_const(args[2])) {
 823        and_const:
 824                affected = temps[args[1]].mask & ~mask;
 825            }
 826            mask = temps[args[1]].mask & mask;
 827            break;
 828
 829        case INDEX_op_ext_i32_i64:
 830            if ((temps[args[1]].mask & 0x80000000) != 0) {
 831                break;
 832            }
 833        case INDEX_op_extu_i32_i64:
 834            /* We do not compute affected as it is a size changing op.  */
 835            mask = (uint32_t)temps[args[1]].mask;
 836            break;
 837
 838        CASE_OP_32_64(andc):
 839            /* Known-zeros does not imply known-ones.  Therefore unless
 840               args[2] is constant, we can't infer anything from it.  */
 841            if (temp_is_const(args[2])) {
 842                mask = ~temps[args[2]].mask;
 843                goto and_const;
 844            }
 845            /* But we certainly know nothing outside args[1] may be set. */
 846            mask = temps[args[1]].mask;
 847            break;
 848
 849        case INDEX_op_sar_i32:
 850            if (temp_is_const(args[2])) {
 851                tmp = temps[args[2]].val & 31;
 852                mask = (int32_t)temps[args[1]].mask >> tmp;
 853            }
 854            break;
 855        case INDEX_op_sar_i64:
 856            if (temp_is_const(args[2])) {
 857                tmp = temps[args[2]].val & 63;
 858                mask = (int64_t)temps[args[1]].mask >> tmp;
 859            }
 860            break;
 861
 862        case INDEX_op_shr_i32:
 863            if (temp_is_const(args[2])) {
 864                tmp = temps[args[2]].val & 31;
 865                mask = (uint32_t)temps[args[1]].mask >> tmp;
 866            }
 867            break;
 868        case INDEX_op_shr_i64:
 869            if (temp_is_const(args[2])) {
 870                tmp = temps[args[2]].val & 63;
 871                mask = (uint64_t)temps[args[1]].mask >> tmp;
 872            }
 873            break;
 874
 875        case INDEX_op_extrl_i64_i32:
 876            mask = (uint32_t)temps[args[1]].mask;
 877            break;
 878        case INDEX_op_extrh_i64_i32:
 879            mask = (uint64_t)temps[args[1]].mask >> 32;
 880            break;
 881
 882        CASE_OP_32_64(shl):
 883            if (temp_is_const(args[2])) {
 884                tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1);
 885                mask = temps[args[1]].mask << tmp;
 886            }
 887            break;
 888
 889        CASE_OP_32_64(neg):
 890            /* Set to 1 all bits to the left of the rightmost.  */
 891            mask = -(temps[args[1]].mask & -temps[args[1]].mask);
 892            break;
 893
 894        CASE_OP_32_64(deposit):
 895            mask = deposit64(temps[args[1]].mask, args[3], args[4],
 896                             temps[args[2]].mask);
 897            break;
 898
 899        CASE_OP_32_64(extract):
 900            mask = extract64(temps[args[1]].mask, args[2], args[3]);
 901            if (args[2] == 0) {
 902                affected = temps[args[1]].mask & ~mask;
 903            }
 904            break;
 905        CASE_OP_32_64(sextract):
 906            mask = sextract64(temps[args[1]].mask, args[2], args[3]);
 907            if (args[2] == 0 && (tcg_target_long)mask >= 0) {
 908                affected = temps[args[1]].mask & ~mask;
 909            }
 910            break;
 911
 912        CASE_OP_32_64(or):
 913        CASE_OP_32_64(xor):
 914            mask = temps[args[1]].mask | temps[args[2]].mask;
 915            break;
 916
 917        case INDEX_op_clz_i32:
 918        case INDEX_op_ctz_i32:
 919            mask = temps[args[2]].mask | 31;
 920            break;
 921
 922        case INDEX_op_clz_i64:
 923        case INDEX_op_ctz_i64:
 924            mask = temps[args[2]].mask | 63;
 925            break;
 926
 927        case INDEX_op_ctpop_i32:
 928            mask = 32 | 31;
 929            break;
 930        case INDEX_op_ctpop_i64:
 931            mask = 64 | 63;
 932            break;
 933
 934        CASE_OP_32_64(setcond):
 935        case INDEX_op_setcond2_i32:
 936            mask = 1;
 937            break;
 938
 939        CASE_OP_32_64(movcond):
 940            mask = temps[args[3]].mask | temps[args[4]].mask;
 941            break;
 942
 943        CASE_OP_32_64(ld8u):
 944            mask = 0xff;
 945            break;
 946        CASE_OP_32_64(ld16u):
 947            mask = 0xffff;
 948            break;
 949        case INDEX_op_ld32u_i64:
 950            mask = 0xffffffffu;
 951            break;
 952
 953        CASE_OP_32_64(qemu_ld):
 954            {
 955                TCGMemOpIdx oi = args[nb_oargs + nb_iargs];
 956                TCGMemOp mop = get_memop(oi);
 957                if (!(mop & MO_SIGN)) {
 958                    mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
 959                }
 960            }
 961            break;
 962
 963        default:
 964            break;
 965        }
 966
 967        /* 32-bit ops generate 32-bit results.  For the result is zero test
 968           below, we can ignore high bits, but for further optimizations we
 969           need to record that the high bits contain garbage.  */
 970        partmask = mask;
 971        if (!(def->flags & TCG_OPF_64BIT)) {
 972            mask |= ~(tcg_target_ulong)0xffffffffu;
 973            partmask &= 0xffffffffu;
 974            affected &= 0xffffffffu;
 975        }
 976
 977        if (partmask == 0) {
 978            tcg_debug_assert(nb_oargs == 1);
 979            tcg_opt_gen_movi(s, op, args, args[0], 0);
 980            continue;
 981        }
 982        if (affected == 0) {
 983            tcg_debug_assert(nb_oargs == 1);
 984            tcg_opt_gen_mov(s, op, args, args[0], args[1]);
 985            continue;
 986        }
 987
 988        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
 989        switch (opc) {
 990        CASE_OP_32_64(and):
 991        CASE_OP_32_64(mul):
 992        CASE_OP_32_64(muluh):
 993        CASE_OP_32_64(mulsh):
 994            if ((temp_is_const(args[2]) && temps[args[2]].val == 0)) {
 995                tcg_opt_gen_movi(s, op, args, args[0], 0);
 996                continue;
 997            }
 998            break;
 999        default:
1000            break;
1001        }
1002
1003        /* Simplify expression for "op r, a, a => mov r, a" cases */
1004        switch (opc) {
1005        CASE_OP_32_64(or):
1006        CASE_OP_32_64(and):
1007            if (temps_are_copies(args[1], args[2])) {
1008                tcg_opt_gen_mov(s, op, args, args[0], args[1]);
1009                continue;
1010            }
1011            break;
1012        default:
1013            break;
1014        }
1015
1016        /* Simplify expression for "op r, a, a => movi r, 0" cases */
1017        switch (opc) {
1018        CASE_OP_32_64(andc):
1019        CASE_OP_32_64(sub):
1020        CASE_OP_32_64(xor):
1021            if (temps_are_copies(args[1], args[2])) {
1022                tcg_opt_gen_movi(s, op, args, args[0], 0);
1023                continue;
1024            }
1025            break;
1026        default:
1027            break;
1028        }
1029
1030        /* Propagate constants through copy operations and do constant
1031           folding.  Constants will be substituted to arguments by register
1032           allocator where needed and possible.  Also detect copies. */
1033        switch (opc) {
1034        CASE_OP_32_64(mov):
1035            tcg_opt_gen_mov(s, op, args, args[0], args[1]);
1036            break;
1037        CASE_OP_32_64(movi):
1038            tcg_opt_gen_movi(s, op, args, args[0], args[1]);
1039            break;
1040
1041        CASE_OP_32_64(not):
1042        CASE_OP_32_64(neg):
1043        CASE_OP_32_64(ext8s):
1044        CASE_OP_32_64(ext8u):
1045        CASE_OP_32_64(ext16s):
1046        CASE_OP_32_64(ext16u):
1047        CASE_OP_32_64(ctpop):
1048        case INDEX_op_ext32s_i64:
1049        case INDEX_op_ext32u_i64:
1050        case INDEX_op_ext_i32_i64:
1051        case INDEX_op_extu_i32_i64:
1052        case INDEX_op_extrl_i64_i32:
1053        case INDEX_op_extrh_i64_i32:
1054            if (temp_is_const(args[1])) {
1055                tmp = do_constant_folding(opc, temps[args[1]].val, 0);
1056                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1057                break;
1058            }
1059            goto do_default;
1060
1061        CASE_OP_32_64(add):
1062        CASE_OP_32_64(sub):
1063        CASE_OP_32_64(mul):
1064        CASE_OP_32_64(or):
1065        CASE_OP_32_64(and):
1066        CASE_OP_32_64(xor):
1067        CASE_OP_32_64(shl):
1068        CASE_OP_32_64(shr):
1069        CASE_OP_32_64(sar):
1070        CASE_OP_32_64(rotl):
1071        CASE_OP_32_64(rotr):
1072        CASE_OP_32_64(andc):
1073        CASE_OP_32_64(orc):
1074        CASE_OP_32_64(eqv):
1075        CASE_OP_32_64(nand):
1076        CASE_OP_32_64(nor):
1077        CASE_OP_32_64(muluh):
1078        CASE_OP_32_64(mulsh):
1079        CASE_OP_32_64(div):
1080        CASE_OP_32_64(divu):
1081        CASE_OP_32_64(rem):
1082        CASE_OP_32_64(remu):
1083            if (temp_is_const(args[1]) && temp_is_const(args[2])) {
1084                tmp = do_constant_folding(opc, temps[args[1]].val,
1085                                          temps[args[2]].val);
1086                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1087                break;
1088            }
1089            goto do_default;
1090
1091        CASE_OP_32_64(clz):
1092        CASE_OP_32_64(ctz):
1093            if (temp_is_const(args[1])) {
1094                TCGArg v = temps[args[1]].val;
1095                if (v != 0) {
1096                    tmp = do_constant_folding(opc, v, 0);
1097                    tcg_opt_gen_movi(s, op, args, args[0], tmp);
1098                } else {
1099                    tcg_opt_gen_mov(s, op, args, args[0], args[2]);
1100                }
1101                break;
1102            }
1103            goto do_default;
1104
1105        CASE_OP_32_64(deposit):
1106            if (temp_is_const(args[1]) && temp_is_const(args[2])) {
1107                tmp = deposit64(temps[args[1]].val, args[3], args[4],
1108                                temps[args[2]].val);
1109                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1110                break;
1111            }
1112            goto do_default;
1113
1114        CASE_OP_32_64(extract):
1115            if (temp_is_const(args[1])) {
1116                tmp = extract64(temps[args[1]].val, args[2], args[3]);
1117                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1118                break;
1119            }
1120            goto do_default;
1121
1122        CASE_OP_32_64(sextract):
1123            if (temp_is_const(args[1])) {
1124                tmp = sextract64(temps[args[1]].val, args[2], args[3]);
1125                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1126                break;
1127            }
1128            goto do_default;
1129
1130        CASE_OP_32_64(setcond):
1131            tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]);
1132            if (tmp != 2) {
1133                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1134                break;
1135            }
1136            goto do_default;
1137
1138        CASE_OP_32_64(brcond):
1139            tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]);
1140            if (tmp != 2) {
1141                if (tmp) {
1142                    reset_all_temps(nb_temps);
1143                    op->opc = INDEX_op_br;
1144                    args[0] = args[3];
1145                } else {
1146                    tcg_op_remove(s, op);
1147                }
1148                break;
1149            }
1150            goto do_default;
1151
1152        CASE_OP_32_64(movcond):
1153            tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]);
1154            if (tmp != 2) {
1155                tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]);
1156                break;
1157            }
1158            if (temp_is_const(args[3]) && temp_is_const(args[4])) {
1159                tcg_target_ulong tv = temps[args[3]].val;
1160                tcg_target_ulong fv = temps[args[4]].val;
1161                TCGCond cond = args[5];
1162                if (fv == 1 && tv == 0) {
1163                    cond = tcg_invert_cond(cond);
1164                } else if (!(tv == 1 && fv == 0)) {
1165                    goto do_default;
1166                }
1167                args[3] = cond;
1168                op->opc = opc = (opc == INDEX_op_movcond_i32
1169                                 ? INDEX_op_setcond_i32
1170                                 : INDEX_op_setcond_i64);
1171                nb_iargs = 2;
1172            }
1173            goto do_default;
1174
1175        case INDEX_op_add2_i32:
1176        case INDEX_op_sub2_i32:
1177            if (temp_is_const(args[2]) && temp_is_const(args[3])
1178                && temp_is_const(args[4]) && temp_is_const(args[5])) {
1179                uint32_t al = temps[args[2]].val;
1180                uint32_t ah = temps[args[3]].val;
1181                uint32_t bl = temps[args[4]].val;
1182                uint32_t bh = temps[args[5]].val;
1183                uint64_t a = ((uint64_t)ah << 32) | al;
1184                uint64_t b = ((uint64_t)bh << 32) | bl;
1185                TCGArg rl, rh;
1186                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
1187                TCGArg *args2 = &s->gen_opparam_buf[op2->args];
1188
1189                if (opc == INDEX_op_add2_i32) {
1190                    a += b;
1191                } else {
1192                    a -= b;
1193                }
1194
1195                rl = args[0];
1196                rh = args[1];
1197                tcg_opt_gen_movi(s, op, args, rl, (int32_t)a);
1198                tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(a >> 32));
1199
1200                /* We've done all we need to do with the movi.  Skip it.  */
1201                oi_next = op2->next;
1202                break;
1203            }
1204            goto do_default;
1205
1206        case INDEX_op_mulu2_i32:
1207            if (temp_is_const(args[2]) && temp_is_const(args[3])) {
1208                uint32_t a = temps[args[2]].val;
1209                uint32_t b = temps[args[3]].val;
1210                uint64_t r = (uint64_t)a * b;
1211                TCGArg rl, rh;
1212                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
1213                TCGArg *args2 = &s->gen_opparam_buf[op2->args];
1214
1215                rl = args[0];
1216                rh = args[1];
1217                tcg_opt_gen_movi(s, op, args, rl, (int32_t)r);
1218                tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(r >> 32));
1219
1220                /* We've done all we need to do with the movi.  Skip it.  */
1221                oi_next = op2->next;
1222                break;
1223            }
1224            goto do_default;
1225
1226        case INDEX_op_brcond2_i32:
1227            tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
1228            if (tmp != 2) {
1229                if (tmp) {
1230            do_brcond_true:
1231                    reset_all_temps(nb_temps);
1232                    op->opc = INDEX_op_br;
1233                    args[0] = args[5];
1234                } else {
1235            do_brcond_false:
1236                    tcg_op_remove(s, op);
1237                }
1238            } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
1239                       && temp_is_const(args[2]) && temps[args[2]].val == 0
1240                       && temp_is_const(args[3]) && temps[args[3]].val == 0) {
1241                /* Simplify LT/GE comparisons vs zero to a single compare
1242                   vs the high word of the input.  */
1243            do_brcond_high:
1244                reset_all_temps(nb_temps);
1245                op->opc = INDEX_op_brcond_i32;
1246                args[0] = args[1];
1247                args[1] = args[3];
1248                args[2] = args[4];
1249                args[3] = args[5];
1250            } else if (args[4] == TCG_COND_EQ) {
1251                /* Simplify EQ comparisons where one of the pairs
1252                   can be simplified.  */
1253                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1254                                               args[0], args[2], TCG_COND_EQ);
1255                if (tmp == 0) {
1256                    goto do_brcond_false;
1257                } else if (tmp == 1) {
1258                    goto do_brcond_high;
1259                }
1260                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1261                                               args[1], args[3], TCG_COND_EQ);
1262                if (tmp == 0) {
1263                    goto do_brcond_false;
1264                } else if (tmp != 1) {
1265                    goto do_default;
1266                }
1267            do_brcond_low:
1268                reset_all_temps(nb_temps);
1269                op->opc = INDEX_op_brcond_i32;
1270                args[1] = args[2];
1271                args[2] = args[4];
1272                args[3] = args[5];
1273            } else if (args[4] == TCG_COND_NE) {
1274                /* Simplify NE comparisons where one of the pairs
1275                   can be simplified.  */
1276                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1277                                               args[0], args[2], TCG_COND_NE);
1278                if (tmp == 0) {
1279                    goto do_brcond_high;
1280                } else if (tmp == 1) {
1281                    goto do_brcond_true;
1282                }
1283                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1284                                               args[1], args[3], TCG_COND_NE);
1285                if (tmp == 0) {
1286                    goto do_brcond_low;
1287                } else if (tmp == 1) {
1288                    goto do_brcond_true;
1289                }
1290                goto do_default;
1291            } else {
1292                goto do_default;
1293            }
1294            break;
1295
1296        case INDEX_op_setcond2_i32:
1297            tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
1298            if (tmp != 2) {
1299            do_setcond_const:
1300                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1301            } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
1302                       && temp_is_const(args[3]) && temps[args[3]].val == 0
1303                       && temp_is_const(args[4]) && temps[args[4]].val == 0) {
1304                /* Simplify LT/GE comparisons vs zero to a single compare
1305                   vs the high word of the input.  */
1306            do_setcond_high:
1307                reset_temp(args[0]);
1308                temps[args[0]].mask = 1;
1309                op->opc = INDEX_op_setcond_i32;
1310                args[1] = args[2];
1311                args[2] = args[4];
1312                args[3] = args[5];
1313            } else if (args[5] == TCG_COND_EQ) {
1314                /* Simplify EQ comparisons where one of the pairs
1315                   can be simplified.  */
1316                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1317                                               args[1], args[3], TCG_COND_EQ);
1318                if (tmp == 0) {
1319                    goto do_setcond_const;
1320                } else if (tmp == 1) {
1321                    goto do_setcond_high;
1322                }
1323                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1324                                               args[2], args[4], TCG_COND_EQ);
1325                if (tmp == 0) {
1326                    goto do_setcond_high;
1327                } else if (tmp != 1) {
1328                    goto do_default;
1329                }
1330            do_setcond_low:
1331                reset_temp(args[0]);
1332                temps[args[0]].mask = 1;
1333                op->opc = INDEX_op_setcond_i32;
1334                args[2] = args[3];
1335                args[3] = args[5];
1336            } else if (args[5] == TCG_COND_NE) {
1337                /* Simplify NE comparisons where one of the pairs
1338                   can be simplified.  */
1339                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1340                                               args[1], args[3], TCG_COND_NE);
1341                if (tmp == 0) {
1342                    goto do_setcond_high;
1343                } else if (tmp == 1) {
1344                    goto do_setcond_const;
1345                }
1346                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1347                                               args[2], args[4], TCG_COND_NE);
1348                if (tmp == 0) {
1349                    goto do_setcond_low;
1350                } else if (tmp == 1) {
1351                    goto do_setcond_const;
1352                }
1353                goto do_default;
1354            } else {
1355                goto do_default;
1356            }
1357            break;
1358
1359        case INDEX_op_call:
1360            if (!(args[nb_oargs + nb_iargs + 1]
1361                  & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1362                for (i = 0; i < nb_globals; i++) {
1363                    if (test_bit(i, temps_used.l)) {
1364                        reset_temp(i);
1365                    }
1366                }
1367            }
1368            goto do_reset_output;
1369
1370        default:
1371        do_default:
1372            /* Default case: we know nothing about operation (or were unable
1373               to compute the operation result) so no propagation is done.
1374               We trash everything if the operation is the end of a basic
1375               block, otherwise we only trash the output args.  "mask" is
1376               the non-zero bits mask for the first output arg.  */
1377            if (def->flags & TCG_OPF_BB_END) {
1378                reset_all_temps(nb_temps);
1379            } else {
1380        do_reset_output:
1381                for (i = 0; i < nb_oargs; i++) {
1382                    reset_temp(args[i]);
1383                    /* Save the corresponding known-zero bits mask for the
1384                       first output argument (only one supported so far). */
1385                    if (i == 0) {
1386                        temps[args[i]].mask = mask;
1387                    }
1388                }
1389            }
1390            break;
1391        }
1392
1393        /* Eliminate duplicate and redundant fence instructions.  */
1394        if (prev_mb_args) {
1395            switch (opc) {
1396            case INDEX_op_mb:
1397                /* Merge two barriers of the same type into one,
1398                 * or a weaker barrier into a stronger one,
1399                 * or two weaker barriers into a stronger one.
1400                 *   mb X; mb Y => mb X|Y
1401                 *   mb; strl => mb; st
1402                 *   ldaq; mb => ld; mb
1403                 *   ldaq; strl => ld; mb; st
1404                 * Other combinations are also merged into a strong
1405                 * barrier.  This is stricter than specified but for
1406                 * the purposes of TCG is better than not optimizing.
1407                 */
1408                prev_mb_args[0] |= args[0];
1409                tcg_op_remove(s, op);
1410                break;
1411
1412            default:
1413                /* Opcodes that end the block stop the optimization.  */
1414                if ((def->flags & TCG_OPF_BB_END) == 0) {
1415                    break;
1416                }
1417                /* fallthru */
1418            case INDEX_op_qemu_ld_i32:
1419            case INDEX_op_qemu_ld_i64:
1420            case INDEX_op_qemu_st_i32:
1421            case INDEX_op_qemu_st_i64:
1422            case INDEX_op_call:
1423                /* Opcodes that touch guest memory stop the optimization.  */
1424                prev_mb_args = NULL;
1425                break;
1426            }
1427        } else if (opc == INDEX_op_mb) {
1428            prev_mb_args = args;
1429        }
1430    }
1431}
1432