qemu/tcg/optimize.c
<<
>>
Prefs
   1/*
   2 * Optimizations for Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2010 Samsung Electronics.
   5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "qemu-common.h"
  28#include "exec/cpu-common.h"
  29#include "tcg-op.h"
  30
  31#define CASE_OP_32_64(x)                        \
  32        glue(glue(case INDEX_op_, x), _i32):    \
  33        glue(glue(case INDEX_op_, x), _i64)
  34
  35struct tcg_temp_info {
  36    bool is_const;
  37    uint16_t prev_copy;
  38    uint16_t next_copy;
  39    tcg_target_ulong val;
  40    tcg_target_ulong mask;
  41};
  42
  43static struct tcg_temp_info temps[TCG_MAX_TEMPS];
  44static TCGTempSet temps_used;
  45
  46static inline bool temp_is_const(TCGArg arg)
  47{
  48    return temps[arg].is_const;
  49}
  50
  51static inline bool temp_is_copy(TCGArg arg)
  52{
  53    return temps[arg].next_copy != arg;
  54}
  55
  56/* Reset TEMP's state, possibly removing the temp for the list of copies.  */
  57static void reset_temp(TCGArg temp)
  58{
  59    temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
  60    temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
  61    temps[temp].next_copy = temp;
  62    temps[temp].prev_copy = temp;
  63    temps[temp].is_const = false;
  64    temps[temp].mask = -1;
  65}
  66
  67/* Reset all temporaries, given that there are NB_TEMPS of them.  */
  68static void reset_all_temps(int nb_temps)
  69{
  70    bitmap_zero(temps_used.l, nb_temps);
  71}
  72
  73/* Initialize and activate a temporary.  */
  74static void init_temp_info(TCGArg temp)
  75{
  76    if (!test_bit(temp, temps_used.l)) {
  77        temps[temp].next_copy = temp;
  78        temps[temp].prev_copy = temp;
  79        temps[temp].is_const = false;
  80        temps[temp].mask = -1;
  81        set_bit(temp, temps_used.l);
  82    }
  83}
  84
  85static int op_bits(TCGOpcode op)
  86{
  87    const TCGOpDef *def = &tcg_op_defs[op];
  88    return def->flags & TCG_OPF_64BIT ? 64 : 32;
  89}
  90
  91static TCGOpcode op_to_mov(TCGOpcode op)
  92{
  93    switch (op_bits(op)) {
  94    case 32:
  95        return INDEX_op_mov_i32;
  96    case 64:
  97        return INDEX_op_mov_i64;
  98    default:
  99        fprintf(stderr, "op_to_mov: unexpected return value of "
 100                "function op_bits.\n");
 101        tcg_abort();
 102    }
 103}
 104
 105static TCGOpcode op_to_movi(TCGOpcode op)
 106{
 107    switch (op_bits(op)) {
 108    case 32:
 109        return INDEX_op_movi_i32;
 110    case 64:
 111        return INDEX_op_movi_i64;
 112    default:
 113        fprintf(stderr, "op_to_movi: unexpected return value of "
 114                "function op_bits.\n");
 115        tcg_abort();
 116    }
 117}
 118
 119static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
 120{
 121    TCGArg i;
 122
 123    /* If this is already a global, we can't do better. */
 124    if (temp < s->nb_globals) {
 125        return temp;
 126    }
 127
 128    /* Search for a global first. */
 129    for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
 130        if (i < s->nb_globals) {
 131            return i;
 132        }
 133    }
 134
 135    /* If it is a temp, search for a temp local. */
 136    if (!s->temps[temp].temp_local) {
 137        for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
 138            if (s->temps[i].temp_local) {
 139                return i;
 140            }
 141        }
 142    }
 143
 144    /* Failure to find a better representation, return the same temp. */
 145    return temp;
 146}
 147
 148static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
 149{
 150    TCGArg i;
 151
 152    if (arg1 == arg2) {
 153        return true;
 154    }
 155
 156    if (!temp_is_copy(arg1) || !temp_is_copy(arg2)) {
 157        return false;
 158    }
 159
 160    for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
 161        if (i == arg2) {
 162            return true;
 163        }
 164    }
 165
 166    return false;
 167}
 168
 169static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args,
 170                             TCGArg dst, TCGArg val)
 171{
 172    TCGOpcode new_op = op_to_movi(op->opc);
 173    tcg_target_ulong mask;
 174
 175    op->opc = new_op;
 176
 177    reset_temp(dst);
 178    temps[dst].is_const = true;
 179    temps[dst].val = val;
 180    mask = val;
 181    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
 182        /* High bits of the destination are now garbage.  */
 183        mask |= ~0xffffffffull;
 184    }
 185    temps[dst].mask = mask;
 186
 187    args[0] = dst;
 188    args[1] = val;
 189}
 190
 191static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args,
 192                            TCGArg dst, TCGArg src)
 193{
 194    if (temps_are_copies(dst, src)) {
 195        tcg_op_remove(s, op);
 196        return;
 197    }
 198
 199    TCGOpcode new_op = op_to_mov(op->opc);
 200    tcg_target_ulong mask;
 201
 202    op->opc = new_op;
 203
 204    reset_temp(dst);
 205    mask = temps[src].mask;
 206    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
 207        /* High bits of the destination are now garbage.  */
 208        mask |= ~0xffffffffull;
 209    }
 210    temps[dst].mask = mask;
 211
 212    if (s->temps[src].type == s->temps[dst].type) {
 213        temps[dst].next_copy = temps[src].next_copy;
 214        temps[dst].prev_copy = src;
 215        temps[temps[dst].next_copy].prev_copy = dst;
 216        temps[src].next_copy = dst;
 217        temps[dst].is_const = temps[src].is_const;
 218        temps[dst].val = temps[src].val;
 219    }
 220
 221    args[0] = dst;
 222    args[1] = src;
 223}
 224
 225static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
 226{
 227    uint64_t l64, h64;
 228
 229    switch (op) {
 230    CASE_OP_32_64(add):
 231        return x + y;
 232
 233    CASE_OP_32_64(sub):
 234        return x - y;
 235
 236    CASE_OP_32_64(mul):
 237        return x * y;
 238
 239    CASE_OP_32_64(and):
 240        return x & y;
 241
 242    CASE_OP_32_64(or):
 243        return x | y;
 244
 245    CASE_OP_32_64(xor):
 246        return x ^ y;
 247
 248    case INDEX_op_shl_i32:
 249        return (uint32_t)x << (y & 31);
 250
 251    case INDEX_op_shl_i64:
 252        return (uint64_t)x << (y & 63);
 253
 254    case INDEX_op_shr_i32:
 255        return (uint32_t)x >> (y & 31);
 256
 257    case INDEX_op_shr_i64:
 258        return (uint64_t)x >> (y & 63);
 259
 260    case INDEX_op_sar_i32:
 261        return (int32_t)x >> (y & 31);
 262
 263    case INDEX_op_sar_i64:
 264        return (int64_t)x >> (y & 63);
 265
 266    case INDEX_op_rotr_i32:
 267        return ror32(x, y & 31);
 268
 269    case INDEX_op_rotr_i64:
 270        return ror64(x, y & 63);
 271
 272    case INDEX_op_rotl_i32:
 273        return rol32(x, y & 31);
 274
 275    case INDEX_op_rotl_i64:
 276        return rol64(x, y & 63);
 277
 278    CASE_OP_32_64(not):
 279        return ~x;
 280
 281    CASE_OP_32_64(neg):
 282        return -x;
 283
 284    CASE_OP_32_64(andc):
 285        return x & ~y;
 286
 287    CASE_OP_32_64(orc):
 288        return x | ~y;
 289
 290    CASE_OP_32_64(eqv):
 291        return ~(x ^ y);
 292
 293    CASE_OP_32_64(nand):
 294        return ~(x & y);
 295
 296    CASE_OP_32_64(nor):
 297        return ~(x | y);
 298
 299    CASE_OP_32_64(ext8s):
 300        return (int8_t)x;
 301
 302    CASE_OP_32_64(ext16s):
 303        return (int16_t)x;
 304
 305    CASE_OP_32_64(ext8u):
 306        return (uint8_t)x;
 307
 308    CASE_OP_32_64(ext16u):
 309        return (uint16_t)x;
 310
 311    case INDEX_op_ext_i32_i64:
 312    case INDEX_op_ext32s_i64:
 313        return (int32_t)x;
 314
 315    case INDEX_op_extu_i32_i64:
 316    case INDEX_op_extrl_i64_i32:
 317    case INDEX_op_ext32u_i64:
 318        return (uint32_t)x;
 319
 320    case INDEX_op_extrh_i64_i32:
 321        return (uint64_t)x >> 32;
 322
 323    case INDEX_op_muluh_i32:
 324        return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
 325    case INDEX_op_mulsh_i32:
 326        return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
 327
 328    case INDEX_op_muluh_i64:
 329        mulu64(&l64, &h64, x, y);
 330        return h64;
 331    case INDEX_op_mulsh_i64:
 332        muls64(&l64, &h64, x, y);
 333        return h64;
 334
 335    case INDEX_op_div_i32:
 336        /* Avoid crashing on divide by zero, otherwise undefined.  */
 337        return (int32_t)x / ((int32_t)y ? : 1);
 338    case INDEX_op_divu_i32:
 339        return (uint32_t)x / ((uint32_t)y ? : 1);
 340    case INDEX_op_div_i64:
 341        return (int64_t)x / ((int64_t)y ? : 1);
 342    case INDEX_op_divu_i64:
 343        return (uint64_t)x / ((uint64_t)y ? : 1);
 344
 345    case INDEX_op_rem_i32:
 346        return (int32_t)x % ((int32_t)y ? : 1);
 347    case INDEX_op_remu_i32:
 348        return (uint32_t)x % ((uint32_t)y ? : 1);
 349    case INDEX_op_rem_i64:
 350        return (int64_t)x % ((int64_t)y ? : 1);
 351    case INDEX_op_remu_i64:
 352        return (uint64_t)x % ((uint64_t)y ? : 1);
 353
 354    default:
 355        fprintf(stderr,
 356                "Unrecognized operation %d in do_constant_folding.\n", op);
 357        tcg_abort();
 358    }
 359}
 360
 361static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
 362{
 363    TCGArg res = do_constant_folding_2(op, x, y);
 364    if (op_bits(op) == 32) {
 365        res = (int32_t)res;
 366    }
 367    return res;
 368}
 369
 370static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
 371{
 372    switch (c) {
 373    case TCG_COND_EQ:
 374        return x == y;
 375    case TCG_COND_NE:
 376        return x != y;
 377    case TCG_COND_LT:
 378        return (int32_t)x < (int32_t)y;
 379    case TCG_COND_GE:
 380        return (int32_t)x >= (int32_t)y;
 381    case TCG_COND_LE:
 382        return (int32_t)x <= (int32_t)y;
 383    case TCG_COND_GT:
 384        return (int32_t)x > (int32_t)y;
 385    case TCG_COND_LTU:
 386        return x < y;
 387    case TCG_COND_GEU:
 388        return x >= y;
 389    case TCG_COND_LEU:
 390        return x <= y;
 391    case TCG_COND_GTU:
 392        return x > y;
 393    default:
 394        tcg_abort();
 395    }
 396}
 397
 398static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
 399{
 400    switch (c) {
 401    case TCG_COND_EQ:
 402        return x == y;
 403    case TCG_COND_NE:
 404        return x != y;
 405    case TCG_COND_LT:
 406        return (int64_t)x < (int64_t)y;
 407    case TCG_COND_GE:
 408        return (int64_t)x >= (int64_t)y;
 409    case TCG_COND_LE:
 410        return (int64_t)x <= (int64_t)y;
 411    case TCG_COND_GT:
 412        return (int64_t)x > (int64_t)y;
 413    case TCG_COND_LTU:
 414        return x < y;
 415    case TCG_COND_GEU:
 416        return x >= y;
 417    case TCG_COND_LEU:
 418        return x <= y;
 419    case TCG_COND_GTU:
 420        return x > y;
 421    default:
 422        tcg_abort();
 423    }
 424}
 425
 426static bool do_constant_folding_cond_eq(TCGCond c)
 427{
 428    switch (c) {
 429    case TCG_COND_GT:
 430    case TCG_COND_LTU:
 431    case TCG_COND_LT:
 432    case TCG_COND_GTU:
 433    case TCG_COND_NE:
 434        return 0;
 435    case TCG_COND_GE:
 436    case TCG_COND_GEU:
 437    case TCG_COND_LE:
 438    case TCG_COND_LEU:
 439    case TCG_COND_EQ:
 440        return 1;
 441    default:
 442        tcg_abort();
 443    }
 444}
 445
 446/* Return 2 if the condition can't be simplified, and the result
 447   of the condition (0 or 1) if it can */
 448static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
 449                                       TCGArg y, TCGCond c)
 450{
 451    if (temp_is_const(x) && temp_is_const(y)) {
 452        switch (op_bits(op)) {
 453        case 32:
 454            return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
 455        case 64:
 456            return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
 457        default:
 458            tcg_abort();
 459        }
 460    } else if (temps_are_copies(x, y)) {
 461        return do_constant_folding_cond_eq(c);
 462    } else if (temp_is_const(y) && temps[y].val == 0) {
 463        switch (c) {
 464        case TCG_COND_LTU:
 465            return 0;
 466        case TCG_COND_GEU:
 467            return 1;
 468        default:
 469            return 2;
 470        }
 471    } else {
 472        return 2;
 473    }
 474}
 475
 476/* Return 2 if the condition can't be simplified, and the result
 477   of the condition (0 or 1) if it can */
 478static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
 479{
 480    TCGArg al = p1[0], ah = p1[1];
 481    TCGArg bl = p2[0], bh = p2[1];
 482
 483    if (temp_is_const(bl) && temp_is_const(bh)) {
 484        uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
 485
 486        if (temp_is_const(al) && temp_is_const(ah)) {
 487            uint64_t a;
 488            a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
 489            return do_constant_folding_cond_64(a, b, c);
 490        }
 491        if (b == 0) {
 492            switch (c) {
 493            case TCG_COND_LTU:
 494                return 0;
 495            case TCG_COND_GEU:
 496                return 1;
 497            default:
 498                break;
 499            }
 500        }
 501    }
 502    if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
 503        return do_constant_folding_cond_eq(c);
 504    }
 505    return 2;
 506}
 507
 508static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 509{
 510    TCGArg a1 = *p1, a2 = *p2;
 511    int sum = 0;
 512    sum += temp_is_const(a1);
 513    sum -= temp_is_const(a2);
 514
 515    /* Prefer the constant in second argument, and then the form
 516       op a, a, b, which is better handled on non-RISC hosts. */
 517    if (sum > 0 || (sum == 0 && dest == a2)) {
 518        *p1 = a2;
 519        *p2 = a1;
 520        return true;
 521    }
 522    return false;
 523}
 524
 525static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 526{
 527    int sum = 0;
 528    sum += temp_is_const(p1[0]);
 529    sum += temp_is_const(p1[1]);
 530    sum -= temp_is_const(p2[0]);
 531    sum -= temp_is_const(p2[1]);
 532    if (sum > 0) {
 533        TCGArg t;
 534        t = p1[0], p1[0] = p2[0], p2[0] = t;
 535        t = p1[1], p1[1] = p2[1], p2[1] = t;
 536        return true;
 537    }
 538    return false;
 539}
 540
 541/* Propagate constants and copies, fold constant expressions. */
 542void tcg_optimize(TCGContext *s)
 543{
 544    int oi, oi_next, nb_temps, nb_globals;
 545
 546    /* Array VALS has an element for each temp.
 547       If this temp holds a constant then its value is kept in VALS' element.
 548       If this temp is a copy of other ones then the other copies are
 549       available through the doubly linked circular list. */
 550
 551    nb_temps = s->nb_temps;
 552    nb_globals = s->nb_globals;
 553    reset_all_temps(nb_temps);
 554
 555    for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
 556        tcg_target_ulong mask, partmask, affected;
 557        int nb_oargs, nb_iargs, i;
 558        TCGArg tmp;
 559
 560        TCGOp * const op = &s->gen_op_buf[oi];
 561        TCGArg * const args = &s->gen_opparam_buf[op->args];
 562        TCGOpcode opc = op->opc;
 563        const TCGOpDef *def = &tcg_op_defs[opc];
 564
 565        oi_next = op->next;
 566
 567        /* Count the arguments, and initialize the temps that are
 568           going to be used */
 569        if (opc == INDEX_op_call) {
 570            nb_oargs = op->callo;
 571            nb_iargs = op->calli;
 572            for (i = 0; i < nb_oargs + nb_iargs; i++) {
 573                tmp = args[i];
 574                if (tmp != TCG_CALL_DUMMY_ARG) {
 575                    init_temp_info(tmp);
 576                }
 577            }
 578        } else {
 579            nb_oargs = def->nb_oargs;
 580            nb_iargs = def->nb_iargs;
 581            for (i = 0; i < nb_oargs + nb_iargs; i++) {
 582                init_temp_info(args[i]);
 583            }
 584        }
 585
 586        /* Do copy propagation */
 587        for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
 588            if (temp_is_copy(args[i])) {
 589                args[i] = find_better_copy(s, args[i]);
 590            }
 591        }
 592
 593        /* For commutative operations make constant second argument */
 594        switch (opc) {
 595        CASE_OP_32_64(add):
 596        CASE_OP_32_64(mul):
 597        CASE_OP_32_64(and):
 598        CASE_OP_32_64(or):
 599        CASE_OP_32_64(xor):
 600        CASE_OP_32_64(eqv):
 601        CASE_OP_32_64(nand):
 602        CASE_OP_32_64(nor):
 603        CASE_OP_32_64(muluh):
 604        CASE_OP_32_64(mulsh):
 605            swap_commutative(args[0], &args[1], &args[2]);
 606            break;
 607        CASE_OP_32_64(brcond):
 608            if (swap_commutative(-1, &args[0], &args[1])) {
 609                args[2] = tcg_swap_cond(args[2]);
 610            }
 611            break;
 612        CASE_OP_32_64(setcond):
 613            if (swap_commutative(args[0], &args[1], &args[2])) {
 614                args[3] = tcg_swap_cond(args[3]);
 615            }
 616            break;
 617        CASE_OP_32_64(movcond):
 618            if (swap_commutative(-1, &args[1], &args[2])) {
 619                args[5] = tcg_swap_cond(args[5]);
 620            }
 621            /* For movcond, we canonicalize the "false" input reg to match
 622               the destination reg so that the tcg backend can implement
 623               a "move if true" operation.  */
 624            if (swap_commutative(args[0], &args[4], &args[3])) {
 625                args[5] = tcg_invert_cond(args[5]);
 626            }
 627            break;
 628        CASE_OP_32_64(add2):
 629            swap_commutative(args[0], &args[2], &args[4]);
 630            swap_commutative(args[1], &args[3], &args[5]);
 631            break;
 632        CASE_OP_32_64(mulu2):
 633        CASE_OP_32_64(muls2):
 634            swap_commutative(args[0], &args[2], &args[3]);
 635            break;
 636        case INDEX_op_brcond2_i32:
 637            if (swap_commutative2(&args[0], &args[2])) {
 638                args[4] = tcg_swap_cond(args[4]);
 639            }
 640            break;
 641        case INDEX_op_setcond2_i32:
 642            if (swap_commutative2(&args[1], &args[3])) {
 643                args[5] = tcg_swap_cond(args[5]);
 644            }
 645            break;
 646        default:
 647            break;
 648        }
 649
 650        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
 651           and "sub r, 0, a => neg r, a" case.  */
 652        switch (opc) {
 653        CASE_OP_32_64(shl):
 654        CASE_OP_32_64(shr):
 655        CASE_OP_32_64(sar):
 656        CASE_OP_32_64(rotl):
 657        CASE_OP_32_64(rotr):
 658            if (temp_is_const(args[1]) && temps[args[1]].val == 0) {
 659                tcg_opt_gen_movi(s, op, args, args[0], 0);
 660                continue;
 661            }
 662            break;
 663        CASE_OP_32_64(sub):
 664            {
 665                TCGOpcode neg_op;
 666                bool have_neg;
 667
 668                if (temp_is_const(args[2])) {
 669                    /* Proceed with possible constant folding. */
 670                    break;
 671                }
 672                if (opc == INDEX_op_sub_i32) {
 673                    neg_op = INDEX_op_neg_i32;
 674                    have_neg = TCG_TARGET_HAS_neg_i32;
 675                } else {
 676                    neg_op = INDEX_op_neg_i64;
 677                    have_neg = TCG_TARGET_HAS_neg_i64;
 678                }
 679                if (!have_neg) {
 680                    break;
 681                }
 682                if (temp_is_const(args[1]) && temps[args[1]].val == 0) {
 683                    op->opc = neg_op;
 684                    reset_temp(args[0]);
 685                    args[1] = args[2];
 686                    continue;
 687                }
 688            }
 689            break;
 690        CASE_OP_32_64(xor):
 691        CASE_OP_32_64(nand):
 692            if (!temp_is_const(args[1])
 693                && temp_is_const(args[2]) && temps[args[2]].val == -1) {
 694                i = 1;
 695                goto try_not;
 696            }
 697            break;
 698        CASE_OP_32_64(nor):
 699            if (!temp_is_const(args[1])
 700                && temp_is_const(args[2]) && temps[args[2]].val == 0) {
 701                i = 1;
 702                goto try_not;
 703            }
 704            break;
 705        CASE_OP_32_64(andc):
 706            if (!temp_is_const(args[2])
 707                && temp_is_const(args[1]) && temps[args[1]].val == -1) {
 708                i = 2;
 709                goto try_not;
 710            }
 711            break;
 712        CASE_OP_32_64(orc):
 713        CASE_OP_32_64(eqv):
 714            if (!temp_is_const(args[2])
 715                && temp_is_const(args[1]) && temps[args[1]].val == 0) {
 716                i = 2;
 717                goto try_not;
 718            }
 719            break;
 720        try_not:
 721            {
 722                TCGOpcode not_op;
 723                bool have_not;
 724
 725                if (def->flags & TCG_OPF_64BIT) {
 726                    not_op = INDEX_op_not_i64;
 727                    have_not = TCG_TARGET_HAS_not_i64;
 728                } else {
 729                    not_op = INDEX_op_not_i32;
 730                    have_not = TCG_TARGET_HAS_not_i32;
 731                }
 732                if (!have_not) {
 733                    break;
 734                }
 735                op->opc = not_op;
 736                reset_temp(args[0]);
 737                args[1] = args[i];
 738                continue;
 739            }
 740        default:
 741            break;
 742        }
 743
 744        /* Simplify expression for "op r, a, const => mov r, a" cases */
 745        switch (opc) {
 746        CASE_OP_32_64(add):
 747        CASE_OP_32_64(sub):
 748        CASE_OP_32_64(shl):
 749        CASE_OP_32_64(shr):
 750        CASE_OP_32_64(sar):
 751        CASE_OP_32_64(rotl):
 752        CASE_OP_32_64(rotr):
 753        CASE_OP_32_64(or):
 754        CASE_OP_32_64(xor):
 755        CASE_OP_32_64(andc):
 756            if (!temp_is_const(args[1])
 757                && temp_is_const(args[2]) && temps[args[2]].val == 0) {
 758                tcg_opt_gen_mov(s, op, args, args[0], args[1]);
 759                continue;
 760            }
 761            break;
 762        CASE_OP_32_64(and):
 763        CASE_OP_32_64(orc):
 764        CASE_OP_32_64(eqv):
 765            if (!temp_is_const(args[1])
 766                && temp_is_const(args[2]) && temps[args[2]].val == -1) {
 767                tcg_opt_gen_mov(s, op, args, args[0], args[1]);
 768                continue;
 769            }
 770            break;
 771        default:
 772            break;
 773        }
 774
 775        /* Simplify using known-zero bits. Currently only ops with a single
 776           output argument is supported. */
 777        mask = -1;
 778        affected = -1;
 779        switch (opc) {
 780        CASE_OP_32_64(ext8s):
 781            if ((temps[args[1]].mask & 0x80) != 0) {
 782                break;
 783            }
 784        CASE_OP_32_64(ext8u):
 785            mask = 0xff;
 786            goto and_const;
 787        CASE_OP_32_64(ext16s):
 788            if ((temps[args[1]].mask & 0x8000) != 0) {
 789                break;
 790            }
 791        CASE_OP_32_64(ext16u):
 792            mask = 0xffff;
 793            goto and_const;
 794        case INDEX_op_ext32s_i64:
 795            if ((temps[args[1]].mask & 0x80000000) != 0) {
 796                break;
 797            }
 798        case INDEX_op_ext32u_i64:
 799            mask = 0xffffffffU;
 800            goto and_const;
 801
 802        CASE_OP_32_64(and):
 803            mask = temps[args[2]].mask;
 804            if (temp_is_const(args[2])) {
 805        and_const:
 806                affected = temps[args[1]].mask & ~mask;
 807            }
 808            mask = temps[args[1]].mask & mask;
 809            break;
 810
 811        case INDEX_op_ext_i32_i64:
 812            if ((temps[args[1]].mask & 0x80000000) != 0) {
 813                break;
 814            }
 815        case INDEX_op_extu_i32_i64:
 816            /* We do not compute affected as it is a size changing op.  */
 817            mask = (uint32_t)temps[args[1]].mask;
 818            break;
 819
 820        CASE_OP_32_64(andc):
 821            /* Known-zeros does not imply known-ones.  Therefore unless
 822               args[2] is constant, we can't infer anything from it.  */
 823            if (temp_is_const(args[2])) {
 824                mask = ~temps[args[2]].mask;
 825                goto and_const;
 826            }
 827            /* But we certainly know nothing outside args[1] may be set. */
 828            mask = temps[args[1]].mask;
 829            break;
 830
 831        case INDEX_op_sar_i32:
 832            if (temp_is_const(args[2])) {
 833                tmp = temps[args[2]].val & 31;
 834                mask = (int32_t)temps[args[1]].mask >> tmp;
 835            }
 836            break;
 837        case INDEX_op_sar_i64:
 838            if (temp_is_const(args[2])) {
 839                tmp = temps[args[2]].val & 63;
 840                mask = (int64_t)temps[args[1]].mask >> tmp;
 841            }
 842            break;
 843
 844        case INDEX_op_shr_i32:
 845            if (temp_is_const(args[2])) {
 846                tmp = temps[args[2]].val & 31;
 847                mask = (uint32_t)temps[args[1]].mask >> tmp;
 848            }
 849            break;
 850        case INDEX_op_shr_i64:
 851            if (temp_is_const(args[2])) {
 852                tmp = temps[args[2]].val & 63;
 853                mask = (uint64_t)temps[args[1]].mask >> tmp;
 854            }
 855            break;
 856
 857        case INDEX_op_extrl_i64_i32:
 858            mask = (uint32_t)temps[args[1]].mask;
 859            break;
 860        case INDEX_op_extrh_i64_i32:
 861            mask = (uint64_t)temps[args[1]].mask >> 32;
 862            break;
 863
 864        CASE_OP_32_64(shl):
 865            if (temp_is_const(args[2])) {
 866                tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1);
 867                mask = temps[args[1]].mask << tmp;
 868            }
 869            break;
 870
 871        CASE_OP_32_64(neg):
 872            /* Set to 1 all bits to the left of the rightmost.  */
 873            mask = -(temps[args[1]].mask & -temps[args[1]].mask);
 874            break;
 875
 876        CASE_OP_32_64(deposit):
 877            mask = deposit64(temps[args[1]].mask, args[3], args[4],
 878                             temps[args[2]].mask);
 879            break;
 880
 881        CASE_OP_32_64(or):
 882        CASE_OP_32_64(xor):
 883            mask = temps[args[1]].mask | temps[args[2]].mask;
 884            break;
 885
 886        CASE_OP_32_64(setcond):
 887        case INDEX_op_setcond2_i32:
 888            mask = 1;
 889            break;
 890
 891        CASE_OP_32_64(movcond):
 892            mask = temps[args[3]].mask | temps[args[4]].mask;
 893            break;
 894
 895        CASE_OP_32_64(ld8u):
 896            mask = 0xff;
 897            break;
 898        CASE_OP_32_64(ld16u):
 899            mask = 0xffff;
 900            break;
 901        case INDEX_op_ld32u_i64:
 902            mask = 0xffffffffu;
 903            break;
 904
 905        CASE_OP_32_64(qemu_ld):
 906            {
 907                TCGMemOpIdx oi = args[nb_oargs + nb_iargs];
 908                TCGMemOp mop = get_memop(oi);
 909                if (!(mop & MO_SIGN)) {
 910                    mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
 911                }
 912            }
 913            break;
 914
 915        default:
 916            break;
 917        }
 918
 919        /* 32-bit ops generate 32-bit results.  For the result is zero test
 920           below, we can ignore high bits, but for further optimizations we
 921           need to record that the high bits contain garbage.  */
 922        partmask = mask;
 923        if (!(def->flags & TCG_OPF_64BIT)) {
 924            mask |= ~(tcg_target_ulong)0xffffffffu;
 925            partmask &= 0xffffffffu;
 926            affected &= 0xffffffffu;
 927        }
 928
 929        if (partmask == 0) {
 930            tcg_debug_assert(nb_oargs == 1);
 931            tcg_opt_gen_movi(s, op, args, args[0], 0);
 932            continue;
 933        }
 934        if (affected == 0) {
 935            tcg_debug_assert(nb_oargs == 1);
 936            tcg_opt_gen_mov(s, op, args, args[0], args[1]);
 937            continue;
 938        }
 939
 940        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
 941        switch (opc) {
 942        CASE_OP_32_64(and):
 943        CASE_OP_32_64(mul):
 944        CASE_OP_32_64(muluh):
 945        CASE_OP_32_64(mulsh):
 946            if ((temp_is_const(args[2]) && temps[args[2]].val == 0)) {
 947                tcg_opt_gen_movi(s, op, args, args[0], 0);
 948                continue;
 949            }
 950            break;
 951        default:
 952            break;
 953        }
 954
 955        /* Simplify expression for "op r, a, a => mov r, a" cases */
 956        switch (opc) {
 957        CASE_OP_32_64(or):
 958        CASE_OP_32_64(and):
 959            if (temps_are_copies(args[1], args[2])) {
 960                tcg_opt_gen_mov(s, op, args, args[0], args[1]);
 961                continue;
 962            }
 963            break;
 964        default:
 965            break;
 966        }
 967
 968        /* Simplify expression for "op r, a, a => movi r, 0" cases */
 969        switch (opc) {
 970        CASE_OP_32_64(andc):
 971        CASE_OP_32_64(sub):
 972        CASE_OP_32_64(xor):
 973            if (temps_are_copies(args[1], args[2])) {
 974                tcg_opt_gen_movi(s, op, args, args[0], 0);
 975                continue;
 976            }
 977            break;
 978        default:
 979            break;
 980        }
 981
 982        /* Propagate constants through copy operations and do constant
 983           folding.  Constants will be substituted to arguments by register
 984           allocator where needed and possible.  Also detect copies. */
 985        switch (opc) {
 986        CASE_OP_32_64(mov):
 987            tcg_opt_gen_mov(s, op, args, args[0], args[1]);
 988            break;
 989        CASE_OP_32_64(movi):
 990            tcg_opt_gen_movi(s, op, args, args[0], args[1]);
 991            break;
 992
 993        CASE_OP_32_64(not):
 994        CASE_OP_32_64(neg):
 995        CASE_OP_32_64(ext8s):
 996        CASE_OP_32_64(ext8u):
 997        CASE_OP_32_64(ext16s):
 998        CASE_OP_32_64(ext16u):
 999        case INDEX_op_ext32s_i64:
1000        case INDEX_op_ext32u_i64:
1001        case INDEX_op_ext_i32_i64:
1002        case INDEX_op_extu_i32_i64:
1003        case INDEX_op_extrl_i64_i32:
1004        case INDEX_op_extrh_i64_i32:
1005            if (temp_is_const(args[1])) {
1006                tmp = do_constant_folding(opc, temps[args[1]].val, 0);
1007                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1008                break;
1009            }
1010            goto do_default;
1011
1012        CASE_OP_32_64(add):
1013        CASE_OP_32_64(sub):
1014        CASE_OP_32_64(mul):
1015        CASE_OP_32_64(or):
1016        CASE_OP_32_64(and):
1017        CASE_OP_32_64(xor):
1018        CASE_OP_32_64(shl):
1019        CASE_OP_32_64(shr):
1020        CASE_OP_32_64(sar):
1021        CASE_OP_32_64(rotl):
1022        CASE_OP_32_64(rotr):
1023        CASE_OP_32_64(andc):
1024        CASE_OP_32_64(orc):
1025        CASE_OP_32_64(eqv):
1026        CASE_OP_32_64(nand):
1027        CASE_OP_32_64(nor):
1028        CASE_OP_32_64(muluh):
1029        CASE_OP_32_64(mulsh):
1030        CASE_OP_32_64(div):
1031        CASE_OP_32_64(divu):
1032        CASE_OP_32_64(rem):
1033        CASE_OP_32_64(remu):
1034            if (temp_is_const(args[1]) && temp_is_const(args[2])) {
1035                tmp = do_constant_folding(opc, temps[args[1]].val,
1036                                          temps[args[2]].val);
1037                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1038                break;
1039            }
1040            goto do_default;
1041
1042        CASE_OP_32_64(deposit):
1043            if (temp_is_const(args[1]) && temp_is_const(args[2])) {
1044                tmp = deposit64(temps[args[1]].val, args[3], args[4],
1045                                temps[args[2]].val);
1046                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1047                break;
1048            }
1049            goto do_default;
1050
1051        CASE_OP_32_64(setcond):
1052            tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]);
1053            if (tmp != 2) {
1054                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1055                break;
1056            }
1057            goto do_default;
1058
1059        CASE_OP_32_64(brcond):
1060            tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]);
1061            if (tmp != 2) {
1062                if (tmp) {
1063                    reset_all_temps(nb_temps);
1064                    op->opc = INDEX_op_br;
1065                    args[0] = args[3];
1066                } else {
1067                    tcg_op_remove(s, op);
1068                }
1069                break;
1070            }
1071            goto do_default;
1072
1073        CASE_OP_32_64(movcond):
1074            tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]);
1075            if (tmp != 2) {
1076                tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]);
1077                break;
1078            }
1079            goto do_default;
1080
1081        case INDEX_op_add2_i32:
1082        case INDEX_op_sub2_i32:
1083            if (temp_is_const(args[2]) && temp_is_const(args[3])
1084                && temp_is_const(args[4]) && temp_is_const(args[5])) {
1085                uint32_t al = temps[args[2]].val;
1086                uint32_t ah = temps[args[3]].val;
1087                uint32_t bl = temps[args[4]].val;
1088                uint32_t bh = temps[args[5]].val;
1089                uint64_t a = ((uint64_t)ah << 32) | al;
1090                uint64_t b = ((uint64_t)bh << 32) | bl;
1091                TCGArg rl, rh;
1092                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
1093                TCGArg *args2 = &s->gen_opparam_buf[op2->args];
1094
1095                if (opc == INDEX_op_add2_i32) {
1096                    a += b;
1097                } else {
1098                    a -= b;
1099                }
1100
1101                rl = args[0];
1102                rh = args[1];
1103                tcg_opt_gen_movi(s, op, args, rl, (int32_t)a);
1104                tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(a >> 32));
1105
1106                /* We've done all we need to do with the movi.  Skip it.  */
1107                oi_next = op2->next;
1108                break;
1109            }
1110            goto do_default;
1111
1112        case INDEX_op_mulu2_i32:
1113            if (temp_is_const(args[2]) && temp_is_const(args[3])) {
1114                uint32_t a = temps[args[2]].val;
1115                uint32_t b = temps[args[3]].val;
1116                uint64_t r = (uint64_t)a * b;
1117                TCGArg rl, rh;
1118                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
1119                TCGArg *args2 = &s->gen_opparam_buf[op2->args];
1120
1121                rl = args[0];
1122                rh = args[1];
1123                tcg_opt_gen_movi(s, op, args, rl, (int32_t)r);
1124                tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(r >> 32));
1125
1126                /* We've done all we need to do with the movi.  Skip it.  */
1127                oi_next = op2->next;
1128                break;
1129            }
1130            goto do_default;
1131
1132        case INDEX_op_brcond2_i32:
1133            tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
1134            if (tmp != 2) {
1135                if (tmp) {
1136            do_brcond_true:
1137                    reset_all_temps(nb_temps);
1138                    op->opc = INDEX_op_br;
1139                    args[0] = args[5];
1140                } else {
1141            do_brcond_false:
1142                    tcg_op_remove(s, op);
1143                }
1144            } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
1145                       && temp_is_const(args[2]) && temps[args[2]].val == 0
1146                       && temp_is_const(args[3]) && temps[args[3]].val == 0) {
1147                /* Simplify LT/GE comparisons vs zero to a single compare
1148                   vs the high word of the input.  */
1149            do_brcond_high:
1150                reset_all_temps(nb_temps);
1151                op->opc = INDEX_op_brcond_i32;
1152                args[0] = args[1];
1153                args[1] = args[3];
1154                args[2] = args[4];
1155                args[3] = args[5];
1156            } else if (args[4] == TCG_COND_EQ) {
1157                /* Simplify EQ comparisons where one of the pairs
1158                   can be simplified.  */
1159                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1160                                               args[0], args[2], TCG_COND_EQ);
1161                if (tmp == 0) {
1162                    goto do_brcond_false;
1163                } else if (tmp == 1) {
1164                    goto do_brcond_high;
1165                }
1166                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1167                                               args[1], args[3], TCG_COND_EQ);
1168                if (tmp == 0) {
1169                    goto do_brcond_false;
1170                } else if (tmp != 1) {
1171                    goto do_default;
1172                }
1173            do_brcond_low:
1174                reset_all_temps(nb_temps);
1175                op->opc = INDEX_op_brcond_i32;
1176                args[1] = args[2];
1177                args[2] = args[4];
1178                args[3] = args[5];
1179            } else if (args[4] == TCG_COND_NE) {
1180                /* Simplify NE comparisons where one of the pairs
1181                   can be simplified.  */
1182                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1183                                               args[0], args[2], TCG_COND_NE);
1184                if (tmp == 0) {
1185                    goto do_brcond_high;
1186                } else if (tmp == 1) {
1187                    goto do_brcond_true;
1188                }
1189                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1190                                               args[1], args[3], TCG_COND_NE);
1191                if (tmp == 0) {
1192                    goto do_brcond_low;
1193                } else if (tmp == 1) {
1194                    goto do_brcond_true;
1195                }
1196                goto do_default;
1197            } else {
1198                goto do_default;
1199            }
1200            break;
1201
1202        case INDEX_op_setcond2_i32:
1203            tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
1204            if (tmp != 2) {
1205            do_setcond_const:
1206                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1207            } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
1208                       && temp_is_const(args[3]) && temps[args[3]].val == 0
1209                       && temp_is_const(args[4]) && temps[args[4]].val == 0) {
1210                /* Simplify LT/GE comparisons vs zero to a single compare
1211                   vs the high word of the input.  */
1212            do_setcond_high:
1213                reset_temp(args[0]);
1214                temps[args[0]].mask = 1;
1215                op->opc = INDEX_op_setcond_i32;
1216                args[1] = args[2];
1217                args[2] = args[4];
1218                args[3] = args[5];
1219            } else if (args[5] == TCG_COND_EQ) {
1220                /* Simplify EQ comparisons where one of the pairs
1221                   can be simplified.  */
1222                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1223                                               args[1], args[3], TCG_COND_EQ);
1224                if (tmp == 0) {
1225                    goto do_setcond_const;
1226                } else if (tmp == 1) {
1227                    goto do_setcond_high;
1228                }
1229                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1230                                               args[2], args[4], TCG_COND_EQ);
1231                if (tmp == 0) {
1232                    goto do_setcond_high;
1233                } else if (tmp != 1) {
1234                    goto do_default;
1235                }
1236            do_setcond_low:
1237                reset_temp(args[0]);
1238                temps[args[0]].mask = 1;
1239                op->opc = INDEX_op_setcond_i32;
1240                args[2] = args[3];
1241                args[3] = args[5];
1242            } else if (args[5] == TCG_COND_NE) {
1243                /* Simplify NE comparisons where one of the pairs
1244                   can be simplified.  */
1245                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1246                                               args[1], args[3], TCG_COND_NE);
1247                if (tmp == 0) {
1248                    goto do_setcond_high;
1249                } else if (tmp == 1) {
1250                    goto do_setcond_const;
1251                }
1252                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1253                                               args[2], args[4], TCG_COND_NE);
1254                if (tmp == 0) {
1255                    goto do_setcond_low;
1256                } else if (tmp == 1) {
1257                    goto do_setcond_const;
1258                }
1259                goto do_default;
1260            } else {
1261                goto do_default;
1262            }
1263            break;
1264
1265        case INDEX_op_call:
1266            if (!(args[nb_oargs + nb_iargs + 1]
1267                  & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1268                for (i = 0; i < nb_globals; i++) {
1269                    if (test_bit(i, temps_used.l)) {
1270                        reset_temp(i);
1271                    }
1272                }
1273            }
1274            goto do_reset_output;
1275
1276        default:
1277        do_default:
1278            /* Default case: we know nothing about operation (or were unable
1279               to compute the operation result) so no propagation is done.
1280               We trash everything if the operation is the end of a basic
1281               block, otherwise we only trash the output args.  "mask" is
1282               the non-zero bits mask for the first output arg.  */
1283            if (def->flags & TCG_OPF_BB_END) {
1284                reset_all_temps(nb_temps);
1285            } else {
1286        do_reset_output:
1287                for (i = 0; i < nb_oargs; i++) {
1288                    reset_temp(args[i]);
1289                    /* Save the corresponding known-zero bits mask for the
1290                       first output argument (only one supported so far). */
1291                    if (i == 0) {
1292                        temps[args[i]].mask = mask;
1293                    }
1294                }
1295            }
1296            break;
1297        }
1298    }
1299}
1300