qemu/tcg/optimize.c
<<
>>
Prefs
   1/*
   2 * Optimizations for Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2010 Samsung Electronics.
   5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27
  28
  29#include "qemu-common.h"
  30#include "tcg-op.h"
  31
  32#define CASE_OP_32_64(x)                        \
  33        glue(glue(case INDEX_op_, x), _i32):    \
  34        glue(glue(case INDEX_op_, x), _i64)
  35
  36struct tcg_temp_info {
  37    bool is_const;
  38    uint16_t prev_copy;
  39    uint16_t next_copy;
  40    tcg_target_ulong val;
  41    tcg_target_ulong mask;
  42};
  43
  44static struct tcg_temp_info temps[TCG_MAX_TEMPS];
  45static TCGTempSet temps_used;
  46
  47static inline bool temp_is_const(TCGArg arg)
  48{
  49    return temps[arg].is_const;
  50}
  51
  52static inline bool temp_is_copy(TCGArg arg)
  53{
  54    return temps[arg].next_copy != arg;
  55}
  56
  57/* Reset TEMP's state, possibly removing the temp for the list of copies.  */
  58static void reset_temp(TCGArg temp)
  59{
  60    temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
  61    temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
  62    temps[temp].next_copy = temp;
  63    temps[temp].prev_copy = temp;
  64    temps[temp].is_const = false;
  65    temps[temp].mask = -1;
  66}
  67
  68/* Reset all temporaries, given that there are NB_TEMPS of them.  */
  69static void reset_all_temps(int nb_temps)
  70{
  71    bitmap_zero(temps_used.l, nb_temps);
  72}
  73
  74/* Initialize and activate a temporary.  */
  75static void init_temp_info(TCGArg temp)
  76{
  77    if (!test_bit(temp, temps_used.l)) {
  78        temps[temp].next_copy = temp;
  79        temps[temp].prev_copy = temp;
  80        temps[temp].is_const = false;
  81        temps[temp].mask = -1;
  82        set_bit(temp, temps_used.l);
  83    }
  84}
  85
  86static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
  87                                TCGOpcode opc, int nargs)
  88{
  89    int oi = s->gen_next_op_idx;
  90    int pi = s->gen_next_parm_idx;
  91    int prev = old_op->prev;
  92    int next = old_op - s->gen_op_buf;
  93    TCGOp *new_op;
  94
  95    tcg_debug_assert(oi < OPC_BUF_SIZE);
  96    tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
  97    s->gen_next_op_idx = oi + 1;
  98    s->gen_next_parm_idx = pi + nargs;
  99
 100    new_op = &s->gen_op_buf[oi];
 101    *new_op = (TCGOp){
 102        .opc = opc,
 103        .args = pi,
 104        .prev = prev,
 105        .next = next
 106    };
 107    if (prev >= 0) {
 108        s->gen_op_buf[prev].next = oi;
 109    } else {
 110        s->gen_first_op_idx = oi;
 111    }
 112    old_op->prev = oi;
 113
 114    return new_op;
 115}
 116
 117static int op_bits(TCGOpcode op)
 118{
 119    const TCGOpDef *def = &tcg_op_defs[op];
 120    return def->flags & TCG_OPF_64BIT ? 64 : 32;
 121}
 122
 123static TCGOpcode op_to_mov(TCGOpcode op)
 124{
 125    switch (op_bits(op)) {
 126    case 32:
 127        return INDEX_op_mov_i32;
 128    case 64:
 129        return INDEX_op_mov_i64;
 130    default:
 131        fprintf(stderr, "op_to_mov: unexpected return value of "
 132                "function op_bits.\n");
 133        tcg_abort();
 134    }
 135}
 136
 137static TCGOpcode op_to_movi(TCGOpcode op)
 138{
 139    switch (op_bits(op)) {
 140    case 32:
 141        return INDEX_op_movi_i32;
 142    case 64:
 143        return INDEX_op_movi_i64;
 144    default:
 145        fprintf(stderr, "op_to_movi: unexpected return value of "
 146                "function op_bits.\n");
 147        tcg_abort();
 148    }
 149}
 150
 151static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
 152{
 153    TCGArg i;
 154
 155    /* If this is already a global, we can't do better. */
 156    if (temp < s->nb_globals) {
 157        return temp;
 158    }
 159
 160    /* Search for a global first. */
 161    for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
 162        if (i < s->nb_globals) {
 163            return i;
 164        }
 165    }
 166
 167    /* If it is a temp, search for a temp local. */
 168    if (!s->temps[temp].temp_local) {
 169        for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
 170            if (s->temps[i].temp_local) {
 171                return i;
 172            }
 173        }
 174    }
 175
 176    /* Failure to find a better representation, return the same temp. */
 177    return temp;
 178}
 179
 180static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
 181{
 182    TCGArg i;
 183
 184    if (arg1 == arg2) {
 185        return true;
 186    }
 187
 188    if (!temp_is_copy(arg1) || !temp_is_copy(arg2)) {
 189        return false;
 190    }
 191
 192    for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
 193        if (i == arg2) {
 194            return true;
 195        }
 196    }
 197
 198    return false;
 199}
 200
 201static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args,
 202                             TCGArg dst, TCGArg val)
 203{
 204    TCGOpcode new_op = op_to_movi(op->opc);
 205    tcg_target_ulong mask;
 206
 207    op->opc = new_op;
 208
 209    reset_temp(dst);
 210    temps[dst].is_const = true;
 211    temps[dst].val = val;
 212    mask = val;
 213    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
 214        /* High bits of the destination are now garbage.  */
 215        mask |= ~0xffffffffull;
 216    }
 217    temps[dst].mask = mask;
 218
 219    args[0] = dst;
 220    args[1] = val;
 221}
 222
 223static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args,
 224                            TCGArg dst, TCGArg src)
 225{
 226    if (temps_are_copies(dst, src)) {
 227        tcg_op_remove(s, op);
 228        return;
 229    }
 230
 231    TCGOpcode new_op = op_to_mov(op->opc);
 232    tcg_target_ulong mask;
 233
 234    op->opc = new_op;
 235
 236    reset_temp(dst);
 237    mask = temps[src].mask;
 238    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
 239        /* High bits of the destination are now garbage.  */
 240        mask |= ~0xffffffffull;
 241    }
 242    temps[dst].mask = mask;
 243
 244    if (s->temps[src].type == s->temps[dst].type) {
 245        temps[dst].next_copy = temps[src].next_copy;
 246        temps[dst].prev_copy = src;
 247        temps[temps[dst].next_copy].prev_copy = dst;
 248        temps[src].next_copy = dst;
 249        temps[dst].is_const = temps[src].is_const;
 250        temps[dst].val = temps[src].val;
 251    }
 252
 253    args[0] = dst;
 254    args[1] = src;
 255}
 256
 257static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
 258{
 259    uint64_t l64, h64;
 260
 261    switch (op) {
 262    CASE_OP_32_64(add):
 263        return x + y;
 264
 265    CASE_OP_32_64(sub):
 266        return x - y;
 267
 268    CASE_OP_32_64(mul):
 269        return x * y;
 270
 271    CASE_OP_32_64(and):
 272        return x & y;
 273
 274    CASE_OP_32_64(or):
 275        return x | y;
 276
 277    CASE_OP_32_64(xor):
 278        return x ^ y;
 279
 280    case INDEX_op_shl_i32:
 281        return (uint32_t)x << (y & 31);
 282
 283    case INDEX_op_shl_i64:
 284        return (uint64_t)x << (y & 63);
 285
 286    case INDEX_op_shr_i32:
 287        return (uint32_t)x >> (y & 31);
 288
 289    case INDEX_op_shr_i64:
 290        return (uint64_t)x >> (y & 63);
 291
 292    case INDEX_op_sar_i32:
 293        return (int32_t)x >> (y & 31);
 294
 295    case INDEX_op_sar_i64:
 296        return (int64_t)x >> (y & 63);
 297
 298    case INDEX_op_rotr_i32:
 299        return ror32(x, y & 31);
 300
 301    case INDEX_op_rotr_i64:
 302        return ror64(x, y & 63);
 303
 304    case INDEX_op_rotl_i32:
 305        return rol32(x, y & 31);
 306
 307    case INDEX_op_rotl_i64:
 308        return rol64(x, y & 63);
 309
 310    CASE_OP_32_64(not):
 311        return ~x;
 312
 313    CASE_OP_32_64(neg):
 314        return -x;
 315
 316    CASE_OP_32_64(andc):
 317        return x & ~y;
 318
 319    CASE_OP_32_64(orc):
 320        return x | ~y;
 321
 322    CASE_OP_32_64(eqv):
 323        return ~(x ^ y);
 324
 325    CASE_OP_32_64(nand):
 326        return ~(x & y);
 327
 328    CASE_OP_32_64(nor):
 329        return ~(x | y);
 330
 331    CASE_OP_32_64(ext8s):
 332        return (int8_t)x;
 333
 334    CASE_OP_32_64(ext16s):
 335        return (int16_t)x;
 336
 337    CASE_OP_32_64(ext8u):
 338        return (uint8_t)x;
 339
 340    CASE_OP_32_64(ext16u):
 341        return (uint16_t)x;
 342
 343    case INDEX_op_ext_i32_i64:
 344    case INDEX_op_ext32s_i64:
 345        return (int32_t)x;
 346
 347    case INDEX_op_extu_i32_i64:
 348    case INDEX_op_extrl_i64_i32:
 349    case INDEX_op_ext32u_i64:
 350        return (uint32_t)x;
 351
 352    case INDEX_op_extrh_i64_i32:
 353        return (uint64_t)x >> 32;
 354
 355    case INDEX_op_muluh_i32:
 356        return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
 357    case INDEX_op_mulsh_i32:
 358        return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
 359
 360    case INDEX_op_muluh_i64:
 361        mulu64(&l64, &h64, x, y);
 362        return h64;
 363    case INDEX_op_mulsh_i64:
 364        muls64(&l64, &h64, x, y);
 365        return h64;
 366
 367    case INDEX_op_div_i32:
 368        /* Avoid crashing on divide by zero, otherwise undefined.  */
 369        return (int32_t)x / ((int32_t)y ? : 1);
 370    case INDEX_op_divu_i32:
 371        return (uint32_t)x / ((uint32_t)y ? : 1);
 372    case INDEX_op_div_i64:
 373        return (int64_t)x / ((int64_t)y ? : 1);
 374    case INDEX_op_divu_i64:
 375        return (uint64_t)x / ((uint64_t)y ? : 1);
 376
 377    case INDEX_op_rem_i32:
 378        return (int32_t)x % ((int32_t)y ? : 1);
 379    case INDEX_op_remu_i32:
 380        return (uint32_t)x % ((uint32_t)y ? : 1);
 381    case INDEX_op_rem_i64:
 382        return (int64_t)x % ((int64_t)y ? : 1);
 383    case INDEX_op_remu_i64:
 384        return (uint64_t)x % ((uint64_t)y ? : 1);
 385
 386    default:
 387        fprintf(stderr,
 388                "Unrecognized operation %d in do_constant_folding.\n", op);
 389        tcg_abort();
 390    }
 391}
 392
 393static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
 394{
 395    TCGArg res = do_constant_folding_2(op, x, y);
 396    if (op_bits(op) == 32) {
 397        res = (int32_t)res;
 398    }
 399    return res;
 400}
 401
 402static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
 403{
 404    switch (c) {
 405    case TCG_COND_EQ:
 406        return x == y;
 407    case TCG_COND_NE:
 408        return x != y;
 409    case TCG_COND_LT:
 410        return (int32_t)x < (int32_t)y;
 411    case TCG_COND_GE:
 412        return (int32_t)x >= (int32_t)y;
 413    case TCG_COND_LE:
 414        return (int32_t)x <= (int32_t)y;
 415    case TCG_COND_GT:
 416        return (int32_t)x > (int32_t)y;
 417    case TCG_COND_LTU:
 418        return x < y;
 419    case TCG_COND_GEU:
 420        return x >= y;
 421    case TCG_COND_LEU:
 422        return x <= y;
 423    case TCG_COND_GTU:
 424        return x > y;
 425    default:
 426        tcg_abort();
 427    }
 428}
 429
 430static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
 431{
 432    switch (c) {
 433    case TCG_COND_EQ:
 434        return x == y;
 435    case TCG_COND_NE:
 436        return x != y;
 437    case TCG_COND_LT:
 438        return (int64_t)x < (int64_t)y;
 439    case TCG_COND_GE:
 440        return (int64_t)x >= (int64_t)y;
 441    case TCG_COND_LE:
 442        return (int64_t)x <= (int64_t)y;
 443    case TCG_COND_GT:
 444        return (int64_t)x > (int64_t)y;
 445    case TCG_COND_LTU:
 446        return x < y;
 447    case TCG_COND_GEU:
 448        return x >= y;
 449    case TCG_COND_LEU:
 450        return x <= y;
 451    case TCG_COND_GTU:
 452        return x > y;
 453    default:
 454        tcg_abort();
 455    }
 456}
 457
 458static bool do_constant_folding_cond_eq(TCGCond c)
 459{
 460    switch (c) {
 461    case TCG_COND_GT:
 462    case TCG_COND_LTU:
 463    case TCG_COND_LT:
 464    case TCG_COND_GTU:
 465    case TCG_COND_NE:
 466        return 0;
 467    case TCG_COND_GE:
 468    case TCG_COND_GEU:
 469    case TCG_COND_LE:
 470    case TCG_COND_LEU:
 471    case TCG_COND_EQ:
 472        return 1;
 473    default:
 474        tcg_abort();
 475    }
 476}
 477
 478/* Return 2 if the condition can't be simplified, and the result
 479   of the condition (0 or 1) if it can */
 480static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
 481                                       TCGArg y, TCGCond c)
 482{
 483    if (temp_is_const(x) && temp_is_const(y)) {
 484        switch (op_bits(op)) {
 485        case 32:
 486            return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
 487        case 64:
 488            return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
 489        default:
 490            tcg_abort();
 491        }
 492    } else if (temps_are_copies(x, y)) {
 493        return do_constant_folding_cond_eq(c);
 494    } else if (temp_is_const(y) && temps[y].val == 0) {
 495        switch (c) {
 496        case TCG_COND_LTU:
 497            return 0;
 498        case TCG_COND_GEU:
 499            return 1;
 500        default:
 501            return 2;
 502        }
 503    } else {
 504        return 2;
 505    }
 506}
 507
 508/* Return 2 if the condition can't be simplified, and the result
 509   of the condition (0 or 1) if it can */
 510static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
 511{
 512    TCGArg al = p1[0], ah = p1[1];
 513    TCGArg bl = p2[0], bh = p2[1];
 514
 515    if (temp_is_const(bl) && temp_is_const(bh)) {
 516        uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
 517
 518        if (temp_is_const(al) && temp_is_const(ah)) {
 519            uint64_t a;
 520            a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
 521            return do_constant_folding_cond_64(a, b, c);
 522        }
 523        if (b == 0) {
 524            switch (c) {
 525            case TCG_COND_LTU:
 526                return 0;
 527            case TCG_COND_GEU:
 528                return 1;
 529            default:
 530                break;
 531            }
 532        }
 533    }
 534    if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
 535        return do_constant_folding_cond_eq(c);
 536    }
 537    return 2;
 538}
 539
 540static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 541{
 542    TCGArg a1 = *p1, a2 = *p2;
 543    int sum = 0;
 544    sum += temp_is_const(a1);
 545    sum -= temp_is_const(a2);
 546
 547    /* Prefer the constant in second argument, and then the form
 548       op a, a, b, which is better handled on non-RISC hosts. */
 549    if (sum > 0 || (sum == 0 && dest == a2)) {
 550        *p1 = a2;
 551        *p2 = a1;
 552        return true;
 553    }
 554    return false;
 555}
 556
 557static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 558{
 559    int sum = 0;
 560    sum += temp_is_const(p1[0]);
 561    sum += temp_is_const(p1[1]);
 562    sum -= temp_is_const(p2[0]);
 563    sum -= temp_is_const(p2[1]);
 564    if (sum > 0) {
 565        TCGArg t;
 566        t = p1[0], p1[0] = p2[0], p2[0] = t;
 567        t = p1[1], p1[1] = p2[1], p2[1] = t;
 568        return true;
 569    }
 570    return false;
 571}
 572
 573/* Propagate constants and copies, fold constant expressions. */
 574void tcg_optimize(TCGContext *s)
 575{
 576    int oi, oi_next, nb_temps, nb_globals;
 577
 578    /* Array VALS has an element for each temp.
 579       If this temp holds a constant then its value is kept in VALS' element.
 580       If this temp is a copy of other ones then the other copies are
 581       available through the doubly linked circular list. */
 582
 583    nb_temps = s->nb_temps;
 584    nb_globals = s->nb_globals;
 585    reset_all_temps(nb_temps);
 586
 587    for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
 588        tcg_target_ulong mask, partmask, affected;
 589        int nb_oargs, nb_iargs, i;
 590        TCGArg tmp;
 591
 592        TCGOp * const op = &s->gen_op_buf[oi];
 593        TCGArg * const args = &s->gen_opparam_buf[op->args];
 594        TCGOpcode opc = op->opc;
 595        const TCGOpDef *def = &tcg_op_defs[opc];
 596
 597        oi_next = op->next;
 598
 599        /* Count the arguments, and initialize the temps that are
 600           going to be used */
 601        if (opc == INDEX_op_call) {
 602            nb_oargs = op->callo;
 603            nb_iargs = op->calli;
 604            for (i = 0; i < nb_oargs + nb_iargs; i++) {
 605                tmp = args[i];
 606                if (tmp != TCG_CALL_DUMMY_ARG) {
 607                    init_temp_info(tmp);
 608                }
 609            }
 610        } else {
 611            nb_oargs = def->nb_oargs;
 612            nb_iargs = def->nb_iargs;
 613            for (i = 0; i < nb_oargs + nb_iargs; i++) {
 614                init_temp_info(args[i]);
 615            }
 616        }
 617
 618        /* Do copy propagation */
 619        for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
 620            if (temp_is_copy(args[i])) {
 621                args[i] = find_better_copy(s, args[i]);
 622            }
 623        }
 624
 625        /* For commutative operations make constant second argument */
 626        switch (opc) {
 627        CASE_OP_32_64(add):
 628        CASE_OP_32_64(mul):
 629        CASE_OP_32_64(and):
 630        CASE_OP_32_64(or):
 631        CASE_OP_32_64(xor):
 632        CASE_OP_32_64(eqv):
 633        CASE_OP_32_64(nand):
 634        CASE_OP_32_64(nor):
 635        CASE_OP_32_64(muluh):
 636        CASE_OP_32_64(mulsh):
 637            swap_commutative(args[0], &args[1], &args[2]);
 638            break;
 639        CASE_OP_32_64(brcond):
 640            if (swap_commutative(-1, &args[0], &args[1])) {
 641                args[2] = tcg_swap_cond(args[2]);
 642            }
 643            break;
 644        CASE_OP_32_64(setcond):
 645            if (swap_commutative(args[0], &args[1], &args[2])) {
 646                args[3] = tcg_swap_cond(args[3]);
 647            }
 648            break;
 649        CASE_OP_32_64(movcond):
 650            if (swap_commutative(-1, &args[1], &args[2])) {
 651                args[5] = tcg_swap_cond(args[5]);
 652            }
 653            /* For movcond, we canonicalize the "false" input reg to match
 654               the destination reg so that the tcg backend can implement
 655               a "move if true" operation.  */
 656            if (swap_commutative(args[0], &args[4], &args[3])) {
 657                args[5] = tcg_invert_cond(args[5]);
 658            }
 659            break;
 660        CASE_OP_32_64(add2):
 661            swap_commutative(args[0], &args[2], &args[4]);
 662            swap_commutative(args[1], &args[3], &args[5]);
 663            break;
 664        CASE_OP_32_64(mulu2):
 665        CASE_OP_32_64(muls2):
 666            swap_commutative(args[0], &args[2], &args[3]);
 667            break;
 668        case INDEX_op_brcond2_i32:
 669            if (swap_commutative2(&args[0], &args[2])) {
 670                args[4] = tcg_swap_cond(args[4]);
 671            }
 672            break;
 673        case INDEX_op_setcond2_i32:
 674            if (swap_commutative2(&args[1], &args[3])) {
 675                args[5] = tcg_swap_cond(args[5]);
 676            }
 677            break;
 678        default:
 679            break;
 680        }
 681
 682        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
 683           and "sub r, 0, a => neg r, a" case.  */
 684        switch (opc) {
 685        CASE_OP_32_64(shl):
 686        CASE_OP_32_64(shr):
 687        CASE_OP_32_64(sar):
 688        CASE_OP_32_64(rotl):
 689        CASE_OP_32_64(rotr):
 690            if (temp_is_const(args[1]) && temps[args[1]].val == 0) {
 691                tcg_opt_gen_movi(s, op, args, args[0], 0);
 692                continue;
 693            }
 694            break;
 695        CASE_OP_32_64(sub):
 696            {
 697                TCGOpcode neg_op;
 698                bool have_neg;
 699
 700                if (temp_is_const(args[2])) {
 701                    /* Proceed with possible constant folding. */
 702                    break;
 703                }
 704                if (opc == INDEX_op_sub_i32) {
 705                    neg_op = INDEX_op_neg_i32;
 706                    have_neg = TCG_TARGET_HAS_neg_i32;
 707                } else {
 708                    neg_op = INDEX_op_neg_i64;
 709                    have_neg = TCG_TARGET_HAS_neg_i64;
 710                }
 711                if (!have_neg) {
 712                    break;
 713                }
 714                if (temp_is_const(args[1]) && temps[args[1]].val == 0) {
 715                    op->opc = neg_op;
 716                    reset_temp(args[0]);
 717                    args[1] = args[2];
 718                    continue;
 719                }
 720            }
 721            break;
 722        CASE_OP_32_64(xor):
 723        CASE_OP_32_64(nand):
 724            if (!temp_is_const(args[1])
 725                && temp_is_const(args[2]) && temps[args[2]].val == -1) {
 726                i = 1;
 727                goto try_not;
 728            }
 729            break;
 730        CASE_OP_32_64(nor):
 731            if (!temp_is_const(args[1])
 732                && temp_is_const(args[2]) && temps[args[2]].val == 0) {
 733                i = 1;
 734                goto try_not;
 735            }
 736            break;
 737        CASE_OP_32_64(andc):
 738            if (!temp_is_const(args[2])
 739                && temp_is_const(args[1]) && temps[args[1]].val == -1) {
 740                i = 2;
 741                goto try_not;
 742            }
 743            break;
 744        CASE_OP_32_64(orc):
 745        CASE_OP_32_64(eqv):
 746            if (!temp_is_const(args[2])
 747                && temp_is_const(args[1]) && temps[args[1]].val == 0) {
 748                i = 2;
 749                goto try_not;
 750            }
 751            break;
 752        try_not:
 753            {
 754                TCGOpcode not_op;
 755                bool have_not;
 756
 757                if (def->flags & TCG_OPF_64BIT) {
 758                    not_op = INDEX_op_not_i64;
 759                    have_not = TCG_TARGET_HAS_not_i64;
 760                } else {
 761                    not_op = INDEX_op_not_i32;
 762                    have_not = TCG_TARGET_HAS_not_i32;
 763                }
 764                if (!have_not) {
 765                    break;
 766                }
 767                op->opc = not_op;
 768                reset_temp(args[0]);
 769                args[1] = args[i];
 770                continue;
 771            }
 772        default:
 773            break;
 774        }
 775
 776        /* Simplify expression for "op r, a, const => mov r, a" cases */
 777        switch (opc) {
 778        CASE_OP_32_64(add):
 779        CASE_OP_32_64(sub):
 780        CASE_OP_32_64(shl):
 781        CASE_OP_32_64(shr):
 782        CASE_OP_32_64(sar):
 783        CASE_OP_32_64(rotl):
 784        CASE_OP_32_64(rotr):
 785        CASE_OP_32_64(or):
 786        CASE_OP_32_64(xor):
 787        CASE_OP_32_64(andc):
 788            if (!temp_is_const(args[1])
 789                && temp_is_const(args[2]) && temps[args[2]].val == 0) {
 790                tcg_opt_gen_mov(s, op, args, args[0], args[1]);
 791                continue;
 792            }
 793            break;
 794        CASE_OP_32_64(and):
 795        CASE_OP_32_64(orc):
 796        CASE_OP_32_64(eqv):
 797            if (!temp_is_const(args[1])
 798                && temp_is_const(args[2]) && temps[args[2]].val == -1) {
 799                tcg_opt_gen_mov(s, op, args, args[0], args[1]);
 800                continue;
 801            }
 802            break;
 803        default:
 804            break;
 805        }
 806
 807        /* Simplify using known-zero bits. Currently only ops with a single
 808           output argument is supported. */
 809        mask = -1;
 810        affected = -1;
 811        switch (opc) {
 812        CASE_OP_32_64(ext8s):
 813            if ((temps[args[1]].mask & 0x80) != 0) {
 814                break;
 815            }
 816        CASE_OP_32_64(ext8u):
 817            mask = 0xff;
 818            goto and_const;
 819        CASE_OP_32_64(ext16s):
 820            if ((temps[args[1]].mask & 0x8000) != 0) {
 821                break;
 822            }
 823        CASE_OP_32_64(ext16u):
 824            mask = 0xffff;
 825            goto and_const;
 826        case INDEX_op_ext32s_i64:
 827            if ((temps[args[1]].mask & 0x80000000) != 0) {
 828                break;
 829            }
 830        case INDEX_op_ext32u_i64:
 831            mask = 0xffffffffU;
 832            goto and_const;
 833
 834        CASE_OP_32_64(and):
 835            mask = temps[args[2]].mask;
 836            if (temp_is_const(args[2])) {
 837        and_const:
 838                affected = temps[args[1]].mask & ~mask;
 839            }
 840            mask = temps[args[1]].mask & mask;
 841            break;
 842
 843        case INDEX_op_ext_i32_i64:
 844            if ((temps[args[1]].mask & 0x80000000) != 0) {
 845                break;
 846            }
 847        case INDEX_op_extu_i32_i64:
 848            /* We do not compute affected as it is a size changing op.  */
 849            mask = (uint32_t)temps[args[1]].mask;
 850            break;
 851
 852        CASE_OP_32_64(andc):
 853            /* Known-zeros does not imply known-ones.  Therefore unless
 854               args[2] is constant, we can't infer anything from it.  */
 855            if (temp_is_const(args[2])) {
 856                mask = ~temps[args[2]].mask;
 857                goto and_const;
 858            }
 859            /* But we certainly know nothing outside args[1] may be set. */
 860            mask = temps[args[1]].mask;
 861            break;
 862
 863        case INDEX_op_sar_i32:
 864            if (temp_is_const(args[2])) {
 865                tmp = temps[args[2]].val & 31;
 866                mask = (int32_t)temps[args[1]].mask >> tmp;
 867            }
 868            break;
 869        case INDEX_op_sar_i64:
 870            if (temp_is_const(args[2])) {
 871                tmp = temps[args[2]].val & 63;
 872                mask = (int64_t)temps[args[1]].mask >> tmp;
 873            }
 874            break;
 875
 876        case INDEX_op_shr_i32:
 877            if (temp_is_const(args[2])) {
 878                tmp = temps[args[2]].val & 31;
 879                mask = (uint32_t)temps[args[1]].mask >> tmp;
 880            }
 881            break;
 882        case INDEX_op_shr_i64:
 883            if (temp_is_const(args[2])) {
 884                tmp = temps[args[2]].val & 63;
 885                mask = (uint64_t)temps[args[1]].mask >> tmp;
 886            }
 887            break;
 888
 889        case INDEX_op_extrl_i64_i32:
 890            mask = (uint32_t)temps[args[1]].mask;
 891            break;
 892        case INDEX_op_extrh_i64_i32:
 893            mask = (uint64_t)temps[args[1]].mask >> 32;
 894            break;
 895
 896        CASE_OP_32_64(shl):
 897            if (temp_is_const(args[2])) {
 898                tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1);
 899                mask = temps[args[1]].mask << tmp;
 900            }
 901            break;
 902
 903        CASE_OP_32_64(neg):
 904            /* Set to 1 all bits to the left of the rightmost.  */
 905            mask = -(temps[args[1]].mask & -temps[args[1]].mask);
 906            break;
 907
 908        CASE_OP_32_64(deposit):
 909            mask = deposit64(temps[args[1]].mask, args[3], args[4],
 910                             temps[args[2]].mask);
 911            break;
 912
 913        CASE_OP_32_64(or):
 914        CASE_OP_32_64(xor):
 915            mask = temps[args[1]].mask | temps[args[2]].mask;
 916            break;
 917
 918        CASE_OP_32_64(setcond):
 919        case INDEX_op_setcond2_i32:
 920            mask = 1;
 921            break;
 922
 923        CASE_OP_32_64(movcond):
 924            mask = temps[args[3]].mask | temps[args[4]].mask;
 925            break;
 926
 927        CASE_OP_32_64(ld8u):
 928            mask = 0xff;
 929            break;
 930        CASE_OP_32_64(ld16u):
 931            mask = 0xffff;
 932            break;
 933        case INDEX_op_ld32u_i64:
 934            mask = 0xffffffffu;
 935            break;
 936
 937        CASE_OP_32_64(qemu_ld):
 938            {
 939                TCGMemOpIdx oi = args[nb_oargs + nb_iargs];
 940                TCGMemOp mop = get_memop(oi);
 941                if (!(mop & MO_SIGN)) {
 942                    mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
 943                }
 944            }
 945            break;
 946
 947        default:
 948            break;
 949        }
 950
 951        /* 32-bit ops generate 32-bit results.  For the result is zero test
 952           below, we can ignore high bits, but for further optimizations we
 953           need to record that the high bits contain garbage.  */
 954        partmask = mask;
 955        if (!(def->flags & TCG_OPF_64BIT)) {
 956            mask |= ~(tcg_target_ulong)0xffffffffu;
 957            partmask &= 0xffffffffu;
 958            affected &= 0xffffffffu;
 959        }
 960
 961        if (partmask == 0) {
 962            assert(nb_oargs == 1);
 963            tcg_opt_gen_movi(s, op, args, args[0], 0);
 964            continue;
 965        }
 966        if (affected == 0) {
 967            assert(nb_oargs == 1);
 968            tcg_opt_gen_mov(s, op, args, args[0], args[1]);
 969            continue;
 970        }
 971
 972        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
 973        switch (opc) {
 974        CASE_OP_32_64(and):
 975        CASE_OP_32_64(mul):
 976        CASE_OP_32_64(muluh):
 977        CASE_OP_32_64(mulsh):
 978            if ((temp_is_const(args[2]) && temps[args[2]].val == 0)) {
 979                tcg_opt_gen_movi(s, op, args, args[0], 0);
 980                continue;
 981            }
 982            break;
 983        default:
 984            break;
 985        }
 986
 987        /* Simplify expression for "op r, a, a => mov r, a" cases */
 988        switch (opc) {
 989        CASE_OP_32_64(or):
 990        CASE_OP_32_64(and):
 991            if (temps_are_copies(args[1], args[2])) {
 992                tcg_opt_gen_mov(s, op, args, args[0], args[1]);
 993                continue;
 994            }
 995            break;
 996        default:
 997            break;
 998        }
 999
1000        /* Simplify expression for "op r, a, a => movi r, 0" cases */
1001        switch (opc) {
1002        CASE_OP_32_64(andc):
1003        CASE_OP_32_64(sub):
1004        CASE_OP_32_64(xor):
1005            if (temps_are_copies(args[1], args[2])) {
1006                tcg_opt_gen_movi(s, op, args, args[0], 0);
1007                continue;
1008            }
1009            break;
1010        default:
1011            break;
1012        }
1013
1014        /* Propagate constants through copy operations and do constant
1015           folding.  Constants will be substituted to arguments by register
1016           allocator where needed and possible.  Also detect copies. */
1017        switch (opc) {
1018        CASE_OP_32_64(mov):
1019            tcg_opt_gen_mov(s, op, args, args[0], args[1]);
1020            break;
1021        CASE_OP_32_64(movi):
1022            tcg_opt_gen_movi(s, op, args, args[0], args[1]);
1023            break;
1024
1025        CASE_OP_32_64(not):
1026        CASE_OP_32_64(neg):
1027        CASE_OP_32_64(ext8s):
1028        CASE_OP_32_64(ext8u):
1029        CASE_OP_32_64(ext16s):
1030        CASE_OP_32_64(ext16u):
1031        case INDEX_op_ext32s_i64:
1032        case INDEX_op_ext32u_i64:
1033        case INDEX_op_ext_i32_i64:
1034        case INDEX_op_extu_i32_i64:
1035        case INDEX_op_extrl_i64_i32:
1036        case INDEX_op_extrh_i64_i32:
1037            if (temp_is_const(args[1])) {
1038                tmp = do_constant_folding(opc, temps[args[1]].val, 0);
1039                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1040                break;
1041            }
1042            goto do_default;
1043
1044        CASE_OP_32_64(add):
1045        CASE_OP_32_64(sub):
1046        CASE_OP_32_64(mul):
1047        CASE_OP_32_64(or):
1048        CASE_OP_32_64(and):
1049        CASE_OP_32_64(xor):
1050        CASE_OP_32_64(shl):
1051        CASE_OP_32_64(shr):
1052        CASE_OP_32_64(sar):
1053        CASE_OP_32_64(rotl):
1054        CASE_OP_32_64(rotr):
1055        CASE_OP_32_64(andc):
1056        CASE_OP_32_64(orc):
1057        CASE_OP_32_64(eqv):
1058        CASE_OP_32_64(nand):
1059        CASE_OP_32_64(nor):
1060        CASE_OP_32_64(muluh):
1061        CASE_OP_32_64(mulsh):
1062        CASE_OP_32_64(div):
1063        CASE_OP_32_64(divu):
1064        CASE_OP_32_64(rem):
1065        CASE_OP_32_64(remu):
1066            if (temp_is_const(args[1]) && temp_is_const(args[2])) {
1067                tmp = do_constant_folding(opc, temps[args[1]].val,
1068                                          temps[args[2]].val);
1069                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1070                break;
1071            }
1072            goto do_default;
1073
1074        CASE_OP_32_64(deposit):
1075            if (temp_is_const(args[1]) && temp_is_const(args[2])) {
1076                tmp = deposit64(temps[args[1]].val, args[3], args[4],
1077                                temps[args[2]].val);
1078                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1079                break;
1080            }
1081            goto do_default;
1082
1083        CASE_OP_32_64(setcond):
1084            tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]);
1085            if (tmp != 2) {
1086                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1087                break;
1088            }
1089            goto do_default;
1090
1091        CASE_OP_32_64(brcond):
1092            tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]);
1093            if (tmp != 2) {
1094                if (tmp) {
1095                    reset_all_temps(nb_temps);
1096                    op->opc = INDEX_op_br;
1097                    args[0] = args[3];
1098                } else {
1099                    tcg_op_remove(s, op);
1100                }
1101                break;
1102            }
1103            goto do_default;
1104
1105        CASE_OP_32_64(movcond):
1106            tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]);
1107            if (tmp != 2) {
1108                tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]);
1109                break;
1110            }
1111            goto do_default;
1112
1113        case INDEX_op_add2_i32:
1114        case INDEX_op_sub2_i32:
1115            if (temp_is_const(args[2]) && temp_is_const(args[3])
1116                && temp_is_const(args[4]) && temp_is_const(args[5])) {
1117                uint32_t al = temps[args[2]].val;
1118                uint32_t ah = temps[args[3]].val;
1119                uint32_t bl = temps[args[4]].val;
1120                uint32_t bh = temps[args[5]].val;
1121                uint64_t a = ((uint64_t)ah << 32) | al;
1122                uint64_t b = ((uint64_t)bh << 32) | bl;
1123                TCGArg rl, rh;
1124                TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
1125                TCGArg *args2 = &s->gen_opparam_buf[op2->args];
1126
1127                if (opc == INDEX_op_add2_i32) {
1128                    a += b;
1129                } else {
1130                    a -= b;
1131                }
1132
1133                rl = args[0];
1134                rh = args[1];
1135                tcg_opt_gen_movi(s, op, args, rl, (int32_t)a);
1136                tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(a >> 32));
1137
1138                /* We've done all we need to do with the movi.  Skip it.  */
1139                oi_next = op2->next;
1140                break;
1141            }
1142            goto do_default;
1143
1144        case INDEX_op_mulu2_i32:
1145            if (temp_is_const(args[2]) && temp_is_const(args[3])) {
1146                uint32_t a = temps[args[2]].val;
1147                uint32_t b = temps[args[3]].val;
1148                uint64_t r = (uint64_t)a * b;
1149                TCGArg rl, rh;
1150                TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
1151                TCGArg *args2 = &s->gen_opparam_buf[op2->args];
1152
1153                rl = args[0];
1154                rh = args[1];
1155                tcg_opt_gen_movi(s, op, args, rl, (int32_t)r);
1156                tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(r >> 32));
1157
1158                /* We've done all we need to do with the movi.  Skip it.  */
1159                oi_next = op2->next;
1160                break;
1161            }
1162            goto do_default;
1163
1164        case INDEX_op_brcond2_i32:
1165            tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
1166            if (tmp != 2) {
1167                if (tmp) {
1168            do_brcond_true:
1169                    reset_all_temps(nb_temps);
1170                    op->opc = INDEX_op_br;
1171                    args[0] = args[5];
1172                } else {
1173            do_brcond_false:
1174                    tcg_op_remove(s, op);
1175                }
1176            } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
1177                       && temp_is_const(args[2]) && temps[args[2]].val == 0
1178                       && temp_is_const(args[3]) && temps[args[3]].val == 0) {
1179                /* Simplify LT/GE comparisons vs zero to a single compare
1180                   vs the high word of the input.  */
1181            do_brcond_high:
1182                reset_all_temps(nb_temps);
1183                op->opc = INDEX_op_brcond_i32;
1184                args[0] = args[1];
1185                args[1] = args[3];
1186                args[2] = args[4];
1187                args[3] = args[5];
1188            } else if (args[4] == TCG_COND_EQ) {
1189                /* Simplify EQ comparisons where one of the pairs
1190                   can be simplified.  */
1191                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1192                                               args[0], args[2], TCG_COND_EQ);
1193                if (tmp == 0) {
1194                    goto do_brcond_false;
1195                } else if (tmp == 1) {
1196                    goto do_brcond_high;
1197                }
1198                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1199                                               args[1], args[3], TCG_COND_EQ);
1200                if (tmp == 0) {
1201                    goto do_brcond_false;
1202                } else if (tmp != 1) {
1203                    goto do_default;
1204                }
1205            do_brcond_low:
1206                reset_all_temps(nb_temps);
1207                op->opc = INDEX_op_brcond_i32;
1208                args[1] = args[2];
1209                args[2] = args[4];
1210                args[3] = args[5];
1211            } else if (args[4] == TCG_COND_NE) {
1212                /* Simplify NE comparisons where one of the pairs
1213                   can be simplified.  */
1214                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1215                                               args[0], args[2], TCG_COND_NE);
1216                if (tmp == 0) {
1217                    goto do_brcond_high;
1218                } else if (tmp == 1) {
1219                    goto do_brcond_true;
1220                }
1221                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1222                                               args[1], args[3], TCG_COND_NE);
1223                if (tmp == 0) {
1224                    goto do_brcond_low;
1225                } else if (tmp == 1) {
1226                    goto do_brcond_true;
1227                }
1228                goto do_default;
1229            } else {
1230                goto do_default;
1231            }
1232            break;
1233
1234        case INDEX_op_setcond2_i32:
1235            tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
1236            if (tmp != 2) {
1237            do_setcond_const:
1238                tcg_opt_gen_movi(s, op, args, args[0], tmp);
1239            } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
1240                       && temp_is_const(args[3]) && temps[args[3]].val == 0
1241                       && temp_is_const(args[4]) && temps[args[4]].val == 0) {
1242                /* Simplify LT/GE comparisons vs zero to a single compare
1243                   vs the high word of the input.  */
1244            do_setcond_high:
1245                reset_temp(args[0]);
1246                temps[args[0]].mask = 1;
1247                op->opc = INDEX_op_setcond_i32;
1248                args[1] = args[2];
1249                args[2] = args[4];
1250                args[3] = args[5];
1251            } else if (args[5] == TCG_COND_EQ) {
1252                /* Simplify EQ comparisons where one of the pairs
1253                   can be simplified.  */
1254                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1255                                               args[1], args[3], TCG_COND_EQ);
1256                if (tmp == 0) {
1257                    goto do_setcond_const;
1258                } else if (tmp == 1) {
1259                    goto do_setcond_high;
1260                }
1261                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1262                                               args[2], args[4], TCG_COND_EQ);
1263                if (tmp == 0) {
1264                    goto do_setcond_high;
1265                } else if (tmp != 1) {
1266                    goto do_default;
1267                }
1268            do_setcond_low:
1269                reset_temp(args[0]);
1270                temps[args[0]].mask = 1;
1271                op->opc = INDEX_op_setcond_i32;
1272                args[2] = args[3];
1273                args[3] = args[5];
1274            } else if (args[5] == TCG_COND_NE) {
1275                /* Simplify NE comparisons where one of the pairs
1276                   can be simplified.  */
1277                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1278                                               args[1], args[3], TCG_COND_NE);
1279                if (tmp == 0) {
1280                    goto do_setcond_high;
1281                } else if (tmp == 1) {
1282                    goto do_setcond_const;
1283                }
1284                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1285                                               args[2], args[4], TCG_COND_NE);
1286                if (tmp == 0) {
1287                    goto do_setcond_low;
1288                } else if (tmp == 1) {
1289                    goto do_setcond_const;
1290                }
1291                goto do_default;
1292            } else {
1293                goto do_default;
1294            }
1295            break;
1296
1297        case INDEX_op_call:
1298            if (!(args[nb_oargs + nb_iargs + 1]
1299                  & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1300                for (i = 0; i < nb_globals; i++) {
1301                    if (test_bit(i, temps_used.l)) {
1302                        reset_temp(i);
1303                    }
1304                }
1305            }
1306            goto do_reset_output;
1307
1308        default:
1309        do_default:
1310            /* Default case: we know nothing about operation (or were unable
1311               to compute the operation result) so no propagation is done.
1312               We trash everything if the operation is the end of a basic
1313               block, otherwise we only trash the output args.  "mask" is
1314               the non-zero bits mask for the first output arg.  */
1315            if (def->flags & TCG_OPF_BB_END) {
1316                reset_all_temps(nb_temps);
1317            } else {
1318        do_reset_output:
1319                for (i = 0; i < nb_oargs; i++) {
1320                    reset_temp(args[i]);
1321                    /* Save the corresponding known-zero bits mask for the
1322                       first output argument (only one supported so far). */
1323                    if (i == 0) {
1324                        temps[args[i]].mask = mask;
1325                    }
1326                }
1327            }
1328            break;
1329        }
1330    }
1331}
1332