qemu/tcg/optimize.c
<<
>>
Prefs
   1/*
   2 * Optimizations for Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2010 Samsung Electronics.
   5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "config.h"
  27
  28#include <stdlib.h>
  29#include <stdio.h>
  30
  31#include "qemu-common.h"
  32#include "tcg-op.h"
  33
  34#define CASE_OP_32_64(x)                        \
  35        glue(glue(case INDEX_op_, x), _i32):    \
  36        glue(glue(case INDEX_op_, x), _i64)
  37
  38typedef enum {
  39    TCG_TEMP_UNDEF = 0,
  40    TCG_TEMP_CONST,
  41    TCG_TEMP_COPY,
  42} tcg_temp_state;
  43
  44struct tcg_temp_info {
  45    tcg_temp_state state;
  46    uint16_t prev_copy;
  47    uint16_t next_copy;
  48    tcg_target_ulong val;
  49};
  50
  51static struct tcg_temp_info temps[TCG_MAX_TEMPS];
  52
  53/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP only had one copy, remove
  54   the copy flag from the left temp.  */
  55static void reset_temp(TCGArg temp)
  56{
  57    if (temps[temp].state == TCG_TEMP_COPY) {
  58        if (temps[temp].prev_copy == temps[temp].next_copy) {
  59            temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
  60        } else {
  61            temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
  62            temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
  63        }
  64    }
  65    temps[temp].state = TCG_TEMP_UNDEF;
  66}
  67
  68static int op_bits(TCGOpcode op)
  69{
  70    const TCGOpDef *def = &tcg_op_defs[op];
  71    return def->flags & TCG_OPF_64BIT ? 64 : 32;
  72}
  73
  74static TCGOpcode op_to_movi(TCGOpcode op)
  75{
  76    switch (op_bits(op)) {
  77    case 32:
  78        return INDEX_op_movi_i32;
  79    case 64:
  80        return INDEX_op_movi_i64;
  81    default:
  82        fprintf(stderr, "op_to_movi: unexpected return value of "
  83                "function op_bits.\n");
  84        tcg_abort();
  85    }
  86}
  87
  88static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
  89{
  90    TCGArg i;
  91
  92    /* If this is already a global, we can't do better. */
  93    if (temp < s->nb_globals) {
  94        return temp;
  95    }
  96
  97    /* Search for a global first. */
  98    for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
  99        if (i < s->nb_globals) {
 100            return i;
 101        }
 102    }
 103
 104    /* If it is a temp, search for a temp local. */
 105    if (!s->temps[temp].temp_local) {
 106        for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
 107            if (s->temps[i].temp_local) {
 108                return i;
 109            }
 110        }
 111    }
 112
 113    /* Failure to find a better representation, return the same temp. */
 114    return temp;
 115}
 116
 117static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
 118{
 119    TCGArg i;
 120
 121    if (arg1 == arg2) {
 122        return true;
 123    }
 124
 125    if (temps[arg1].state != TCG_TEMP_COPY
 126        || temps[arg2].state != TCG_TEMP_COPY) {
 127        return false;
 128    }
 129
 130    for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
 131        if (i == arg2) {
 132            return true;
 133        }
 134    }
 135
 136    return false;
 137}
 138
 139static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
 140                            TCGArg dst, TCGArg src)
 141{
 142        reset_temp(dst);
 143        assert(temps[src].state != TCG_TEMP_CONST);
 144
 145        if (s->temps[src].type == s->temps[dst].type) {
 146            if (temps[src].state != TCG_TEMP_COPY) {
 147                temps[src].state = TCG_TEMP_COPY;
 148                temps[src].next_copy = src;
 149                temps[src].prev_copy = src;
 150            }
 151            temps[dst].state = TCG_TEMP_COPY;
 152            temps[dst].next_copy = temps[src].next_copy;
 153            temps[dst].prev_copy = src;
 154            temps[temps[dst].next_copy].prev_copy = dst;
 155            temps[src].next_copy = dst;
 156        }
 157
 158        gen_args[0] = dst;
 159        gen_args[1] = src;
 160}
 161
 162static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
 163{
 164        reset_temp(dst);
 165        temps[dst].state = TCG_TEMP_CONST;
 166        temps[dst].val = val;
 167        gen_args[0] = dst;
 168        gen_args[1] = val;
 169}
 170
 171static TCGOpcode op_to_mov(TCGOpcode op)
 172{
 173    switch (op_bits(op)) {
 174    case 32:
 175        return INDEX_op_mov_i32;
 176    case 64:
 177        return INDEX_op_mov_i64;
 178    default:
 179        fprintf(stderr, "op_to_mov: unexpected return value of "
 180                "function op_bits.\n");
 181        tcg_abort();
 182    }
 183}
 184
 185static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
 186{
 187    switch (op) {
 188    CASE_OP_32_64(add):
 189        return x + y;
 190
 191    CASE_OP_32_64(sub):
 192        return x - y;
 193
 194    CASE_OP_32_64(mul):
 195        return x * y;
 196
 197    CASE_OP_32_64(and):
 198        return x & y;
 199
 200    CASE_OP_32_64(or):
 201        return x | y;
 202
 203    CASE_OP_32_64(xor):
 204        return x ^ y;
 205
 206    case INDEX_op_shl_i32:
 207        return (uint32_t)x << (uint32_t)y;
 208
 209    case INDEX_op_shl_i64:
 210        return (uint64_t)x << (uint64_t)y;
 211
 212    case INDEX_op_shr_i32:
 213        return (uint32_t)x >> (uint32_t)y;
 214
 215    case INDEX_op_shr_i64:
 216        return (uint64_t)x >> (uint64_t)y;
 217
 218    case INDEX_op_sar_i32:
 219        return (int32_t)x >> (int32_t)y;
 220
 221    case INDEX_op_sar_i64:
 222        return (int64_t)x >> (int64_t)y;
 223
 224    case INDEX_op_rotr_i32:
 225        x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y);
 226        return x;
 227
 228    case INDEX_op_rotr_i64:
 229        x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y);
 230        return x;
 231
 232    case INDEX_op_rotl_i32:
 233        x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y));
 234        return x;
 235
 236    case INDEX_op_rotl_i64:
 237        x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y));
 238        return x;
 239
 240    CASE_OP_32_64(not):
 241        return ~x;
 242
 243    CASE_OP_32_64(neg):
 244        return -x;
 245
 246    CASE_OP_32_64(andc):
 247        return x & ~y;
 248
 249    CASE_OP_32_64(orc):
 250        return x | ~y;
 251
 252    CASE_OP_32_64(eqv):
 253        return ~(x ^ y);
 254
 255    CASE_OP_32_64(nand):
 256        return ~(x & y);
 257
 258    CASE_OP_32_64(nor):
 259        return ~(x | y);
 260
 261    CASE_OP_32_64(ext8s):
 262        return (int8_t)x;
 263
 264    CASE_OP_32_64(ext16s):
 265        return (int16_t)x;
 266
 267    CASE_OP_32_64(ext8u):
 268        return (uint8_t)x;
 269
 270    CASE_OP_32_64(ext16u):
 271        return (uint16_t)x;
 272
 273    case INDEX_op_ext32s_i64:
 274        return (int32_t)x;
 275
 276    case INDEX_op_ext32u_i64:
 277        return (uint32_t)x;
 278
 279    default:
 280        fprintf(stderr,
 281                "Unrecognized operation %d in do_constant_folding.\n", op);
 282        tcg_abort();
 283    }
 284}
 285
 286static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
 287{
 288    TCGArg res = do_constant_folding_2(op, x, y);
 289    if (op_bits(op) == 32) {
 290        res &= 0xffffffff;
 291    }
 292    return res;
 293}
 294
 295static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
 296{
 297    switch (c) {
 298    case TCG_COND_EQ:
 299        return x == y;
 300    case TCG_COND_NE:
 301        return x != y;
 302    case TCG_COND_LT:
 303        return (int32_t)x < (int32_t)y;
 304    case TCG_COND_GE:
 305        return (int32_t)x >= (int32_t)y;
 306    case TCG_COND_LE:
 307        return (int32_t)x <= (int32_t)y;
 308    case TCG_COND_GT:
 309        return (int32_t)x > (int32_t)y;
 310    case TCG_COND_LTU:
 311        return x < y;
 312    case TCG_COND_GEU:
 313        return x >= y;
 314    case TCG_COND_LEU:
 315        return x <= y;
 316    case TCG_COND_GTU:
 317        return x > y;
 318    default:
 319        tcg_abort();
 320    }
 321}
 322
 323static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
 324{
 325    switch (c) {
 326    case TCG_COND_EQ:
 327        return x == y;
 328    case TCG_COND_NE:
 329        return x != y;
 330    case TCG_COND_LT:
 331        return (int64_t)x < (int64_t)y;
 332    case TCG_COND_GE:
 333        return (int64_t)x >= (int64_t)y;
 334    case TCG_COND_LE:
 335        return (int64_t)x <= (int64_t)y;
 336    case TCG_COND_GT:
 337        return (int64_t)x > (int64_t)y;
 338    case TCG_COND_LTU:
 339        return x < y;
 340    case TCG_COND_GEU:
 341        return x >= y;
 342    case TCG_COND_LEU:
 343        return x <= y;
 344    case TCG_COND_GTU:
 345        return x > y;
 346    default:
 347        tcg_abort();
 348    }
 349}
 350
 351static bool do_constant_folding_cond_eq(TCGCond c)
 352{
 353    switch (c) {
 354    case TCG_COND_GT:
 355    case TCG_COND_LTU:
 356    case TCG_COND_LT:
 357    case TCG_COND_GTU:
 358    case TCG_COND_NE:
 359        return 0;
 360    case TCG_COND_GE:
 361    case TCG_COND_GEU:
 362    case TCG_COND_LE:
 363    case TCG_COND_LEU:
 364    case TCG_COND_EQ:
 365        return 1;
 366    default:
 367        tcg_abort();
 368    }
 369}
 370
 371/* Return 2 if the condition can't be simplified, and the result
 372   of the condition (0 or 1) if it can */
 373static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
 374                                       TCGArg y, TCGCond c)
 375{
 376    if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
 377        switch (op_bits(op)) {
 378        case 32:
 379            return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
 380        case 64:
 381            return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
 382        default:
 383            tcg_abort();
 384        }
 385    } else if (temps_are_copies(x, y)) {
 386        return do_constant_folding_cond_eq(c);
 387    } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
 388        switch (c) {
 389        case TCG_COND_LTU:
 390            return 0;
 391        case TCG_COND_GEU:
 392            return 1;
 393        default:
 394            return 2;
 395        }
 396    } else {
 397        return 2;
 398    }
 399}
 400
 401/* Return 2 if the condition can't be simplified, and the result
 402   of the condition (0 or 1) if it can */
 403static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
 404{
 405    TCGArg al = p1[0], ah = p1[1];
 406    TCGArg bl = p2[0], bh = p2[1];
 407
 408    if (temps[bl].state == TCG_TEMP_CONST
 409        && temps[bh].state == TCG_TEMP_CONST) {
 410        uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
 411
 412        if (temps[al].state == TCG_TEMP_CONST
 413            && temps[ah].state == TCG_TEMP_CONST) {
 414            uint64_t a;
 415            a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
 416            return do_constant_folding_cond_64(a, b, c);
 417        }
 418        if (b == 0) {
 419            switch (c) {
 420            case TCG_COND_LTU:
 421                return 0;
 422            case TCG_COND_GEU:
 423                return 1;
 424            default:
 425                break;
 426            }
 427        }
 428    }
 429    if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
 430        return do_constant_folding_cond_eq(c);
 431    }
 432    return 2;
 433}
 434
 435static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 436{
 437    TCGArg a1 = *p1, a2 = *p2;
 438    int sum = 0;
 439    sum += temps[a1].state == TCG_TEMP_CONST;
 440    sum -= temps[a2].state == TCG_TEMP_CONST;
 441
 442    /* Prefer the constant in second argument, and then the form
 443       op a, a, b, which is better handled on non-RISC hosts. */
 444    if (sum > 0 || (sum == 0 && dest == a2)) {
 445        *p1 = a2;
 446        *p2 = a1;
 447        return true;
 448    }
 449    return false;
 450}
 451
 452static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 453{
 454    int sum = 0;
 455    sum += temps[p1[0]].state == TCG_TEMP_CONST;
 456    sum += temps[p1[1]].state == TCG_TEMP_CONST;
 457    sum -= temps[p2[0]].state == TCG_TEMP_CONST;
 458    sum -= temps[p2[1]].state == TCG_TEMP_CONST;
 459    if (sum > 0) {
 460        TCGArg t;
 461        t = p1[0], p1[0] = p2[0], p2[0] = t;
 462        t = p1[1], p1[1] = p2[1], p2[1] = t;
 463        return true;
 464    }
 465    return false;
 466}
 467
 468/* Propagate constants and copies, fold constant expressions. */
 469static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
 470                                    TCGArg *args, TCGOpDef *tcg_op_defs)
 471{
 472    int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
 473    TCGOpcode op;
 474    const TCGOpDef *def;
 475    TCGArg *gen_args;
 476    TCGArg tmp;
 477
 478    /* Array VALS has an element for each temp.
 479       If this temp holds a constant then its value is kept in VALS' element.
 480       If this temp is a copy of other ones then the other copies are
 481       available through the doubly linked circular list. */
 482
 483    nb_temps = s->nb_temps;
 484    nb_globals = s->nb_globals;
 485    memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
 486
 487    nb_ops = tcg_opc_ptr - s->gen_opc_buf;
 488    gen_args = args;
 489    for (op_index = 0; op_index < nb_ops; op_index++) {
 490        op = s->gen_opc_buf[op_index];
 491        def = &tcg_op_defs[op];
 492        /* Do copy propagation */
 493        if (op == INDEX_op_call) {
 494            int nb_oargs = args[0] >> 16;
 495            int nb_iargs = args[0] & 0xffff;
 496            for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) {
 497                if (temps[args[i]].state == TCG_TEMP_COPY) {
 498                    args[i] = find_better_copy(s, args[i]);
 499                }
 500            }
 501        } else {
 502            for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
 503                if (temps[args[i]].state == TCG_TEMP_COPY) {
 504                    args[i] = find_better_copy(s, args[i]);
 505                }
 506            }
 507        }
 508
 509        /* For commutative operations make constant second argument */
 510        switch (op) {
 511        CASE_OP_32_64(add):
 512        CASE_OP_32_64(mul):
 513        CASE_OP_32_64(and):
 514        CASE_OP_32_64(or):
 515        CASE_OP_32_64(xor):
 516        CASE_OP_32_64(eqv):
 517        CASE_OP_32_64(nand):
 518        CASE_OP_32_64(nor):
 519            swap_commutative(args[0], &args[1], &args[2]);
 520            break;
 521        CASE_OP_32_64(brcond):
 522            if (swap_commutative(-1, &args[0], &args[1])) {
 523                args[2] = tcg_swap_cond(args[2]);
 524            }
 525            break;
 526        CASE_OP_32_64(setcond):
 527            if (swap_commutative(args[0], &args[1], &args[2])) {
 528                args[3] = tcg_swap_cond(args[3]);
 529            }
 530            break;
 531        CASE_OP_32_64(movcond):
 532            if (swap_commutative(-1, &args[1], &args[2])) {
 533                args[5] = tcg_swap_cond(args[5]);
 534            }
 535            /* For movcond, we canonicalize the "false" input reg to match
 536               the destination reg so that the tcg backend can implement
 537               a "move if true" operation.  */
 538            if (swap_commutative(args[0], &args[4], &args[3])) {
 539                args[5] = tcg_invert_cond(args[5]);
 540            }
 541            break;
 542        case INDEX_op_add2_i32:
 543            swap_commutative(args[0], &args[2], &args[4]);
 544            swap_commutative(args[1], &args[3], &args[5]);
 545            break;
 546        case INDEX_op_mulu2_i32:
 547            swap_commutative(args[0], &args[2], &args[3]);
 548            break;
 549        case INDEX_op_brcond2_i32:
 550            if (swap_commutative2(&args[0], &args[2])) {
 551                args[4] = tcg_swap_cond(args[4]);
 552            }
 553            break;
 554        case INDEX_op_setcond2_i32:
 555            if (swap_commutative2(&args[1], &args[3])) {
 556                args[5] = tcg_swap_cond(args[5]);
 557            }
 558            break;
 559        default:
 560            break;
 561        }
 562
 563        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0" */
 564        switch (op) {
 565        CASE_OP_32_64(shl):
 566        CASE_OP_32_64(shr):
 567        CASE_OP_32_64(sar):
 568        CASE_OP_32_64(rotl):
 569        CASE_OP_32_64(rotr):
 570            if (temps[args[1]].state == TCG_TEMP_CONST
 571                && temps[args[1]].val == 0) {
 572                s->gen_opc_buf[op_index] = op_to_movi(op);
 573                tcg_opt_gen_movi(gen_args, args[0], 0);
 574                args += 3;
 575                gen_args += 2;
 576                continue;
 577            }
 578            break;
 579        default:
 580            break;
 581        }
 582
 583        /* Simplify expression for "op r, a, 0 => mov r, a" cases */
 584        switch (op) {
 585        CASE_OP_32_64(add):
 586        CASE_OP_32_64(sub):
 587        CASE_OP_32_64(shl):
 588        CASE_OP_32_64(shr):
 589        CASE_OP_32_64(sar):
 590        CASE_OP_32_64(rotl):
 591        CASE_OP_32_64(rotr):
 592        CASE_OP_32_64(or):
 593        CASE_OP_32_64(xor):
 594            if (temps[args[1]].state == TCG_TEMP_CONST) {
 595                /* Proceed with possible constant folding. */
 596                break;
 597            }
 598            if (temps[args[2]].state == TCG_TEMP_CONST
 599                && temps[args[2]].val == 0) {
 600                if (temps_are_copies(args[0], args[1])) {
 601                    s->gen_opc_buf[op_index] = INDEX_op_nop;
 602                } else {
 603                    s->gen_opc_buf[op_index] = op_to_mov(op);
 604                    tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
 605                    gen_args += 2;
 606                }
 607                args += 3;
 608                continue;
 609            }
 610            break;
 611        default:
 612            break;
 613        }
 614
 615        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
 616        switch (op) {
 617        CASE_OP_32_64(and):
 618        CASE_OP_32_64(mul):
 619            if ((temps[args[2]].state == TCG_TEMP_CONST
 620                && temps[args[2]].val == 0)) {
 621                s->gen_opc_buf[op_index] = op_to_movi(op);
 622                tcg_opt_gen_movi(gen_args, args[0], 0);
 623                args += 3;
 624                gen_args += 2;
 625                continue;
 626            }
 627            break;
 628        default:
 629            break;
 630        }
 631
 632        /* Simplify expression for "op r, a, a => mov r, a" cases */
 633        switch (op) {
 634        CASE_OP_32_64(or):
 635        CASE_OP_32_64(and):
 636            if (temps_are_copies(args[1], args[2])) {
 637                if (temps_are_copies(args[0], args[1])) {
 638                    s->gen_opc_buf[op_index] = INDEX_op_nop;
 639                } else {
 640                    s->gen_opc_buf[op_index] = op_to_mov(op);
 641                    tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
 642                    gen_args += 2;
 643                }
 644                args += 3;
 645                continue;
 646            }
 647            break;
 648        default:
 649            break;
 650        }
 651
 652        /* Simplify expression for "op r, a, a => movi r, 0" cases */
 653        switch (op) {
 654        CASE_OP_32_64(sub):
 655        CASE_OP_32_64(xor):
 656            if (temps_are_copies(args[1], args[2])) {
 657                s->gen_opc_buf[op_index] = op_to_movi(op);
 658                tcg_opt_gen_movi(gen_args, args[0], 0);
 659                gen_args += 2;
 660                args += 3;
 661                continue;
 662            }
 663            break;
 664        default:
 665            break;
 666        }
 667
 668        /* Propagate constants through copy operations and do constant
 669           folding.  Constants will be substituted to arguments by register
 670           allocator where needed and possible.  Also detect copies. */
 671        switch (op) {
 672        CASE_OP_32_64(mov):
 673            if (temps_are_copies(args[0], args[1])) {
 674                args += 2;
 675                s->gen_opc_buf[op_index] = INDEX_op_nop;
 676                break;
 677            }
 678            if (temps[args[1]].state != TCG_TEMP_CONST) {
 679                tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
 680                gen_args += 2;
 681                args += 2;
 682                break;
 683            }
 684            /* Source argument is constant.  Rewrite the operation and
 685               let movi case handle it. */
 686            op = op_to_movi(op);
 687            s->gen_opc_buf[op_index] = op;
 688            args[1] = temps[args[1]].val;
 689            /* fallthrough */
 690        CASE_OP_32_64(movi):
 691            tcg_opt_gen_movi(gen_args, args[0], args[1]);
 692            gen_args += 2;
 693            args += 2;
 694            break;
 695
 696        CASE_OP_32_64(not):
 697        CASE_OP_32_64(neg):
 698        CASE_OP_32_64(ext8s):
 699        CASE_OP_32_64(ext8u):
 700        CASE_OP_32_64(ext16s):
 701        CASE_OP_32_64(ext16u):
 702        case INDEX_op_ext32s_i64:
 703        case INDEX_op_ext32u_i64:
 704            if (temps[args[1]].state == TCG_TEMP_CONST) {
 705                s->gen_opc_buf[op_index] = op_to_movi(op);
 706                tmp = do_constant_folding(op, temps[args[1]].val, 0);
 707                tcg_opt_gen_movi(gen_args, args[0], tmp);
 708                gen_args += 2;
 709                args += 2;
 710                break;
 711            }
 712            goto do_default;
 713
 714        CASE_OP_32_64(add):
 715        CASE_OP_32_64(sub):
 716        CASE_OP_32_64(mul):
 717        CASE_OP_32_64(or):
 718        CASE_OP_32_64(and):
 719        CASE_OP_32_64(xor):
 720        CASE_OP_32_64(shl):
 721        CASE_OP_32_64(shr):
 722        CASE_OP_32_64(sar):
 723        CASE_OP_32_64(rotl):
 724        CASE_OP_32_64(rotr):
 725        CASE_OP_32_64(andc):
 726        CASE_OP_32_64(orc):
 727        CASE_OP_32_64(eqv):
 728        CASE_OP_32_64(nand):
 729        CASE_OP_32_64(nor):
 730            if (temps[args[1]].state == TCG_TEMP_CONST
 731                && temps[args[2]].state == TCG_TEMP_CONST) {
 732                s->gen_opc_buf[op_index] = op_to_movi(op);
 733                tmp = do_constant_folding(op, temps[args[1]].val,
 734                                          temps[args[2]].val);
 735                tcg_opt_gen_movi(gen_args, args[0], tmp);
 736                gen_args += 2;
 737                args += 3;
 738                break;
 739            }
 740            goto do_default;
 741
 742        CASE_OP_32_64(deposit):
 743            if (temps[args[1]].state == TCG_TEMP_CONST
 744                && temps[args[2]].state == TCG_TEMP_CONST) {
 745                s->gen_opc_buf[op_index] = op_to_movi(op);
 746                tmp = ((1ull << args[4]) - 1);
 747                tmp = (temps[args[1]].val & ~(tmp << args[3]))
 748                      | ((temps[args[2]].val & tmp) << args[3]);
 749                tcg_opt_gen_movi(gen_args, args[0], tmp);
 750                gen_args += 2;
 751                args += 5;
 752                break;
 753            }
 754            goto do_default;
 755
 756        CASE_OP_32_64(setcond):
 757            tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
 758            if (tmp != 2) {
 759                s->gen_opc_buf[op_index] = op_to_movi(op);
 760                tcg_opt_gen_movi(gen_args, args[0], tmp);
 761                gen_args += 2;
 762                args += 4;
 763                break;
 764            }
 765            goto do_default;
 766
 767        CASE_OP_32_64(brcond):
 768            tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
 769            if (tmp != 2) {
 770                if (tmp) {
 771                    memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
 772                    s->gen_opc_buf[op_index] = INDEX_op_br;
 773                    gen_args[0] = args[3];
 774                    gen_args += 1;
 775                } else {
 776                    s->gen_opc_buf[op_index] = INDEX_op_nop;
 777                }
 778                args += 4;
 779                break;
 780            }
 781            goto do_default;
 782
 783        CASE_OP_32_64(movcond):
 784            tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
 785            if (tmp != 2) {
 786                if (temps_are_copies(args[0], args[4-tmp])) {
 787                    s->gen_opc_buf[op_index] = INDEX_op_nop;
 788                } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
 789                    s->gen_opc_buf[op_index] = op_to_movi(op);
 790                    tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
 791                    gen_args += 2;
 792                } else {
 793                    s->gen_opc_buf[op_index] = op_to_mov(op);
 794                    tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
 795                    gen_args += 2;
 796                }
 797                args += 6;
 798                break;
 799            }
 800            goto do_default;
 801
 802        case INDEX_op_add2_i32:
 803        case INDEX_op_sub2_i32:
 804            if (temps[args[2]].state == TCG_TEMP_CONST
 805                && temps[args[3]].state == TCG_TEMP_CONST
 806                && temps[args[4]].state == TCG_TEMP_CONST
 807                && temps[args[5]].state == TCG_TEMP_CONST) {
 808                uint32_t al = temps[args[2]].val;
 809                uint32_t ah = temps[args[3]].val;
 810                uint32_t bl = temps[args[4]].val;
 811                uint32_t bh = temps[args[5]].val;
 812                uint64_t a = ((uint64_t)ah << 32) | al;
 813                uint64_t b = ((uint64_t)bh << 32) | bl;
 814                TCGArg rl, rh;
 815
 816                if (op == INDEX_op_add2_i32) {
 817                    a += b;
 818                } else {
 819                    a -= b;
 820                }
 821
 822                /* We emit the extra nop when we emit the add2/sub2.  */
 823                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
 824
 825                rl = args[0];
 826                rh = args[1];
 827                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
 828                s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
 829                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
 830                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
 831                gen_args += 4;
 832                args += 6;
 833                break;
 834            }
 835            goto do_default;
 836
 837        case INDEX_op_mulu2_i32:
 838            if (temps[args[2]].state == TCG_TEMP_CONST
 839                && temps[args[3]].state == TCG_TEMP_CONST) {
 840                uint32_t a = temps[args[2]].val;
 841                uint32_t b = temps[args[3]].val;
 842                uint64_t r = (uint64_t)a * b;
 843                TCGArg rl, rh;
 844
 845                /* We emit the extra nop when we emit the mulu2.  */
 846                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
 847
 848                rl = args[0];
 849                rh = args[1];
 850                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
 851                s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
 852                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
 853                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
 854                gen_args += 4;
 855                args += 4;
 856                break;
 857            }
 858            goto do_default;
 859
 860        case INDEX_op_brcond2_i32:
 861            tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
 862            if (tmp != 2) {
 863                if (tmp) {
 864                    memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
 865                    s->gen_opc_buf[op_index] = INDEX_op_br;
 866                    gen_args[0] = args[5];
 867                    gen_args += 1;
 868                } else {
 869                    s->gen_opc_buf[op_index] = INDEX_op_nop;
 870                }
 871            } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
 872                       && temps[args[2]].state == TCG_TEMP_CONST
 873                       && temps[args[3]].state == TCG_TEMP_CONST
 874                       && temps[args[2]].val == 0
 875                       && temps[args[3]].val == 0) {
 876                /* Simplify LT/GE comparisons vs zero to a single compare
 877                   vs the high word of the input.  */
 878                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
 879                s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
 880                gen_args[0] = args[1];
 881                gen_args[1] = args[3];
 882                gen_args[2] = args[4];
 883                gen_args[3] = args[5];
 884                gen_args += 4;
 885            } else {
 886                goto do_default;
 887            }
 888            args += 6;
 889            break;
 890
 891        case INDEX_op_setcond2_i32:
 892            tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
 893            if (tmp != 2) {
 894                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
 895                tcg_opt_gen_movi(gen_args, args[0], tmp);
 896                gen_args += 2;
 897            } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
 898                       && temps[args[3]].state == TCG_TEMP_CONST
 899                       && temps[args[4]].state == TCG_TEMP_CONST
 900                       && temps[args[3]].val == 0
 901                       && temps[args[4]].val == 0) {
 902                /* Simplify LT/GE comparisons vs zero to a single compare
 903                   vs the high word of the input.  */
 904                s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
 905                gen_args[0] = args[0];
 906                gen_args[1] = args[2];
 907                gen_args[2] = args[4];
 908                gen_args[3] = args[5];
 909                gen_args += 4;
 910            } else {
 911                goto do_default;
 912            }
 913            args += 6;
 914            break;
 915
 916        case INDEX_op_call:
 917            nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
 918            if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS |
 919                                            TCG_CALL_NO_WRITE_GLOBALS))) {
 920                for (i = 0; i < nb_globals; i++) {
 921                    reset_temp(i);
 922                }
 923            }
 924            for (i = 0; i < (args[0] >> 16); i++) {
 925                reset_temp(args[i + 1]);
 926            }
 927            i = nb_call_args + 3;
 928            while (i) {
 929                *gen_args = *args;
 930                args++;
 931                gen_args++;
 932                i--;
 933            }
 934            break;
 935
 936        default:
 937        do_default:
 938            /* Default case: we know nothing about operation (or were unable
 939               to compute the operation result) so no propagation is done.
 940               We trash everything if the operation is the end of a basic
 941               block, otherwise we only trash the output args.  */
 942            if (def->flags & TCG_OPF_BB_END) {
 943                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
 944            } else {
 945                for (i = 0; i < def->nb_oargs; i++) {
 946                    reset_temp(args[i]);
 947                }
 948            }
 949            for (i = 0; i < def->nb_args; i++) {
 950                gen_args[i] = args[i];
 951            }
 952            args += def->nb_args;
 953            gen_args += def->nb_args;
 954            break;
 955        }
 956    }
 957
 958    return gen_args;
 959}
 960
 961TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
 962        TCGArg *args, TCGOpDef *tcg_op_defs)
 963{
 964    TCGArg *res;
 965    res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
 966    return res;
 967}
 968