qemu/tcg/tcg.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* define it to use liveness analysis (better code) */
  26#define USE_TCG_OPTIMIZATIONS
  27
  28#include "qemu/osdep.h"
  29
  30/* Define to jump the ELF file used to communicate with GDB.  */
  31#undef DEBUG_JIT
  32
  33#include "qemu/error-report.h"
  34#include "qemu/cutils.h"
  35#include "qemu/host-utils.h"
  36#include "qemu/qemu-print.h"
  37#include "qemu/timer.h"
  38#include "qemu/cacheflush.h"
  39
  40/* Note: the long term plan is to reduce the dependencies on the QEMU
  41   CPU definitions. Currently they are used for qemu_ld/st
  42   instructions */
  43#define NO_CPU_IO_DEFS
  44
  45#include "exec/exec-all.h"
  46#include "tcg/tcg-op.h"
  47
  48#if UINTPTR_MAX == UINT32_MAX
  49# define ELF_CLASS  ELFCLASS32
  50#else
  51# define ELF_CLASS  ELFCLASS64
  52#endif
  53#ifdef HOST_WORDS_BIGENDIAN
  54# define ELF_DATA   ELFDATA2MSB
  55#else
  56# define ELF_DATA   ELFDATA2LSB
  57#endif
  58
  59#include "elf.h"
  60#include "exec/log.h"
  61#include "tcg/tcg-ldst.h"
  62#include "tcg-internal.h"
  63
  64#ifdef CONFIG_TCG_INTERPRETER
  65#include <ffi.h>
  66#endif
  67
  68/* Forward declarations for functions declared in tcg-target.c.inc and
  69   used here. */
  70static void tcg_target_init(TCGContext *s);
  71static void tcg_target_qemu_prologue(TCGContext *s);
  72static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
  73                        intptr_t value, intptr_t addend);
  74
  75/* The CIE and FDE header definitions will be common to all hosts.  */
  76typedef struct {
  77    uint32_t len __attribute__((aligned((sizeof(void *)))));
  78    uint32_t id;
  79    uint8_t version;
  80    char augmentation[1];
  81    uint8_t code_align;
  82    uint8_t data_align;
  83    uint8_t return_column;
  84} DebugFrameCIE;
  85
  86typedef struct QEMU_PACKED {
  87    uint32_t len __attribute__((aligned((sizeof(void *)))));
  88    uint32_t cie_offset;
  89    uintptr_t func_start;
  90    uintptr_t func_len;
  91} DebugFrameFDEHeader;
  92
  93typedef struct QEMU_PACKED {
  94    DebugFrameCIE cie;
  95    DebugFrameFDEHeader fde;
  96} DebugFrameHeader;
  97
  98static void tcg_register_jit_int(const void *buf, size_t size,
  99                                 const void *debug_frame,
 100                                 size_t debug_frame_size)
 101    __attribute__((unused));
 102
 103/* Forward declarations for functions declared and used in tcg-target.c.inc. */
 104static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
 105                       intptr_t arg2);
 106static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
 107static void tcg_out_movi(TCGContext *s, TCGType type,
 108                         TCGReg ret, tcg_target_long arg);
 109static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 110                       const TCGArg args[TCG_MAX_OP_ARGS],
 111                       const int const_args[TCG_MAX_OP_ARGS]);
 112#if TCG_TARGET_MAYBE_vec
 113static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 114                            TCGReg dst, TCGReg src);
 115static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 116                             TCGReg dst, TCGReg base, intptr_t offset);
 117static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 118                             TCGReg dst, int64_t arg);
 119static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 120                           unsigned vecl, unsigned vece,
 121                           const TCGArg args[TCG_MAX_OP_ARGS],
 122                           const int const_args[TCG_MAX_OP_ARGS]);
 123#else
 124static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 125                                   TCGReg dst, TCGReg src)
 126{
 127    g_assert_not_reached();
 128}
 129static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 130                                    TCGReg dst, TCGReg base, intptr_t offset)
 131{
 132    g_assert_not_reached();
 133}
 134static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 135                                    TCGReg dst, int64_t arg)
 136{
 137    g_assert_not_reached();
 138}
 139static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 140                                  unsigned vecl, unsigned vece,
 141                                  const TCGArg args[TCG_MAX_OP_ARGS],
 142                                  const int const_args[TCG_MAX_OP_ARGS])
 143{
 144    g_assert_not_reached();
 145}
 146#endif
 147static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
 148                       intptr_t arg2);
 149static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 150                        TCGReg base, intptr_t ofs);
 151#ifdef CONFIG_TCG_INTERPRETER
 152static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
 153                         ffi_cif *cif);
 154#else
 155static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
 156#endif
 157static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
 158#ifdef TCG_TARGET_NEED_LDST_LABELS
 159static int tcg_out_ldst_finalize(TCGContext *s);
 160#endif
 161
 162TCGContext tcg_init_ctx;
 163__thread TCGContext *tcg_ctx;
 164
 165TCGContext **tcg_ctxs;
 166unsigned int tcg_cur_ctxs;
 167unsigned int tcg_max_ctxs;
 168TCGv_env cpu_env = 0;
 169const void *tcg_code_gen_epilogue;
 170uintptr_t tcg_splitwx_diff;
 171
 172#ifndef CONFIG_TCG_INTERPRETER
 173tcg_prologue_fn *tcg_qemu_tb_exec;
 174#endif
 175
 176static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
 177static TCGRegSet tcg_target_call_clobber_regs;
 178
 179#if TCG_TARGET_INSN_UNIT_SIZE == 1
 180static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
 181{
 182    *s->code_ptr++ = v;
 183}
 184
 185static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
 186                                                      uint8_t v)
 187{
 188    *p = v;
 189}
 190#endif
 191
 192#if TCG_TARGET_INSN_UNIT_SIZE <= 2
 193static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
 194{
 195    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 196        *s->code_ptr++ = v;
 197    } else {
 198        tcg_insn_unit *p = s->code_ptr;
 199        memcpy(p, &v, sizeof(v));
 200        s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
 201    }
 202}
 203
 204static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
 205                                                       uint16_t v)
 206{
 207    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 208        *p = v;
 209    } else {
 210        memcpy(p, &v, sizeof(v));
 211    }
 212}
 213#endif
 214
 215#if TCG_TARGET_INSN_UNIT_SIZE <= 4
 216static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
 217{
 218    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 219        *s->code_ptr++ = v;
 220    } else {
 221        tcg_insn_unit *p = s->code_ptr;
 222        memcpy(p, &v, sizeof(v));
 223        s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
 224    }
 225}
 226
 227static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
 228                                                       uint32_t v)
 229{
 230    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 231        *p = v;
 232    } else {
 233        memcpy(p, &v, sizeof(v));
 234    }
 235}
 236#endif
 237
 238#if TCG_TARGET_INSN_UNIT_SIZE <= 8
 239static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
 240{
 241    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 242        *s->code_ptr++ = v;
 243    } else {
 244        tcg_insn_unit *p = s->code_ptr;
 245        memcpy(p, &v, sizeof(v));
 246        s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
 247    }
 248}
 249
 250static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
 251                                                       uint64_t v)
 252{
 253    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 254        *p = v;
 255    } else {
 256        memcpy(p, &v, sizeof(v));
 257    }
 258}
 259#endif
 260
 261/* label relocation processing */
 262
 263static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
 264                          TCGLabel *l, intptr_t addend)
 265{
 266    TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
 267
 268    r->type = type;
 269    r->ptr = code_ptr;
 270    r->addend = addend;
 271    QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
 272}
 273
 274static void tcg_out_label(TCGContext *s, TCGLabel *l)
 275{
 276    tcg_debug_assert(!l->has_value);
 277    l->has_value = 1;
 278    l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
 279}
 280
 281TCGLabel *gen_new_label(void)
 282{
 283    TCGContext *s = tcg_ctx;
 284    TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
 285
 286    memset(l, 0, sizeof(TCGLabel));
 287    l->id = s->nb_labels++;
 288    QSIMPLEQ_INIT(&l->relocs);
 289
 290    QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
 291
 292    return l;
 293}
 294
 295static bool tcg_resolve_relocs(TCGContext *s)
 296{
 297    TCGLabel *l;
 298
 299    QSIMPLEQ_FOREACH(l, &s->labels, next) {
 300        TCGRelocation *r;
 301        uintptr_t value = l->u.value;
 302
 303        QSIMPLEQ_FOREACH(r, &l->relocs, next) {
 304            if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
 305                return false;
 306            }
 307        }
 308    }
 309    return true;
 310}
 311
 312static void set_jmp_reset_offset(TCGContext *s, int which)
 313{
 314    /*
 315     * We will check for overflow at the end of the opcode loop in
 316     * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
 317     */
 318    s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
 319}
 320
 321/* Signal overflow, starting over with fewer guest insns. */
 322static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
 323{
 324    siglongjmp(s->jmp_trans, -2);
 325}
 326
 327#define C_PFX1(P, A)                    P##A
 328#define C_PFX2(P, A, B)                 P##A##_##B
 329#define C_PFX3(P, A, B, C)              P##A##_##B##_##C
 330#define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
 331#define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
 332#define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
 333
 334/* Define an enumeration for the various combinations. */
 335
 336#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
 337#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
 338#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
 339#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
 340
 341#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
 342#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
 343#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
 344#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
 345
 346#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
 347
 348#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
 349#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
 350#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
 351#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
 352
 353typedef enum {
 354#include "tcg-target-con-set.h"
 355} TCGConstraintSetIndex;
 356
 357static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
 358
 359#undef C_O0_I1
 360#undef C_O0_I2
 361#undef C_O0_I3
 362#undef C_O0_I4
 363#undef C_O1_I1
 364#undef C_O1_I2
 365#undef C_O1_I3
 366#undef C_O1_I4
 367#undef C_N1_I2
 368#undef C_O2_I1
 369#undef C_O2_I2
 370#undef C_O2_I3
 371#undef C_O2_I4
 372
 373/* Put all of the constraint sets into an array, indexed by the enum. */
 374
 375#define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
 376#define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
 377#define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
 378#define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
 379
 380#define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
 381#define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
 382#define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
 383#define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
 384
 385#define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
 386
 387#define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
 388#define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
 389#define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
 390#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
 391
 392static const TCGTargetOpDef constraint_sets[] = {
 393#include "tcg-target-con-set.h"
 394};
 395
 396
 397#undef C_O0_I1
 398#undef C_O0_I2
 399#undef C_O0_I3
 400#undef C_O0_I4
 401#undef C_O1_I1
 402#undef C_O1_I2
 403#undef C_O1_I3
 404#undef C_O1_I4
 405#undef C_N1_I2
 406#undef C_O2_I1
 407#undef C_O2_I2
 408#undef C_O2_I3
 409#undef C_O2_I4
 410
 411/* Expand the enumerator to be returned from tcg_target_op_def(). */
 412
 413#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
 414#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
 415#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
 416#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
 417
 418#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
 419#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
 420#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
 421#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
 422
 423#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
 424
 425#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
 426#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
 427#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
 428#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
 429
 430#include "tcg-target.c.inc"
 431
 432static void alloc_tcg_plugin_context(TCGContext *s)
 433{
 434#ifdef CONFIG_PLUGIN
 435    s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
 436    s->plugin_tb->insns =
 437        g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
 438#endif
 439}
 440
 441/*
 442 * All TCG threads except the parent (i.e. the one that called tcg_context_init
 443 * and registered the target's TCG globals) must register with this function
 444 * before initiating translation.
 445 *
 446 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
 447 * of tcg_region_init() for the reasoning behind this.
 448 *
 449 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
 450 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
 451 * is not used anymore for translation once this function is called.
 452 *
 453 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
 454 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
 455 */
 456#ifdef CONFIG_USER_ONLY
 457void tcg_register_thread(void)
 458{
 459    tcg_ctx = &tcg_init_ctx;
 460}
 461#else
 462void tcg_register_thread(void)
 463{
 464    TCGContext *s = g_malloc(sizeof(*s));
 465    unsigned int i, n;
 466
 467    *s = tcg_init_ctx;
 468
 469    /* Relink mem_base.  */
 470    for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
 471        if (tcg_init_ctx.temps[i].mem_base) {
 472            ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
 473            tcg_debug_assert(b >= 0 && b < n);
 474            s->temps[i].mem_base = &s->temps[b];
 475        }
 476    }
 477
 478    /* Claim an entry in tcg_ctxs */
 479    n = qatomic_fetch_inc(&tcg_cur_ctxs);
 480    g_assert(n < tcg_max_ctxs);
 481    qatomic_set(&tcg_ctxs[n], s);
 482
 483    if (n > 0) {
 484        alloc_tcg_plugin_context(s);
 485        tcg_region_initial_alloc(s);
 486    }
 487
 488    tcg_ctx = s;
 489}
 490#endif /* !CONFIG_USER_ONLY */
 491
 492/* pool based memory allocation */
 493void *tcg_malloc_internal(TCGContext *s, int size)
 494{
 495    TCGPool *p;
 496    int pool_size;
 497    
 498    if (size > TCG_POOL_CHUNK_SIZE) {
 499        /* big malloc: insert a new pool (XXX: could optimize) */
 500        p = g_malloc(sizeof(TCGPool) + size);
 501        p->size = size;
 502        p->next = s->pool_first_large;
 503        s->pool_first_large = p;
 504        return p->data;
 505    } else {
 506        p = s->pool_current;
 507        if (!p) {
 508            p = s->pool_first;
 509            if (!p)
 510                goto new_pool;
 511        } else {
 512            if (!p->next) {
 513            new_pool:
 514                pool_size = TCG_POOL_CHUNK_SIZE;
 515                p = g_malloc(sizeof(TCGPool) + pool_size);
 516                p->size = pool_size;
 517                p->next = NULL;
 518                if (s->pool_current) 
 519                    s->pool_current->next = p;
 520                else
 521                    s->pool_first = p;
 522            } else {
 523                p = p->next;
 524            }
 525        }
 526    }
 527    s->pool_current = p;
 528    s->pool_cur = p->data + size;
 529    s->pool_end = p->data + p->size;
 530    return p->data;
 531}
 532
 533void tcg_pool_reset(TCGContext *s)
 534{
 535    TCGPool *p, *t;
 536    for (p = s->pool_first_large; p; p = t) {
 537        t = p->next;
 538        g_free(p);
 539    }
 540    s->pool_first_large = NULL;
 541    s->pool_cur = s->pool_end = NULL;
 542    s->pool_current = NULL;
 543}
 544
 545#include "exec/helper-proto.h"
 546
 547static const TCGHelperInfo all_helpers[] = {
 548#include "exec/helper-tcg.h"
 549};
 550static GHashTable *helper_table;
 551
 552#ifdef CONFIG_TCG_INTERPRETER
 553static GHashTable *ffi_table;
 554
 555static ffi_type * const typecode_to_ffi[8] = {
 556    [dh_typecode_void] = &ffi_type_void,
 557    [dh_typecode_i32]  = &ffi_type_uint32,
 558    [dh_typecode_s32]  = &ffi_type_sint32,
 559    [dh_typecode_i64]  = &ffi_type_uint64,
 560    [dh_typecode_s64]  = &ffi_type_sint64,
 561    [dh_typecode_ptr]  = &ffi_type_pointer,
 562};
 563#endif
 564
 565static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
 566static void process_op_defs(TCGContext *s);
 567static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
 568                                            TCGReg reg, const char *name);
 569
 570static void tcg_context_init(unsigned max_cpus)
 571{
 572    TCGContext *s = &tcg_init_ctx;
 573    int op, total_args, n, i;
 574    TCGOpDef *def;
 575    TCGArgConstraint *args_ct;
 576    TCGTemp *ts;
 577
 578    memset(s, 0, sizeof(*s));
 579    s->nb_globals = 0;
 580
 581    /* Count total number of arguments and allocate the corresponding
 582       space */
 583    total_args = 0;
 584    for(op = 0; op < NB_OPS; op++) {
 585        def = &tcg_op_defs[op];
 586        n = def->nb_iargs + def->nb_oargs;
 587        total_args += n;
 588    }
 589
 590    args_ct = g_new0(TCGArgConstraint, total_args);
 591
 592    for(op = 0; op < NB_OPS; op++) {
 593        def = &tcg_op_defs[op];
 594        def->args_ct = args_ct;
 595        n = def->nb_iargs + def->nb_oargs;
 596        args_ct += n;
 597    }
 598
 599    /* Register helpers.  */
 600    /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
 601    helper_table = g_hash_table_new(NULL, NULL);
 602
 603    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
 604        g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
 605                            (gpointer)&all_helpers[i]);
 606    }
 607
 608#ifdef CONFIG_TCG_INTERPRETER
 609    /* g_direct_hash/equal for direct comparisons on uint32_t.  */
 610    ffi_table = g_hash_table_new(NULL, NULL);
 611    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
 612        struct {
 613            ffi_cif cif;
 614            ffi_type *args[];
 615        } *ca;
 616        uint32_t typemask = all_helpers[i].typemask;
 617        gpointer hash = (gpointer)(uintptr_t)typemask;
 618        ffi_status status;
 619        int nargs;
 620
 621        if (g_hash_table_lookup(ffi_table, hash)) {
 622            continue;
 623        }
 624
 625        /* Ignoring the return type, find the last non-zero field. */
 626        nargs = 32 - clz32(typemask >> 3);
 627        nargs = DIV_ROUND_UP(nargs, 3);
 628
 629        ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
 630        ca->cif.rtype = typecode_to_ffi[typemask & 7];
 631        ca->cif.nargs = nargs;
 632
 633        if (nargs != 0) {
 634            ca->cif.arg_types = ca->args;
 635            for (i = 0; i < nargs; ++i) {
 636                int typecode = extract32(typemask, (i + 1) * 3, 3);
 637                ca->args[i] = typecode_to_ffi[typecode];
 638            }
 639        }
 640
 641        status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
 642                              ca->cif.rtype, ca->cif.arg_types);
 643        assert(status == FFI_OK);
 644
 645        g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
 646    }
 647#endif
 648
 649    tcg_target_init(s);
 650    process_op_defs(s);
 651
 652    /* Reverse the order of the saved registers, assuming they're all at
 653       the start of tcg_target_reg_alloc_order.  */
 654    for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
 655        int r = tcg_target_reg_alloc_order[n];
 656        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
 657            break;
 658        }
 659    }
 660    for (i = 0; i < n; ++i) {
 661        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
 662    }
 663    for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
 664        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
 665    }
 666
 667    alloc_tcg_plugin_context(s);
 668
 669    tcg_ctx = s;
 670    /*
 671     * In user-mode we simply share the init context among threads, since we
 672     * use a single region. See the documentation tcg_region_init() for the
 673     * reasoning behind this.
 674     * In softmmu we will have at most max_cpus TCG threads.
 675     */
 676#ifdef CONFIG_USER_ONLY
 677    tcg_ctxs = &tcg_ctx;
 678    tcg_cur_ctxs = 1;
 679    tcg_max_ctxs = 1;
 680#else
 681    tcg_max_ctxs = max_cpus;
 682    tcg_ctxs = g_new0(TCGContext *, max_cpus);
 683#endif
 684
 685    tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
 686    ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
 687    cpu_env = temp_tcgv_ptr(ts);
 688}
 689
 690void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
 691{
 692    tcg_context_init(max_cpus);
 693    tcg_region_init(tb_size, splitwx, max_cpus);
 694}
 695
 696/*
 697 * Allocate TBs right before their corresponding translated code, making
 698 * sure that TBs and code are on different cache lines.
 699 */
 700TranslationBlock *tcg_tb_alloc(TCGContext *s)
 701{
 702    uintptr_t align = qemu_icache_linesize;
 703    TranslationBlock *tb;
 704    void *next;
 705
 706 retry:
 707    tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
 708    next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
 709
 710    if (unlikely(next > s->code_gen_highwater)) {
 711        if (tcg_region_alloc(s)) {
 712            return NULL;
 713        }
 714        goto retry;
 715    }
 716    qatomic_set(&s->code_gen_ptr, next);
 717    s->data_gen_ptr = NULL;
 718    return tb;
 719}
 720
 721void tcg_prologue_init(TCGContext *s)
 722{
 723    size_t prologue_size;
 724
 725    s->code_ptr = s->code_gen_ptr;
 726    s->code_buf = s->code_gen_ptr;
 727    s->data_gen_ptr = NULL;
 728
 729#ifndef CONFIG_TCG_INTERPRETER
 730    tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
 731#endif
 732
 733#ifdef TCG_TARGET_NEED_POOL_LABELS
 734    s->pool_labels = NULL;
 735#endif
 736
 737    qemu_thread_jit_write();
 738    /* Generate the prologue.  */
 739    tcg_target_qemu_prologue(s);
 740
 741#ifdef TCG_TARGET_NEED_POOL_LABELS
 742    /* Allow the prologue to put e.g. guest_base into a pool entry.  */
 743    {
 744        int result = tcg_out_pool_finalize(s);
 745        tcg_debug_assert(result == 0);
 746    }
 747#endif
 748
 749    prologue_size = tcg_current_code_size(s);
 750
 751#ifndef CONFIG_TCG_INTERPRETER
 752    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
 753                        (uintptr_t)s->code_buf, prologue_size);
 754#endif
 755
 756#ifdef DEBUG_DISAS
 757    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
 758        FILE *logfile = qemu_log_lock();
 759        qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
 760        if (s->data_gen_ptr) {
 761            size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
 762            size_t data_size = prologue_size - code_size;
 763            size_t i;
 764
 765            log_disas(s->code_gen_ptr, code_size);
 766
 767            for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
 768                if (sizeof(tcg_target_ulong) == 8) {
 769                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
 770                             (uintptr_t)s->data_gen_ptr + i,
 771                             *(uint64_t *)(s->data_gen_ptr + i));
 772                } else {
 773                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
 774                             (uintptr_t)s->data_gen_ptr + i,
 775                             *(uint32_t *)(s->data_gen_ptr + i));
 776                }
 777            }
 778        } else {
 779            log_disas(s->code_gen_ptr, prologue_size);
 780        }
 781        qemu_log("\n");
 782        qemu_log_flush();
 783        qemu_log_unlock(logfile);
 784    }
 785#endif
 786
 787#ifndef CONFIG_TCG_INTERPRETER
 788    /*
 789     * Assert that goto_ptr is implemented completely, setting an epilogue.
 790     * For tci, we use NULL as the signal to return from the interpreter,
 791     * so skip this check.
 792     */
 793    tcg_debug_assert(tcg_code_gen_epilogue != NULL);
 794#endif
 795
 796    tcg_region_prologue_set(s);
 797}
 798
 799void tcg_func_start(TCGContext *s)
 800{
 801    tcg_pool_reset(s);
 802    s->nb_temps = s->nb_globals;
 803
 804    /* No temps have been previously allocated for size or locality.  */
 805    memset(s->free_temps, 0, sizeof(s->free_temps));
 806
 807    /* No constant temps have been previously allocated. */
 808    for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
 809        if (s->const_table[i]) {
 810            g_hash_table_remove_all(s->const_table[i]);
 811        }
 812    }
 813
 814    s->nb_ops = 0;
 815    s->nb_labels = 0;
 816    s->current_frame_offset = s->frame_start;
 817
 818#ifdef CONFIG_DEBUG_TCG
 819    s->goto_tb_issue_mask = 0;
 820#endif
 821
 822    QTAILQ_INIT(&s->ops);
 823    QTAILQ_INIT(&s->free_ops);
 824    QSIMPLEQ_INIT(&s->labels);
 825}
 826
 827static TCGTemp *tcg_temp_alloc(TCGContext *s)
 828{
 829    int n = s->nb_temps++;
 830
 831    if (n >= TCG_MAX_TEMPS) {
 832        tcg_raise_tb_overflow(s);
 833    }
 834    return memset(&s->temps[n], 0, sizeof(TCGTemp));
 835}
 836
 837static TCGTemp *tcg_global_alloc(TCGContext *s)
 838{
 839    TCGTemp *ts;
 840
 841    tcg_debug_assert(s->nb_globals == s->nb_temps);
 842    tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
 843    s->nb_globals++;
 844    ts = tcg_temp_alloc(s);
 845    ts->kind = TEMP_GLOBAL;
 846
 847    return ts;
 848}
 849
 850static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
 851                                            TCGReg reg, const char *name)
 852{
 853    TCGTemp *ts;
 854
 855    if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
 856        tcg_abort();
 857    }
 858
 859    ts = tcg_global_alloc(s);
 860    ts->base_type = type;
 861    ts->type = type;
 862    ts->kind = TEMP_FIXED;
 863    ts->reg = reg;
 864    ts->name = name;
 865    tcg_regset_set_reg(s->reserved_regs, reg);
 866
 867    return ts;
 868}
 869
 870void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
 871{
 872    s->frame_start = start;
 873    s->frame_end = start + size;
 874    s->frame_temp
 875        = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
 876}
 877
 878TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
 879                                     intptr_t offset, const char *name)
 880{
 881    TCGContext *s = tcg_ctx;
 882    TCGTemp *base_ts = tcgv_ptr_temp(base);
 883    TCGTemp *ts = tcg_global_alloc(s);
 884    int indirect_reg = 0, bigendian = 0;
 885#ifdef HOST_WORDS_BIGENDIAN
 886    bigendian = 1;
 887#endif
 888
 889    switch (base_ts->kind) {
 890    case TEMP_FIXED:
 891        break;
 892    case TEMP_GLOBAL:
 893        /* We do not support double-indirect registers.  */
 894        tcg_debug_assert(!base_ts->indirect_reg);
 895        base_ts->indirect_base = 1;
 896        s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
 897                            ? 2 : 1);
 898        indirect_reg = 1;
 899        break;
 900    default:
 901        g_assert_not_reached();
 902    }
 903
 904    if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
 905        TCGTemp *ts2 = tcg_global_alloc(s);
 906        char buf[64];
 907
 908        ts->base_type = TCG_TYPE_I64;
 909        ts->type = TCG_TYPE_I32;
 910        ts->indirect_reg = indirect_reg;
 911        ts->mem_allocated = 1;
 912        ts->mem_base = base_ts;
 913        ts->mem_offset = offset + bigendian * 4;
 914        pstrcpy(buf, sizeof(buf), name);
 915        pstrcat(buf, sizeof(buf), "_0");
 916        ts->name = strdup(buf);
 917
 918        tcg_debug_assert(ts2 == ts + 1);
 919        ts2->base_type = TCG_TYPE_I64;
 920        ts2->type = TCG_TYPE_I32;
 921        ts2->indirect_reg = indirect_reg;
 922        ts2->mem_allocated = 1;
 923        ts2->mem_base = base_ts;
 924        ts2->mem_offset = offset + (1 - bigendian) * 4;
 925        pstrcpy(buf, sizeof(buf), name);
 926        pstrcat(buf, sizeof(buf), "_1");
 927        ts2->name = strdup(buf);
 928    } else {
 929        ts->base_type = type;
 930        ts->type = type;
 931        ts->indirect_reg = indirect_reg;
 932        ts->mem_allocated = 1;
 933        ts->mem_base = base_ts;
 934        ts->mem_offset = offset;
 935        ts->name = name;
 936    }
 937    return ts;
 938}
 939
 940TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
 941{
 942    TCGContext *s = tcg_ctx;
 943    TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
 944    TCGTemp *ts;
 945    int idx, k;
 946
 947    k = type + (temp_local ? TCG_TYPE_COUNT : 0);
 948    idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
 949    if (idx < TCG_MAX_TEMPS) {
 950        /* There is already an available temp with the right type.  */
 951        clear_bit(idx, s->free_temps[k].l);
 952
 953        ts = &s->temps[idx];
 954        ts->temp_allocated = 1;
 955        tcg_debug_assert(ts->base_type == type);
 956        tcg_debug_assert(ts->kind == kind);
 957    } else {
 958        ts = tcg_temp_alloc(s);
 959        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
 960            TCGTemp *ts2 = tcg_temp_alloc(s);
 961
 962            ts->base_type = type;
 963            ts->type = TCG_TYPE_I32;
 964            ts->temp_allocated = 1;
 965            ts->kind = kind;
 966
 967            tcg_debug_assert(ts2 == ts + 1);
 968            ts2->base_type = TCG_TYPE_I64;
 969            ts2->type = TCG_TYPE_I32;
 970            ts2->temp_allocated = 1;
 971            ts2->kind = kind;
 972        } else {
 973            ts->base_type = type;
 974            ts->type = type;
 975            ts->temp_allocated = 1;
 976            ts->kind = kind;
 977        }
 978    }
 979
 980#if defined(CONFIG_DEBUG_TCG)
 981    s->temps_in_use++;
 982#endif
 983    return ts;
 984}
 985
 986TCGv_vec tcg_temp_new_vec(TCGType type)
 987{
 988    TCGTemp *t;
 989
 990#ifdef CONFIG_DEBUG_TCG
 991    switch (type) {
 992    case TCG_TYPE_V64:
 993        assert(TCG_TARGET_HAS_v64);
 994        break;
 995    case TCG_TYPE_V128:
 996        assert(TCG_TARGET_HAS_v128);
 997        break;
 998    case TCG_TYPE_V256:
 999        assert(TCG_TARGET_HAS_v256);
1000        break;
1001    default:
1002        g_assert_not_reached();
1003    }
1004#endif
1005
1006    t = tcg_temp_new_internal(type, 0);
1007    return temp_tcgv_vec(t);
1008}
1009
1010/* Create a new temp of the same type as an existing temp.  */
1011TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1012{
1013    TCGTemp *t = tcgv_vec_temp(match);
1014
1015    tcg_debug_assert(t->temp_allocated != 0);
1016
1017    t = tcg_temp_new_internal(t->base_type, 0);
1018    return temp_tcgv_vec(t);
1019}
1020
1021void tcg_temp_free_internal(TCGTemp *ts)
1022{
1023    TCGContext *s = tcg_ctx;
1024    int k, idx;
1025
1026    /* In order to simplify users of tcg_constant_*, silently ignore free. */
1027    if (ts->kind == TEMP_CONST) {
1028        return;
1029    }
1030
1031#if defined(CONFIG_DEBUG_TCG)
1032    s->temps_in_use--;
1033    if (s->temps_in_use < 0) {
1034        fprintf(stderr, "More temporaries freed than allocated!\n");
1035    }
1036#endif
1037
1038    tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1039    tcg_debug_assert(ts->temp_allocated != 0);
1040    ts->temp_allocated = 0;
1041
1042    idx = temp_idx(ts);
1043    k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1044    set_bit(idx, s->free_temps[k].l);
1045}
1046
1047TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1048{
1049    TCGContext *s = tcg_ctx;
1050    GHashTable *h = s->const_table[type];
1051    TCGTemp *ts;
1052
1053    if (h == NULL) {
1054        h = g_hash_table_new(g_int64_hash, g_int64_equal);
1055        s->const_table[type] = h;
1056    }
1057
1058    ts = g_hash_table_lookup(h, &val);
1059    if (ts == NULL) {
1060        ts = tcg_temp_alloc(s);
1061
1062        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1063            TCGTemp *ts2 = tcg_temp_alloc(s);
1064
1065            ts->base_type = TCG_TYPE_I64;
1066            ts->type = TCG_TYPE_I32;
1067            ts->kind = TEMP_CONST;
1068            ts->temp_allocated = 1;
1069            /*
1070             * Retain the full value of the 64-bit constant in the low
1071             * part, so that the hash table works.  Actual uses will
1072             * truncate the value to the low part.
1073             */
1074            ts->val = val;
1075
1076            tcg_debug_assert(ts2 == ts + 1);
1077            ts2->base_type = TCG_TYPE_I64;
1078            ts2->type = TCG_TYPE_I32;
1079            ts2->kind = TEMP_CONST;
1080            ts2->temp_allocated = 1;
1081            ts2->val = val >> 32;
1082        } else {
1083            ts->base_type = type;
1084            ts->type = type;
1085            ts->kind = TEMP_CONST;
1086            ts->temp_allocated = 1;
1087            ts->val = val;
1088        }
1089        g_hash_table_insert(h, &ts->val, ts);
1090    }
1091
1092    return ts;
1093}
1094
1095TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1096{
1097    val = dup_const(vece, val);
1098    return temp_tcgv_vec(tcg_constant_internal(type, val));
1099}
1100
1101TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1102{
1103    TCGTemp *t = tcgv_vec_temp(match);
1104
1105    tcg_debug_assert(t->temp_allocated != 0);
1106    return tcg_constant_vec(t->base_type, vece, val);
1107}
1108
1109TCGv_i32 tcg_const_i32(int32_t val)
1110{
1111    TCGv_i32 t0;
1112    t0 = tcg_temp_new_i32();
1113    tcg_gen_movi_i32(t0, val);
1114    return t0;
1115}
1116
1117TCGv_i64 tcg_const_i64(int64_t val)
1118{
1119    TCGv_i64 t0;
1120    t0 = tcg_temp_new_i64();
1121    tcg_gen_movi_i64(t0, val);
1122    return t0;
1123}
1124
1125TCGv_i32 tcg_const_local_i32(int32_t val)
1126{
1127    TCGv_i32 t0;
1128    t0 = tcg_temp_local_new_i32();
1129    tcg_gen_movi_i32(t0, val);
1130    return t0;
1131}
1132
1133TCGv_i64 tcg_const_local_i64(int64_t val)
1134{
1135    TCGv_i64 t0;
1136    t0 = tcg_temp_local_new_i64();
1137    tcg_gen_movi_i64(t0, val);
1138    return t0;
1139}
1140
1141#if defined(CONFIG_DEBUG_TCG)
1142void tcg_clear_temp_count(void)
1143{
1144    TCGContext *s = tcg_ctx;
1145    s->temps_in_use = 0;
1146}
1147
1148int tcg_check_temp_count(void)
1149{
1150    TCGContext *s = tcg_ctx;
1151    if (s->temps_in_use) {
1152        /* Clear the count so that we don't give another
1153         * warning immediately next time around.
1154         */
1155        s->temps_in_use = 0;
1156        return 1;
1157    }
1158    return 0;
1159}
1160#endif
1161
1162/* Return true if OP may appear in the opcode stream.
1163   Test the runtime variable that controls each opcode.  */
1164bool tcg_op_supported(TCGOpcode op)
1165{
1166    const bool have_vec
1167        = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1168
1169    switch (op) {
1170    case INDEX_op_discard:
1171    case INDEX_op_set_label:
1172    case INDEX_op_call:
1173    case INDEX_op_br:
1174    case INDEX_op_mb:
1175    case INDEX_op_insn_start:
1176    case INDEX_op_exit_tb:
1177    case INDEX_op_goto_tb:
1178    case INDEX_op_goto_ptr:
1179    case INDEX_op_qemu_ld_i32:
1180    case INDEX_op_qemu_st_i32:
1181    case INDEX_op_qemu_ld_i64:
1182    case INDEX_op_qemu_st_i64:
1183        return true;
1184
1185    case INDEX_op_qemu_st8_i32:
1186        return TCG_TARGET_HAS_qemu_st8_i32;
1187
1188    case INDEX_op_mov_i32:
1189    case INDEX_op_setcond_i32:
1190    case INDEX_op_brcond_i32:
1191    case INDEX_op_ld8u_i32:
1192    case INDEX_op_ld8s_i32:
1193    case INDEX_op_ld16u_i32:
1194    case INDEX_op_ld16s_i32:
1195    case INDEX_op_ld_i32:
1196    case INDEX_op_st8_i32:
1197    case INDEX_op_st16_i32:
1198    case INDEX_op_st_i32:
1199    case INDEX_op_add_i32:
1200    case INDEX_op_sub_i32:
1201    case INDEX_op_mul_i32:
1202    case INDEX_op_and_i32:
1203    case INDEX_op_or_i32:
1204    case INDEX_op_xor_i32:
1205    case INDEX_op_shl_i32:
1206    case INDEX_op_shr_i32:
1207    case INDEX_op_sar_i32:
1208        return true;
1209
1210    case INDEX_op_movcond_i32:
1211        return TCG_TARGET_HAS_movcond_i32;
1212    case INDEX_op_div_i32:
1213    case INDEX_op_divu_i32:
1214        return TCG_TARGET_HAS_div_i32;
1215    case INDEX_op_rem_i32:
1216    case INDEX_op_remu_i32:
1217        return TCG_TARGET_HAS_rem_i32;
1218    case INDEX_op_div2_i32:
1219    case INDEX_op_divu2_i32:
1220        return TCG_TARGET_HAS_div2_i32;
1221    case INDEX_op_rotl_i32:
1222    case INDEX_op_rotr_i32:
1223        return TCG_TARGET_HAS_rot_i32;
1224    case INDEX_op_deposit_i32:
1225        return TCG_TARGET_HAS_deposit_i32;
1226    case INDEX_op_extract_i32:
1227        return TCG_TARGET_HAS_extract_i32;
1228    case INDEX_op_sextract_i32:
1229        return TCG_TARGET_HAS_sextract_i32;
1230    case INDEX_op_extract2_i32:
1231        return TCG_TARGET_HAS_extract2_i32;
1232    case INDEX_op_add2_i32:
1233        return TCG_TARGET_HAS_add2_i32;
1234    case INDEX_op_sub2_i32:
1235        return TCG_TARGET_HAS_sub2_i32;
1236    case INDEX_op_mulu2_i32:
1237        return TCG_TARGET_HAS_mulu2_i32;
1238    case INDEX_op_muls2_i32:
1239        return TCG_TARGET_HAS_muls2_i32;
1240    case INDEX_op_muluh_i32:
1241        return TCG_TARGET_HAS_muluh_i32;
1242    case INDEX_op_mulsh_i32:
1243        return TCG_TARGET_HAS_mulsh_i32;
1244    case INDEX_op_ext8s_i32:
1245        return TCG_TARGET_HAS_ext8s_i32;
1246    case INDEX_op_ext16s_i32:
1247        return TCG_TARGET_HAS_ext16s_i32;
1248    case INDEX_op_ext8u_i32:
1249        return TCG_TARGET_HAS_ext8u_i32;
1250    case INDEX_op_ext16u_i32:
1251        return TCG_TARGET_HAS_ext16u_i32;
1252    case INDEX_op_bswap16_i32:
1253        return TCG_TARGET_HAS_bswap16_i32;
1254    case INDEX_op_bswap32_i32:
1255        return TCG_TARGET_HAS_bswap32_i32;
1256    case INDEX_op_not_i32:
1257        return TCG_TARGET_HAS_not_i32;
1258    case INDEX_op_neg_i32:
1259        return TCG_TARGET_HAS_neg_i32;
1260    case INDEX_op_andc_i32:
1261        return TCG_TARGET_HAS_andc_i32;
1262    case INDEX_op_orc_i32:
1263        return TCG_TARGET_HAS_orc_i32;
1264    case INDEX_op_eqv_i32:
1265        return TCG_TARGET_HAS_eqv_i32;
1266    case INDEX_op_nand_i32:
1267        return TCG_TARGET_HAS_nand_i32;
1268    case INDEX_op_nor_i32:
1269        return TCG_TARGET_HAS_nor_i32;
1270    case INDEX_op_clz_i32:
1271        return TCG_TARGET_HAS_clz_i32;
1272    case INDEX_op_ctz_i32:
1273        return TCG_TARGET_HAS_ctz_i32;
1274    case INDEX_op_ctpop_i32:
1275        return TCG_TARGET_HAS_ctpop_i32;
1276
1277    case INDEX_op_brcond2_i32:
1278    case INDEX_op_setcond2_i32:
1279        return TCG_TARGET_REG_BITS == 32;
1280
1281    case INDEX_op_mov_i64:
1282    case INDEX_op_setcond_i64:
1283    case INDEX_op_brcond_i64:
1284    case INDEX_op_ld8u_i64:
1285    case INDEX_op_ld8s_i64:
1286    case INDEX_op_ld16u_i64:
1287    case INDEX_op_ld16s_i64:
1288    case INDEX_op_ld32u_i64:
1289    case INDEX_op_ld32s_i64:
1290    case INDEX_op_ld_i64:
1291    case INDEX_op_st8_i64:
1292    case INDEX_op_st16_i64:
1293    case INDEX_op_st32_i64:
1294    case INDEX_op_st_i64:
1295    case INDEX_op_add_i64:
1296    case INDEX_op_sub_i64:
1297    case INDEX_op_mul_i64:
1298    case INDEX_op_and_i64:
1299    case INDEX_op_or_i64:
1300    case INDEX_op_xor_i64:
1301    case INDEX_op_shl_i64:
1302    case INDEX_op_shr_i64:
1303    case INDEX_op_sar_i64:
1304    case INDEX_op_ext_i32_i64:
1305    case INDEX_op_extu_i32_i64:
1306        return TCG_TARGET_REG_BITS == 64;
1307
1308    case INDEX_op_movcond_i64:
1309        return TCG_TARGET_HAS_movcond_i64;
1310    case INDEX_op_div_i64:
1311    case INDEX_op_divu_i64:
1312        return TCG_TARGET_HAS_div_i64;
1313    case INDEX_op_rem_i64:
1314    case INDEX_op_remu_i64:
1315        return TCG_TARGET_HAS_rem_i64;
1316    case INDEX_op_div2_i64:
1317    case INDEX_op_divu2_i64:
1318        return TCG_TARGET_HAS_div2_i64;
1319    case INDEX_op_rotl_i64:
1320    case INDEX_op_rotr_i64:
1321        return TCG_TARGET_HAS_rot_i64;
1322    case INDEX_op_deposit_i64:
1323        return TCG_TARGET_HAS_deposit_i64;
1324    case INDEX_op_extract_i64:
1325        return TCG_TARGET_HAS_extract_i64;
1326    case INDEX_op_sextract_i64:
1327        return TCG_TARGET_HAS_sextract_i64;
1328    case INDEX_op_extract2_i64:
1329        return TCG_TARGET_HAS_extract2_i64;
1330    case INDEX_op_extrl_i64_i32:
1331        return TCG_TARGET_HAS_extrl_i64_i32;
1332    case INDEX_op_extrh_i64_i32:
1333        return TCG_TARGET_HAS_extrh_i64_i32;
1334    case INDEX_op_ext8s_i64:
1335        return TCG_TARGET_HAS_ext8s_i64;
1336    case INDEX_op_ext16s_i64:
1337        return TCG_TARGET_HAS_ext16s_i64;
1338    case INDEX_op_ext32s_i64:
1339        return TCG_TARGET_HAS_ext32s_i64;
1340    case INDEX_op_ext8u_i64:
1341        return TCG_TARGET_HAS_ext8u_i64;
1342    case INDEX_op_ext16u_i64:
1343        return TCG_TARGET_HAS_ext16u_i64;
1344    case INDEX_op_ext32u_i64:
1345        return TCG_TARGET_HAS_ext32u_i64;
1346    case INDEX_op_bswap16_i64:
1347        return TCG_TARGET_HAS_bswap16_i64;
1348    case INDEX_op_bswap32_i64:
1349        return TCG_TARGET_HAS_bswap32_i64;
1350    case INDEX_op_bswap64_i64:
1351        return TCG_TARGET_HAS_bswap64_i64;
1352    case INDEX_op_not_i64:
1353        return TCG_TARGET_HAS_not_i64;
1354    case INDEX_op_neg_i64:
1355        return TCG_TARGET_HAS_neg_i64;
1356    case INDEX_op_andc_i64:
1357        return TCG_TARGET_HAS_andc_i64;
1358    case INDEX_op_orc_i64:
1359        return TCG_TARGET_HAS_orc_i64;
1360    case INDEX_op_eqv_i64:
1361        return TCG_TARGET_HAS_eqv_i64;
1362    case INDEX_op_nand_i64:
1363        return TCG_TARGET_HAS_nand_i64;
1364    case INDEX_op_nor_i64:
1365        return TCG_TARGET_HAS_nor_i64;
1366    case INDEX_op_clz_i64:
1367        return TCG_TARGET_HAS_clz_i64;
1368    case INDEX_op_ctz_i64:
1369        return TCG_TARGET_HAS_ctz_i64;
1370    case INDEX_op_ctpop_i64:
1371        return TCG_TARGET_HAS_ctpop_i64;
1372    case INDEX_op_add2_i64:
1373        return TCG_TARGET_HAS_add2_i64;
1374    case INDEX_op_sub2_i64:
1375        return TCG_TARGET_HAS_sub2_i64;
1376    case INDEX_op_mulu2_i64:
1377        return TCG_TARGET_HAS_mulu2_i64;
1378    case INDEX_op_muls2_i64:
1379        return TCG_TARGET_HAS_muls2_i64;
1380    case INDEX_op_muluh_i64:
1381        return TCG_TARGET_HAS_muluh_i64;
1382    case INDEX_op_mulsh_i64:
1383        return TCG_TARGET_HAS_mulsh_i64;
1384
1385    case INDEX_op_mov_vec:
1386    case INDEX_op_dup_vec:
1387    case INDEX_op_dupm_vec:
1388    case INDEX_op_ld_vec:
1389    case INDEX_op_st_vec:
1390    case INDEX_op_add_vec:
1391    case INDEX_op_sub_vec:
1392    case INDEX_op_and_vec:
1393    case INDEX_op_or_vec:
1394    case INDEX_op_xor_vec:
1395    case INDEX_op_cmp_vec:
1396        return have_vec;
1397    case INDEX_op_dup2_vec:
1398        return have_vec && TCG_TARGET_REG_BITS == 32;
1399    case INDEX_op_not_vec:
1400        return have_vec && TCG_TARGET_HAS_not_vec;
1401    case INDEX_op_neg_vec:
1402        return have_vec && TCG_TARGET_HAS_neg_vec;
1403    case INDEX_op_abs_vec:
1404        return have_vec && TCG_TARGET_HAS_abs_vec;
1405    case INDEX_op_andc_vec:
1406        return have_vec && TCG_TARGET_HAS_andc_vec;
1407    case INDEX_op_orc_vec:
1408        return have_vec && TCG_TARGET_HAS_orc_vec;
1409    case INDEX_op_mul_vec:
1410        return have_vec && TCG_TARGET_HAS_mul_vec;
1411    case INDEX_op_shli_vec:
1412    case INDEX_op_shri_vec:
1413    case INDEX_op_sari_vec:
1414        return have_vec && TCG_TARGET_HAS_shi_vec;
1415    case INDEX_op_shls_vec:
1416    case INDEX_op_shrs_vec:
1417    case INDEX_op_sars_vec:
1418        return have_vec && TCG_TARGET_HAS_shs_vec;
1419    case INDEX_op_shlv_vec:
1420    case INDEX_op_shrv_vec:
1421    case INDEX_op_sarv_vec:
1422        return have_vec && TCG_TARGET_HAS_shv_vec;
1423    case INDEX_op_rotli_vec:
1424        return have_vec && TCG_TARGET_HAS_roti_vec;
1425    case INDEX_op_rotls_vec:
1426        return have_vec && TCG_TARGET_HAS_rots_vec;
1427    case INDEX_op_rotlv_vec:
1428    case INDEX_op_rotrv_vec:
1429        return have_vec && TCG_TARGET_HAS_rotv_vec;
1430    case INDEX_op_ssadd_vec:
1431    case INDEX_op_usadd_vec:
1432    case INDEX_op_sssub_vec:
1433    case INDEX_op_ussub_vec:
1434        return have_vec && TCG_TARGET_HAS_sat_vec;
1435    case INDEX_op_smin_vec:
1436    case INDEX_op_umin_vec:
1437    case INDEX_op_smax_vec:
1438    case INDEX_op_umax_vec:
1439        return have_vec && TCG_TARGET_HAS_minmax_vec;
1440    case INDEX_op_bitsel_vec:
1441        return have_vec && TCG_TARGET_HAS_bitsel_vec;
1442    case INDEX_op_cmpsel_vec:
1443        return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1444
1445    default:
1446        tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1447        return true;
1448    }
1449}
1450
1451/* Note: we convert the 64 bit args to 32 bit and do some alignment
1452   and endian swap. Maybe it would be better to do the alignment
1453   and endian swap in tcg_reg_alloc_call(). */
1454void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1455{
1456    int i, real_args, nb_rets, pi;
1457    unsigned typemask;
1458    const TCGHelperInfo *info;
1459    TCGOp *op;
1460
1461    info = g_hash_table_lookup(helper_table, (gpointer)func);
1462    typemask = info->typemask;
1463
1464#ifdef CONFIG_PLUGIN
1465    /* detect non-plugin helpers */
1466    if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1467        tcg_ctx->plugin_insn->calls_helpers = true;
1468    }
1469#endif
1470
1471#if defined(__sparc__) && !defined(__arch64__) \
1472    && !defined(CONFIG_TCG_INTERPRETER)
1473    /* We have 64-bit values in one register, but need to pass as two
1474       separate parameters.  Split them.  */
1475    int orig_typemask = typemask;
1476    int orig_nargs = nargs;
1477    TCGv_i64 retl, reth;
1478    TCGTemp *split_args[MAX_OPC_PARAM];
1479
1480    retl = NULL;
1481    reth = NULL;
1482    typemask = 0;
1483    for (i = real_args = 0; i < nargs; ++i) {
1484        int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1485        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1486
1487        if (is_64bit) {
1488            TCGv_i64 orig = temp_tcgv_i64(args[i]);
1489            TCGv_i32 h = tcg_temp_new_i32();
1490            TCGv_i32 l = tcg_temp_new_i32();
1491            tcg_gen_extr_i64_i32(l, h, orig);
1492            split_args[real_args++] = tcgv_i32_temp(h);
1493            typemask |= dh_typecode_i32 << (real_args * 3);
1494            split_args[real_args++] = tcgv_i32_temp(l);
1495            typemask |= dh_typecode_i32 << (real_args * 3);
1496        } else {
1497            split_args[real_args++] = args[i];
1498            typemask |= argtype << (real_args * 3);
1499        }
1500    }
1501    nargs = real_args;
1502    args = split_args;
1503#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1504    for (i = 0; i < nargs; ++i) {
1505        int argtype = extract32(typemask, (i + 1) * 3, 3);
1506        bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1507        bool is_signed = argtype & 1;
1508
1509        if (is_32bit) {
1510            TCGv_i64 temp = tcg_temp_new_i64();
1511            TCGv_i32 orig = temp_tcgv_i32(args[i]);
1512            if (is_signed) {
1513                tcg_gen_ext_i32_i64(temp, orig);
1514            } else {
1515                tcg_gen_extu_i32_i64(temp, orig);
1516            }
1517            args[i] = tcgv_i64_temp(temp);
1518        }
1519    }
1520#endif /* TCG_TARGET_EXTEND_ARGS */
1521
1522    op = tcg_emit_op(INDEX_op_call);
1523
1524    pi = 0;
1525    if (ret != NULL) {
1526#if defined(__sparc__) && !defined(__arch64__) \
1527    && !defined(CONFIG_TCG_INTERPRETER)
1528        if ((typemask & 6) == dh_typecode_i64) {
1529            /* The 32-bit ABI is going to return the 64-bit value in
1530               the %o0/%o1 register pair.  Prepare for this by using
1531               two return temporaries, and reassemble below.  */
1532            retl = tcg_temp_new_i64();
1533            reth = tcg_temp_new_i64();
1534            op->args[pi++] = tcgv_i64_arg(reth);
1535            op->args[pi++] = tcgv_i64_arg(retl);
1536            nb_rets = 2;
1537        } else {
1538            op->args[pi++] = temp_arg(ret);
1539            nb_rets = 1;
1540        }
1541#else
1542        if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1543#ifdef HOST_WORDS_BIGENDIAN
1544            op->args[pi++] = temp_arg(ret + 1);
1545            op->args[pi++] = temp_arg(ret);
1546#else
1547            op->args[pi++] = temp_arg(ret);
1548            op->args[pi++] = temp_arg(ret + 1);
1549#endif
1550            nb_rets = 2;
1551        } else {
1552            op->args[pi++] = temp_arg(ret);
1553            nb_rets = 1;
1554        }
1555#endif
1556    } else {
1557        nb_rets = 0;
1558    }
1559    TCGOP_CALLO(op) = nb_rets;
1560
1561    real_args = 0;
1562    for (i = 0; i < nargs; i++) {
1563        int argtype = extract32(typemask, (i + 1) * 3, 3);
1564        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1565        bool want_align = false;
1566
1567#if defined(CONFIG_TCG_INTERPRETER)
1568        /*
1569         * Align all arguments, so that they land in predictable places
1570         * for passing off to ffi_call.
1571         */
1572        want_align = true;
1573#elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1574        /* Some targets want aligned 64 bit args */
1575        want_align = is_64bit;
1576#endif
1577
1578        if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1579            op->args[pi++] = TCG_CALL_DUMMY_ARG;
1580            real_args++;
1581        }
1582
1583        if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1584            /*
1585             * If stack grows up, then we will be placing successive
1586             * arguments at lower addresses, which means we need to
1587             * reverse the order compared to how we would normally
1588             * treat either big or little-endian.  For those arguments
1589             * that will wind up in registers, this still works for
1590             * HPPA (the only current STACK_GROWSUP target) since the
1591             * argument registers are *also* allocated in decreasing
1592             * order.  If another such target is added, this logic may
1593             * have to get more complicated to differentiate between
1594             * stack arguments and register arguments.
1595             */
1596#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1597            op->args[pi++] = temp_arg(args[i] + 1);
1598            op->args[pi++] = temp_arg(args[i]);
1599#else
1600            op->args[pi++] = temp_arg(args[i]);
1601            op->args[pi++] = temp_arg(args[i] + 1);
1602#endif
1603            real_args += 2;
1604            continue;
1605        }
1606
1607        op->args[pi++] = temp_arg(args[i]);
1608        real_args++;
1609    }
1610    op->args[pi++] = (uintptr_t)func;
1611    op->args[pi++] = (uintptr_t)info;
1612    TCGOP_CALLI(op) = real_args;
1613
1614    /* Make sure the fields didn't overflow.  */
1615    tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1616    tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1617
1618#if defined(__sparc__) && !defined(__arch64__) \
1619    && !defined(CONFIG_TCG_INTERPRETER)
1620    /* Free all of the parts we allocated above.  */
1621    for (i = real_args = 0; i < orig_nargs; ++i) {
1622        int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1623        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1624
1625        if (is_64bit) {
1626            tcg_temp_free_internal(args[real_args++]);
1627            tcg_temp_free_internal(args[real_args++]);
1628        } else {
1629            real_args++;
1630        }
1631    }
1632    if ((orig_typemask & 6) == dh_typecode_i64) {
1633        /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1634           Note that describing these as TCGv_i64 eliminates an unnecessary
1635           zero-extension that tcg_gen_concat_i32_i64 would create.  */
1636        tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1637        tcg_temp_free_i64(retl);
1638        tcg_temp_free_i64(reth);
1639    }
1640#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1641    for (i = 0; i < nargs; ++i) {
1642        int argtype = extract32(typemask, (i + 1) * 3, 3);
1643        bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1644
1645        if (is_32bit) {
1646            tcg_temp_free_internal(args[i]);
1647        }
1648    }
1649#endif /* TCG_TARGET_EXTEND_ARGS */
1650}
1651
1652static void tcg_reg_alloc_start(TCGContext *s)
1653{
1654    int i, n;
1655
1656    for (i = 0, n = s->nb_temps; i < n; i++) {
1657        TCGTemp *ts = &s->temps[i];
1658        TCGTempVal val = TEMP_VAL_MEM;
1659
1660        switch (ts->kind) {
1661        case TEMP_CONST:
1662            val = TEMP_VAL_CONST;
1663            break;
1664        case TEMP_FIXED:
1665            val = TEMP_VAL_REG;
1666            break;
1667        case TEMP_GLOBAL:
1668            break;
1669        case TEMP_NORMAL:
1670            val = TEMP_VAL_DEAD;
1671            /* fall through */
1672        case TEMP_LOCAL:
1673            ts->mem_allocated = 0;
1674            break;
1675        default:
1676            g_assert_not_reached();
1677        }
1678        ts->val_type = val;
1679    }
1680
1681    memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1682}
1683
1684static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1685                                 TCGTemp *ts)
1686{
1687    int idx = temp_idx(ts);
1688
1689    switch (ts->kind) {
1690    case TEMP_FIXED:
1691    case TEMP_GLOBAL:
1692        pstrcpy(buf, buf_size, ts->name);
1693        break;
1694    case TEMP_LOCAL:
1695        snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1696        break;
1697    case TEMP_NORMAL:
1698        snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1699        break;
1700    case TEMP_CONST:
1701        switch (ts->type) {
1702        case TCG_TYPE_I32:
1703            snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1704            break;
1705#if TCG_TARGET_REG_BITS > 32
1706        case TCG_TYPE_I64:
1707            snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1708            break;
1709#endif
1710        case TCG_TYPE_V64:
1711        case TCG_TYPE_V128:
1712        case TCG_TYPE_V256:
1713            snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1714                     64 << (ts->type - TCG_TYPE_V64), ts->val);
1715            break;
1716        default:
1717            g_assert_not_reached();
1718        }
1719        break;
1720    }
1721    return buf;
1722}
1723
1724static char *tcg_get_arg_str(TCGContext *s, char *buf,
1725                             int buf_size, TCGArg arg)
1726{
1727    return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1728}
1729
1730static const char * const cond_name[] =
1731{
1732    [TCG_COND_NEVER] = "never",
1733    [TCG_COND_ALWAYS] = "always",
1734    [TCG_COND_EQ] = "eq",
1735    [TCG_COND_NE] = "ne",
1736    [TCG_COND_LT] = "lt",
1737    [TCG_COND_GE] = "ge",
1738    [TCG_COND_LE] = "le",
1739    [TCG_COND_GT] = "gt",
1740    [TCG_COND_LTU] = "ltu",
1741    [TCG_COND_GEU] = "geu",
1742    [TCG_COND_LEU] = "leu",
1743    [TCG_COND_GTU] = "gtu"
1744};
1745
1746static const char * const ldst_name[] =
1747{
1748    [MO_UB]   = "ub",
1749    [MO_SB]   = "sb",
1750    [MO_LEUW] = "leuw",
1751    [MO_LESW] = "lesw",
1752    [MO_LEUL] = "leul",
1753    [MO_LESL] = "lesl",
1754    [MO_LEQ]  = "leq",
1755    [MO_BEUW] = "beuw",
1756    [MO_BESW] = "besw",
1757    [MO_BEUL] = "beul",
1758    [MO_BESL] = "besl",
1759    [MO_BEQ]  = "beq",
1760};
1761
1762static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1763#ifdef TARGET_ALIGNED_ONLY
1764    [MO_UNALN >> MO_ASHIFT]    = "un+",
1765    [MO_ALIGN >> MO_ASHIFT]    = "",
1766#else
1767    [MO_UNALN >> MO_ASHIFT]    = "",
1768    [MO_ALIGN >> MO_ASHIFT]    = "al+",
1769#endif
1770    [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1771    [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1772    [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1773    [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1774    [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1775    [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1776};
1777
1778static const char bswap_flag_name[][6] = {
1779    [TCG_BSWAP_IZ] = "iz",
1780    [TCG_BSWAP_OZ] = "oz",
1781    [TCG_BSWAP_OS] = "os",
1782    [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1783    [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1784};
1785
1786static inline bool tcg_regset_single(TCGRegSet d)
1787{
1788    return (d & (d - 1)) == 0;
1789}
1790
1791static inline TCGReg tcg_regset_first(TCGRegSet d)
1792{
1793    if (TCG_TARGET_NB_REGS <= 32) {
1794        return ctz32(d);
1795    } else {
1796        return ctz64(d);
1797    }
1798}
1799
1800static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1801{
1802    char buf[128];
1803    TCGOp *op;
1804
1805    QTAILQ_FOREACH(op, &s->ops, link) {
1806        int i, k, nb_oargs, nb_iargs, nb_cargs;
1807        const TCGOpDef *def;
1808        TCGOpcode c;
1809        int col = 0;
1810
1811        c = op->opc;
1812        def = &tcg_op_defs[c];
1813
1814        if (c == INDEX_op_insn_start) {
1815            nb_oargs = 0;
1816            col += qemu_log("\n ----");
1817
1818            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1819                target_ulong a;
1820#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1821                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1822#else
1823                a = op->args[i];
1824#endif
1825                col += qemu_log(" " TARGET_FMT_lx, a);
1826            }
1827        } else if (c == INDEX_op_call) {
1828            const TCGHelperInfo *info = tcg_call_info(op);
1829            void *func = tcg_call_func(op);
1830
1831            /* variable number of arguments */
1832            nb_oargs = TCGOP_CALLO(op);
1833            nb_iargs = TCGOP_CALLI(op);
1834            nb_cargs = def->nb_cargs;
1835
1836            col += qemu_log(" %s ", def->name);
1837
1838            /*
1839             * Print the function name from TCGHelperInfo, if available.
1840             * Note that plugins have a template function for the info,
1841             * but the actual function pointer comes from the plugin.
1842             */
1843            if (func == info->func) {
1844                col += qemu_log("%s", info->name);
1845            } else {
1846                col += qemu_log("plugin(%p)", func);
1847            }
1848
1849            col += qemu_log(",$0x%x,$%d", info->flags, nb_oargs);
1850            for (i = 0; i < nb_oargs; i++) {
1851                col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1852                                                       op->args[i]));
1853            }
1854            for (i = 0; i < nb_iargs; i++) {
1855                TCGArg arg = op->args[nb_oargs + i];
1856                const char *t = "<dummy>";
1857                if (arg != TCG_CALL_DUMMY_ARG) {
1858                    t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1859                }
1860                col += qemu_log(",%s", t);
1861            }
1862        } else {
1863            col += qemu_log(" %s ", def->name);
1864
1865            nb_oargs = def->nb_oargs;
1866            nb_iargs = def->nb_iargs;
1867            nb_cargs = def->nb_cargs;
1868
1869            if (def->flags & TCG_OPF_VECTOR) {
1870                col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1871                                8 << TCGOP_VECE(op));
1872            }
1873
1874            k = 0;
1875            for (i = 0; i < nb_oargs; i++) {
1876                if (k != 0) {
1877                    col += qemu_log(",");
1878                }
1879                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1880                                                      op->args[k++]));
1881            }
1882            for (i = 0; i < nb_iargs; i++) {
1883                if (k != 0) {
1884                    col += qemu_log(",");
1885                }
1886                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1887                                                      op->args[k++]));
1888            }
1889            switch (c) {
1890            case INDEX_op_brcond_i32:
1891            case INDEX_op_setcond_i32:
1892            case INDEX_op_movcond_i32:
1893            case INDEX_op_brcond2_i32:
1894            case INDEX_op_setcond2_i32:
1895            case INDEX_op_brcond_i64:
1896            case INDEX_op_setcond_i64:
1897            case INDEX_op_movcond_i64:
1898            case INDEX_op_cmp_vec:
1899            case INDEX_op_cmpsel_vec:
1900                if (op->args[k] < ARRAY_SIZE(cond_name)
1901                    && cond_name[op->args[k]]) {
1902                    col += qemu_log(",%s", cond_name[op->args[k++]]);
1903                } else {
1904                    col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1905                }
1906                i = 1;
1907                break;
1908            case INDEX_op_qemu_ld_i32:
1909            case INDEX_op_qemu_st_i32:
1910            case INDEX_op_qemu_st8_i32:
1911            case INDEX_op_qemu_ld_i64:
1912            case INDEX_op_qemu_st_i64:
1913                {
1914                    MemOpIdx oi = op->args[k++];
1915                    MemOp op = get_memop(oi);
1916                    unsigned ix = get_mmuidx(oi);
1917
1918                    if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1919                        col += qemu_log(",$0x%x,%u", op, ix);
1920                    } else {
1921                        const char *s_al, *s_op;
1922                        s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1923                        s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1924                        col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1925                    }
1926                    i = 1;
1927                }
1928                break;
1929            case INDEX_op_bswap16_i32:
1930            case INDEX_op_bswap16_i64:
1931            case INDEX_op_bswap32_i32:
1932            case INDEX_op_bswap32_i64:
1933            case INDEX_op_bswap64_i64:
1934                {
1935                    TCGArg flags = op->args[k];
1936                    const char *name = NULL;
1937
1938                    if (flags < ARRAY_SIZE(bswap_flag_name)) {
1939                        name = bswap_flag_name[flags];
1940                    }
1941                    if (name) {
1942                        col += qemu_log(",%s", name);
1943                    } else {
1944                        col += qemu_log(",$0x%" TCG_PRIlx, flags);
1945                    }
1946                    i = k = 1;
1947                }
1948                break;
1949            default:
1950                i = 0;
1951                break;
1952            }
1953            switch (c) {
1954            case INDEX_op_set_label:
1955            case INDEX_op_br:
1956            case INDEX_op_brcond_i32:
1957            case INDEX_op_brcond_i64:
1958            case INDEX_op_brcond2_i32:
1959                col += qemu_log("%s$L%d", k ? "," : "",
1960                                arg_label(op->args[k])->id);
1961                i++, k++;
1962                break;
1963            default:
1964                break;
1965            }
1966            for (; i < nb_cargs; i++, k++) {
1967                col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1968            }
1969        }
1970
1971        if (have_prefs || op->life) {
1972
1973            QemuLogFile *logfile;
1974
1975            rcu_read_lock();
1976            logfile = qatomic_rcu_read(&qemu_logfile);
1977            if (logfile) {
1978                for (; col < 40; ++col) {
1979                    putc(' ', logfile->fd);
1980                }
1981            }
1982            rcu_read_unlock();
1983        }
1984
1985        if (op->life) {
1986            unsigned life = op->life;
1987
1988            if (life & (SYNC_ARG * 3)) {
1989                qemu_log("  sync:");
1990                for (i = 0; i < 2; ++i) {
1991                    if (life & (SYNC_ARG << i)) {
1992                        qemu_log(" %d", i);
1993                    }
1994                }
1995            }
1996            life /= DEAD_ARG;
1997            if (life) {
1998                qemu_log("  dead:");
1999                for (i = 0; life; ++i, life >>= 1) {
2000                    if (life & 1) {
2001                        qemu_log(" %d", i);
2002                    }
2003                }
2004            }
2005        }
2006
2007        if (have_prefs) {
2008            for (i = 0; i < nb_oargs; ++i) {
2009                TCGRegSet set = op->output_pref[i];
2010
2011                if (i == 0) {
2012                    qemu_log("  pref=");
2013                } else {
2014                    qemu_log(",");
2015                }
2016                if (set == 0) {
2017                    qemu_log("none");
2018                } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2019                    qemu_log("all");
2020#ifdef CONFIG_DEBUG_TCG
2021                } else if (tcg_regset_single(set)) {
2022                    TCGReg reg = tcg_regset_first(set);
2023                    qemu_log("%s", tcg_target_reg_names[reg]);
2024#endif
2025                } else if (TCG_TARGET_NB_REGS <= 32) {
2026                    qemu_log("%#x", (uint32_t)set);
2027                } else {
2028                    qemu_log("%#" PRIx64, (uint64_t)set);
2029                }
2030            }
2031        }
2032
2033        qemu_log("\n");
2034    }
2035}
2036
2037/* we give more priority to constraints with less registers */
2038static int get_constraint_priority(const TCGOpDef *def, int k)
2039{
2040    const TCGArgConstraint *arg_ct = &def->args_ct[k];
2041    int n;
2042
2043    if (arg_ct->oalias) {
2044        /* an alias is equivalent to a single register */
2045        n = 1;
2046    } else {
2047        n = ctpop64(arg_ct->regs);
2048    }
2049    return TCG_TARGET_NB_REGS - n + 1;
2050}
2051
2052/* sort from highest priority to lowest */
2053static void sort_constraints(TCGOpDef *def, int start, int n)
2054{
2055    int i, j;
2056    TCGArgConstraint *a = def->args_ct;
2057
2058    for (i = 0; i < n; i++) {
2059        a[start + i].sort_index = start + i;
2060    }
2061    if (n <= 1) {
2062        return;
2063    }
2064    for (i = 0; i < n - 1; i++) {
2065        for (j = i + 1; j < n; j++) {
2066            int p1 = get_constraint_priority(def, a[start + i].sort_index);
2067            int p2 = get_constraint_priority(def, a[start + j].sort_index);
2068            if (p1 < p2) {
2069                int tmp = a[start + i].sort_index;
2070                a[start + i].sort_index = a[start + j].sort_index;
2071                a[start + j].sort_index = tmp;
2072            }
2073        }
2074    }
2075}
2076
2077static void process_op_defs(TCGContext *s)
2078{
2079    TCGOpcode op;
2080
2081    for (op = 0; op < NB_OPS; op++) {
2082        TCGOpDef *def = &tcg_op_defs[op];
2083        const TCGTargetOpDef *tdefs;
2084        int i, nb_args;
2085
2086        if (def->flags & TCG_OPF_NOT_PRESENT) {
2087            continue;
2088        }
2089
2090        nb_args = def->nb_iargs + def->nb_oargs;
2091        if (nb_args == 0) {
2092            continue;
2093        }
2094
2095        /*
2096         * Macro magic should make it impossible, but double-check that
2097         * the array index is in range.  Since the signness of an enum
2098         * is implementation defined, force the result to unsigned.
2099         */
2100        unsigned con_set = tcg_target_op_def(op);
2101        tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2102        tdefs = &constraint_sets[con_set];
2103
2104        for (i = 0; i < nb_args; i++) {
2105            const char *ct_str = tdefs->args_ct_str[i];
2106            /* Incomplete TCGTargetOpDef entry. */
2107            tcg_debug_assert(ct_str != NULL);
2108
2109            while (*ct_str != '\0') {
2110                switch(*ct_str) {
2111                case '0' ... '9':
2112                    {
2113                        int oarg = *ct_str - '0';
2114                        tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2115                        tcg_debug_assert(oarg < def->nb_oargs);
2116                        tcg_debug_assert(def->args_ct[oarg].regs != 0);
2117                        def->args_ct[i] = def->args_ct[oarg];
2118                        /* The output sets oalias.  */
2119                        def->args_ct[oarg].oalias = true;
2120                        def->args_ct[oarg].alias_index = i;
2121                        /* The input sets ialias. */
2122                        def->args_ct[i].ialias = true;
2123                        def->args_ct[i].alias_index = oarg;
2124                    }
2125                    ct_str++;
2126                    break;
2127                case '&':
2128                    def->args_ct[i].newreg = true;
2129                    ct_str++;
2130                    break;
2131                case 'i':
2132                    def->args_ct[i].ct |= TCG_CT_CONST;
2133                    ct_str++;
2134                    break;
2135
2136                /* Include all of the target-specific constraints. */
2137
2138#undef CONST
2139#define CONST(CASE, MASK) \
2140    case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2141#define REGS(CASE, MASK) \
2142    case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2143
2144#include "tcg-target-con-str.h"
2145
2146#undef REGS
2147#undef CONST
2148                default:
2149                    /* Typo in TCGTargetOpDef constraint. */
2150                    g_assert_not_reached();
2151                }
2152            }
2153        }
2154
2155        /* TCGTargetOpDef entry with too much information? */
2156        tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2157
2158        /* sort the constraints (XXX: this is just an heuristic) */
2159        sort_constraints(def, 0, def->nb_oargs);
2160        sort_constraints(def, def->nb_oargs, def->nb_iargs);
2161    }
2162}
2163
2164void tcg_op_remove(TCGContext *s, TCGOp *op)
2165{
2166    TCGLabel *label;
2167
2168    switch (op->opc) {
2169    case INDEX_op_br:
2170        label = arg_label(op->args[0]);
2171        label->refs--;
2172        break;
2173    case INDEX_op_brcond_i32:
2174    case INDEX_op_brcond_i64:
2175        label = arg_label(op->args[3]);
2176        label->refs--;
2177        break;
2178    case INDEX_op_brcond2_i32:
2179        label = arg_label(op->args[5]);
2180        label->refs--;
2181        break;
2182    default:
2183        break;
2184    }
2185
2186    QTAILQ_REMOVE(&s->ops, op, link);
2187    QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2188    s->nb_ops--;
2189
2190#ifdef CONFIG_PROFILER
2191    qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2192#endif
2193}
2194
2195void tcg_remove_ops_after(TCGOp *op)
2196{
2197    TCGContext *s = tcg_ctx;
2198
2199    while (true) {
2200        TCGOp *last = tcg_last_op();
2201        if (last == op) {
2202            return;
2203        }
2204        tcg_op_remove(s, last);
2205    }
2206}
2207
2208static TCGOp *tcg_op_alloc(TCGOpcode opc)
2209{
2210    TCGContext *s = tcg_ctx;
2211    TCGOp *op;
2212
2213    if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2214        op = tcg_malloc(sizeof(TCGOp));
2215    } else {
2216        op = QTAILQ_FIRST(&s->free_ops);
2217        QTAILQ_REMOVE(&s->free_ops, op, link);
2218    }
2219    memset(op, 0, offsetof(TCGOp, link));
2220    op->opc = opc;
2221    s->nb_ops++;
2222
2223    return op;
2224}
2225
2226TCGOp *tcg_emit_op(TCGOpcode opc)
2227{
2228    TCGOp *op = tcg_op_alloc(opc);
2229    QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2230    return op;
2231}
2232
2233TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2234{
2235    TCGOp *new_op = tcg_op_alloc(opc);
2236    QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2237    return new_op;
2238}
2239
2240TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2241{
2242    TCGOp *new_op = tcg_op_alloc(opc);
2243    QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2244    return new_op;
2245}
2246
2247/* Reachable analysis : remove unreachable code.  */
2248static void reachable_code_pass(TCGContext *s)
2249{
2250    TCGOp *op, *op_next;
2251    bool dead = false;
2252
2253    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2254        bool remove = dead;
2255        TCGLabel *label;
2256
2257        switch (op->opc) {
2258        case INDEX_op_set_label:
2259            label = arg_label(op->args[0]);
2260            if (label->refs == 0) {
2261                /*
2262                 * While there is an occasional backward branch, virtually
2263                 * all branches generated by the translators are forward.
2264                 * Which means that generally we will have already removed
2265                 * all references to the label that will be, and there is
2266                 * little to be gained by iterating.
2267                 */
2268                remove = true;
2269            } else {
2270                /* Once we see a label, insns become live again.  */
2271                dead = false;
2272                remove = false;
2273
2274                /*
2275                 * Optimization can fold conditional branches to unconditional.
2276                 * If we find a label with one reference which is preceded by
2277                 * an unconditional branch to it, remove both.  This needed to
2278                 * wait until the dead code in between them was removed.
2279                 */
2280                if (label->refs == 1) {
2281                    TCGOp *op_prev = QTAILQ_PREV(op, link);
2282                    if (op_prev->opc == INDEX_op_br &&
2283                        label == arg_label(op_prev->args[0])) {
2284                        tcg_op_remove(s, op_prev);
2285                        remove = true;
2286                    }
2287                }
2288            }
2289            break;
2290
2291        case INDEX_op_br:
2292        case INDEX_op_exit_tb:
2293        case INDEX_op_goto_ptr:
2294            /* Unconditional branches; everything following is dead.  */
2295            dead = true;
2296            break;
2297
2298        case INDEX_op_call:
2299            /* Notice noreturn helper calls, raising exceptions.  */
2300            if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2301                dead = true;
2302            }
2303            break;
2304
2305        case INDEX_op_insn_start:
2306            /* Never remove -- we need to keep these for unwind.  */
2307            remove = false;
2308            break;
2309
2310        default:
2311            break;
2312        }
2313
2314        if (remove) {
2315            tcg_op_remove(s, op);
2316        }
2317    }
2318}
2319
2320#define TS_DEAD  1
2321#define TS_MEM   2
2322
2323#define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2324#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2325
2326/* For liveness_pass_1, the register preferences for a given temp.  */
2327static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2328{
2329    return ts->state_ptr;
2330}
2331
2332/* For liveness_pass_1, reset the preferences for a given temp to the
2333 * maximal regset for its type.
2334 */
2335static inline void la_reset_pref(TCGTemp *ts)
2336{
2337    *la_temp_pref(ts)
2338        = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2339}
2340
2341/* liveness analysis: end of function: all temps are dead, and globals
2342   should be in memory. */
2343static void la_func_end(TCGContext *s, int ng, int nt)
2344{
2345    int i;
2346
2347    for (i = 0; i < ng; ++i) {
2348        s->temps[i].state = TS_DEAD | TS_MEM;
2349        la_reset_pref(&s->temps[i]);
2350    }
2351    for (i = ng; i < nt; ++i) {
2352        s->temps[i].state = TS_DEAD;
2353        la_reset_pref(&s->temps[i]);
2354    }
2355}
2356
2357/* liveness analysis: end of basic block: all temps are dead, globals
2358   and local temps should be in memory. */
2359static void la_bb_end(TCGContext *s, int ng, int nt)
2360{
2361    int i;
2362
2363    for (i = 0; i < nt; ++i) {
2364        TCGTemp *ts = &s->temps[i];
2365        int state;
2366
2367        switch (ts->kind) {
2368        case TEMP_FIXED:
2369        case TEMP_GLOBAL:
2370        case TEMP_LOCAL:
2371            state = TS_DEAD | TS_MEM;
2372            break;
2373        case TEMP_NORMAL:
2374        case TEMP_CONST:
2375            state = TS_DEAD;
2376            break;
2377        default:
2378            g_assert_not_reached();
2379        }
2380        ts->state = state;
2381        la_reset_pref(ts);
2382    }
2383}
2384
2385/* liveness analysis: sync globals back to memory.  */
2386static void la_global_sync(TCGContext *s, int ng)
2387{
2388    int i;
2389
2390    for (i = 0; i < ng; ++i) {
2391        int state = s->temps[i].state;
2392        s->temps[i].state = state | TS_MEM;
2393        if (state == TS_DEAD) {
2394            /* If the global was previously dead, reset prefs.  */
2395            la_reset_pref(&s->temps[i]);
2396        }
2397    }
2398}
2399
2400/*
2401 * liveness analysis: conditional branch: all temps are dead,
2402 * globals and local temps should be synced.
2403 */
2404static void la_bb_sync(TCGContext *s, int ng, int nt)
2405{
2406    la_global_sync(s, ng);
2407
2408    for (int i = ng; i < nt; ++i) {
2409        TCGTemp *ts = &s->temps[i];
2410        int state;
2411
2412        switch (ts->kind) {
2413        case TEMP_LOCAL:
2414            state = ts->state;
2415            ts->state = state | TS_MEM;
2416            if (state != TS_DEAD) {
2417                continue;
2418            }
2419            break;
2420        case TEMP_NORMAL:
2421            s->temps[i].state = TS_DEAD;
2422            break;
2423        case TEMP_CONST:
2424            continue;
2425        default:
2426            g_assert_not_reached();
2427        }
2428        la_reset_pref(&s->temps[i]);
2429    }
2430}
2431
2432/* liveness analysis: sync globals back to memory and kill.  */
2433static void la_global_kill(TCGContext *s, int ng)
2434{
2435    int i;
2436
2437    for (i = 0; i < ng; i++) {
2438        s->temps[i].state = TS_DEAD | TS_MEM;
2439        la_reset_pref(&s->temps[i]);
2440    }
2441}
2442
2443/* liveness analysis: note live globals crossing calls.  */
2444static void la_cross_call(TCGContext *s, int nt)
2445{
2446    TCGRegSet mask = ~tcg_target_call_clobber_regs;
2447    int i;
2448
2449    for (i = 0; i < nt; i++) {
2450        TCGTemp *ts = &s->temps[i];
2451        if (!(ts->state & TS_DEAD)) {
2452            TCGRegSet *pset = la_temp_pref(ts);
2453            TCGRegSet set = *pset;
2454
2455            set &= mask;
2456            /* If the combination is not possible, restart.  */
2457            if (set == 0) {
2458                set = tcg_target_available_regs[ts->type] & mask;
2459            }
2460            *pset = set;
2461        }
2462    }
2463}
2464
2465/* Liveness analysis : update the opc_arg_life array to tell if a
2466   given input arguments is dead. Instructions updating dead
2467   temporaries are removed. */
2468static void liveness_pass_1(TCGContext *s)
2469{
2470    int nb_globals = s->nb_globals;
2471    int nb_temps = s->nb_temps;
2472    TCGOp *op, *op_prev;
2473    TCGRegSet *prefs;
2474    int i;
2475
2476    prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2477    for (i = 0; i < nb_temps; ++i) {
2478        s->temps[i].state_ptr = prefs + i;
2479    }
2480
2481    /* ??? Should be redundant with the exit_tb that ends the TB.  */
2482    la_func_end(s, nb_globals, nb_temps);
2483
2484    QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2485        int nb_iargs, nb_oargs;
2486        TCGOpcode opc_new, opc_new2;
2487        bool have_opc_new2;
2488        TCGLifeData arg_life = 0;
2489        TCGTemp *ts;
2490        TCGOpcode opc = op->opc;
2491        const TCGOpDef *def = &tcg_op_defs[opc];
2492
2493        switch (opc) {
2494        case INDEX_op_call:
2495            {
2496                int call_flags;
2497                int nb_call_regs;
2498
2499                nb_oargs = TCGOP_CALLO(op);
2500                nb_iargs = TCGOP_CALLI(op);
2501                call_flags = tcg_call_flags(op);
2502
2503                /* pure functions can be removed if their result is unused */
2504                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2505                    for (i = 0; i < nb_oargs; i++) {
2506                        ts = arg_temp(op->args[i]);
2507                        if (ts->state != TS_DEAD) {
2508                            goto do_not_remove_call;
2509                        }
2510                    }
2511                    goto do_remove;
2512                }
2513            do_not_remove_call:
2514
2515                /* Output args are dead.  */
2516                for (i = 0; i < nb_oargs; i++) {
2517                    ts = arg_temp(op->args[i]);
2518                    if (ts->state & TS_DEAD) {
2519                        arg_life |= DEAD_ARG << i;
2520                    }
2521                    if (ts->state & TS_MEM) {
2522                        arg_life |= SYNC_ARG << i;
2523                    }
2524                    ts->state = TS_DEAD;
2525                    la_reset_pref(ts);
2526
2527                    /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2528                    op->output_pref[i] = 0;
2529                }
2530
2531                if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2532                                    TCG_CALL_NO_READ_GLOBALS))) {
2533                    la_global_kill(s, nb_globals);
2534                } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2535                    la_global_sync(s, nb_globals);
2536                }
2537
2538                /* Record arguments that die in this helper.  */
2539                for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2540                    ts = arg_temp(op->args[i]);
2541                    if (ts && ts->state & TS_DEAD) {
2542                        arg_life |= DEAD_ARG << i;
2543                    }
2544                }
2545
2546                /* For all live registers, remove call-clobbered prefs.  */
2547                la_cross_call(s, nb_temps);
2548
2549                nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2550
2551                /* Input arguments are live for preceding opcodes.  */
2552                for (i = 0; i < nb_iargs; i++) {
2553                    ts = arg_temp(op->args[i + nb_oargs]);
2554                    if (ts && ts->state & TS_DEAD) {
2555                        /* For those arguments that die, and will be allocated
2556                         * in registers, clear the register set for that arg,
2557                         * to be filled in below.  For args that will be on
2558                         * the stack, reset to any available reg.
2559                         */
2560                        *la_temp_pref(ts)
2561                            = (i < nb_call_regs ? 0 :
2562                               tcg_target_available_regs[ts->type]);
2563                        ts->state &= ~TS_DEAD;
2564                    }
2565                }
2566
2567                /* For each input argument, add its input register to prefs.
2568                   If a temp is used once, this produces a single set bit.  */
2569                for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2570                    ts = arg_temp(op->args[i + nb_oargs]);
2571                    if (ts) {
2572                        tcg_regset_set_reg(*la_temp_pref(ts),
2573                                           tcg_target_call_iarg_regs[i]);
2574                    }
2575                }
2576            }
2577            break;
2578        case INDEX_op_insn_start:
2579            break;
2580        case INDEX_op_discard:
2581            /* mark the temporary as dead */
2582            ts = arg_temp(op->args[0]);
2583            ts->state = TS_DEAD;
2584            la_reset_pref(ts);
2585            break;
2586
2587        case INDEX_op_add2_i32:
2588            opc_new = INDEX_op_add_i32;
2589            goto do_addsub2;
2590        case INDEX_op_sub2_i32:
2591            opc_new = INDEX_op_sub_i32;
2592            goto do_addsub2;
2593        case INDEX_op_add2_i64:
2594            opc_new = INDEX_op_add_i64;
2595            goto do_addsub2;
2596        case INDEX_op_sub2_i64:
2597            opc_new = INDEX_op_sub_i64;
2598        do_addsub2:
2599            nb_iargs = 4;
2600            nb_oargs = 2;
2601            /* Test if the high part of the operation is dead, but not
2602               the low part.  The result can be optimized to a simple
2603               add or sub.  This happens often for x86_64 guest when the
2604               cpu mode is set to 32 bit.  */
2605            if (arg_temp(op->args[1])->state == TS_DEAD) {
2606                if (arg_temp(op->args[0])->state == TS_DEAD) {
2607                    goto do_remove;
2608                }
2609                /* Replace the opcode and adjust the args in place,
2610                   leaving 3 unused args at the end.  */
2611                op->opc = opc = opc_new;
2612                op->args[1] = op->args[2];
2613                op->args[2] = op->args[4];
2614                /* Fall through and mark the single-word operation live.  */
2615                nb_iargs = 2;
2616                nb_oargs = 1;
2617            }
2618            goto do_not_remove;
2619
2620        case INDEX_op_mulu2_i32:
2621            opc_new = INDEX_op_mul_i32;
2622            opc_new2 = INDEX_op_muluh_i32;
2623            have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2624            goto do_mul2;
2625        case INDEX_op_muls2_i32:
2626            opc_new = INDEX_op_mul_i32;
2627            opc_new2 = INDEX_op_mulsh_i32;
2628            have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2629            goto do_mul2;
2630        case INDEX_op_mulu2_i64:
2631            opc_new = INDEX_op_mul_i64;
2632            opc_new2 = INDEX_op_muluh_i64;
2633            have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2634            goto do_mul2;
2635        case INDEX_op_muls2_i64:
2636            opc_new = INDEX_op_mul_i64;
2637            opc_new2 = INDEX_op_mulsh_i64;
2638            have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2639            goto do_mul2;
2640        do_mul2:
2641            nb_iargs = 2;
2642            nb_oargs = 2;
2643            if (arg_temp(op->args[1])->state == TS_DEAD) {
2644                if (arg_temp(op->args[0])->state == TS_DEAD) {
2645                    /* Both parts of the operation are dead.  */
2646                    goto do_remove;
2647                }
2648                /* The high part of the operation is dead; generate the low. */
2649                op->opc = opc = opc_new;
2650                op->args[1] = op->args[2];
2651                op->args[2] = op->args[3];
2652            } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2653                /* The low part of the operation is dead; generate the high. */
2654                op->opc = opc = opc_new2;
2655                op->args[0] = op->args[1];
2656                op->args[1] = op->args[2];
2657                op->args[2] = op->args[3];
2658            } else {
2659                goto do_not_remove;
2660            }
2661            /* Mark the single-word operation live.  */
2662            nb_oargs = 1;
2663            goto do_not_remove;
2664
2665        default:
2666            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2667            nb_iargs = def->nb_iargs;
2668            nb_oargs = def->nb_oargs;
2669
2670            /* Test if the operation can be removed because all
2671               its outputs are dead. We assume that nb_oargs == 0
2672               implies side effects */
2673            if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2674                for (i = 0; i < nb_oargs; i++) {
2675                    if (arg_temp(op->args[i])->state != TS_DEAD) {
2676                        goto do_not_remove;
2677                    }
2678                }
2679                goto do_remove;
2680            }
2681            goto do_not_remove;
2682
2683        do_remove:
2684            tcg_op_remove(s, op);
2685            break;
2686
2687        do_not_remove:
2688            for (i = 0; i < nb_oargs; i++) {
2689                ts = arg_temp(op->args[i]);
2690
2691                /* Remember the preference of the uses that followed.  */
2692                op->output_pref[i] = *la_temp_pref(ts);
2693
2694                /* Output args are dead.  */
2695                if (ts->state & TS_DEAD) {
2696                    arg_life |= DEAD_ARG << i;
2697                }
2698                if (ts->state & TS_MEM) {
2699                    arg_life |= SYNC_ARG << i;
2700                }
2701                ts->state = TS_DEAD;
2702                la_reset_pref(ts);
2703            }
2704
2705            /* If end of basic block, update.  */
2706            if (def->flags & TCG_OPF_BB_EXIT) {
2707                la_func_end(s, nb_globals, nb_temps);
2708            } else if (def->flags & TCG_OPF_COND_BRANCH) {
2709                la_bb_sync(s, nb_globals, nb_temps);
2710            } else if (def->flags & TCG_OPF_BB_END) {
2711                la_bb_end(s, nb_globals, nb_temps);
2712            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2713                la_global_sync(s, nb_globals);
2714                if (def->flags & TCG_OPF_CALL_CLOBBER) {
2715                    la_cross_call(s, nb_temps);
2716                }
2717            }
2718
2719            /* Record arguments that die in this opcode.  */
2720            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2721                ts = arg_temp(op->args[i]);
2722                if (ts->state & TS_DEAD) {
2723                    arg_life |= DEAD_ARG << i;
2724                }
2725            }
2726
2727            /* Input arguments are live for preceding opcodes.  */
2728            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2729                ts = arg_temp(op->args[i]);
2730                if (ts->state & TS_DEAD) {
2731                    /* For operands that were dead, initially allow
2732                       all regs for the type.  */
2733                    *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2734                    ts->state &= ~TS_DEAD;
2735                }
2736            }
2737
2738            /* Incorporate constraints for this operand.  */
2739            switch (opc) {
2740            case INDEX_op_mov_i32:
2741            case INDEX_op_mov_i64:
2742                /* Note that these are TCG_OPF_NOT_PRESENT and do not
2743                   have proper constraints.  That said, special case
2744                   moves to propagate preferences backward.  */
2745                if (IS_DEAD_ARG(1)) {
2746                    *la_temp_pref(arg_temp(op->args[0]))
2747                        = *la_temp_pref(arg_temp(op->args[1]));
2748                }
2749                break;
2750
2751            default:
2752                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2753                    const TCGArgConstraint *ct = &def->args_ct[i];
2754                    TCGRegSet set, *pset;
2755
2756                    ts = arg_temp(op->args[i]);
2757                    pset = la_temp_pref(ts);
2758                    set = *pset;
2759
2760                    set &= ct->regs;
2761                    if (ct->ialias) {
2762                        set &= op->output_pref[ct->alias_index];
2763                    }
2764                    /* If the combination is not possible, restart.  */
2765                    if (set == 0) {
2766                        set = ct->regs;
2767                    }
2768                    *pset = set;
2769                }
2770                break;
2771            }
2772            break;
2773        }
2774        op->life = arg_life;
2775    }
2776}
2777
2778/* Liveness analysis: Convert indirect regs to direct temporaries.  */
2779static bool liveness_pass_2(TCGContext *s)
2780{
2781    int nb_globals = s->nb_globals;
2782    int nb_temps, i;
2783    bool changes = false;
2784    TCGOp *op, *op_next;
2785
2786    /* Create a temporary for each indirect global.  */
2787    for (i = 0; i < nb_globals; ++i) {
2788        TCGTemp *its = &s->temps[i];
2789        if (its->indirect_reg) {
2790            TCGTemp *dts = tcg_temp_alloc(s);
2791            dts->type = its->type;
2792            dts->base_type = its->base_type;
2793            its->state_ptr = dts;
2794        } else {
2795            its->state_ptr = NULL;
2796        }
2797        /* All globals begin dead.  */
2798        its->state = TS_DEAD;
2799    }
2800    for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2801        TCGTemp *its = &s->temps[i];
2802        its->state_ptr = NULL;
2803        its->state = TS_DEAD;
2804    }
2805
2806    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2807        TCGOpcode opc = op->opc;
2808        const TCGOpDef *def = &tcg_op_defs[opc];
2809        TCGLifeData arg_life = op->life;
2810        int nb_iargs, nb_oargs, call_flags;
2811        TCGTemp *arg_ts, *dir_ts;
2812
2813        if (opc == INDEX_op_call) {
2814            nb_oargs = TCGOP_CALLO(op);
2815            nb_iargs = TCGOP_CALLI(op);
2816            call_flags = tcg_call_flags(op);
2817        } else {
2818            nb_iargs = def->nb_iargs;
2819            nb_oargs = def->nb_oargs;
2820
2821            /* Set flags similar to how calls require.  */
2822            if (def->flags & TCG_OPF_COND_BRANCH) {
2823                /* Like reading globals: sync_globals */
2824                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2825            } else if (def->flags & TCG_OPF_BB_END) {
2826                /* Like writing globals: save_globals */
2827                call_flags = 0;
2828            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2829                /* Like reading globals: sync_globals */
2830                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2831            } else {
2832                /* No effect on globals.  */
2833                call_flags = (TCG_CALL_NO_READ_GLOBALS |
2834                              TCG_CALL_NO_WRITE_GLOBALS);
2835            }
2836        }
2837
2838        /* Make sure that input arguments are available.  */
2839        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2840            arg_ts = arg_temp(op->args[i]);
2841            if (arg_ts) {
2842                dir_ts = arg_ts->state_ptr;
2843                if (dir_ts && arg_ts->state == TS_DEAD) {
2844                    TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2845                                      ? INDEX_op_ld_i32
2846                                      : INDEX_op_ld_i64);
2847                    TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2848
2849                    lop->args[0] = temp_arg(dir_ts);
2850                    lop->args[1] = temp_arg(arg_ts->mem_base);
2851                    lop->args[2] = arg_ts->mem_offset;
2852
2853                    /* Loaded, but synced with memory.  */
2854                    arg_ts->state = TS_MEM;
2855                }
2856            }
2857        }
2858
2859        /* Perform input replacement, and mark inputs that became dead.
2860           No action is required except keeping temp_state up to date
2861           so that we reload when needed.  */
2862        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2863            arg_ts = arg_temp(op->args[i]);
2864            if (arg_ts) {
2865                dir_ts = arg_ts->state_ptr;
2866                if (dir_ts) {
2867                    op->args[i] = temp_arg(dir_ts);
2868                    changes = true;
2869                    if (IS_DEAD_ARG(i)) {
2870                        arg_ts->state = TS_DEAD;
2871                    }
2872                }
2873            }
2874        }
2875
2876        /* Liveness analysis should ensure that the following are
2877           all correct, for call sites and basic block end points.  */
2878        if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2879            /* Nothing to do */
2880        } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2881            for (i = 0; i < nb_globals; ++i) {
2882                /* Liveness should see that globals are synced back,
2883                   that is, either TS_DEAD or TS_MEM.  */
2884                arg_ts = &s->temps[i];
2885                tcg_debug_assert(arg_ts->state_ptr == 0
2886                                 || arg_ts->state != 0);
2887            }
2888        } else {
2889            for (i = 0; i < nb_globals; ++i) {
2890                /* Liveness should see that globals are saved back,
2891                   that is, TS_DEAD, waiting to be reloaded.  */
2892                arg_ts = &s->temps[i];
2893                tcg_debug_assert(arg_ts->state_ptr == 0
2894                                 || arg_ts->state == TS_DEAD);
2895            }
2896        }
2897
2898        /* Outputs become available.  */
2899        if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2900            arg_ts = arg_temp(op->args[0]);
2901            dir_ts = arg_ts->state_ptr;
2902            if (dir_ts) {
2903                op->args[0] = temp_arg(dir_ts);
2904                changes = true;
2905
2906                /* The output is now live and modified.  */
2907                arg_ts->state = 0;
2908
2909                if (NEED_SYNC_ARG(0)) {
2910                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2911                                      ? INDEX_op_st_i32
2912                                      : INDEX_op_st_i64);
2913                    TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2914                    TCGTemp *out_ts = dir_ts;
2915
2916                    if (IS_DEAD_ARG(0)) {
2917                        out_ts = arg_temp(op->args[1]);
2918                        arg_ts->state = TS_DEAD;
2919                        tcg_op_remove(s, op);
2920                    } else {
2921                        arg_ts->state = TS_MEM;
2922                    }
2923
2924                    sop->args[0] = temp_arg(out_ts);
2925                    sop->args[1] = temp_arg(arg_ts->mem_base);
2926                    sop->args[2] = arg_ts->mem_offset;
2927                } else {
2928                    tcg_debug_assert(!IS_DEAD_ARG(0));
2929                }
2930            }
2931        } else {
2932            for (i = 0; i < nb_oargs; i++) {
2933                arg_ts = arg_temp(op->args[i]);
2934                dir_ts = arg_ts->state_ptr;
2935                if (!dir_ts) {
2936                    continue;
2937                }
2938                op->args[i] = temp_arg(dir_ts);
2939                changes = true;
2940
2941                /* The output is now live and modified.  */
2942                arg_ts->state = 0;
2943
2944                /* Sync outputs upon their last write.  */
2945                if (NEED_SYNC_ARG(i)) {
2946                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2947                                      ? INDEX_op_st_i32
2948                                      : INDEX_op_st_i64);
2949                    TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2950
2951                    sop->args[0] = temp_arg(dir_ts);
2952                    sop->args[1] = temp_arg(arg_ts->mem_base);
2953                    sop->args[2] = arg_ts->mem_offset;
2954
2955                    arg_ts->state = TS_MEM;
2956                }
2957                /* Drop outputs that are dead.  */
2958                if (IS_DEAD_ARG(i)) {
2959                    arg_ts->state = TS_DEAD;
2960                }
2961            }
2962        }
2963    }
2964
2965    return changes;
2966}
2967
2968#ifdef CONFIG_DEBUG_TCG
2969static void dump_regs(TCGContext *s)
2970{
2971    TCGTemp *ts;
2972    int i;
2973    char buf[64];
2974
2975    for(i = 0; i < s->nb_temps; i++) {
2976        ts = &s->temps[i];
2977        printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2978        switch(ts->val_type) {
2979        case TEMP_VAL_REG:
2980            printf("%s", tcg_target_reg_names[ts->reg]);
2981            break;
2982        case TEMP_VAL_MEM:
2983            printf("%d(%s)", (int)ts->mem_offset,
2984                   tcg_target_reg_names[ts->mem_base->reg]);
2985            break;
2986        case TEMP_VAL_CONST:
2987            printf("$0x%" PRIx64, ts->val);
2988            break;
2989        case TEMP_VAL_DEAD:
2990            printf("D");
2991            break;
2992        default:
2993            printf("???");
2994            break;
2995        }
2996        printf("\n");
2997    }
2998
2999    for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3000        if (s->reg_to_temp[i] != NULL) {
3001            printf("%s: %s\n", 
3002                   tcg_target_reg_names[i], 
3003                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3004        }
3005    }
3006}
3007
3008static void check_regs(TCGContext *s)
3009{
3010    int reg;
3011    int k;
3012    TCGTemp *ts;
3013    char buf[64];
3014
3015    for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3016        ts = s->reg_to_temp[reg];
3017        if (ts != NULL) {
3018            if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3019                printf("Inconsistency for register %s:\n", 
3020                       tcg_target_reg_names[reg]);
3021                goto fail;
3022            }
3023        }
3024    }
3025    for (k = 0; k < s->nb_temps; k++) {
3026        ts = &s->temps[k];
3027        if (ts->val_type == TEMP_VAL_REG
3028            && ts->kind != TEMP_FIXED
3029            && s->reg_to_temp[ts->reg] != ts) {
3030            printf("Inconsistency for temp %s:\n",
3031                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3032        fail:
3033            printf("reg state:\n");
3034            dump_regs(s);
3035            tcg_abort();
3036        }
3037    }
3038}
3039#endif
3040
3041static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3042{
3043    intptr_t off, size, align;
3044
3045    switch (ts->type) {
3046    case TCG_TYPE_I32:
3047        size = align = 4;
3048        break;
3049    case TCG_TYPE_I64:
3050    case TCG_TYPE_V64:
3051        size = align = 8;
3052        break;
3053    case TCG_TYPE_V128:
3054        size = align = 16;
3055        break;
3056    case TCG_TYPE_V256:
3057        /* Note that we do not require aligned storage for V256. */
3058        size = 32, align = 16;
3059        break;
3060    default:
3061        g_assert_not_reached();
3062    }
3063
3064    /*
3065     * Assume the stack is sufficiently aligned.
3066     * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3067     * and do not require 16 byte vector alignment.  This seems slightly
3068     * easier than fully parameterizing the above switch statement.
3069     */
3070    align = MIN(TCG_TARGET_STACK_ALIGN, align);
3071    off = ROUND_UP(s->current_frame_offset, align);
3072
3073    /* If we've exhausted the stack frame, restart with a smaller TB. */
3074    if (off + size > s->frame_end) {
3075        tcg_raise_tb_overflow(s);
3076    }
3077    s->current_frame_offset = off + size;
3078
3079    ts->mem_offset = off;
3080#if defined(__sparc__)
3081    ts->mem_offset += TCG_TARGET_STACK_BIAS;
3082#endif
3083    ts->mem_base = s->frame_temp;
3084    ts->mem_allocated = 1;
3085}
3086
3087static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3088
3089/* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3090   mark it free; otherwise mark it dead.  */
3091static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3092{
3093    TCGTempVal new_type;
3094
3095    switch (ts->kind) {
3096    case TEMP_FIXED:
3097        return;
3098    case TEMP_GLOBAL:
3099    case TEMP_LOCAL:
3100        new_type = TEMP_VAL_MEM;
3101        break;
3102    case TEMP_NORMAL:
3103        new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3104        break;
3105    case TEMP_CONST:
3106        new_type = TEMP_VAL_CONST;
3107        break;
3108    default:
3109        g_assert_not_reached();
3110    }
3111    if (ts->val_type == TEMP_VAL_REG) {
3112        s->reg_to_temp[ts->reg] = NULL;
3113    }
3114    ts->val_type = new_type;
3115}
3116
3117/* Mark a temporary as dead.  */
3118static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3119{
3120    temp_free_or_dead(s, ts, 1);
3121}
3122
3123/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3124   registers needs to be allocated to store a constant.  If 'free_or_dead'
3125   is non-zero, subsequently release the temporary; if it is positive, the
3126   temp is dead; if it is negative, the temp is free.  */
3127static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3128                      TCGRegSet preferred_regs, int free_or_dead)
3129{
3130    if (!temp_readonly(ts) && !ts->mem_coherent) {
3131        if (!ts->mem_allocated) {
3132            temp_allocate_frame(s, ts);
3133        }
3134        switch (ts->val_type) {
3135        case TEMP_VAL_CONST:
3136            /* If we're going to free the temp immediately, then we won't
3137               require it later in a register, so attempt to store the
3138               constant to memory directly.  */
3139            if (free_or_dead
3140                && tcg_out_sti(s, ts->type, ts->val,
3141                               ts->mem_base->reg, ts->mem_offset)) {
3142                break;
3143            }
3144            temp_load(s, ts, tcg_target_available_regs[ts->type],
3145                      allocated_regs, preferred_regs);
3146            /* fallthrough */
3147
3148        case TEMP_VAL_REG:
3149            tcg_out_st(s, ts->type, ts->reg,
3150                       ts->mem_base->reg, ts->mem_offset);
3151            break;
3152
3153        case TEMP_VAL_MEM:
3154            break;
3155
3156        case TEMP_VAL_DEAD:
3157        default:
3158            tcg_abort();
3159        }
3160        ts->mem_coherent = 1;
3161    }
3162    if (free_or_dead) {
3163        temp_free_or_dead(s, ts, free_or_dead);
3164    }
3165}
3166
3167/* free register 'reg' by spilling the corresponding temporary if necessary */
3168static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3169{
3170    TCGTemp *ts = s->reg_to_temp[reg];
3171    if (ts != NULL) {
3172        temp_sync(s, ts, allocated_regs, 0, -1);
3173    }
3174}
3175
3176/**
3177 * tcg_reg_alloc:
3178 * @required_regs: Set of registers in which we must allocate.
3179 * @allocated_regs: Set of registers which must be avoided.
3180 * @preferred_regs: Set of registers we should prefer.
3181 * @rev: True if we search the registers in "indirect" order.
3182 *
3183 * The allocated register must be in @required_regs & ~@allocated_regs,
3184 * but if we can put it in @preferred_regs we may save a move later.
3185 */
3186static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3187                            TCGRegSet allocated_regs,
3188                            TCGRegSet preferred_regs, bool rev)
3189{
3190    int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3191    TCGRegSet reg_ct[2];
3192    const int *order;
3193
3194    reg_ct[1] = required_regs & ~allocated_regs;
3195    tcg_debug_assert(reg_ct[1] != 0);
3196    reg_ct[0] = reg_ct[1] & preferred_regs;
3197
3198    /* Skip the preferred_regs option if it cannot be satisfied,
3199       or if the preference made no difference.  */
3200    f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3201
3202    order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3203
3204    /* Try free registers, preferences first.  */
3205    for (j = f; j < 2; j++) {
3206        TCGRegSet set = reg_ct[j];
3207
3208        if (tcg_regset_single(set)) {
3209            /* One register in the set.  */
3210            TCGReg reg = tcg_regset_first(set);
3211            if (s->reg_to_temp[reg] == NULL) {
3212                return reg;
3213            }
3214        } else {
3215            for (i = 0; i < n; i++) {
3216                TCGReg reg = order[i];
3217                if (s->reg_to_temp[reg] == NULL &&
3218                    tcg_regset_test_reg(set, reg)) {
3219                    return reg;
3220                }
3221            }
3222        }
3223    }
3224
3225    /* We must spill something.  */
3226    for (j = f; j < 2; j++) {
3227        TCGRegSet set = reg_ct[j];
3228
3229        if (tcg_regset_single(set)) {
3230            /* One register in the set.  */
3231            TCGReg reg = tcg_regset_first(set);
3232            tcg_reg_free(s, reg, allocated_regs);
3233            return reg;
3234        } else {
3235            for (i = 0; i < n; i++) {
3236                TCGReg reg = order[i];
3237                if (tcg_regset_test_reg(set, reg)) {
3238                    tcg_reg_free(s, reg, allocated_regs);
3239                    return reg;
3240                }
3241            }
3242        }
3243    }
3244
3245    tcg_abort();
3246}
3247
3248/* Make sure the temporary is in a register.  If needed, allocate the register
3249   from DESIRED while avoiding ALLOCATED.  */
3250static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3251                      TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3252{
3253    TCGReg reg;
3254
3255    switch (ts->val_type) {
3256    case TEMP_VAL_REG:
3257        return;
3258    case TEMP_VAL_CONST:
3259        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3260                            preferred_regs, ts->indirect_base);
3261        if (ts->type <= TCG_TYPE_I64) {
3262            tcg_out_movi(s, ts->type, reg, ts->val);
3263        } else {
3264            uint64_t val = ts->val;
3265            MemOp vece = MO_64;
3266
3267            /*
3268             * Find the minimal vector element that matches the constant.
3269             * The targets will, in general, have to do this search anyway,
3270             * do this generically.
3271             */
3272            if (val == dup_const(MO_8, val)) {
3273                vece = MO_8;
3274            } else if (val == dup_const(MO_16, val)) {
3275                vece = MO_16;
3276            } else if (val == dup_const(MO_32, val)) {
3277                vece = MO_32;
3278            }
3279
3280            tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3281        }
3282        ts->mem_coherent = 0;
3283        break;
3284    case TEMP_VAL_MEM:
3285        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3286                            preferred_regs, ts->indirect_base);
3287        tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3288        ts->mem_coherent = 1;
3289        break;
3290    case TEMP_VAL_DEAD:
3291    default:
3292        tcg_abort();
3293    }
3294    ts->reg = reg;
3295    ts->val_type = TEMP_VAL_REG;
3296    s->reg_to_temp[reg] = ts;
3297}
3298
3299/* Save a temporary to memory. 'allocated_regs' is used in case a
3300   temporary registers needs to be allocated to store a constant.  */
3301static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3302{
3303    /* The liveness analysis already ensures that globals are back
3304       in memory. Keep an tcg_debug_assert for safety. */
3305    tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3306}
3307
3308/* save globals to their canonical location and assume they can be
3309   modified be the following code. 'allocated_regs' is used in case a
3310   temporary registers needs to be allocated to store a constant. */
3311static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3312{
3313    int i, n;
3314
3315    for (i = 0, n = s->nb_globals; i < n; i++) {
3316        temp_save(s, &s->temps[i], allocated_regs);
3317    }
3318}
3319
3320/* sync globals to their canonical location and assume they can be
3321   read by the following code. 'allocated_regs' is used in case a
3322   temporary registers needs to be allocated to store a constant. */
3323static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3324{
3325    int i, n;
3326
3327    for (i = 0, n = s->nb_globals; i < n; i++) {
3328        TCGTemp *ts = &s->temps[i];
3329        tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3330                         || ts->kind == TEMP_FIXED
3331                         || ts->mem_coherent);
3332    }
3333}
3334
3335/* at the end of a basic block, we assume all temporaries are dead and
3336   all globals are stored at their canonical location. */
3337static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3338{
3339    int i;
3340
3341    for (i = s->nb_globals; i < s->nb_temps; i++) {
3342        TCGTemp *ts = &s->temps[i];
3343
3344        switch (ts->kind) {
3345        case TEMP_LOCAL:
3346            temp_save(s, ts, allocated_regs);
3347            break;
3348        case TEMP_NORMAL:
3349            /* The liveness analysis already ensures that temps are dead.
3350               Keep an tcg_debug_assert for safety. */
3351            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3352            break;
3353        case TEMP_CONST:
3354            /* Similarly, we should have freed any allocated register. */
3355            tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3356            break;
3357        default:
3358            g_assert_not_reached();
3359        }
3360    }
3361
3362    save_globals(s, allocated_regs);
3363}
3364
3365/*
3366 * At a conditional branch, we assume all temporaries are dead and
3367 * all globals and local temps are synced to their location.
3368 */
3369static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3370{
3371    sync_globals(s, allocated_regs);
3372
3373    for (int i = s->nb_globals; i < s->nb_temps; i++) {
3374        TCGTemp *ts = &s->temps[i];
3375        /*
3376         * The liveness analysis already ensures that temps are dead.
3377         * Keep tcg_debug_asserts for safety.
3378         */
3379        switch (ts->kind) {
3380        case TEMP_LOCAL:
3381            tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3382            break;
3383        case TEMP_NORMAL:
3384            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3385            break;
3386        case TEMP_CONST:
3387            break;
3388        default:
3389            g_assert_not_reached();
3390        }
3391    }
3392}
3393
3394/*
3395 * Specialized code generation for INDEX_op_mov_* with a constant.
3396 */
3397static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3398                                  tcg_target_ulong val, TCGLifeData arg_life,
3399                                  TCGRegSet preferred_regs)
3400{
3401    /* ENV should not be modified.  */
3402    tcg_debug_assert(!temp_readonly(ots));
3403
3404    /* The movi is not explicitly generated here.  */
3405    if (ots->val_type == TEMP_VAL_REG) {
3406        s->reg_to_temp[ots->reg] = NULL;
3407    }
3408    ots->val_type = TEMP_VAL_CONST;
3409    ots->val = val;
3410    ots->mem_coherent = 0;
3411    if (NEED_SYNC_ARG(0)) {
3412        temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3413    } else if (IS_DEAD_ARG(0)) {
3414        temp_dead(s, ots);
3415    }
3416}
3417
3418/*
3419 * Specialized code generation for INDEX_op_mov_*.
3420 */
3421static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3422{
3423    const TCGLifeData arg_life = op->life;
3424    TCGRegSet allocated_regs, preferred_regs;
3425    TCGTemp *ts, *ots;
3426    TCGType otype, itype;
3427
3428    allocated_regs = s->reserved_regs;
3429    preferred_regs = op->output_pref[0];
3430    ots = arg_temp(op->args[0]);
3431    ts = arg_temp(op->args[1]);
3432
3433    /* ENV should not be modified.  */
3434    tcg_debug_assert(!temp_readonly(ots));
3435
3436    /* Note that otype != itype for no-op truncation.  */
3437    otype = ots->type;
3438    itype = ts->type;
3439
3440    if (ts->val_type == TEMP_VAL_CONST) {
3441        /* propagate constant or generate sti */
3442        tcg_target_ulong val = ts->val;
3443        if (IS_DEAD_ARG(1)) {
3444            temp_dead(s, ts);
3445        }
3446        tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3447        return;
3448    }
3449
3450    /* If the source value is in memory we're going to be forced
3451       to have it in a register in order to perform the copy.  Copy
3452       the SOURCE value into its own register first, that way we
3453       don't have to reload SOURCE the next time it is used. */
3454    if (ts->val_type == TEMP_VAL_MEM) {
3455        temp_load(s, ts, tcg_target_available_regs[itype],
3456                  allocated_regs, preferred_regs);
3457    }
3458
3459    tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3460    if (IS_DEAD_ARG(0)) {
3461        /* mov to a non-saved dead register makes no sense (even with
3462           liveness analysis disabled). */
3463        tcg_debug_assert(NEED_SYNC_ARG(0));
3464        if (!ots->mem_allocated) {
3465            temp_allocate_frame(s, ots);
3466        }
3467        tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3468        if (IS_DEAD_ARG(1)) {
3469            temp_dead(s, ts);
3470        }
3471        temp_dead(s, ots);
3472    } else {
3473        if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3474            /* the mov can be suppressed */
3475            if (ots->val_type == TEMP_VAL_REG) {
3476                s->reg_to_temp[ots->reg] = NULL;
3477            }
3478            ots->reg = ts->reg;
3479            temp_dead(s, ts);
3480        } else {
3481            if (ots->val_type != TEMP_VAL_REG) {
3482                /* When allocating a new register, make sure to not spill the
3483                   input one. */
3484                tcg_regset_set_reg(allocated_regs, ts->reg);
3485                ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3486                                         allocated_regs, preferred_regs,
3487                                         ots->indirect_base);
3488            }
3489            if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3490                /*
3491                 * Cross register class move not supported.
3492                 * Store the source register into the destination slot
3493                 * and leave the destination temp as TEMP_VAL_MEM.
3494                 */
3495                assert(!temp_readonly(ots));
3496                if (!ts->mem_allocated) {
3497                    temp_allocate_frame(s, ots);
3498                }
3499                tcg_out_st(s, ts->type, ts->reg,
3500                           ots->mem_base->reg, ots->mem_offset);
3501                ots->mem_coherent = 1;
3502                temp_free_or_dead(s, ots, -1);
3503                return;
3504            }
3505        }
3506        ots->val_type = TEMP_VAL_REG;
3507        ots->mem_coherent = 0;
3508        s->reg_to_temp[ots->reg] = ots;
3509        if (NEED_SYNC_ARG(0)) {
3510            temp_sync(s, ots, allocated_regs, 0, 0);
3511        }
3512    }
3513}
3514
3515/*
3516 * Specialized code generation for INDEX_op_dup_vec.
3517 */
3518static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3519{
3520    const TCGLifeData arg_life = op->life;
3521    TCGRegSet dup_out_regs, dup_in_regs;
3522    TCGTemp *its, *ots;
3523    TCGType itype, vtype;
3524    intptr_t endian_fixup;
3525    unsigned vece;
3526    bool ok;
3527
3528    ots = arg_temp(op->args[0]);
3529    its = arg_temp(op->args[1]);
3530
3531    /* ENV should not be modified.  */
3532    tcg_debug_assert(!temp_readonly(ots));
3533
3534    itype = its->type;
3535    vece = TCGOP_VECE(op);
3536    vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3537
3538    if (its->val_type == TEMP_VAL_CONST) {
3539        /* Propagate constant via movi -> dupi.  */
3540        tcg_target_ulong val = its->val;
3541        if (IS_DEAD_ARG(1)) {
3542            temp_dead(s, its);
3543        }
3544        tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3545        return;
3546    }
3547
3548    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3549    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3550
3551    /* Allocate the output register now.  */
3552    if (ots->val_type != TEMP_VAL_REG) {
3553        TCGRegSet allocated_regs = s->reserved_regs;
3554
3555        if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3556            /* Make sure to not spill the input register. */
3557            tcg_regset_set_reg(allocated_regs, its->reg);
3558        }
3559        ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3560                                 op->output_pref[0], ots->indirect_base);
3561        ots->val_type = TEMP_VAL_REG;
3562        ots->mem_coherent = 0;
3563        s->reg_to_temp[ots->reg] = ots;
3564    }
3565
3566    switch (its->val_type) {
3567    case TEMP_VAL_REG:
3568        /*
3569         * The dup constriaints must be broad, covering all possible VECE.
3570         * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3571         * to fail, indicating that extra moves are required for that case.
3572         */
3573        if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3574            if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3575                goto done;
3576            }
3577            /* Try again from memory or a vector input register.  */
3578        }
3579        if (!its->mem_coherent) {
3580            /*
3581             * The input register is not synced, and so an extra store
3582             * would be required to use memory.  Attempt an integer-vector
3583             * register move first.  We do not have a TCGRegSet for this.
3584             */
3585            if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3586                break;
3587            }
3588            /* Sync the temp back to its slot and load from there.  */
3589            temp_sync(s, its, s->reserved_regs, 0, 0);
3590        }
3591        /* fall through */
3592
3593    case TEMP_VAL_MEM:
3594#ifdef HOST_WORDS_BIGENDIAN
3595        endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3596        endian_fixup -= 1 << vece;
3597#else
3598        endian_fixup = 0;
3599#endif
3600        if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3601                             its->mem_offset + endian_fixup)) {
3602            goto done;
3603        }
3604        tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3605        break;
3606
3607    default:
3608        g_assert_not_reached();
3609    }
3610
3611    /* We now have a vector input register, so dup must succeed. */
3612    ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3613    tcg_debug_assert(ok);
3614
3615 done:
3616    if (IS_DEAD_ARG(1)) {
3617        temp_dead(s, its);
3618    }
3619    if (NEED_SYNC_ARG(0)) {
3620        temp_sync(s, ots, s->reserved_regs, 0, 0);
3621    }
3622    if (IS_DEAD_ARG(0)) {
3623        temp_dead(s, ots);
3624    }
3625}
3626
3627static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3628{
3629    const TCGLifeData arg_life = op->life;
3630    const TCGOpDef * const def = &tcg_op_defs[op->opc];
3631    TCGRegSet i_allocated_regs;
3632    TCGRegSet o_allocated_regs;
3633    int i, k, nb_iargs, nb_oargs;
3634    TCGReg reg;
3635    TCGArg arg;
3636    const TCGArgConstraint *arg_ct;
3637    TCGTemp *ts;
3638    TCGArg new_args[TCG_MAX_OP_ARGS];
3639    int const_args[TCG_MAX_OP_ARGS];
3640
3641    nb_oargs = def->nb_oargs;
3642    nb_iargs = def->nb_iargs;
3643
3644    /* copy constants */
3645    memcpy(new_args + nb_oargs + nb_iargs, 
3646           op->args + nb_oargs + nb_iargs,
3647           sizeof(TCGArg) * def->nb_cargs);
3648
3649    i_allocated_regs = s->reserved_regs;
3650    o_allocated_regs = s->reserved_regs;
3651
3652    /* satisfy input constraints */ 
3653    for (k = 0; k < nb_iargs; k++) {
3654        TCGRegSet i_preferred_regs, o_preferred_regs;
3655
3656        i = def->args_ct[nb_oargs + k].sort_index;
3657        arg = op->args[i];
3658        arg_ct = &def->args_ct[i];
3659        ts = arg_temp(arg);
3660
3661        if (ts->val_type == TEMP_VAL_CONST
3662            && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3663            /* constant is OK for instruction */
3664            const_args[i] = 1;
3665            new_args[i] = ts->val;
3666            continue;
3667        }
3668
3669        i_preferred_regs = o_preferred_regs = 0;
3670        if (arg_ct->ialias) {
3671            o_preferred_regs = op->output_pref[arg_ct->alias_index];
3672
3673            /*
3674             * If the input is readonly, then it cannot also be an
3675             * output and aliased to itself.  If the input is not
3676             * dead after the instruction, we must allocate a new
3677             * register and move it.
3678             */
3679            if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3680                goto allocate_in_reg;
3681            }
3682
3683            /*
3684             * Check if the current register has already been allocated
3685             * for another input aliased to an output.
3686             */
3687            if (ts->val_type == TEMP_VAL_REG) {
3688                reg = ts->reg;
3689                for (int k2 = 0; k2 < k; k2++) {
3690                    int i2 = def->args_ct[nb_oargs + k2].sort_index;
3691                    if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3692                        goto allocate_in_reg;
3693                    }
3694                }
3695            }
3696            i_preferred_regs = o_preferred_regs;
3697        }
3698
3699        temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3700        reg = ts->reg;
3701
3702        if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3703 allocate_in_reg:
3704            /*
3705             * Allocate a new register matching the constraint
3706             * and move the temporary register into it.
3707             */
3708            temp_load(s, ts, tcg_target_available_regs[ts->type],
3709                      i_allocated_regs, 0);
3710            reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3711                                o_preferred_regs, ts->indirect_base);
3712            if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3713                /*
3714                 * Cross register class move not supported.  Sync the
3715                 * temp back to its slot and load from there.
3716                 */
3717                temp_sync(s, ts, i_allocated_regs, 0, 0);
3718                tcg_out_ld(s, ts->type, reg,
3719                           ts->mem_base->reg, ts->mem_offset);
3720            }
3721        }
3722        new_args[i] = reg;
3723        const_args[i] = 0;
3724        tcg_regset_set_reg(i_allocated_regs, reg);
3725    }
3726    
3727    /* mark dead temporaries and free the associated registers */
3728    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3729        if (IS_DEAD_ARG(i)) {
3730            temp_dead(s, arg_temp(op->args[i]));
3731        }
3732    }
3733
3734    if (def->flags & TCG_OPF_COND_BRANCH) {
3735        tcg_reg_alloc_cbranch(s, i_allocated_regs);
3736    } else if (def->flags & TCG_OPF_BB_END) {
3737        tcg_reg_alloc_bb_end(s, i_allocated_regs);
3738    } else {
3739        if (def->flags & TCG_OPF_CALL_CLOBBER) {
3740            /* XXX: permit generic clobber register list ? */ 
3741            for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3742                if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3743                    tcg_reg_free(s, i, i_allocated_regs);
3744                }
3745            }
3746        }
3747        if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3748            /* sync globals if the op has side effects and might trigger
3749               an exception. */
3750            sync_globals(s, i_allocated_regs);
3751        }
3752        
3753        /* satisfy the output constraints */
3754        for(k = 0; k < nb_oargs; k++) {
3755            i = def->args_ct[k].sort_index;
3756            arg = op->args[i];
3757            arg_ct = &def->args_ct[i];
3758            ts = arg_temp(arg);
3759
3760            /* ENV should not be modified.  */
3761            tcg_debug_assert(!temp_readonly(ts));
3762
3763            if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3764                reg = new_args[arg_ct->alias_index];
3765            } else if (arg_ct->newreg) {
3766                reg = tcg_reg_alloc(s, arg_ct->regs,
3767                                    i_allocated_regs | o_allocated_regs,
3768                                    op->output_pref[k], ts->indirect_base);
3769            } else {
3770                reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3771                                    op->output_pref[k], ts->indirect_base);
3772            }
3773            tcg_regset_set_reg(o_allocated_regs, reg);
3774            if (ts->val_type == TEMP_VAL_REG) {
3775                s->reg_to_temp[ts->reg] = NULL;
3776            }
3777            ts->val_type = TEMP_VAL_REG;
3778            ts->reg = reg;
3779            /*
3780             * Temp value is modified, so the value kept in memory is
3781             * potentially not the same.
3782             */
3783            ts->mem_coherent = 0;
3784            s->reg_to_temp[reg] = ts;
3785            new_args[i] = reg;
3786        }
3787    }
3788
3789    /* emit instruction */
3790    if (def->flags & TCG_OPF_VECTOR) {
3791        tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3792                       new_args, const_args);
3793    } else {
3794        tcg_out_op(s, op->opc, new_args, const_args);
3795    }
3796
3797    /* move the outputs in the correct register if needed */
3798    for(i = 0; i < nb_oargs; i++) {
3799        ts = arg_temp(op->args[i]);
3800
3801        /* ENV should not be modified.  */
3802        tcg_debug_assert(!temp_readonly(ts));
3803
3804        if (NEED_SYNC_ARG(i)) {
3805            temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3806        } else if (IS_DEAD_ARG(i)) {
3807            temp_dead(s, ts);
3808        }
3809    }
3810}
3811
3812static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3813{
3814    const TCGLifeData arg_life = op->life;
3815    TCGTemp *ots, *itsl, *itsh;
3816    TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3817
3818    /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3819    tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3820    tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3821
3822    ots = arg_temp(op->args[0]);
3823    itsl = arg_temp(op->args[1]);
3824    itsh = arg_temp(op->args[2]);
3825
3826    /* ENV should not be modified.  */
3827    tcg_debug_assert(!temp_readonly(ots));
3828
3829    /* Allocate the output register now.  */
3830    if (ots->val_type != TEMP_VAL_REG) {
3831        TCGRegSet allocated_regs = s->reserved_regs;
3832        TCGRegSet dup_out_regs =
3833            tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3834
3835        /* Make sure to not spill the input registers. */
3836        if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3837            tcg_regset_set_reg(allocated_regs, itsl->reg);
3838        }
3839        if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3840            tcg_regset_set_reg(allocated_regs, itsh->reg);
3841        }
3842
3843        ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3844                                 op->output_pref[0], ots->indirect_base);
3845        ots->val_type = TEMP_VAL_REG;
3846        ots->mem_coherent = 0;
3847        s->reg_to_temp[ots->reg] = ots;
3848    }
3849
3850    /* Promote dup2 of immediates to dupi_vec. */
3851    if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3852        uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3853        MemOp vece = MO_64;
3854
3855        if (val == dup_const(MO_8, val)) {
3856            vece = MO_8;
3857        } else if (val == dup_const(MO_16, val)) {
3858            vece = MO_16;
3859        } else if (val == dup_const(MO_32, val)) {
3860            vece = MO_32;
3861        }
3862
3863        tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3864        goto done;
3865    }
3866
3867    /* If the two inputs form one 64-bit value, try dupm_vec. */
3868    if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3869        if (!itsl->mem_coherent) {
3870            temp_sync(s, itsl, s->reserved_regs, 0, 0);
3871        }
3872        if (!itsh->mem_coherent) {
3873            temp_sync(s, itsh, s->reserved_regs, 0, 0);
3874        }
3875#ifdef HOST_WORDS_BIGENDIAN
3876        TCGTemp *its = itsh;
3877#else
3878        TCGTemp *its = itsl;
3879#endif
3880        if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3881                             its->mem_base->reg, its->mem_offset)) {
3882            goto done;
3883        }
3884    }
3885
3886    /* Fall back to generic expansion. */
3887    return false;
3888
3889 done:
3890    if (IS_DEAD_ARG(1)) {
3891        temp_dead(s, itsl);
3892    }
3893    if (IS_DEAD_ARG(2)) {
3894        temp_dead(s, itsh);
3895    }
3896    if (NEED_SYNC_ARG(0)) {
3897        temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3898    } else if (IS_DEAD_ARG(0)) {
3899        temp_dead(s, ots);
3900    }
3901    return true;
3902}
3903
3904#ifdef TCG_TARGET_STACK_GROWSUP
3905#define STACK_DIR(x) (-(x))
3906#else
3907#define STACK_DIR(x) (x)
3908#endif
3909
3910static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3911{
3912    const int nb_oargs = TCGOP_CALLO(op);
3913    const int nb_iargs = TCGOP_CALLI(op);
3914    const TCGLifeData arg_life = op->life;
3915    const TCGHelperInfo *info;
3916    int flags, nb_regs, i;
3917    TCGReg reg;
3918    TCGArg arg;
3919    TCGTemp *ts;
3920    intptr_t stack_offset;
3921    size_t call_stack_size;
3922    tcg_insn_unit *func_addr;
3923    int allocate_args;
3924    TCGRegSet allocated_regs;
3925
3926    func_addr = tcg_call_func(op);
3927    info = tcg_call_info(op);
3928    flags = info->flags;
3929
3930    nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3931    if (nb_regs > nb_iargs) {
3932        nb_regs = nb_iargs;
3933    }
3934
3935    /* assign stack slots first */
3936    call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3937    call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 
3938        ~(TCG_TARGET_STACK_ALIGN - 1);
3939    allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3940    if (allocate_args) {
3941        /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3942           preallocate call stack */
3943        tcg_abort();
3944    }
3945
3946    stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3947    for (i = nb_regs; i < nb_iargs; i++) {
3948        arg = op->args[nb_oargs + i];
3949#ifdef TCG_TARGET_STACK_GROWSUP
3950        stack_offset -= sizeof(tcg_target_long);
3951#endif
3952        if (arg != TCG_CALL_DUMMY_ARG) {
3953            ts = arg_temp(arg);
3954            temp_load(s, ts, tcg_target_available_regs[ts->type],
3955                      s->reserved_regs, 0);
3956            tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3957        }
3958#ifndef TCG_TARGET_STACK_GROWSUP
3959        stack_offset += sizeof(tcg_target_long);
3960#endif
3961    }
3962    
3963    /* assign input registers */
3964    allocated_regs = s->reserved_regs;
3965    for (i = 0; i < nb_regs; i++) {
3966        arg = op->args[nb_oargs + i];
3967        if (arg != TCG_CALL_DUMMY_ARG) {
3968            ts = arg_temp(arg);
3969            reg = tcg_target_call_iarg_regs[i];
3970
3971            if (ts->val_type == TEMP_VAL_REG) {
3972                if (ts->reg != reg) {
3973                    tcg_reg_free(s, reg, allocated_regs);
3974                    if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3975                        /*
3976                         * Cross register class move not supported.  Sync the
3977                         * temp back to its slot and load from there.
3978                         */
3979                        temp_sync(s, ts, allocated_regs, 0, 0);
3980                        tcg_out_ld(s, ts->type, reg,
3981                                   ts->mem_base->reg, ts->mem_offset);
3982                    }
3983                }
3984            } else {
3985                TCGRegSet arg_set = 0;
3986
3987                tcg_reg_free(s, reg, allocated_regs);
3988                tcg_regset_set_reg(arg_set, reg);
3989                temp_load(s, ts, arg_set, allocated_regs, 0);
3990            }
3991
3992            tcg_regset_set_reg(allocated_regs, reg);
3993        }
3994    }
3995    
3996    /* mark dead temporaries and free the associated registers */
3997    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3998        if (IS_DEAD_ARG(i)) {
3999            temp_dead(s, arg_temp(op->args[i]));
4000        }
4001    }
4002    
4003    /* clobber call registers */
4004    for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4005        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4006            tcg_reg_free(s, i, allocated_regs);
4007        }
4008    }
4009
4010    /* Save globals if they might be written by the helper, sync them if
4011       they might be read. */
4012    if (flags & TCG_CALL_NO_READ_GLOBALS) {
4013        /* Nothing to do */
4014    } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4015        sync_globals(s, allocated_regs);
4016    } else {
4017        save_globals(s, allocated_regs);
4018    }
4019
4020#ifdef CONFIG_TCG_INTERPRETER
4021    {
4022        gpointer hash = (gpointer)(uintptr_t)info->typemask;
4023        ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
4024        assert(cif != NULL);
4025        tcg_out_call(s, func_addr, cif);
4026    }
4027#else
4028    tcg_out_call(s, func_addr);
4029#endif
4030
4031    /* assign output registers and emit moves if needed */
4032    for(i = 0; i < nb_oargs; i++) {
4033        arg = op->args[i];
4034        ts = arg_temp(arg);
4035
4036        /* ENV should not be modified.  */
4037        tcg_debug_assert(!temp_readonly(ts));
4038
4039        reg = tcg_target_call_oarg_regs[i];
4040        tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4041        if (ts->val_type == TEMP_VAL_REG) {
4042            s->reg_to_temp[ts->reg] = NULL;
4043        }
4044        ts->val_type = TEMP_VAL_REG;
4045        ts->reg = reg;
4046        ts->mem_coherent = 0;
4047        s->reg_to_temp[reg] = ts;
4048        if (NEED_SYNC_ARG(i)) {
4049            temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4050        } else if (IS_DEAD_ARG(i)) {
4051            temp_dead(s, ts);
4052        }
4053    }
4054}
4055
4056#ifdef CONFIG_PROFILER
4057
4058/* avoid copy/paste errors */
4059#define PROF_ADD(to, from, field)                       \
4060    do {                                                \
4061        (to)->field += qatomic_read(&((from)->field));  \
4062    } while (0)
4063
4064#define PROF_MAX(to, from, field)                                       \
4065    do {                                                                \
4066        typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4067        if (val__ > (to)->field) {                                      \
4068            (to)->field = val__;                                        \
4069        }                                                               \
4070    } while (0)
4071
4072/* Pass in a zero'ed @prof */
4073static inline
4074void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4075{
4076    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4077    unsigned int i;
4078
4079    for (i = 0; i < n_ctxs; i++) {
4080        TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4081        const TCGProfile *orig = &s->prof;
4082
4083        if (counters) {
4084            PROF_ADD(prof, orig, cpu_exec_time);
4085            PROF_ADD(prof, orig, tb_count1);
4086            PROF_ADD(prof, orig, tb_count);
4087            PROF_ADD(prof, orig, op_count);
4088            PROF_MAX(prof, orig, op_count_max);
4089            PROF_ADD(prof, orig, temp_count);
4090            PROF_MAX(prof, orig, temp_count_max);
4091            PROF_ADD(prof, orig, del_op_count);
4092            PROF_ADD(prof, orig, code_in_len);
4093            PROF_ADD(prof, orig, code_out_len);
4094            PROF_ADD(prof, orig, search_out_len);
4095            PROF_ADD(prof, orig, interm_time);
4096            PROF_ADD(prof, orig, code_time);
4097            PROF_ADD(prof, orig, la_time);
4098            PROF_ADD(prof, orig, opt_time);
4099            PROF_ADD(prof, orig, restore_count);
4100            PROF_ADD(prof, orig, restore_time);
4101        }
4102        if (table) {
4103            int i;
4104
4105            for (i = 0; i < NB_OPS; i++) {
4106                PROF_ADD(prof, orig, table_op_count[i]);
4107            }
4108        }
4109    }
4110}
4111
4112#undef PROF_ADD
4113#undef PROF_MAX
4114
4115static void tcg_profile_snapshot_counters(TCGProfile *prof)
4116{
4117    tcg_profile_snapshot(prof, true, false);
4118}
4119
4120static void tcg_profile_snapshot_table(TCGProfile *prof)
4121{
4122    tcg_profile_snapshot(prof, false, true);
4123}
4124
4125void tcg_dump_op_count(GString *buf)
4126{
4127    TCGProfile prof = {};
4128    int i;
4129
4130    tcg_profile_snapshot_table(&prof);
4131    for (i = 0; i < NB_OPS; i++) {
4132        g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4133                               prof.table_op_count[i]);
4134    }
4135}
4136
4137int64_t tcg_cpu_exec_time(void)
4138{
4139    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4140    unsigned int i;
4141    int64_t ret = 0;
4142
4143    for (i = 0; i < n_ctxs; i++) {
4144        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4145        const TCGProfile *prof = &s->prof;
4146
4147        ret += qatomic_read(&prof->cpu_exec_time);
4148    }
4149    return ret;
4150}
4151#else
4152void tcg_dump_op_count(GString *buf)
4153{
4154    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4155}
4156
4157int64_t tcg_cpu_exec_time(void)
4158{
4159    error_report("%s: TCG profiler not compiled", __func__);
4160    exit(EXIT_FAILURE);
4161}
4162#endif
4163
4164
4165int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4166{
4167#ifdef CONFIG_PROFILER
4168    TCGProfile *prof = &s->prof;
4169#endif
4170    int i, num_insns;
4171    TCGOp *op;
4172
4173#ifdef CONFIG_PROFILER
4174    {
4175        int n = 0;
4176
4177        QTAILQ_FOREACH(op, &s->ops, link) {
4178            n++;
4179        }
4180        qatomic_set(&prof->op_count, prof->op_count + n);
4181        if (n > prof->op_count_max) {
4182            qatomic_set(&prof->op_count_max, n);
4183        }
4184
4185        n = s->nb_temps;
4186        qatomic_set(&prof->temp_count, prof->temp_count + n);
4187        if (n > prof->temp_count_max) {
4188            qatomic_set(&prof->temp_count_max, n);
4189        }
4190    }
4191#endif
4192
4193#ifdef DEBUG_DISAS
4194    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4195                 && qemu_log_in_addr_range(tb->pc))) {
4196        FILE *logfile = qemu_log_lock();
4197        qemu_log("OP:\n");
4198        tcg_dump_ops(s, false);
4199        qemu_log("\n");
4200        qemu_log_unlock(logfile);
4201    }
4202#endif
4203
4204#ifdef CONFIG_DEBUG_TCG
4205    /* Ensure all labels referenced have been emitted.  */
4206    {
4207        TCGLabel *l;
4208        bool error = false;
4209
4210        QSIMPLEQ_FOREACH(l, &s->labels, next) {
4211            if (unlikely(!l->present) && l->refs) {
4212                qemu_log_mask(CPU_LOG_TB_OP,
4213                              "$L%d referenced but not present.\n", l->id);
4214                error = true;
4215            }
4216        }
4217        assert(!error);
4218    }
4219#endif
4220
4221#ifdef CONFIG_PROFILER
4222    qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4223#endif
4224
4225#ifdef USE_TCG_OPTIMIZATIONS
4226    tcg_optimize(s);
4227#endif
4228
4229#ifdef CONFIG_PROFILER
4230    qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4231    qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4232#endif
4233
4234    reachable_code_pass(s);
4235    liveness_pass_1(s);
4236
4237    if (s->nb_indirects > 0) {
4238#ifdef DEBUG_DISAS
4239        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4240                     && qemu_log_in_addr_range(tb->pc))) {
4241            FILE *logfile = qemu_log_lock();
4242            qemu_log("OP before indirect lowering:\n");
4243            tcg_dump_ops(s, false);
4244            qemu_log("\n");
4245            qemu_log_unlock(logfile);
4246        }
4247#endif
4248        /* Replace indirect temps with direct temps.  */
4249        if (liveness_pass_2(s)) {
4250            /* If changes were made, re-run liveness.  */
4251            liveness_pass_1(s);
4252        }
4253    }
4254
4255#ifdef CONFIG_PROFILER
4256    qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4257#endif
4258
4259#ifdef DEBUG_DISAS
4260    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4261                 && qemu_log_in_addr_range(tb->pc))) {
4262        FILE *logfile = qemu_log_lock();
4263        qemu_log("OP after optimization and liveness analysis:\n");
4264        tcg_dump_ops(s, true);
4265        qemu_log("\n");
4266        qemu_log_unlock(logfile);
4267    }
4268#endif
4269
4270    tcg_reg_alloc_start(s);
4271
4272    /*
4273     * Reset the buffer pointers when restarting after overflow.
4274     * TODO: Move this into translate-all.c with the rest of the
4275     * buffer management.  Having only this done here is confusing.
4276     */
4277    s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4278    s->code_ptr = s->code_buf;
4279
4280#ifdef TCG_TARGET_NEED_LDST_LABELS
4281    QSIMPLEQ_INIT(&s->ldst_labels);
4282#endif
4283#ifdef TCG_TARGET_NEED_POOL_LABELS
4284    s->pool_labels = NULL;
4285#endif
4286
4287    num_insns = -1;
4288    QTAILQ_FOREACH(op, &s->ops, link) {
4289        TCGOpcode opc = op->opc;
4290
4291#ifdef CONFIG_PROFILER
4292        qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4293#endif
4294
4295        switch (opc) {
4296        case INDEX_op_mov_i32:
4297        case INDEX_op_mov_i64:
4298        case INDEX_op_mov_vec:
4299            tcg_reg_alloc_mov(s, op);
4300            break;
4301        case INDEX_op_dup_vec:
4302            tcg_reg_alloc_dup(s, op);
4303            break;
4304        case INDEX_op_insn_start:
4305            if (num_insns >= 0) {
4306                size_t off = tcg_current_code_size(s);
4307                s->gen_insn_end_off[num_insns] = off;
4308                /* Assert that we do not overflow our stored offset.  */
4309                assert(s->gen_insn_end_off[num_insns] == off);
4310            }
4311            num_insns++;
4312            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4313                target_ulong a;
4314#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4315                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4316#else
4317                a = op->args[i];
4318#endif
4319                s->gen_insn_data[num_insns][i] = a;
4320            }
4321            break;
4322        case INDEX_op_discard:
4323            temp_dead(s, arg_temp(op->args[0]));
4324            break;
4325        case INDEX_op_set_label:
4326            tcg_reg_alloc_bb_end(s, s->reserved_regs);
4327            tcg_out_label(s, arg_label(op->args[0]));
4328            break;
4329        case INDEX_op_call:
4330            tcg_reg_alloc_call(s, op);
4331            break;
4332        case INDEX_op_dup2_vec:
4333            if (tcg_reg_alloc_dup2(s, op)) {
4334                break;
4335            }
4336            /* fall through */
4337        default:
4338            /* Sanity check that we've not introduced any unhandled opcodes. */
4339            tcg_debug_assert(tcg_op_supported(opc));
4340            /* Note: in order to speed up the code, it would be much
4341               faster to have specialized register allocator functions for
4342               some common argument patterns */
4343            tcg_reg_alloc_op(s, op);
4344            break;
4345        }
4346#ifdef CONFIG_DEBUG_TCG
4347        check_regs(s);
4348#endif
4349        /* Test for (pending) buffer overflow.  The assumption is that any
4350           one operation beginning below the high water mark cannot overrun
4351           the buffer completely.  Thus we can test for overflow after
4352           generating code without having to check during generation.  */
4353        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4354            return -1;
4355        }
4356        /* Test for TB overflow, as seen by gen_insn_end_off.  */
4357        if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4358            return -2;
4359        }
4360    }
4361    tcg_debug_assert(num_insns >= 0);
4362    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4363
4364    /* Generate TB finalization at the end of block */
4365#ifdef TCG_TARGET_NEED_LDST_LABELS
4366    i = tcg_out_ldst_finalize(s);
4367    if (i < 0) {
4368        return i;
4369    }
4370#endif
4371#ifdef TCG_TARGET_NEED_POOL_LABELS
4372    i = tcg_out_pool_finalize(s);
4373    if (i < 0) {
4374        return i;
4375    }
4376#endif
4377    if (!tcg_resolve_relocs(s)) {
4378        return -2;
4379    }
4380
4381#ifndef CONFIG_TCG_INTERPRETER
4382    /* flush instruction cache */
4383    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4384                        (uintptr_t)s->code_buf,
4385                        tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4386#endif
4387
4388    return tcg_current_code_size(s);
4389}
4390
4391#ifdef CONFIG_PROFILER
4392void tcg_dump_info(GString *buf)
4393{
4394    TCGProfile prof = {};
4395    const TCGProfile *s;
4396    int64_t tb_count;
4397    int64_t tb_div_count;
4398    int64_t tot;
4399
4400    tcg_profile_snapshot_counters(&prof);
4401    s = &prof;
4402    tb_count = s->tb_count;
4403    tb_div_count = tb_count ? tb_count : 1;
4404    tot = s->interm_time + s->code_time;
4405
4406    g_string_append_printf(buf, "JIT cycles          %" PRId64
4407                           " (%0.3f s at 2.4 GHz)\n",
4408                           tot, tot / 2.4e9);
4409    g_string_append_printf(buf, "translated TBs      %" PRId64
4410                           " (aborted=%" PRId64 " %0.1f%%)\n",
4411                           tb_count, s->tb_count1 - tb_count,
4412                           (double)(s->tb_count1 - s->tb_count)
4413                           / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4414    g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
4415                           (double)s->op_count / tb_div_count, s->op_count_max);
4416    g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
4417                           (double)s->del_op_count / tb_div_count);
4418    g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
4419                           (double)s->temp_count / tb_div_count,
4420                           s->temp_count_max);
4421    g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
4422                           (double)s->code_out_len / tb_div_count);
4423    g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
4424                           (double)s->search_out_len / tb_div_count);
4425    
4426    g_string_append_printf(buf, "cycles/op           %0.1f\n",
4427                           s->op_count ? (double)tot / s->op_count : 0);
4428    g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
4429                           s->code_in_len ? (double)tot / s->code_in_len : 0);
4430    g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
4431                           s->code_out_len ? (double)tot / s->code_out_len : 0);
4432    g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
4433                           s->search_out_len ?
4434                           (double)tot / s->search_out_len : 0);
4435    if (tot == 0) {
4436        tot = 1;
4437    }
4438    g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
4439                           (double)s->interm_time / tot * 100.0);
4440    g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
4441                           (double)s->code_time / tot * 100.0);
4442    g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
4443                           (double)s->opt_time / (s->code_time ?
4444                                                  s->code_time : 1)
4445                           * 100.0);
4446    g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
4447                           (double)s->la_time / (s->code_time ?
4448                                                 s->code_time : 1) * 100.0);
4449    g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
4450                           s->restore_count);
4451    g_string_append_printf(buf, "  avg cycles        %0.1f\n",
4452                           s->restore_count ?
4453                           (double)s->restore_time / s->restore_count : 0);
4454}
4455#else
4456void tcg_dump_info(GString *buf)
4457{
4458    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4459}
4460#endif
4461
4462#ifdef ELF_HOST_MACHINE
4463/* In order to use this feature, the backend needs to do three things:
4464
4465   (1) Define ELF_HOST_MACHINE to indicate both what value to
4466       put into the ELF image and to indicate support for the feature.
4467
4468   (2) Define tcg_register_jit.  This should create a buffer containing
4469       the contents of a .debug_frame section that describes the post-
4470       prologue unwind info for the tcg machine.
4471
4472   (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4473*/
4474
4475/* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4476typedef enum {
4477    JIT_NOACTION = 0,
4478    JIT_REGISTER_FN,
4479    JIT_UNREGISTER_FN
4480} jit_actions_t;
4481
4482struct jit_code_entry {
4483    struct jit_code_entry *next_entry;
4484    struct jit_code_entry *prev_entry;
4485    const void *symfile_addr;
4486    uint64_t symfile_size;
4487};
4488
4489struct jit_descriptor {
4490    uint32_t version;
4491    uint32_t action_flag;
4492    struct jit_code_entry *relevant_entry;
4493    struct jit_code_entry *first_entry;
4494};
4495
4496void __jit_debug_register_code(void) __attribute__((noinline));
4497void __jit_debug_register_code(void)
4498{
4499    asm("");
4500}
4501
4502/* Must statically initialize the version, because GDB may check
4503   the version before we can set it.  */
4504struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4505
4506/* End GDB interface.  */
4507
4508static int find_string(const char *strtab, const char *str)
4509{
4510    const char *p = strtab + 1;
4511
4512    while (1) {
4513        if (strcmp(p, str) == 0) {
4514            return p - strtab;
4515        }
4516        p += strlen(p) + 1;
4517    }
4518}
4519
4520static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4521                                 const void *debug_frame,
4522                                 size_t debug_frame_size)
4523{
4524    struct __attribute__((packed)) DebugInfo {
4525        uint32_t  len;
4526        uint16_t  version;
4527        uint32_t  abbrev;
4528        uint8_t   ptr_size;
4529        uint8_t   cu_die;
4530        uint16_t  cu_lang;
4531        uintptr_t cu_low_pc;
4532        uintptr_t cu_high_pc;
4533        uint8_t   fn_die;
4534        char      fn_name[16];
4535        uintptr_t fn_low_pc;
4536        uintptr_t fn_high_pc;
4537        uint8_t   cu_eoc;
4538    };
4539
4540    struct ElfImage {
4541        ElfW(Ehdr) ehdr;
4542        ElfW(Phdr) phdr;
4543        ElfW(Shdr) shdr[7];
4544        ElfW(Sym)  sym[2];
4545        struct DebugInfo di;
4546        uint8_t    da[24];
4547        char       str[80];
4548    };
4549
4550    struct ElfImage *img;
4551
4552    static const struct ElfImage img_template = {
4553        .ehdr = {
4554            .e_ident[EI_MAG0] = ELFMAG0,
4555            .e_ident[EI_MAG1] = ELFMAG1,
4556            .e_ident[EI_MAG2] = ELFMAG2,
4557            .e_ident[EI_MAG3] = ELFMAG3,
4558            .e_ident[EI_CLASS] = ELF_CLASS,
4559            .e_ident[EI_DATA] = ELF_DATA,
4560            .e_ident[EI_VERSION] = EV_CURRENT,
4561            .e_type = ET_EXEC,
4562            .e_machine = ELF_HOST_MACHINE,
4563            .e_version = EV_CURRENT,
4564            .e_phoff = offsetof(struct ElfImage, phdr),
4565            .e_shoff = offsetof(struct ElfImage, shdr),
4566            .e_ehsize = sizeof(ElfW(Shdr)),
4567            .e_phentsize = sizeof(ElfW(Phdr)),
4568            .e_phnum = 1,
4569            .e_shentsize = sizeof(ElfW(Shdr)),
4570            .e_shnum = ARRAY_SIZE(img->shdr),
4571            .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4572#ifdef ELF_HOST_FLAGS
4573            .e_flags = ELF_HOST_FLAGS,
4574#endif
4575#ifdef ELF_OSABI
4576            .e_ident[EI_OSABI] = ELF_OSABI,
4577#endif
4578        },
4579        .phdr = {
4580            .p_type = PT_LOAD,
4581            .p_flags = PF_X,
4582        },
4583        .shdr = {
4584            [0] = { .sh_type = SHT_NULL },
4585            /* Trick: The contents of code_gen_buffer are not present in
4586               this fake ELF file; that got allocated elsewhere.  Therefore
4587               we mark .text as SHT_NOBITS (similar to .bss) so that readers
4588               will not look for contents.  We can record any address.  */
4589            [1] = { /* .text */
4590                .sh_type = SHT_NOBITS,
4591                .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4592            },
4593            [2] = { /* .debug_info */
4594                .sh_type = SHT_PROGBITS,
4595                .sh_offset = offsetof(struct ElfImage, di),
4596                .sh_size = sizeof(struct DebugInfo),
4597            },
4598            [3] = { /* .debug_abbrev */
4599                .sh_type = SHT_PROGBITS,
4600                .sh_offset = offsetof(struct ElfImage, da),
4601                .sh_size = sizeof(img->da),
4602            },
4603            [4] = { /* .debug_frame */
4604                .sh_type = SHT_PROGBITS,
4605                .sh_offset = sizeof(struct ElfImage),
4606            },
4607            [5] = { /* .symtab */
4608                .sh_type = SHT_SYMTAB,
4609                .sh_offset = offsetof(struct ElfImage, sym),
4610                .sh_size = sizeof(img->sym),
4611                .sh_info = 1,
4612                .sh_link = ARRAY_SIZE(img->shdr) - 1,
4613                .sh_entsize = sizeof(ElfW(Sym)),
4614            },
4615            [6] = { /* .strtab */
4616                .sh_type = SHT_STRTAB,
4617                .sh_offset = offsetof(struct ElfImage, str),
4618                .sh_size = sizeof(img->str),
4619            }
4620        },
4621        .sym = {
4622            [1] = { /* code_gen_buffer */
4623                .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4624                .st_shndx = 1,
4625            }
4626        },
4627        .di = {
4628            .len = sizeof(struct DebugInfo) - 4,
4629            .version = 2,
4630            .ptr_size = sizeof(void *),
4631            .cu_die = 1,
4632            .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4633            .fn_die = 2,
4634            .fn_name = "code_gen_buffer"
4635        },
4636        .da = {
4637            1,          /* abbrev number (the cu) */
4638            0x11, 1,    /* DW_TAG_compile_unit, has children */
4639            0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4640            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4641            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4642            0, 0,       /* end of abbrev */
4643            2,          /* abbrev number (the fn) */
4644            0x2e, 0,    /* DW_TAG_subprogram, no children */
4645            0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4646            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4647            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4648            0, 0,       /* end of abbrev */
4649            0           /* no more abbrev */
4650        },
4651        .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4652               ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4653    };
4654
4655    /* We only need a single jit entry; statically allocate it.  */
4656    static struct jit_code_entry one_entry;
4657
4658    uintptr_t buf = (uintptr_t)buf_ptr;
4659    size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4660    DebugFrameHeader *dfh;
4661
4662    img = g_malloc(img_size);
4663    *img = img_template;
4664
4665    img->phdr.p_vaddr = buf;
4666    img->phdr.p_paddr = buf;
4667    img->phdr.p_memsz = buf_size;
4668
4669    img->shdr[1].sh_name = find_string(img->str, ".text");
4670    img->shdr[1].sh_addr = buf;
4671    img->shdr[1].sh_size = buf_size;
4672
4673    img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4674    img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4675
4676    img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4677    img->shdr[4].sh_size = debug_frame_size;
4678
4679    img->shdr[5].sh_name = find_string(img->str, ".symtab");
4680    img->shdr[6].sh_name = find_string(img->str, ".strtab");
4681
4682    img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4683    img->sym[1].st_value = buf;
4684    img->sym[1].st_size = buf_size;
4685
4686    img->di.cu_low_pc = buf;
4687    img->di.cu_high_pc = buf + buf_size;
4688    img->di.fn_low_pc = buf;
4689    img->di.fn_high_pc = buf + buf_size;
4690
4691    dfh = (DebugFrameHeader *)(img + 1);
4692    memcpy(dfh, debug_frame, debug_frame_size);
4693    dfh->fde.func_start = buf;
4694    dfh->fde.func_len = buf_size;
4695
4696#ifdef DEBUG_JIT
4697    /* Enable this block to be able to debug the ELF image file creation.
4698       One can use readelf, objdump, or other inspection utilities.  */
4699    {
4700        FILE *f = fopen("/tmp/qemu.jit", "w+b");
4701        if (f) {
4702            if (fwrite(img, img_size, 1, f) != img_size) {
4703                /* Avoid stupid unused return value warning for fwrite.  */
4704            }
4705            fclose(f);
4706        }
4707    }
4708#endif
4709
4710    one_entry.symfile_addr = img;
4711    one_entry.symfile_size = img_size;
4712
4713    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4714    __jit_debug_descriptor.relevant_entry = &one_entry;
4715    __jit_debug_descriptor.first_entry = &one_entry;
4716    __jit_debug_register_code();
4717}
4718#else
4719/* No support for the feature.  Provide the entry point expected by exec.c,
4720   and implement the internal function we declared earlier.  */
4721
4722static void tcg_register_jit_int(const void *buf, size_t size,
4723                                 const void *debug_frame,
4724                                 size_t debug_frame_size)
4725{
4726}
4727
4728void tcg_register_jit(const void *buf, size_t buf_size)
4729{
4730}
4731#endif /* ELF_HOST_MACHINE */
4732
4733#if !TCG_TARGET_MAYBE_vec
4734void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4735{
4736    g_assert_not_reached();
4737}
4738#endif
4739