qemu/tcg/tcg.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* define it to use liveness analysis (better code) */
  26#define USE_TCG_OPTIMIZATIONS
  27
  28#include "qemu/osdep.h"
  29
  30/* Define to jump the ELF file used to communicate with GDB.  */
  31#undef DEBUG_JIT
  32
  33#include "qemu/error-report.h"
  34#include "qemu/cutils.h"
  35#include "qemu/host-utils.h"
  36#include "qemu/qemu-print.h"
  37#include "qemu/timer.h"
  38#include "qemu/cacheflush.h"
  39
  40/* Note: the long term plan is to reduce the dependencies on the QEMU
  41   CPU definitions. Currently they are used for qemu_ld/st
  42   instructions */
  43#define NO_CPU_IO_DEFS
  44
  45#include "exec/exec-all.h"
  46#include "tcg/tcg-op.h"
  47
  48#if UINTPTR_MAX == UINT32_MAX
  49# define ELF_CLASS  ELFCLASS32
  50#else
  51# define ELF_CLASS  ELFCLASS64
  52#endif
  53#ifdef HOST_WORDS_BIGENDIAN
  54# define ELF_DATA   ELFDATA2MSB
  55#else
  56# define ELF_DATA   ELFDATA2LSB
  57#endif
  58
  59#include "elf.h"
  60#include "exec/log.h"
  61#include "tcg-internal.h"
  62
  63#ifdef CONFIG_TCG_INTERPRETER
  64#include <ffi.h>
  65#endif
  66
  67/* Forward declarations for functions declared in tcg-target.c.inc and
  68   used here. */
  69static void tcg_target_init(TCGContext *s);
  70static void tcg_target_qemu_prologue(TCGContext *s);
  71static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
  72                        intptr_t value, intptr_t addend);
  73
  74/* The CIE and FDE header definitions will be common to all hosts.  */
  75typedef struct {
  76    uint32_t len __attribute__((aligned((sizeof(void *)))));
  77    uint32_t id;
  78    uint8_t version;
  79    char augmentation[1];
  80    uint8_t code_align;
  81    uint8_t data_align;
  82    uint8_t return_column;
  83} DebugFrameCIE;
  84
  85typedef struct QEMU_PACKED {
  86    uint32_t len __attribute__((aligned((sizeof(void *)))));
  87    uint32_t cie_offset;
  88    uintptr_t func_start;
  89    uintptr_t func_len;
  90} DebugFrameFDEHeader;
  91
  92typedef struct QEMU_PACKED {
  93    DebugFrameCIE cie;
  94    DebugFrameFDEHeader fde;
  95} DebugFrameHeader;
  96
  97static void tcg_register_jit_int(const void *buf, size_t size,
  98                                 const void *debug_frame,
  99                                 size_t debug_frame_size)
 100    __attribute__((unused));
 101
 102/* Forward declarations for functions declared and used in tcg-target.c.inc. */
 103static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
 104                       intptr_t arg2);
 105static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
 106static void tcg_out_movi(TCGContext *s, TCGType type,
 107                         TCGReg ret, tcg_target_long arg);
 108static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 109                       const TCGArg args[TCG_MAX_OP_ARGS],
 110                       const int const_args[TCG_MAX_OP_ARGS]);
 111#if TCG_TARGET_MAYBE_vec
 112static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 113                            TCGReg dst, TCGReg src);
 114static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 115                             TCGReg dst, TCGReg base, intptr_t offset);
 116static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 117                             TCGReg dst, int64_t arg);
 118static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 119                           unsigned vecl, unsigned vece,
 120                           const TCGArg args[TCG_MAX_OP_ARGS],
 121                           const int const_args[TCG_MAX_OP_ARGS]);
 122#else
 123static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 124                                   TCGReg dst, TCGReg src)
 125{
 126    g_assert_not_reached();
 127}
 128static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 129                                    TCGReg dst, TCGReg base, intptr_t offset)
 130{
 131    g_assert_not_reached();
 132}
 133static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 134                                    TCGReg dst, int64_t arg)
 135{
 136    g_assert_not_reached();
 137}
 138static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 139                                  unsigned vecl, unsigned vece,
 140                                  const TCGArg args[TCG_MAX_OP_ARGS],
 141                                  const int const_args[TCG_MAX_OP_ARGS])
 142{
 143    g_assert_not_reached();
 144}
 145#endif
 146static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
 147                       intptr_t arg2);
 148static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 149                        TCGReg base, intptr_t ofs);
 150#ifdef CONFIG_TCG_INTERPRETER
 151static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
 152                         ffi_cif *cif);
 153#else
 154static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
 155#endif
 156static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
 157#ifdef TCG_TARGET_NEED_LDST_LABELS
 158static int tcg_out_ldst_finalize(TCGContext *s);
 159#endif
 160
 161TCGContext tcg_init_ctx;
 162__thread TCGContext *tcg_ctx;
 163
 164TCGContext **tcg_ctxs;
 165unsigned int tcg_cur_ctxs;
 166unsigned int tcg_max_ctxs;
 167TCGv_env cpu_env = 0;
 168const void *tcg_code_gen_epilogue;
 169uintptr_t tcg_splitwx_diff;
 170
 171#ifndef CONFIG_TCG_INTERPRETER
 172tcg_prologue_fn *tcg_qemu_tb_exec;
 173#endif
 174
 175static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
 176static TCGRegSet tcg_target_call_clobber_regs;
 177
 178#if TCG_TARGET_INSN_UNIT_SIZE == 1
 179static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
 180{
 181    *s->code_ptr++ = v;
 182}
 183
 184static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
 185                                                      uint8_t v)
 186{
 187    *p = v;
 188}
 189#endif
 190
 191#if TCG_TARGET_INSN_UNIT_SIZE <= 2
 192static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
 193{
 194    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 195        *s->code_ptr++ = v;
 196    } else {
 197        tcg_insn_unit *p = s->code_ptr;
 198        memcpy(p, &v, sizeof(v));
 199        s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
 200    }
 201}
 202
 203static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
 204                                                       uint16_t v)
 205{
 206    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 207        *p = v;
 208    } else {
 209        memcpy(p, &v, sizeof(v));
 210    }
 211}
 212#endif
 213
 214#if TCG_TARGET_INSN_UNIT_SIZE <= 4
 215static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
 216{
 217    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 218        *s->code_ptr++ = v;
 219    } else {
 220        tcg_insn_unit *p = s->code_ptr;
 221        memcpy(p, &v, sizeof(v));
 222        s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
 223    }
 224}
 225
 226static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
 227                                                       uint32_t v)
 228{
 229    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 230        *p = v;
 231    } else {
 232        memcpy(p, &v, sizeof(v));
 233    }
 234}
 235#endif
 236
 237#if TCG_TARGET_INSN_UNIT_SIZE <= 8
 238static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
 239{
 240    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 241        *s->code_ptr++ = v;
 242    } else {
 243        tcg_insn_unit *p = s->code_ptr;
 244        memcpy(p, &v, sizeof(v));
 245        s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
 246    }
 247}
 248
 249static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
 250                                                       uint64_t v)
 251{
 252    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 253        *p = v;
 254    } else {
 255        memcpy(p, &v, sizeof(v));
 256    }
 257}
 258#endif
 259
 260/* label relocation processing */
 261
 262static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
 263                          TCGLabel *l, intptr_t addend)
 264{
 265    TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
 266
 267    r->type = type;
 268    r->ptr = code_ptr;
 269    r->addend = addend;
 270    QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
 271}
 272
 273static void tcg_out_label(TCGContext *s, TCGLabel *l)
 274{
 275    tcg_debug_assert(!l->has_value);
 276    l->has_value = 1;
 277    l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
 278}
 279
 280TCGLabel *gen_new_label(void)
 281{
 282    TCGContext *s = tcg_ctx;
 283    TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
 284
 285    memset(l, 0, sizeof(TCGLabel));
 286    l->id = s->nb_labels++;
 287    QSIMPLEQ_INIT(&l->relocs);
 288
 289    QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
 290
 291    return l;
 292}
 293
 294static bool tcg_resolve_relocs(TCGContext *s)
 295{
 296    TCGLabel *l;
 297
 298    QSIMPLEQ_FOREACH(l, &s->labels, next) {
 299        TCGRelocation *r;
 300        uintptr_t value = l->u.value;
 301
 302        QSIMPLEQ_FOREACH(r, &l->relocs, next) {
 303            if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
 304                return false;
 305            }
 306        }
 307    }
 308    return true;
 309}
 310
 311static void set_jmp_reset_offset(TCGContext *s, int which)
 312{
 313    /*
 314     * We will check for overflow at the end of the opcode loop in
 315     * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
 316     */
 317    s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
 318}
 319
 320/* Signal overflow, starting over with fewer guest insns. */
 321static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
 322{
 323    siglongjmp(s->jmp_trans, -2);
 324}
 325
 326#define C_PFX1(P, A)                    P##A
 327#define C_PFX2(P, A, B)                 P##A##_##B
 328#define C_PFX3(P, A, B, C)              P##A##_##B##_##C
 329#define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
 330#define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
 331#define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
 332
 333/* Define an enumeration for the various combinations. */
 334
 335#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
 336#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
 337#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
 338#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
 339
 340#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
 341#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
 342#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
 343#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
 344
 345#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
 346
 347#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
 348#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
 349#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
 350#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
 351
 352typedef enum {
 353#include "tcg-target-con-set.h"
 354} TCGConstraintSetIndex;
 355
 356static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
 357
 358#undef C_O0_I1
 359#undef C_O0_I2
 360#undef C_O0_I3
 361#undef C_O0_I4
 362#undef C_O1_I1
 363#undef C_O1_I2
 364#undef C_O1_I3
 365#undef C_O1_I4
 366#undef C_N1_I2
 367#undef C_O2_I1
 368#undef C_O2_I2
 369#undef C_O2_I3
 370#undef C_O2_I4
 371
 372/* Put all of the constraint sets into an array, indexed by the enum. */
 373
 374#define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
 375#define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
 376#define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
 377#define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
 378
 379#define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
 380#define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
 381#define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
 382#define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
 383
 384#define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
 385
 386#define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
 387#define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
 388#define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
 389#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
 390
 391static const TCGTargetOpDef constraint_sets[] = {
 392#include "tcg-target-con-set.h"
 393};
 394
 395
 396#undef C_O0_I1
 397#undef C_O0_I2
 398#undef C_O0_I3
 399#undef C_O0_I4
 400#undef C_O1_I1
 401#undef C_O1_I2
 402#undef C_O1_I3
 403#undef C_O1_I4
 404#undef C_N1_I2
 405#undef C_O2_I1
 406#undef C_O2_I2
 407#undef C_O2_I3
 408#undef C_O2_I4
 409
 410/* Expand the enumerator to be returned from tcg_target_op_def(). */
 411
 412#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
 413#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
 414#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
 415#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
 416
 417#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
 418#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
 419#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
 420#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
 421
 422#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
 423
 424#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
 425#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
 426#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
 427#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
 428
 429#include "tcg-target.c.inc"
 430
 431static void alloc_tcg_plugin_context(TCGContext *s)
 432{
 433#ifdef CONFIG_PLUGIN
 434    s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
 435    s->plugin_tb->insns =
 436        g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
 437#endif
 438}
 439
 440/*
 441 * All TCG threads except the parent (i.e. the one that called tcg_context_init
 442 * and registered the target's TCG globals) must register with this function
 443 * before initiating translation.
 444 *
 445 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
 446 * of tcg_region_init() for the reasoning behind this.
 447 *
 448 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
 449 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
 450 * is not used anymore for translation once this function is called.
 451 *
 452 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
 453 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
 454 */
 455#ifdef CONFIG_USER_ONLY
 456void tcg_register_thread(void)
 457{
 458    tcg_ctx = &tcg_init_ctx;
 459}
 460#else
 461void tcg_register_thread(void)
 462{
 463    TCGContext *s = g_malloc(sizeof(*s));
 464    unsigned int i, n;
 465
 466    *s = tcg_init_ctx;
 467
 468    /* Relink mem_base.  */
 469    for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
 470        if (tcg_init_ctx.temps[i].mem_base) {
 471            ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
 472            tcg_debug_assert(b >= 0 && b < n);
 473            s->temps[i].mem_base = &s->temps[b];
 474        }
 475    }
 476
 477    /* Claim an entry in tcg_ctxs */
 478    n = qatomic_fetch_inc(&tcg_cur_ctxs);
 479    g_assert(n < tcg_max_ctxs);
 480    qatomic_set(&tcg_ctxs[n], s);
 481
 482    if (n > 0) {
 483        alloc_tcg_plugin_context(s);
 484        tcg_region_initial_alloc(s);
 485    }
 486
 487    tcg_ctx = s;
 488}
 489#endif /* !CONFIG_USER_ONLY */
 490
 491/* pool based memory allocation */
 492void *tcg_malloc_internal(TCGContext *s, int size)
 493{
 494    TCGPool *p;
 495    int pool_size;
 496    
 497    if (size > TCG_POOL_CHUNK_SIZE) {
 498        /* big malloc: insert a new pool (XXX: could optimize) */
 499        p = g_malloc(sizeof(TCGPool) + size);
 500        p->size = size;
 501        p->next = s->pool_first_large;
 502        s->pool_first_large = p;
 503        return p->data;
 504    } else {
 505        p = s->pool_current;
 506        if (!p) {
 507            p = s->pool_first;
 508            if (!p)
 509                goto new_pool;
 510        } else {
 511            if (!p->next) {
 512            new_pool:
 513                pool_size = TCG_POOL_CHUNK_SIZE;
 514                p = g_malloc(sizeof(TCGPool) + pool_size);
 515                p->size = pool_size;
 516                p->next = NULL;
 517                if (s->pool_current) 
 518                    s->pool_current->next = p;
 519                else
 520                    s->pool_first = p;
 521            } else {
 522                p = p->next;
 523            }
 524        }
 525    }
 526    s->pool_current = p;
 527    s->pool_cur = p->data + size;
 528    s->pool_end = p->data + p->size;
 529    return p->data;
 530}
 531
 532void tcg_pool_reset(TCGContext *s)
 533{
 534    TCGPool *p, *t;
 535    for (p = s->pool_first_large; p; p = t) {
 536        t = p->next;
 537        g_free(p);
 538    }
 539    s->pool_first_large = NULL;
 540    s->pool_cur = s->pool_end = NULL;
 541    s->pool_current = NULL;
 542}
 543
 544#include "exec/helper-proto.h"
 545
 546static const TCGHelperInfo all_helpers[] = {
 547#include "exec/helper-tcg.h"
 548};
 549static GHashTable *helper_table;
 550
 551#ifdef CONFIG_TCG_INTERPRETER
 552static GHashTable *ffi_table;
 553
 554static ffi_type * const typecode_to_ffi[8] = {
 555    [dh_typecode_void] = &ffi_type_void,
 556    [dh_typecode_i32]  = &ffi_type_uint32,
 557    [dh_typecode_s32]  = &ffi_type_sint32,
 558    [dh_typecode_i64]  = &ffi_type_uint64,
 559    [dh_typecode_s64]  = &ffi_type_sint64,
 560    [dh_typecode_ptr]  = &ffi_type_pointer,
 561};
 562#endif
 563
 564static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
 565static void process_op_defs(TCGContext *s);
 566static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
 567                                            TCGReg reg, const char *name);
 568
 569static void tcg_context_init(unsigned max_cpus)
 570{
 571    TCGContext *s = &tcg_init_ctx;
 572    int op, total_args, n, i;
 573    TCGOpDef *def;
 574    TCGArgConstraint *args_ct;
 575    TCGTemp *ts;
 576
 577    memset(s, 0, sizeof(*s));
 578    s->nb_globals = 0;
 579
 580    /* Count total number of arguments and allocate the corresponding
 581       space */
 582    total_args = 0;
 583    for(op = 0; op < NB_OPS; op++) {
 584        def = &tcg_op_defs[op];
 585        n = def->nb_iargs + def->nb_oargs;
 586        total_args += n;
 587    }
 588
 589    args_ct = g_new0(TCGArgConstraint, total_args);
 590
 591    for(op = 0; op < NB_OPS; op++) {
 592        def = &tcg_op_defs[op];
 593        def->args_ct = args_ct;
 594        n = def->nb_iargs + def->nb_oargs;
 595        args_ct += n;
 596    }
 597
 598    /* Register helpers.  */
 599    /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
 600    helper_table = g_hash_table_new(NULL, NULL);
 601
 602    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
 603        g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
 604                            (gpointer)&all_helpers[i]);
 605    }
 606
 607#ifdef CONFIG_TCG_INTERPRETER
 608    /* g_direct_hash/equal for direct comparisons on uint32_t.  */
 609    ffi_table = g_hash_table_new(NULL, NULL);
 610    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
 611        struct {
 612            ffi_cif cif;
 613            ffi_type *args[];
 614        } *ca;
 615        uint32_t typemask = all_helpers[i].typemask;
 616        gpointer hash = (gpointer)(uintptr_t)typemask;
 617        ffi_status status;
 618        int nargs;
 619
 620        if (g_hash_table_lookup(ffi_table, hash)) {
 621            continue;
 622        }
 623
 624        /* Ignoring the return type, find the last non-zero field. */
 625        nargs = 32 - clz32(typemask >> 3);
 626        nargs = DIV_ROUND_UP(nargs, 3);
 627
 628        ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
 629        ca->cif.rtype = typecode_to_ffi[typemask & 7];
 630        ca->cif.nargs = nargs;
 631
 632        if (nargs != 0) {
 633            ca->cif.arg_types = ca->args;
 634            for (i = 0; i < nargs; ++i) {
 635                int typecode = extract32(typemask, (i + 1) * 3, 3);
 636                ca->args[i] = typecode_to_ffi[typecode];
 637            }
 638        }
 639
 640        status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
 641                              ca->cif.rtype, ca->cif.arg_types);
 642        assert(status == FFI_OK);
 643
 644        g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
 645    }
 646#endif
 647
 648    tcg_target_init(s);
 649    process_op_defs(s);
 650
 651    /* Reverse the order of the saved registers, assuming they're all at
 652       the start of tcg_target_reg_alloc_order.  */
 653    for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
 654        int r = tcg_target_reg_alloc_order[n];
 655        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
 656            break;
 657        }
 658    }
 659    for (i = 0; i < n; ++i) {
 660        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
 661    }
 662    for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
 663        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
 664    }
 665
 666    alloc_tcg_plugin_context(s);
 667
 668    tcg_ctx = s;
 669    /*
 670     * In user-mode we simply share the init context among threads, since we
 671     * use a single region. See the documentation tcg_region_init() for the
 672     * reasoning behind this.
 673     * In softmmu we will have at most max_cpus TCG threads.
 674     */
 675#ifdef CONFIG_USER_ONLY
 676    tcg_ctxs = &tcg_ctx;
 677    tcg_cur_ctxs = 1;
 678    tcg_max_ctxs = 1;
 679#else
 680    tcg_max_ctxs = max_cpus;
 681    tcg_ctxs = g_new0(TCGContext *, max_cpus);
 682#endif
 683
 684    tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
 685    ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
 686    cpu_env = temp_tcgv_ptr(ts);
 687}
 688
 689void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
 690{
 691    tcg_context_init(max_cpus);
 692    tcg_region_init(tb_size, splitwx, max_cpus);
 693}
 694
 695/*
 696 * Allocate TBs right before their corresponding translated code, making
 697 * sure that TBs and code are on different cache lines.
 698 */
 699TranslationBlock *tcg_tb_alloc(TCGContext *s)
 700{
 701    uintptr_t align = qemu_icache_linesize;
 702    TranslationBlock *tb;
 703    void *next;
 704
 705 retry:
 706    tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
 707    next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
 708
 709    if (unlikely(next > s->code_gen_highwater)) {
 710        if (tcg_region_alloc(s)) {
 711            return NULL;
 712        }
 713        goto retry;
 714    }
 715    qatomic_set(&s->code_gen_ptr, next);
 716    s->data_gen_ptr = NULL;
 717    return tb;
 718}
 719
 720void tcg_prologue_init(TCGContext *s)
 721{
 722    size_t prologue_size;
 723
 724    s->code_ptr = s->code_gen_ptr;
 725    s->code_buf = s->code_gen_ptr;
 726    s->data_gen_ptr = NULL;
 727
 728#ifndef CONFIG_TCG_INTERPRETER
 729    tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
 730#endif
 731
 732#ifdef TCG_TARGET_NEED_POOL_LABELS
 733    s->pool_labels = NULL;
 734#endif
 735
 736    qemu_thread_jit_write();
 737    /* Generate the prologue.  */
 738    tcg_target_qemu_prologue(s);
 739
 740#ifdef TCG_TARGET_NEED_POOL_LABELS
 741    /* Allow the prologue to put e.g. guest_base into a pool entry.  */
 742    {
 743        int result = tcg_out_pool_finalize(s);
 744        tcg_debug_assert(result == 0);
 745    }
 746#endif
 747
 748    prologue_size = tcg_current_code_size(s);
 749
 750#ifndef CONFIG_TCG_INTERPRETER
 751    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
 752                        (uintptr_t)s->code_buf, prologue_size);
 753#endif
 754
 755#ifdef DEBUG_DISAS
 756    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
 757        FILE *logfile = qemu_log_lock();
 758        qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
 759        if (s->data_gen_ptr) {
 760            size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
 761            size_t data_size = prologue_size - code_size;
 762            size_t i;
 763
 764            log_disas(s->code_gen_ptr, code_size);
 765
 766            for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
 767                if (sizeof(tcg_target_ulong) == 8) {
 768                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
 769                             (uintptr_t)s->data_gen_ptr + i,
 770                             *(uint64_t *)(s->data_gen_ptr + i));
 771                } else {
 772                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
 773                             (uintptr_t)s->data_gen_ptr + i,
 774                             *(uint32_t *)(s->data_gen_ptr + i));
 775                }
 776            }
 777        } else {
 778            log_disas(s->code_gen_ptr, prologue_size);
 779        }
 780        qemu_log("\n");
 781        qemu_log_flush();
 782        qemu_log_unlock(logfile);
 783    }
 784#endif
 785
 786#ifndef CONFIG_TCG_INTERPRETER
 787    /*
 788     * Assert that goto_ptr is implemented completely, setting an epilogue.
 789     * For tci, we use NULL as the signal to return from the interpreter,
 790     * so skip this check.
 791     */
 792    tcg_debug_assert(tcg_code_gen_epilogue != NULL);
 793#endif
 794
 795    tcg_region_prologue_set(s);
 796}
 797
 798void tcg_func_start(TCGContext *s)
 799{
 800    tcg_pool_reset(s);
 801    s->nb_temps = s->nb_globals;
 802
 803    /* No temps have been previously allocated for size or locality.  */
 804    memset(s->free_temps, 0, sizeof(s->free_temps));
 805
 806    /* No constant temps have been previously allocated. */
 807    for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
 808        if (s->const_table[i]) {
 809            g_hash_table_remove_all(s->const_table[i]);
 810        }
 811    }
 812
 813    s->nb_ops = 0;
 814    s->nb_labels = 0;
 815    s->current_frame_offset = s->frame_start;
 816
 817#ifdef CONFIG_DEBUG_TCG
 818    s->goto_tb_issue_mask = 0;
 819#endif
 820
 821    QTAILQ_INIT(&s->ops);
 822    QTAILQ_INIT(&s->free_ops);
 823    QSIMPLEQ_INIT(&s->labels);
 824}
 825
 826static TCGTemp *tcg_temp_alloc(TCGContext *s)
 827{
 828    int n = s->nb_temps++;
 829
 830    if (n >= TCG_MAX_TEMPS) {
 831        tcg_raise_tb_overflow(s);
 832    }
 833    return memset(&s->temps[n], 0, sizeof(TCGTemp));
 834}
 835
 836static TCGTemp *tcg_global_alloc(TCGContext *s)
 837{
 838    TCGTemp *ts;
 839
 840    tcg_debug_assert(s->nb_globals == s->nb_temps);
 841    tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
 842    s->nb_globals++;
 843    ts = tcg_temp_alloc(s);
 844    ts->kind = TEMP_GLOBAL;
 845
 846    return ts;
 847}
 848
 849static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
 850                                            TCGReg reg, const char *name)
 851{
 852    TCGTemp *ts;
 853
 854    if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
 855        tcg_abort();
 856    }
 857
 858    ts = tcg_global_alloc(s);
 859    ts->base_type = type;
 860    ts->type = type;
 861    ts->kind = TEMP_FIXED;
 862    ts->reg = reg;
 863    ts->name = name;
 864    tcg_regset_set_reg(s->reserved_regs, reg);
 865
 866    return ts;
 867}
 868
 869void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
 870{
 871    s->frame_start = start;
 872    s->frame_end = start + size;
 873    s->frame_temp
 874        = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
 875}
 876
 877TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
 878                                     intptr_t offset, const char *name)
 879{
 880    TCGContext *s = tcg_ctx;
 881    TCGTemp *base_ts = tcgv_ptr_temp(base);
 882    TCGTemp *ts = tcg_global_alloc(s);
 883    int indirect_reg = 0, bigendian = 0;
 884#ifdef HOST_WORDS_BIGENDIAN
 885    bigendian = 1;
 886#endif
 887
 888    switch (base_ts->kind) {
 889    case TEMP_FIXED:
 890        break;
 891    case TEMP_GLOBAL:
 892        /* We do not support double-indirect registers.  */
 893        tcg_debug_assert(!base_ts->indirect_reg);
 894        base_ts->indirect_base = 1;
 895        s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
 896                            ? 2 : 1);
 897        indirect_reg = 1;
 898        break;
 899    default:
 900        g_assert_not_reached();
 901    }
 902
 903    if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
 904        TCGTemp *ts2 = tcg_global_alloc(s);
 905        char buf[64];
 906
 907        ts->base_type = TCG_TYPE_I64;
 908        ts->type = TCG_TYPE_I32;
 909        ts->indirect_reg = indirect_reg;
 910        ts->mem_allocated = 1;
 911        ts->mem_base = base_ts;
 912        ts->mem_offset = offset + bigendian * 4;
 913        pstrcpy(buf, sizeof(buf), name);
 914        pstrcat(buf, sizeof(buf), "_0");
 915        ts->name = strdup(buf);
 916
 917        tcg_debug_assert(ts2 == ts + 1);
 918        ts2->base_type = TCG_TYPE_I64;
 919        ts2->type = TCG_TYPE_I32;
 920        ts2->indirect_reg = indirect_reg;
 921        ts2->mem_allocated = 1;
 922        ts2->mem_base = base_ts;
 923        ts2->mem_offset = offset + (1 - bigendian) * 4;
 924        pstrcpy(buf, sizeof(buf), name);
 925        pstrcat(buf, sizeof(buf), "_1");
 926        ts2->name = strdup(buf);
 927    } else {
 928        ts->base_type = type;
 929        ts->type = type;
 930        ts->indirect_reg = indirect_reg;
 931        ts->mem_allocated = 1;
 932        ts->mem_base = base_ts;
 933        ts->mem_offset = offset;
 934        ts->name = name;
 935    }
 936    return ts;
 937}
 938
 939TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
 940{
 941    TCGContext *s = tcg_ctx;
 942    TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
 943    TCGTemp *ts;
 944    int idx, k;
 945
 946    k = type + (temp_local ? TCG_TYPE_COUNT : 0);
 947    idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
 948    if (idx < TCG_MAX_TEMPS) {
 949        /* There is already an available temp with the right type.  */
 950        clear_bit(idx, s->free_temps[k].l);
 951
 952        ts = &s->temps[idx];
 953        ts->temp_allocated = 1;
 954        tcg_debug_assert(ts->base_type == type);
 955        tcg_debug_assert(ts->kind == kind);
 956    } else {
 957        ts = tcg_temp_alloc(s);
 958        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
 959            TCGTemp *ts2 = tcg_temp_alloc(s);
 960
 961            ts->base_type = type;
 962            ts->type = TCG_TYPE_I32;
 963            ts->temp_allocated = 1;
 964            ts->kind = kind;
 965
 966            tcg_debug_assert(ts2 == ts + 1);
 967            ts2->base_type = TCG_TYPE_I64;
 968            ts2->type = TCG_TYPE_I32;
 969            ts2->temp_allocated = 1;
 970            ts2->kind = kind;
 971        } else {
 972            ts->base_type = type;
 973            ts->type = type;
 974            ts->temp_allocated = 1;
 975            ts->kind = kind;
 976        }
 977    }
 978
 979#if defined(CONFIG_DEBUG_TCG)
 980    s->temps_in_use++;
 981#endif
 982    return ts;
 983}
 984
 985TCGv_vec tcg_temp_new_vec(TCGType type)
 986{
 987    TCGTemp *t;
 988
 989#ifdef CONFIG_DEBUG_TCG
 990    switch (type) {
 991    case TCG_TYPE_V64:
 992        assert(TCG_TARGET_HAS_v64);
 993        break;
 994    case TCG_TYPE_V128:
 995        assert(TCG_TARGET_HAS_v128);
 996        break;
 997    case TCG_TYPE_V256:
 998        assert(TCG_TARGET_HAS_v256);
 999        break;
1000    default:
1001        g_assert_not_reached();
1002    }
1003#endif
1004
1005    t = tcg_temp_new_internal(type, 0);
1006    return temp_tcgv_vec(t);
1007}
1008
1009/* Create a new temp of the same type as an existing temp.  */
1010TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1011{
1012    TCGTemp *t = tcgv_vec_temp(match);
1013
1014    tcg_debug_assert(t->temp_allocated != 0);
1015
1016    t = tcg_temp_new_internal(t->base_type, 0);
1017    return temp_tcgv_vec(t);
1018}
1019
1020void tcg_temp_free_internal(TCGTemp *ts)
1021{
1022    TCGContext *s = tcg_ctx;
1023    int k, idx;
1024
1025    /* In order to simplify users of tcg_constant_*, silently ignore free. */
1026    if (ts->kind == TEMP_CONST) {
1027        return;
1028    }
1029
1030#if defined(CONFIG_DEBUG_TCG)
1031    s->temps_in_use--;
1032    if (s->temps_in_use < 0) {
1033        fprintf(stderr, "More temporaries freed than allocated!\n");
1034    }
1035#endif
1036
1037    tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1038    tcg_debug_assert(ts->temp_allocated != 0);
1039    ts->temp_allocated = 0;
1040
1041    idx = temp_idx(ts);
1042    k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1043    set_bit(idx, s->free_temps[k].l);
1044}
1045
1046TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1047{
1048    TCGContext *s = tcg_ctx;
1049    GHashTable *h = s->const_table[type];
1050    TCGTemp *ts;
1051
1052    if (h == NULL) {
1053        h = g_hash_table_new(g_int64_hash, g_int64_equal);
1054        s->const_table[type] = h;
1055    }
1056
1057    ts = g_hash_table_lookup(h, &val);
1058    if (ts == NULL) {
1059        ts = tcg_temp_alloc(s);
1060
1061        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1062            TCGTemp *ts2 = tcg_temp_alloc(s);
1063
1064            ts->base_type = TCG_TYPE_I64;
1065            ts->type = TCG_TYPE_I32;
1066            ts->kind = TEMP_CONST;
1067            ts->temp_allocated = 1;
1068            /*
1069             * Retain the full value of the 64-bit constant in the low
1070             * part, so that the hash table works.  Actual uses will
1071             * truncate the value to the low part.
1072             */
1073            ts->val = val;
1074
1075            tcg_debug_assert(ts2 == ts + 1);
1076            ts2->base_type = TCG_TYPE_I64;
1077            ts2->type = TCG_TYPE_I32;
1078            ts2->kind = TEMP_CONST;
1079            ts2->temp_allocated = 1;
1080            ts2->val = val >> 32;
1081        } else {
1082            ts->base_type = type;
1083            ts->type = type;
1084            ts->kind = TEMP_CONST;
1085            ts->temp_allocated = 1;
1086            ts->val = val;
1087        }
1088        g_hash_table_insert(h, &ts->val, ts);
1089    }
1090
1091    return ts;
1092}
1093
1094TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1095{
1096    val = dup_const(vece, val);
1097    return temp_tcgv_vec(tcg_constant_internal(type, val));
1098}
1099
1100TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1101{
1102    TCGTemp *t = tcgv_vec_temp(match);
1103
1104    tcg_debug_assert(t->temp_allocated != 0);
1105    return tcg_constant_vec(t->base_type, vece, val);
1106}
1107
1108TCGv_i32 tcg_const_i32(int32_t val)
1109{
1110    TCGv_i32 t0;
1111    t0 = tcg_temp_new_i32();
1112    tcg_gen_movi_i32(t0, val);
1113    return t0;
1114}
1115
1116TCGv_i64 tcg_const_i64(int64_t val)
1117{
1118    TCGv_i64 t0;
1119    t0 = tcg_temp_new_i64();
1120    tcg_gen_movi_i64(t0, val);
1121    return t0;
1122}
1123
1124TCGv_i32 tcg_const_local_i32(int32_t val)
1125{
1126    TCGv_i32 t0;
1127    t0 = tcg_temp_local_new_i32();
1128    tcg_gen_movi_i32(t0, val);
1129    return t0;
1130}
1131
1132TCGv_i64 tcg_const_local_i64(int64_t val)
1133{
1134    TCGv_i64 t0;
1135    t0 = tcg_temp_local_new_i64();
1136    tcg_gen_movi_i64(t0, val);
1137    return t0;
1138}
1139
1140#if defined(CONFIG_DEBUG_TCG)
1141void tcg_clear_temp_count(void)
1142{
1143    TCGContext *s = tcg_ctx;
1144    s->temps_in_use = 0;
1145}
1146
1147int tcg_check_temp_count(void)
1148{
1149    TCGContext *s = tcg_ctx;
1150    if (s->temps_in_use) {
1151        /* Clear the count so that we don't give another
1152         * warning immediately next time around.
1153         */
1154        s->temps_in_use = 0;
1155        return 1;
1156    }
1157    return 0;
1158}
1159#endif
1160
1161/* Return true if OP may appear in the opcode stream.
1162   Test the runtime variable that controls each opcode.  */
1163bool tcg_op_supported(TCGOpcode op)
1164{
1165    const bool have_vec
1166        = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1167
1168    switch (op) {
1169    case INDEX_op_discard:
1170    case INDEX_op_set_label:
1171    case INDEX_op_call:
1172    case INDEX_op_br:
1173    case INDEX_op_mb:
1174    case INDEX_op_insn_start:
1175    case INDEX_op_exit_tb:
1176    case INDEX_op_goto_tb:
1177    case INDEX_op_goto_ptr:
1178    case INDEX_op_qemu_ld_i32:
1179    case INDEX_op_qemu_st_i32:
1180    case INDEX_op_qemu_ld_i64:
1181    case INDEX_op_qemu_st_i64:
1182        return true;
1183
1184    case INDEX_op_qemu_st8_i32:
1185        return TCG_TARGET_HAS_qemu_st8_i32;
1186
1187    case INDEX_op_mov_i32:
1188    case INDEX_op_setcond_i32:
1189    case INDEX_op_brcond_i32:
1190    case INDEX_op_ld8u_i32:
1191    case INDEX_op_ld8s_i32:
1192    case INDEX_op_ld16u_i32:
1193    case INDEX_op_ld16s_i32:
1194    case INDEX_op_ld_i32:
1195    case INDEX_op_st8_i32:
1196    case INDEX_op_st16_i32:
1197    case INDEX_op_st_i32:
1198    case INDEX_op_add_i32:
1199    case INDEX_op_sub_i32:
1200    case INDEX_op_mul_i32:
1201    case INDEX_op_and_i32:
1202    case INDEX_op_or_i32:
1203    case INDEX_op_xor_i32:
1204    case INDEX_op_shl_i32:
1205    case INDEX_op_shr_i32:
1206    case INDEX_op_sar_i32:
1207        return true;
1208
1209    case INDEX_op_movcond_i32:
1210        return TCG_TARGET_HAS_movcond_i32;
1211    case INDEX_op_div_i32:
1212    case INDEX_op_divu_i32:
1213        return TCG_TARGET_HAS_div_i32;
1214    case INDEX_op_rem_i32:
1215    case INDEX_op_remu_i32:
1216        return TCG_TARGET_HAS_rem_i32;
1217    case INDEX_op_div2_i32:
1218    case INDEX_op_divu2_i32:
1219        return TCG_TARGET_HAS_div2_i32;
1220    case INDEX_op_rotl_i32:
1221    case INDEX_op_rotr_i32:
1222        return TCG_TARGET_HAS_rot_i32;
1223    case INDEX_op_deposit_i32:
1224        return TCG_TARGET_HAS_deposit_i32;
1225    case INDEX_op_extract_i32:
1226        return TCG_TARGET_HAS_extract_i32;
1227    case INDEX_op_sextract_i32:
1228        return TCG_TARGET_HAS_sextract_i32;
1229    case INDEX_op_extract2_i32:
1230        return TCG_TARGET_HAS_extract2_i32;
1231    case INDEX_op_add2_i32:
1232        return TCG_TARGET_HAS_add2_i32;
1233    case INDEX_op_sub2_i32:
1234        return TCG_TARGET_HAS_sub2_i32;
1235    case INDEX_op_mulu2_i32:
1236        return TCG_TARGET_HAS_mulu2_i32;
1237    case INDEX_op_muls2_i32:
1238        return TCG_TARGET_HAS_muls2_i32;
1239    case INDEX_op_muluh_i32:
1240        return TCG_TARGET_HAS_muluh_i32;
1241    case INDEX_op_mulsh_i32:
1242        return TCG_TARGET_HAS_mulsh_i32;
1243    case INDEX_op_ext8s_i32:
1244        return TCG_TARGET_HAS_ext8s_i32;
1245    case INDEX_op_ext16s_i32:
1246        return TCG_TARGET_HAS_ext16s_i32;
1247    case INDEX_op_ext8u_i32:
1248        return TCG_TARGET_HAS_ext8u_i32;
1249    case INDEX_op_ext16u_i32:
1250        return TCG_TARGET_HAS_ext16u_i32;
1251    case INDEX_op_bswap16_i32:
1252        return TCG_TARGET_HAS_bswap16_i32;
1253    case INDEX_op_bswap32_i32:
1254        return TCG_TARGET_HAS_bswap32_i32;
1255    case INDEX_op_not_i32:
1256        return TCG_TARGET_HAS_not_i32;
1257    case INDEX_op_neg_i32:
1258        return TCG_TARGET_HAS_neg_i32;
1259    case INDEX_op_andc_i32:
1260        return TCG_TARGET_HAS_andc_i32;
1261    case INDEX_op_orc_i32:
1262        return TCG_TARGET_HAS_orc_i32;
1263    case INDEX_op_eqv_i32:
1264        return TCG_TARGET_HAS_eqv_i32;
1265    case INDEX_op_nand_i32:
1266        return TCG_TARGET_HAS_nand_i32;
1267    case INDEX_op_nor_i32:
1268        return TCG_TARGET_HAS_nor_i32;
1269    case INDEX_op_clz_i32:
1270        return TCG_TARGET_HAS_clz_i32;
1271    case INDEX_op_ctz_i32:
1272        return TCG_TARGET_HAS_ctz_i32;
1273    case INDEX_op_ctpop_i32:
1274        return TCG_TARGET_HAS_ctpop_i32;
1275
1276    case INDEX_op_brcond2_i32:
1277    case INDEX_op_setcond2_i32:
1278        return TCG_TARGET_REG_BITS == 32;
1279
1280    case INDEX_op_mov_i64:
1281    case INDEX_op_setcond_i64:
1282    case INDEX_op_brcond_i64:
1283    case INDEX_op_ld8u_i64:
1284    case INDEX_op_ld8s_i64:
1285    case INDEX_op_ld16u_i64:
1286    case INDEX_op_ld16s_i64:
1287    case INDEX_op_ld32u_i64:
1288    case INDEX_op_ld32s_i64:
1289    case INDEX_op_ld_i64:
1290    case INDEX_op_st8_i64:
1291    case INDEX_op_st16_i64:
1292    case INDEX_op_st32_i64:
1293    case INDEX_op_st_i64:
1294    case INDEX_op_add_i64:
1295    case INDEX_op_sub_i64:
1296    case INDEX_op_mul_i64:
1297    case INDEX_op_and_i64:
1298    case INDEX_op_or_i64:
1299    case INDEX_op_xor_i64:
1300    case INDEX_op_shl_i64:
1301    case INDEX_op_shr_i64:
1302    case INDEX_op_sar_i64:
1303    case INDEX_op_ext_i32_i64:
1304    case INDEX_op_extu_i32_i64:
1305        return TCG_TARGET_REG_BITS == 64;
1306
1307    case INDEX_op_movcond_i64:
1308        return TCG_TARGET_HAS_movcond_i64;
1309    case INDEX_op_div_i64:
1310    case INDEX_op_divu_i64:
1311        return TCG_TARGET_HAS_div_i64;
1312    case INDEX_op_rem_i64:
1313    case INDEX_op_remu_i64:
1314        return TCG_TARGET_HAS_rem_i64;
1315    case INDEX_op_div2_i64:
1316    case INDEX_op_divu2_i64:
1317        return TCG_TARGET_HAS_div2_i64;
1318    case INDEX_op_rotl_i64:
1319    case INDEX_op_rotr_i64:
1320        return TCG_TARGET_HAS_rot_i64;
1321    case INDEX_op_deposit_i64:
1322        return TCG_TARGET_HAS_deposit_i64;
1323    case INDEX_op_extract_i64:
1324        return TCG_TARGET_HAS_extract_i64;
1325    case INDEX_op_sextract_i64:
1326        return TCG_TARGET_HAS_sextract_i64;
1327    case INDEX_op_extract2_i64:
1328        return TCG_TARGET_HAS_extract2_i64;
1329    case INDEX_op_extrl_i64_i32:
1330        return TCG_TARGET_HAS_extrl_i64_i32;
1331    case INDEX_op_extrh_i64_i32:
1332        return TCG_TARGET_HAS_extrh_i64_i32;
1333    case INDEX_op_ext8s_i64:
1334        return TCG_TARGET_HAS_ext8s_i64;
1335    case INDEX_op_ext16s_i64:
1336        return TCG_TARGET_HAS_ext16s_i64;
1337    case INDEX_op_ext32s_i64:
1338        return TCG_TARGET_HAS_ext32s_i64;
1339    case INDEX_op_ext8u_i64:
1340        return TCG_TARGET_HAS_ext8u_i64;
1341    case INDEX_op_ext16u_i64:
1342        return TCG_TARGET_HAS_ext16u_i64;
1343    case INDEX_op_ext32u_i64:
1344        return TCG_TARGET_HAS_ext32u_i64;
1345    case INDEX_op_bswap16_i64:
1346        return TCG_TARGET_HAS_bswap16_i64;
1347    case INDEX_op_bswap32_i64:
1348        return TCG_TARGET_HAS_bswap32_i64;
1349    case INDEX_op_bswap64_i64:
1350        return TCG_TARGET_HAS_bswap64_i64;
1351    case INDEX_op_not_i64:
1352        return TCG_TARGET_HAS_not_i64;
1353    case INDEX_op_neg_i64:
1354        return TCG_TARGET_HAS_neg_i64;
1355    case INDEX_op_andc_i64:
1356        return TCG_TARGET_HAS_andc_i64;
1357    case INDEX_op_orc_i64:
1358        return TCG_TARGET_HAS_orc_i64;
1359    case INDEX_op_eqv_i64:
1360        return TCG_TARGET_HAS_eqv_i64;
1361    case INDEX_op_nand_i64:
1362        return TCG_TARGET_HAS_nand_i64;
1363    case INDEX_op_nor_i64:
1364        return TCG_TARGET_HAS_nor_i64;
1365    case INDEX_op_clz_i64:
1366        return TCG_TARGET_HAS_clz_i64;
1367    case INDEX_op_ctz_i64:
1368        return TCG_TARGET_HAS_ctz_i64;
1369    case INDEX_op_ctpop_i64:
1370        return TCG_TARGET_HAS_ctpop_i64;
1371    case INDEX_op_add2_i64:
1372        return TCG_TARGET_HAS_add2_i64;
1373    case INDEX_op_sub2_i64:
1374        return TCG_TARGET_HAS_sub2_i64;
1375    case INDEX_op_mulu2_i64:
1376        return TCG_TARGET_HAS_mulu2_i64;
1377    case INDEX_op_muls2_i64:
1378        return TCG_TARGET_HAS_muls2_i64;
1379    case INDEX_op_muluh_i64:
1380        return TCG_TARGET_HAS_muluh_i64;
1381    case INDEX_op_mulsh_i64:
1382        return TCG_TARGET_HAS_mulsh_i64;
1383
1384    case INDEX_op_mov_vec:
1385    case INDEX_op_dup_vec:
1386    case INDEX_op_dupm_vec:
1387    case INDEX_op_ld_vec:
1388    case INDEX_op_st_vec:
1389    case INDEX_op_add_vec:
1390    case INDEX_op_sub_vec:
1391    case INDEX_op_and_vec:
1392    case INDEX_op_or_vec:
1393    case INDEX_op_xor_vec:
1394    case INDEX_op_cmp_vec:
1395        return have_vec;
1396    case INDEX_op_dup2_vec:
1397        return have_vec && TCG_TARGET_REG_BITS == 32;
1398    case INDEX_op_not_vec:
1399        return have_vec && TCG_TARGET_HAS_not_vec;
1400    case INDEX_op_neg_vec:
1401        return have_vec && TCG_TARGET_HAS_neg_vec;
1402    case INDEX_op_abs_vec:
1403        return have_vec && TCG_TARGET_HAS_abs_vec;
1404    case INDEX_op_andc_vec:
1405        return have_vec && TCG_TARGET_HAS_andc_vec;
1406    case INDEX_op_orc_vec:
1407        return have_vec && TCG_TARGET_HAS_orc_vec;
1408    case INDEX_op_mul_vec:
1409        return have_vec && TCG_TARGET_HAS_mul_vec;
1410    case INDEX_op_shli_vec:
1411    case INDEX_op_shri_vec:
1412    case INDEX_op_sari_vec:
1413        return have_vec && TCG_TARGET_HAS_shi_vec;
1414    case INDEX_op_shls_vec:
1415    case INDEX_op_shrs_vec:
1416    case INDEX_op_sars_vec:
1417        return have_vec && TCG_TARGET_HAS_shs_vec;
1418    case INDEX_op_shlv_vec:
1419    case INDEX_op_shrv_vec:
1420    case INDEX_op_sarv_vec:
1421        return have_vec && TCG_TARGET_HAS_shv_vec;
1422    case INDEX_op_rotli_vec:
1423        return have_vec && TCG_TARGET_HAS_roti_vec;
1424    case INDEX_op_rotls_vec:
1425        return have_vec && TCG_TARGET_HAS_rots_vec;
1426    case INDEX_op_rotlv_vec:
1427    case INDEX_op_rotrv_vec:
1428        return have_vec && TCG_TARGET_HAS_rotv_vec;
1429    case INDEX_op_ssadd_vec:
1430    case INDEX_op_usadd_vec:
1431    case INDEX_op_sssub_vec:
1432    case INDEX_op_ussub_vec:
1433        return have_vec && TCG_TARGET_HAS_sat_vec;
1434    case INDEX_op_smin_vec:
1435    case INDEX_op_umin_vec:
1436    case INDEX_op_smax_vec:
1437    case INDEX_op_umax_vec:
1438        return have_vec && TCG_TARGET_HAS_minmax_vec;
1439    case INDEX_op_bitsel_vec:
1440        return have_vec && TCG_TARGET_HAS_bitsel_vec;
1441    case INDEX_op_cmpsel_vec:
1442        return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1443
1444    default:
1445        tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1446        return true;
1447    }
1448}
1449
1450/* Note: we convert the 64 bit args to 32 bit and do some alignment
1451   and endian swap. Maybe it would be better to do the alignment
1452   and endian swap in tcg_reg_alloc_call(). */
1453void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1454{
1455    int i, real_args, nb_rets, pi;
1456    unsigned typemask;
1457    const TCGHelperInfo *info;
1458    TCGOp *op;
1459
1460    info = g_hash_table_lookup(helper_table, (gpointer)func);
1461    typemask = info->typemask;
1462
1463#ifdef CONFIG_PLUGIN
1464    /* detect non-plugin helpers */
1465    if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1466        tcg_ctx->plugin_insn->calls_helpers = true;
1467    }
1468#endif
1469
1470#if defined(__sparc__) && !defined(__arch64__) \
1471    && !defined(CONFIG_TCG_INTERPRETER)
1472    /* We have 64-bit values in one register, but need to pass as two
1473       separate parameters.  Split them.  */
1474    int orig_typemask = typemask;
1475    int orig_nargs = nargs;
1476    TCGv_i64 retl, reth;
1477    TCGTemp *split_args[MAX_OPC_PARAM];
1478
1479    retl = NULL;
1480    reth = NULL;
1481    typemask = 0;
1482    for (i = real_args = 0; i < nargs; ++i) {
1483        int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1484        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1485
1486        if (is_64bit) {
1487            TCGv_i64 orig = temp_tcgv_i64(args[i]);
1488            TCGv_i32 h = tcg_temp_new_i32();
1489            TCGv_i32 l = tcg_temp_new_i32();
1490            tcg_gen_extr_i64_i32(l, h, orig);
1491            split_args[real_args++] = tcgv_i32_temp(h);
1492            typemask |= dh_typecode_i32 << (real_args * 3);
1493            split_args[real_args++] = tcgv_i32_temp(l);
1494            typemask |= dh_typecode_i32 << (real_args * 3);
1495        } else {
1496            split_args[real_args++] = args[i];
1497            typemask |= argtype << (real_args * 3);
1498        }
1499    }
1500    nargs = real_args;
1501    args = split_args;
1502#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1503    for (i = 0; i < nargs; ++i) {
1504        int argtype = extract32(typemask, (i + 1) * 3, 3);
1505        bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1506        bool is_signed = argtype & 1;
1507
1508        if (is_32bit) {
1509            TCGv_i64 temp = tcg_temp_new_i64();
1510            TCGv_i64 orig = temp_tcgv_i64(args[i]);
1511            if (is_signed) {
1512                tcg_gen_ext32s_i64(temp, orig);
1513            } else {
1514                tcg_gen_ext32u_i64(temp, orig);
1515            }
1516            args[i] = tcgv_i64_temp(temp);
1517        }
1518    }
1519#endif /* TCG_TARGET_EXTEND_ARGS */
1520
1521    op = tcg_emit_op(INDEX_op_call);
1522
1523    pi = 0;
1524    if (ret != NULL) {
1525#if defined(__sparc__) && !defined(__arch64__) \
1526    && !defined(CONFIG_TCG_INTERPRETER)
1527        if ((typemask & 6) == dh_typecode_i64) {
1528            /* The 32-bit ABI is going to return the 64-bit value in
1529               the %o0/%o1 register pair.  Prepare for this by using
1530               two return temporaries, and reassemble below.  */
1531            retl = tcg_temp_new_i64();
1532            reth = tcg_temp_new_i64();
1533            op->args[pi++] = tcgv_i64_arg(reth);
1534            op->args[pi++] = tcgv_i64_arg(retl);
1535            nb_rets = 2;
1536        } else {
1537            op->args[pi++] = temp_arg(ret);
1538            nb_rets = 1;
1539        }
1540#else
1541        if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1542#ifdef HOST_WORDS_BIGENDIAN
1543            op->args[pi++] = temp_arg(ret + 1);
1544            op->args[pi++] = temp_arg(ret);
1545#else
1546            op->args[pi++] = temp_arg(ret);
1547            op->args[pi++] = temp_arg(ret + 1);
1548#endif
1549            nb_rets = 2;
1550        } else {
1551            op->args[pi++] = temp_arg(ret);
1552            nb_rets = 1;
1553        }
1554#endif
1555    } else {
1556        nb_rets = 0;
1557    }
1558    TCGOP_CALLO(op) = nb_rets;
1559
1560    real_args = 0;
1561    for (i = 0; i < nargs; i++) {
1562        int argtype = extract32(typemask, (i + 1) * 3, 3);
1563        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1564        bool want_align = false;
1565
1566#if defined(CONFIG_TCG_INTERPRETER)
1567        /*
1568         * Align all arguments, so that they land in predictable places
1569         * for passing off to ffi_call.
1570         */
1571        want_align = true;
1572#elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1573        /* Some targets want aligned 64 bit args */
1574        want_align = is_64bit;
1575#endif
1576
1577        if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1578            op->args[pi++] = TCG_CALL_DUMMY_ARG;
1579            real_args++;
1580        }
1581
1582        if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1583            /*
1584             * If stack grows up, then we will be placing successive
1585             * arguments at lower addresses, which means we need to
1586             * reverse the order compared to how we would normally
1587             * treat either big or little-endian.  For those arguments
1588             * that will wind up in registers, this still works for
1589             * HPPA (the only current STACK_GROWSUP target) since the
1590             * argument registers are *also* allocated in decreasing
1591             * order.  If another such target is added, this logic may
1592             * have to get more complicated to differentiate between
1593             * stack arguments and register arguments.
1594             */
1595#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1596            op->args[pi++] = temp_arg(args[i] + 1);
1597            op->args[pi++] = temp_arg(args[i]);
1598#else
1599            op->args[pi++] = temp_arg(args[i]);
1600            op->args[pi++] = temp_arg(args[i] + 1);
1601#endif
1602            real_args += 2;
1603            continue;
1604        }
1605
1606        op->args[pi++] = temp_arg(args[i]);
1607        real_args++;
1608    }
1609    op->args[pi++] = (uintptr_t)func;
1610    op->args[pi++] = (uintptr_t)info;
1611    TCGOP_CALLI(op) = real_args;
1612
1613    /* Make sure the fields didn't overflow.  */
1614    tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1615    tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1616
1617#if defined(__sparc__) && !defined(__arch64__) \
1618    && !defined(CONFIG_TCG_INTERPRETER)
1619    /* Free all of the parts we allocated above.  */
1620    for (i = real_args = 0; i < orig_nargs; ++i) {
1621        int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1622        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1623
1624        if (is_64bit) {
1625            tcg_temp_free_internal(args[real_args++]);
1626            tcg_temp_free_internal(args[real_args++]);
1627        } else {
1628            real_args++;
1629        }
1630    }
1631    if ((orig_typemask & 6) == dh_typecode_i64) {
1632        /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1633           Note that describing these as TCGv_i64 eliminates an unnecessary
1634           zero-extension that tcg_gen_concat_i32_i64 would create.  */
1635        tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1636        tcg_temp_free_i64(retl);
1637        tcg_temp_free_i64(reth);
1638    }
1639#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1640    for (i = 0; i < nargs; ++i) {
1641        int argtype = extract32(typemask, (i + 1) * 3, 3);
1642        bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1643
1644        if (is_32bit) {
1645            tcg_temp_free_internal(args[i]);
1646        }
1647    }
1648#endif /* TCG_TARGET_EXTEND_ARGS */
1649}
1650
1651static void tcg_reg_alloc_start(TCGContext *s)
1652{
1653    int i, n;
1654
1655    for (i = 0, n = s->nb_temps; i < n; i++) {
1656        TCGTemp *ts = &s->temps[i];
1657        TCGTempVal val = TEMP_VAL_MEM;
1658
1659        switch (ts->kind) {
1660        case TEMP_CONST:
1661            val = TEMP_VAL_CONST;
1662            break;
1663        case TEMP_FIXED:
1664            val = TEMP_VAL_REG;
1665            break;
1666        case TEMP_GLOBAL:
1667            break;
1668        case TEMP_NORMAL:
1669            val = TEMP_VAL_DEAD;
1670            /* fall through */
1671        case TEMP_LOCAL:
1672            ts->mem_allocated = 0;
1673            break;
1674        default:
1675            g_assert_not_reached();
1676        }
1677        ts->val_type = val;
1678    }
1679
1680    memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1681}
1682
1683static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1684                                 TCGTemp *ts)
1685{
1686    int idx = temp_idx(ts);
1687
1688    switch (ts->kind) {
1689    case TEMP_FIXED:
1690    case TEMP_GLOBAL:
1691        pstrcpy(buf, buf_size, ts->name);
1692        break;
1693    case TEMP_LOCAL:
1694        snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1695        break;
1696    case TEMP_NORMAL:
1697        snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1698        break;
1699    case TEMP_CONST:
1700        switch (ts->type) {
1701        case TCG_TYPE_I32:
1702            snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1703            break;
1704#if TCG_TARGET_REG_BITS > 32
1705        case TCG_TYPE_I64:
1706            snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1707            break;
1708#endif
1709        case TCG_TYPE_V64:
1710        case TCG_TYPE_V128:
1711        case TCG_TYPE_V256:
1712            snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1713                     64 << (ts->type - TCG_TYPE_V64), ts->val);
1714            break;
1715        default:
1716            g_assert_not_reached();
1717        }
1718        break;
1719    }
1720    return buf;
1721}
1722
1723static char *tcg_get_arg_str(TCGContext *s, char *buf,
1724                             int buf_size, TCGArg arg)
1725{
1726    return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1727}
1728
1729static const char * const cond_name[] =
1730{
1731    [TCG_COND_NEVER] = "never",
1732    [TCG_COND_ALWAYS] = "always",
1733    [TCG_COND_EQ] = "eq",
1734    [TCG_COND_NE] = "ne",
1735    [TCG_COND_LT] = "lt",
1736    [TCG_COND_GE] = "ge",
1737    [TCG_COND_LE] = "le",
1738    [TCG_COND_GT] = "gt",
1739    [TCG_COND_LTU] = "ltu",
1740    [TCG_COND_GEU] = "geu",
1741    [TCG_COND_LEU] = "leu",
1742    [TCG_COND_GTU] = "gtu"
1743};
1744
1745static const char * const ldst_name[] =
1746{
1747    [MO_UB]   = "ub",
1748    [MO_SB]   = "sb",
1749    [MO_LEUW] = "leuw",
1750    [MO_LESW] = "lesw",
1751    [MO_LEUL] = "leul",
1752    [MO_LESL] = "lesl",
1753    [MO_LEQ]  = "leq",
1754    [MO_BEUW] = "beuw",
1755    [MO_BESW] = "besw",
1756    [MO_BEUL] = "beul",
1757    [MO_BESL] = "besl",
1758    [MO_BEQ]  = "beq",
1759};
1760
1761static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1762#ifdef TARGET_ALIGNED_ONLY
1763    [MO_UNALN >> MO_ASHIFT]    = "un+",
1764    [MO_ALIGN >> MO_ASHIFT]    = "",
1765#else
1766    [MO_UNALN >> MO_ASHIFT]    = "",
1767    [MO_ALIGN >> MO_ASHIFT]    = "al+",
1768#endif
1769    [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1770    [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1771    [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1772    [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1773    [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1774    [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1775};
1776
1777static const char bswap_flag_name[][6] = {
1778    [TCG_BSWAP_IZ] = "iz",
1779    [TCG_BSWAP_OZ] = "oz",
1780    [TCG_BSWAP_OS] = "os",
1781    [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1782    [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1783};
1784
1785static inline bool tcg_regset_single(TCGRegSet d)
1786{
1787    return (d & (d - 1)) == 0;
1788}
1789
1790static inline TCGReg tcg_regset_first(TCGRegSet d)
1791{
1792    if (TCG_TARGET_NB_REGS <= 32) {
1793        return ctz32(d);
1794    } else {
1795        return ctz64(d);
1796    }
1797}
1798
1799static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1800{
1801    char buf[128];
1802    TCGOp *op;
1803
1804    QTAILQ_FOREACH(op, &s->ops, link) {
1805        int i, k, nb_oargs, nb_iargs, nb_cargs;
1806        const TCGOpDef *def;
1807        TCGOpcode c;
1808        int col = 0;
1809
1810        c = op->opc;
1811        def = &tcg_op_defs[c];
1812
1813        if (c == INDEX_op_insn_start) {
1814            nb_oargs = 0;
1815            col += qemu_log("\n ----");
1816
1817            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1818                target_ulong a;
1819#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1820                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1821#else
1822                a = op->args[i];
1823#endif
1824                col += qemu_log(" " TARGET_FMT_lx, a);
1825            }
1826        } else if (c == INDEX_op_call) {
1827            const TCGHelperInfo *info = tcg_call_info(op);
1828            void *func = tcg_call_func(op);
1829
1830            /* variable number of arguments */
1831            nb_oargs = TCGOP_CALLO(op);
1832            nb_iargs = TCGOP_CALLI(op);
1833            nb_cargs = def->nb_cargs;
1834
1835            col += qemu_log(" %s ", def->name);
1836
1837            /*
1838             * Print the function name from TCGHelperInfo, if available.
1839             * Note that plugins have a template function for the info,
1840             * but the actual function pointer comes from the plugin.
1841             */
1842            if (func == info->func) {
1843                col += qemu_log("%s", info->name);
1844            } else {
1845                col += qemu_log("plugin(%p)", func);
1846            }
1847
1848            col += qemu_log(",$0x%x,$%d", info->flags, nb_oargs);
1849            for (i = 0; i < nb_oargs; i++) {
1850                col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1851                                                       op->args[i]));
1852            }
1853            for (i = 0; i < nb_iargs; i++) {
1854                TCGArg arg = op->args[nb_oargs + i];
1855                const char *t = "<dummy>";
1856                if (arg != TCG_CALL_DUMMY_ARG) {
1857                    t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1858                }
1859                col += qemu_log(",%s", t);
1860            }
1861        } else {
1862            col += qemu_log(" %s ", def->name);
1863
1864            nb_oargs = def->nb_oargs;
1865            nb_iargs = def->nb_iargs;
1866            nb_cargs = def->nb_cargs;
1867
1868            if (def->flags & TCG_OPF_VECTOR) {
1869                col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1870                                8 << TCGOP_VECE(op));
1871            }
1872
1873            k = 0;
1874            for (i = 0; i < nb_oargs; i++) {
1875                if (k != 0) {
1876                    col += qemu_log(",");
1877                }
1878                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1879                                                      op->args[k++]));
1880            }
1881            for (i = 0; i < nb_iargs; i++) {
1882                if (k != 0) {
1883                    col += qemu_log(",");
1884                }
1885                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1886                                                      op->args[k++]));
1887            }
1888            switch (c) {
1889            case INDEX_op_brcond_i32:
1890            case INDEX_op_setcond_i32:
1891            case INDEX_op_movcond_i32:
1892            case INDEX_op_brcond2_i32:
1893            case INDEX_op_setcond2_i32:
1894            case INDEX_op_brcond_i64:
1895            case INDEX_op_setcond_i64:
1896            case INDEX_op_movcond_i64:
1897            case INDEX_op_cmp_vec:
1898            case INDEX_op_cmpsel_vec:
1899                if (op->args[k] < ARRAY_SIZE(cond_name)
1900                    && cond_name[op->args[k]]) {
1901                    col += qemu_log(",%s", cond_name[op->args[k++]]);
1902                } else {
1903                    col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1904                }
1905                i = 1;
1906                break;
1907            case INDEX_op_qemu_ld_i32:
1908            case INDEX_op_qemu_st_i32:
1909            case INDEX_op_qemu_st8_i32:
1910            case INDEX_op_qemu_ld_i64:
1911            case INDEX_op_qemu_st_i64:
1912                {
1913                    TCGMemOpIdx oi = op->args[k++];
1914                    MemOp op = get_memop(oi);
1915                    unsigned ix = get_mmuidx(oi);
1916
1917                    if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1918                        col += qemu_log(",$0x%x,%u", op, ix);
1919                    } else {
1920                        const char *s_al, *s_op;
1921                        s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1922                        s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1923                        col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1924                    }
1925                    i = 1;
1926                }
1927                break;
1928            case INDEX_op_bswap16_i32:
1929            case INDEX_op_bswap16_i64:
1930            case INDEX_op_bswap32_i32:
1931            case INDEX_op_bswap32_i64:
1932            case INDEX_op_bswap64_i64:
1933                {
1934                    TCGArg flags = op->args[k];
1935                    const char *name = NULL;
1936
1937                    if (flags < ARRAY_SIZE(bswap_flag_name)) {
1938                        name = bswap_flag_name[flags];
1939                    }
1940                    if (name) {
1941                        col += qemu_log(",%s", name);
1942                    } else {
1943                        col += qemu_log(",$0x%" TCG_PRIlx, flags);
1944                    }
1945                    i = k = 1;
1946                }
1947                break;
1948            default:
1949                i = 0;
1950                break;
1951            }
1952            switch (c) {
1953            case INDEX_op_set_label:
1954            case INDEX_op_br:
1955            case INDEX_op_brcond_i32:
1956            case INDEX_op_brcond_i64:
1957            case INDEX_op_brcond2_i32:
1958                col += qemu_log("%s$L%d", k ? "," : "",
1959                                arg_label(op->args[k])->id);
1960                i++, k++;
1961                break;
1962            default:
1963                break;
1964            }
1965            for (; i < nb_cargs; i++, k++) {
1966                col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1967            }
1968        }
1969
1970        if (have_prefs || op->life) {
1971
1972            QemuLogFile *logfile;
1973
1974            rcu_read_lock();
1975            logfile = qatomic_rcu_read(&qemu_logfile);
1976            if (logfile) {
1977                for (; col < 40; ++col) {
1978                    putc(' ', logfile->fd);
1979                }
1980            }
1981            rcu_read_unlock();
1982        }
1983
1984        if (op->life) {
1985            unsigned life = op->life;
1986
1987            if (life & (SYNC_ARG * 3)) {
1988                qemu_log("  sync:");
1989                for (i = 0; i < 2; ++i) {
1990                    if (life & (SYNC_ARG << i)) {
1991                        qemu_log(" %d", i);
1992                    }
1993                }
1994            }
1995            life /= DEAD_ARG;
1996            if (life) {
1997                qemu_log("  dead:");
1998                for (i = 0; life; ++i, life >>= 1) {
1999                    if (life & 1) {
2000                        qemu_log(" %d", i);
2001                    }
2002                }
2003            }
2004        }
2005
2006        if (have_prefs) {
2007            for (i = 0; i < nb_oargs; ++i) {
2008                TCGRegSet set = op->output_pref[i];
2009
2010                if (i == 0) {
2011                    qemu_log("  pref=");
2012                } else {
2013                    qemu_log(",");
2014                }
2015                if (set == 0) {
2016                    qemu_log("none");
2017                } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2018                    qemu_log("all");
2019#ifdef CONFIG_DEBUG_TCG
2020                } else if (tcg_regset_single(set)) {
2021                    TCGReg reg = tcg_regset_first(set);
2022                    qemu_log("%s", tcg_target_reg_names[reg]);
2023#endif
2024                } else if (TCG_TARGET_NB_REGS <= 32) {
2025                    qemu_log("%#x", (uint32_t)set);
2026                } else {
2027                    qemu_log("%#" PRIx64, (uint64_t)set);
2028                }
2029            }
2030        }
2031
2032        qemu_log("\n");
2033    }
2034}
2035
2036/* we give more priority to constraints with less registers */
2037static int get_constraint_priority(const TCGOpDef *def, int k)
2038{
2039    const TCGArgConstraint *arg_ct = &def->args_ct[k];
2040    int n;
2041
2042    if (arg_ct->oalias) {
2043        /* an alias is equivalent to a single register */
2044        n = 1;
2045    } else {
2046        n = ctpop64(arg_ct->regs);
2047    }
2048    return TCG_TARGET_NB_REGS - n + 1;
2049}
2050
2051/* sort from highest priority to lowest */
2052static void sort_constraints(TCGOpDef *def, int start, int n)
2053{
2054    int i, j;
2055    TCGArgConstraint *a = def->args_ct;
2056
2057    for (i = 0; i < n; i++) {
2058        a[start + i].sort_index = start + i;
2059    }
2060    if (n <= 1) {
2061        return;
2062    }
2063    for (i = 0; i < n - 1; i++) {
2064        for (j = i + 1; j < n; j++) {
2065            int p1 = get_constraint_priority(def, a[start + i].sort_index);
2066            int p2 = get_constraint_priority(def, a[start + j].sort_index);
2067            if (p1 < p2) {
2068                int tmp = a[start + i].sort_index;
2069                a[start + i].sort_index = a[start + j].sort_index;
2070                a[start + j].sort_index = tmp;
2071            }
2072        }
2073    }
2074}
2075
2076static void process_op_defs(TCGContext *s)
2077{
2078    TCGOpcode op;
2079
2080    for (op = 0; op < NB_OPS; op++) {
2081        TCGOpDef *def = &tcg_op_defs[op];
2082        const TCGTargetOpDef *tdefs;
2083        int i, nb_args;
2084
2085        if (def->flags & TCG_OPF_NOT_PRESENT) {
2086            continue;
2087        }
2088
2089        nb_args = def->nb_iargs + def->nb_oargs;
2090        if (nb_args == 0) {
2091            continue;
2092        }
2093
2094        /*
2095         * Macro magic should make it impossible, but double-check that
2096         * the array index is in range.  Since the signness of an enum
2097         * is implementation defined, force the result to unsigned.
2098         */
2099        unsigned con_set = tcg_target_op_def(op);
2100        tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2101        tdefs = &constraint_sets[con_set];
2102
2103        for (i = 0; i < nb_args; i++) {
2104            const char *ct_str = tdefs->args_ct_str[i];
2105            /* Incomplete TCGTargetOpDef entry. */
2106            tcg_debug_assert(ct_str != NULL);
2107
2108            while (*ct_str != '\0') {
2109                switch(*ct_str) {
2110                case '0' ... '9':
2111                    {
2112                        int oarg = *ct_str - '0';
2113                        tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2114                        tcg_debug_assert(oarg < def->nb_oargs);
2115                        tcg_debug_assert(def->args_ct[oarg].regs != 0);
2116                        def->args_ct[i] = def->args_ct[oarg];
2117                        /* The output sets oalias.  */
2118                        def->args_ct[oarg].oalias = true;
2119                        def->args_ct[oarg].alias_index = i;
2120                        /* The input sets ialias. */
2121                        def->args_ct[i].ialias = true;
2122                        def->args_ct[i].alias_index = oarg;
2123                    }
2124                    ct_str++;
2125                    break;
2126                case '&':
2127                    def->args_ct[i].newreg = true;
2128                    ct_str++;
2129                    break;
2130                case 'i':
2131                    def->args_ct[i].ct |= TCG_CT_CONST;
2132                    ct_str++;
2133                    break;
2134
2135                /* Include all of the target-specific constraints. */
2136
2137#undef CONST
2138#define CONST(CASE, MASK) \
2139    case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2140#define REGS(CASE, MASK) \
2141    case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2142
2143#include "tcg-target-con-str.h"
2144
2145#undef REGS
2146#undef CONST
2147                default:
2148                    /* Typo in TCGTargetOpDef constraint. */
2149                    g_assert_not_reached();
2150                }
2151            }
2152        }
2153
2154        /* TCGTargetOpDef entry with too much information? */
2155        tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2156
2157        /* sort the constraints (XXX: this is just an heuristic) */
2158        sort_constraints(def, 0, def->nb_oargs);
2159        sort_constraints(def, def->nb_oargs, def->nb_iargs);
2160    }
2161}
2162
2163void tcg_op_remove(TCGContext *s, TCGOp *op)
2164{
2165    TCGLabel *label;
2166
2167    switch (op->opc) {
2168    case INDEX_op_br:
2169        label = arg_label(op->args[0]);
2170        label->refs--;
2171        break;
2172    case INDEX_op_brcond_i32:
2173    case INDEX_op_brcond_i64:
2174        label = arg_label(op->args[3]);
2175        label->refs--;
2176        break;
2177    case INDEX_op_brcond2_i32:
2178        label = arg_label(op->args[5]);
2179        label->refs--;
2180        break;
2181    default:
2182        break;
2183    }
2184
2185    QTAILQ_REMOVE(&s->ops, op, link);
2186    QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2187    s->nb_ops--;
2188
2189#ifdef CONFIG_PROFILER
2190    qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2191#endif
2192}
2193
2194void tcg_remove_ops_after(TCGOp *op)
2195{
2196    TCGContext *s = tcg_ctx;
2197
2198    while (true) {
2199        TCGOp *last = tcg_last_op();
2200        if (last == op) {
2201            return;
2202        }
2203        tcg_op_remove(s, last);
2204    }
2205}
2206
2207static TCGOp *tcg_op_alloc(TCGOpcode opc)
2208{
2209    TCGContext *s = tcg_ctx;
2210    TCGOp *op;
2211
2212    if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2213        op = tcg_malloc(sizeof(TCGOp));
2214    } else {
2215        op = QTAILQ_FIRST(&s->free_ops);
2216        QTAILQ_REMOVE(&s->free_ops, op, link);
2217    }
2218    memset(op, 0, offsetof(TCGOp, link));
2219    op->opc = opc;
2220    s->nb_ops++;
2221
2222    return op;
2223}
2224
2225TCGOp *tcg_emit_op(TCGOpcode opc)
2226{
2227    TCGOp *op = tcg_op_alloc(opc);
2228    QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2229    return op;
2230}
2231
2232TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2233{
2234    TCGOp *new_op = tcg_op_alloc(opc);
2235    QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2236    return new_op;
2237}
2238
2239TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2240{
2241    TCGOp *new_op = tcg_op_alloc(opc);
2242    QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2243    return new_op;
2244}
2245
2246/* Reachable analysis : remove unreachable code.  */
2247static void reachable_code_pass(TCGContext *s)
2248{
2249    TCGOp *op, *op_next;
2250    bool dead = false;
2251
2252    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2253        bool remove = dead;
2254        TCGLabel *label;
2255
2256        switch (op->opc) {
2257        case INDEX_op_set_label:
2258            label = arg_label(op->args[0]);
2259            if (label->refs == 0) {
2260                /*
2261                 * While there is an occasional backward branch, virtually
2262                 * all branches generated by the translators are forward.
2263                 * Which means that generally we will have already removed
2264                 * all references to the label that will be, and there is
2265                 * little to be gained by iterating.
2266                 */
2267                remove = true;
2268            } else {
2269                /* Once we see a label, insns become live again.  */
2270                dead = false;
2271                remove = false;
2272
2273                /*
2274                 * Optimization can fold conditional branches to unconditional.
2275                 * If we find a label with one reference which is preceded by
2276                 * an unconditional branch to it, remove both.  This needed to
2277                 * wait until the dead code in between them was removed.
2278                 */
2279                if (label->refs == 1) {
2280                    TCGOp *op_prev = QTAILQ_PREV(op, link);
2281                    if (op_prev->opc == INDEX_op_br &&
2282                        label == arg_label(op_prev->args[0])) {
2283                        tcg_op_remove(s, op_prev);
2284                        remove = true;
2285                    }
2286                }
2287            }
2288            break;
2289
2290        case INDEX_op_br:
2291        case INDEX_op_exit_tb:
2292        case INDEX_op_goto_ptr:
2293            /* Unconditional branches; everything following is dead.  */
2294            dead = true;
2295            break;
2296
2297        case INDEX_op_call:
2298            /* Notice noreturn helper calls, raising exceptions.  */
2299            if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2300                dead = true;
2301            }
2302            break;
2303
2304        case INDEX_op_insn_start:
2305            /* Never remove -- we need to keep these for unwind.  */
2306            remove = false;
2307            break;
2308
2309        default:
2310            break;
2311        }
2312
2313        if (remove) {
2314            tcg_op_remove(s, op);
2315        }
2316    }
2317}
2318
2319#define TS_DEAD  1
2320#define TS_MEM   2
2321
2322#define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2323#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2324
2325/* For liveness_pass_1, the register preferences for a given temp.  */
2326static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2327{
2328    return ts->state_ptr;
2329}
2330
2331/* For liveness_pass_1, reset the preferences for a given temp to the
2332 * maximal regset for its type.
2333 */
2334static inline void la_reset_pref(TCGTemp *ts)
2335{
2336    *la_temp_pref(ts)
2337        = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2338}
2339
2340/* liveness analysis: end of function: all temps are dead, and globals
2341   should be in memory. */
2342static void la_func_end(TCGContext *s, int ng, int nt)
2343{
2344    int i;
2345
2346    for (i = 0; i < ng; ++i) {
2347        s->temps[i].state = TS_DEAD | TS_MEM;
2348        la_reset_pref(&s->temps[i]);
2349    }
2350    for (i = ng; i < nt; ++i) {
2351        s->temps[i].state = TS_DEAD;
2352        la_reset_pref(&s->temps[i]);
2353    }
2354}
2355
2356/* liveness analysis: end of basic block: all temps are dead, globals
2357   and local temps should be in memory. */
2358static void la_bb_end(TCGContext *s, int ng, int nt)
2359{
2360    int i;
2361
2362    for (i = 0; i < nt; ++i) {
2363        TCGTemp *ts = &s->temps[i];
2364        int state;
2365
2366        switch (ts->kind) {
2367        case TEMP_FIXED:
2368        case TEMP_GLOBAL:
2369        case TEMP_LOCAL:
2370            state = TS_DEAD | TS_MEM;
2371            break;
2372        case TEMP_NORMAL:
2373        case TEMP_CONST:
2374            state = TS_DEAD;
2375            break;
2376        default:
2377            g_assert_not_reached();
2378        }
2379        ts->state = state;
2380        la_reset_pref(ts);
2381    }
2382}
2383
2384/* liveness analysis: sync globals back to memory.  */
2385static void la_global_sync(TCGContext *s, int ng)
2386{
2387    int i;
2388
2389    for (i = 0; i < ng; ++i) {
2390        int state = s->temps[i].state;
2391        s->temps[i].state = state | TS_MEM;
2392        if (state == TS_DEAD) {
2393            /* If the global was previously dead, reset prefs.  */
2394            la_reset_pref(&s->temps[i]);
2395        }
2396    }
2397}
2398
2399/*
2400 * liveness analysis: conditional branch: all temps are dead,
2401 * globals and local temps should be synced.
2402 */
2403static void la_bb_sync(TCGContext *s, int ng, int nt)
2404{
2405    la_global_sync(s, ng);
2406
2407    for (int i = ng; i < nt; ++i) {
2408        TCGTemp *ts = &s->temps[i];
2409        int state;
2410
2411        switch (ts->kind) {
2412        case TEMP_LOCAL:
2413            state = ts->state;
2414            ts->state = state | TS_MEM;
2415            if (state != TS_DEAD) {
2416                continue;
2417            }
2418            break;
2419        case TEMP_NORMAL:
2420            s->temps[i].state = TS_DEAD;
2421            break;
2422        case TEMP_CONST:
2423            continue;
2424        default:
2425            g_assert_not_reached();
2426        }
2427        la_reset_pref(&s->temps[i]);
2428    }
2429}
2430
2431/* liveness analysis: sync globals back to memory and kill.  */
2432static void la_global_kill(TCGContext *s, int ng)
2433{
2434    int i;
2435
2436    for (i = 0; i < ng; i++) {
2437        s->temps[i].state = TS_DEAD | TS_MEM;
2438        la_reset_pref(&s->temps[i]);
2439    }
2440}
2441
2442/* liveness analysis: note live globals crossing calls.  */
2443static void la_cross_call(TCGContext *s, int nt)
2444{
2445    TCGRegSet mask = ~tcg_target_call_clobber_regs;
2446    int i;
2447
2448    for (i = 0; i < nt; i++) {
2449        TCGTemp *ts = &s->temps[i];
2450        if (!(ts->state & TS_DEAD)) {
2451            TCGRegSet *pset = la_temp_pref(ts);
2452            TCGRegSet set = *pset;
2453
2454            set &= mask;
2455            /* If the combination is not possible, restart.  */
2456            if (set == 0) {
2457                set = tcg_target_available_regs[ts->type] & mask;
2458            }
2459            *pset = set;
2460        }
2461    }
2462}
2463
2464/* Liveness analysis : update the opc_arg_life array to tell if a
2465   given input arguments is dead. Instructions updating dead
2466   temporaries are removed. */
2467static void liveness_pass_1(TCGContext *s)
2468{
2469    int nb_globals = s->nb_globals;
2470    int nb_temps = s->nb_temps;
2471    TCGOp *op, *op_prev;
2472    TCGRegSet *prefs;
2473    int i;
2474
2475    prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2476    for (i = 0; i < nb_temps; ++i) {
2477        s->temps[i].state_ptr = prefs + i;
2478    }
2479
2480    /* ??? Should be redundant with the exit_tb that ends the TB.  */
2481    la_func_end(s, nb_globals, nb_temps);
2482
2483    QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2484        int nb_iargs, nb_oargs;
2485        TCGOpcode opc_new, opc_new2;
2486        bool have_opc_new2;
2487        TCGLifeData arg_life = 0;
2488        TCGTemp *ts;
2489        TCGOpcode opc = op->opc;
2490        const TCGOpDef *def = &tcg_op_defs[opc];
2491
2492        switch (opc) {
2493        case INDEX_op_call:
2494            {
2495                int call_flags;
2496                int nb_call_regs;
2497
2498                nb_oargs = TCGOP_CALLO(op);
2499                nb_iargs = TCGOP_CALLI(op);
2500                call_flags = tcg_call_flags(op);
2501
2502                /* pure functions can be removed if their result is unused */
2503                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2504                    for (i = 0; i < nb_oargs; i++) {
2505                        ts = arg_temp(op->args[i]);
2506                        if (ts->state != TS_DEAD) {
2507                            goto do_not_remove_call;
2508                        }
2509                    }
2510                    goto do_remove;
2511                }
2512            do_not_remove_call:
2513
2514                /* Output args are dead.  */
2515                for (i = 0; i < nb_oargs; i++) {
2516                    ts = arg_temp(op->args[i]);
2517                    if (ts->state & TS_DEAD) {
2518                        arg_life |= DEAD_ARG << i;
2519                    }
2520                    if (ts->state & TS_MEM) {
2521                        arg_life |= SYNC_ARG << i;
2522                    }
2523                    ts->state = TS_DEAD;
2524                    la_reset_pref(ts);
2525
2526                    /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2527                    op->output_pref[i] = 0;
2528                }
2529
2530                if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2531                                    TCG_CALL_NO_READ_GLOBALS))) {
2532                    la_global_kill(s, nb_globals);
2533                } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2534                    la_global_sync(s, nb_globals);
2535                }
2536
2537                /* Record arguments that die in this helper.  */
2538                for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2539                    ts = arg_temp(op->args[i]);
2540                    if (ts && ts->state & TS_DEAD) {
2541                        arg_life |= DEAD_ARG << i;
2542                    }
2543                }
2544
2545                /* For all live registers, remove call-clobbered prefs.  */
2546                la_cross_call(s, nb_temps);
2547
2548                nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2549
2550                /* Input arguments are live for preceding opcodes.  */
2551                for (i = 0; i < nb_iargs; i++) {
2552                    ts = arg_temp(op->args[i + nb_oargs]);
2553                    if (ts && ts->state & TS_DEAD) {
2554                        /* For those arguments that die, and will be allocated
2555                         * in registers, clear the register set for that arg,
2556                         * to be filled in below.  For args that will be on
2557                         * the stack, reset to any available reg.
2558                         */
2559                        *la_temp_pref(ts)
2560                            = (i < nb_call_regs ? 0 :
2561                               tcg_target_available_regs[ts->type]);
2562                        ts->state &= ~TS_DEAD;
2563                    }
2564                }
2565
2566                /* For each input argument, add its input register to prefs.
2567                   If a temp is used once, this produces a single set bit.  */
2568                for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2569                    ts = arg_temp(op->args[i + nb_oargs]);
2570                    if (ts) {
2571                        tcg_regset_set_reg(*la_temp_pref(ts),
2572                                           tcg_target_call_iarg_regs[i]);
2573                    }
2574                }
2575            }
2576            break;
2577        case INDEX_op_insn_start:
2578            break;
2579        case INDEX_op_discard:
2580            /* mark the temporary as dead */
2581            ts = arg_temp(op->args[0]);
2582            ts->state = TS_DEAD;
2583            la_reset_pref(ts);
2584            break;
2585
2586        case INDEX_op_add2_i32:
2587            opc_new = INDEX_op_add_i32;
2588            goto do_addsub2;
2589        case INDEX_op_sub2_i32:
2590            opc_new = INDEX_op_sub_i32;
2591            goto do_addsub2;
2592        case INDEX_op_add2_i64:
2593            opc_new = INDEX_op_add_i64;
2594            goto do_addsub2;
2595        case INDEX_op_sub2_i64:
2596            opc_new = INDEX_op_sub_i64;
2597        do_addsub2:
2598            nb_iargs = 4;
2599            nb_oargs = 2;
2600            /* Test if the high part of the operation is dead, but not
2601               the low part.  The result can be optimized to a simple
2602               add or sub.  This happens often for x86_64 guest when the
2603               cpu mode is set to 32 bit.  */
2604            if (arg_temp(op->args[1])->state == TS_DEAD) {
2605                if (arg_temp(op->args[0])->state == TS_DEAD) {
2606                    goto do_remove;
2607                }
2608                /* Replace the opcode and adjust the args in place,
2609                   leaving 3 unused args at the end.  */
2610                op->opc = opc = opc_new;
2611                op->args[1] = op->args[2];
2612                op->args[2] = op->args[4];
2613                /* Fall through and mark the single-word operation live.  */
2614                nb_iargs = 2;
2615                nb_oargs = 1;
2616            }
2617            goto do_not_remove;
2618
2619        case INDEX_op_mulu2_i32:
2620            opc_new = INDEX_op_mul_i32;
2621            opc_new2 = INDEX_op_muluh_i32;
2622            have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2623            goto do_mul2;
2624        case INDEX_op_muls2_i32:
2625            opc_new = INDEX_op_mul_i32;
2626            opc_new2 = INDEX_op_mulsh_i32;
2627            have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2628            goto do_mul2;
2629        case INDEX_op_mulu2_i64:
2630            opc_new = INDEX_op_mul_i64;
2631            opc_new2 = INDEX_op_muluh_i64;
2632            have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2633            goto do_mul2;
2634        case INDEX_op_muls2_i64:
2635            opc_new = INDEX_op_mul_i64;
2636            opc_new2 = INDEX_op_mulsh_i64;
2637            have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2638            goto do_mul2;
2639        do_mul2:
2640            nb_iargs = 2;
2641            nb_oargs = 2;
2642            if (arg_temp(op->args[1])->state == TS_DEAD) {
2643                if (arg_temp(op->args[0])->state == TS_DEAD) {
2644                    /* Both parts of the operation are dead.  */
2645                    goto do_remove;
2646                }
2647                /* The high part of the operation is dead; generate the low. */
2648                op->opc = opc = opc_new;
2649                op->args[1] = op->args[2];
2650                op->args[2] = op->args[3];
2651            } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2652                /* The low part of the operation is dead; generate the high. */
2653                op->opc = opc = opc_new2;
2654                op->args[0] = op->args[1];
2655                op->args[1] = op->args[2];
2656                op->args[2] = op->args[3];
2657            } else {
2658                goto do_not_remove;
2659            }
2660            /* Mark the single-word operation live.  */
2661            nb_oargs = 1;
2662            goto do_not_remove;
2663
2664        default:
2665            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2666            nb_iargs = def->nb_iargs;
2667            nb_oargs = def->nb_oargs;
2668
2669            /* Test if the operation can be removed because all
2670               its outputs are dead. We assume that nb_oargs == 0
2671               implies side effects */
2672            if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2673                for (i = 0; i < nb_oargs; i++) {
2674                    if (arg_temp(op->args[i])->state != TS_DEAD) {
2675                        goto do_not_remove;
2676                    }
2677                }
2678                goto do_remove;
2679            }
2680            goto do_not_remove;
2681
2682        do_remove:
2683            tcg_op_remove(s, op);
2684            break;
2685
2686        do_not_remove:
2687            for (i = 0; i < nb_oargs; i++) {
2688                ts = arg_temp(op->args[i]);
2689
2690                /* Remember the preference of the uses that followed.  */
2691                op->output_pref[i] = *la_temp_pref(ts);
2692
2693                /* Output args are dead.  */
2694                if (ts->state & TS_DEAD) {
2695                    arg_life |= DEAD_ARG << i;
2696                }
2697                if (ts->state & TS_MEM) {
2698                    arg_life |= SYNC_ARG << i;
2699                }
2700                ts->state = TS_DEAD;
2701                la_reset_pref(ts);
2702            }
2703
2704            /* If end of basic block, update.  */
2705            if (def->flags & TCG_OPF_BB_EXIT) {
2706                la_func_end(s, nb_globals, nb_temps);
2707            } else if (def->flags & TCG_OPF_COND_BRANCH) {
2708                la_bb_sync(s, nb_globals, nb_temps);
2709            } else if (def->flags & TCG_OPF_BB_END) {
2710                la_bb_end(s, nb_globals, nb_temps);
2711            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2712                la_global_sync(s, nb_globals);
2713                if (def->flags & TCG_OPF_CALL_CLOBBER) {
2714                    la_cross_call(s, nb_temps);
2715                }
2716            }
2717
2718            /* Record arguments that die in this opcode.  */
2719            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2720                ts = arg_temp(op->args[i]);
2721                if (ts->state & TS_DEAD) {
2722                    arg_life |= DEAD_ARG << i;
2723                }
2724            }
2725
2726            /* Input arguments are live for preceding opcodes.  */
2727            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2728                ts = arg_temp(op->args[i]);
2729                if (ts->state & TS_DEAD) {
2730                    /* For operands that were dead, initially allow
2731                       all regs for the type.  */
2732                    *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2733                    ts->state &= ~TS_DEAD;
2734                }
2735            }
2736
2737            /* Incorporate constraints for this operand.  */
2738            switch (opc) {
2739            case INDEX_op_mov_i32:
2740            case INDEX_op_mov_i64:
2741                /* Note that these are TCG_OPF_NOT_PRESENT and do not
2742                   have proper constraints.  That said, special case
2743                   moves to propagate preferences backward.  */
2744                if (IS_DEAD_ARG(1)) {
2745                    *la_temp_pref(arg_temp(op->args[0]))
2746                        = *la_temp_pref(arg_temp(op->args[1]));
2747                }
2748                break;
2749
2750            default:
2751                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2752                    const TCGArgConstraint *ct = &def->args_ct[i];
2753                    TCGRegSet set, *pset;
2754
2755                    ts = arg_temp(op->args[i]);
2756                    pset = la_temp_pref(ts);
2757                    set = *pset;
2758
2759                    set &= ct->regs;
2760                    if (ct->ialias) {
2761                        set &= op->output_pref[ct->alias_index];
2762                    }
2763                    /* If the combination is not possible, restart.  */
2764                    if (set == 0) {
2765                        set = ct->regs;
2766                    }
2767                    *pset = set;
2768                }
2769                break;
2770            }
2771            break;
2772        }
2773        op->life = arg_life;
2774    }
2775}
2776
2777/* Liveness analysis: Convert indirect regs to direct temporaries.  */
2778static bool liveness_pass_2(TCGContext *s)
2779{
2780    int nb_globals = s->nb_globals;
2781    int nb_temps, i;
2782    bool changes = false;
2783    TCGOp *op, *op_next;
2784
2785    /* Create a temporary for each indirect global.  */
2786    for (i = 0; i < nb_globals; ++i) {
2787        TCGTemp *its = &s->temps[i];
2788        if (its->indirect_reg) {
2789            TCGTemp *dts = tcg_temp_alloc(s);
2790            dts->type = its->type;
2791            dts->base_type = its->base_type;
2792            its->state_ptr = dts;
2793        } else {
2794            its->state_ptr = NULL;
2795        }
2796        /* All globals begin dead.  */
2797        its->state = TS_DEAD;
2798    }
2799    for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2800        TCGTemp *its = &s->temps[i];
2801        its->state_ptr = NULL;
2802        its->state = TS_DEAD;
2803    }
2804
2805    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2806        TCGOpcode opc = op->opc;
2807        const TCGOpDef *def = &tcg_op_defs[opc];
2808        TCGLifeData arg_life = op->life;
2809        int nb_iargs, nb_oargs, call_flags;
2810        TCGTemp *arg_ts, *dir_ts;
2811
2812        if (opc == INDEX_op_call) {
2813            nb_oargs = TCGOP_CALLO(op);
2814            nb_iargs = TCGOP_CALLI(op);
2815            call_flags = tcg_call_flags(op);
2816        } else {
2817            nb_iargs = def->nb_iargs;
2818            nb_oargs = def->nb_oargs;
2819
2820            /* Set flags similar to how calls require.  */
2821            if (def->flags & TCG_OPF_COND_BRANCH) {
2822                /* Like reading globals: sync_globals */
2823                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2824            } else if (def->flags & TCG_OPF_BB_END) {
2825                /* Like writing globals: save_globals */
2826                call_flags = 0;
2827            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2828                /* Like reading globals: sync_globals */
2829                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2830            } else {
2831                /* No effect on globals.  */
2832                call_flags = (TCG_CALL_NO_READ_GLOBALS |
2833                              TCG_CALL_NO_WRITE_GLOBALS);
2834            }
2835        }
2836
2837        /* Make sure that input arguments are available.  */
2838        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2839            arg_ts = arg_temp(op->args[i]);
2840            if (arg_ts) {
2841                dir_ts = arg_ts->state_ptr;
2842                if (dir_ts && arg_ts->state == TS_DEAD) {
2843                    TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2844                                      ? INDEX_op_ld_i32
2845                                      : INDEX_op_ld_i64);
2846                    TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2847
2848                    lop->args[0] = temp_arg(dir_ts);
2849                    lop->args[1] = temp_arg(arg_ts->mem_base);
2850                    lop->args[2] = arg_ts->mem_offset;
2851
2852                    /* Loaded, but synced with memory.  */
2853                    arg_ts->state = TS_MEM;
2854                }
2855            }
2856        }
2857
2858        /* Perform input replacement, and mark inputs that became dead.
2859           No action is required except keeping temp_state up to date
2860           so that we reload when needed.  */
2861        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2862            arg_ts = arg_temp(op->args[i]);
2863            if (arg_ts) {
2864                dir_ts = arg_ts->state_ptr;
2865                if (dir_ts) {
2866                    op->args[i] = temp_arg(dir_ts);
2867                    changes = true;
2868                    if (IS_DEAD_ARG(i)) {
2869                        arg_ts->state = TS_DEAD;
2870                    }
2871                }
2872            }
2873        }
2874
2875        /* Liveness analysis should ensure that the following are
2876           all correct, for call sites and basic block end points.  */
2877        if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2878            /* Nothing to do */
2879        } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2880            for (i = 0; i < nb_globals; ++i) {
2881                /* Liveness should see that globals are synced back,
2882                   that is, either TS_DEAD or TS_MEM.  */
2883                arg_ts = &s->temps[i];
2884                tcg_debug_assert(arg_ts->state_ptr == 0
2885                                 || arg_ts->state != 0);
2886            }
2887        } else {
2888            for (i = 0; i < nb_globals; ++i) {
2889                /* Liveness should see that globals are saved back,
2890                   that is, TS_DEAD, waiting to be reloaded.  */
2891                arg_ts = &s->temps[i];
2892                tcg_debug_assert(arg_ts->state_ptr == 0
2893                                 || arg_ts->state == TS_DEAD);
2894            }
2895        }
2896
2897        /* Outputs become available.  */
2898        if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2899            arg_ts = arg_temp(op->args[0]);
2900            dir_ts = arg_ts->state_ptr;
2901            if (dir_ts) {
2902                op->args[0] = temp_arg(dir_ts);
2903                changes = true;
2904
2905                /* The output is now live and modified.  */
2906                arg_ts->state = 0;
2907
2908                if (NEED_SYNC_ARG(0)) {
2909                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2910                                      ? INDEX_op_st_i32
2911                                      : INDEX_op_st_i64);
2912                    TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2913                    TCGTemp *out_ts = dir_ts;
2914
2915                    if (IS_DEAD_ARG(0)) {
2916                        out_ts = arg_temp(op->args[1]);
2917                        arg_ts->state = TS_DEAD;
2918                        tcg_op_remove(s, op);
2919                    } else {
2920                        arg_ts->state = TS_MEM;
2921                    }
2922
2923                    sop->args[0] = temp_arg(out_ts);
2924                    sop->args[1] = temp_arg(arg_ts->mem_base);
2925                    sop->args[2] = arg_ts->mem_offset;
2926                } else {
2927                    tcg_debug_assert(!IS_DEAD_ARG(0));
2928                }
2929            }
2930        } else {
2931            for (i = 0; i < nb_oargs; i++) {
2932                arg_ts = arg_temp(op->args[i]);
2933                dir_ts = arg_ts->state_ptr;
2934                if (!dir_ts) {
2935                    continue;
2936                }
2937                op->args[i] = temp_arg(dir_ts);
2938                changes = true;
2939
2940                /* The output is now live and modified.  */
2941                arg_ts->state = 0;
2942
2943                /* Sync outputs upon their last write.  */
2944                if (NEED_SYNC_ARG(i)) {
2945                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2946                                      ? INDEX_op_st_i32
2947                                      : INDEX_op_st_i64);
2948                    TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2949
2950                    sop->args[0] = temp_arg(dir_ts);
2951                    sop->args[1] = temp_arg(arg_ts->mem_base);
2952                    sop->args[2] = arg_ts->mem_offset;
2953
2954                    arg_ts->state = TS_MEM;
2955                }
2956                /* Drop outputs that are dead.  */
2957                if (IS_DEAD_ARG(i)) {
2958                    arg_ts->state = TS_DEAD;
2959                }
2960            }
2961        }
2962    }
2963
2964    return changes;
2965}
2966
2967#ifdef CONFIG_DEBUG_TCG
2968static void dump_regs(TCGContext *s)
2969{
2970    TCGTemp *ts;
2971    int i;
2972    char buf[64];
2973
2974    for(i = 0; i < s->nb_temps; i++) {
2975        ts = &s->temps[i];
2976        printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2977        switch(ts->val_type) {
2978        case TEMP_VAL_REG:
2979            printf("%s", tcg_target_reg_names[ts->reg]);
2980            break;
2981        case TEMP_VAL_MEM:
2982            printf("%d(%s)", (int)ts->mem_offset,
2983                   tcg_target_reg_names[ts->mem_base->reg]);
2984            break;
2985        case TEMP_VAL_CONST:
2986            printf("$0x%" PRIx64, ts->val);
2987            break;
2988        case TEMP_VAL_DEAD:
2989            printf("D");
2990            break;
2991        default:
2992            printf("???");
2993            break;
2994        }
2995        printf("\n");
2996    }
2997
2998    for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2999        if (s->reg_to_temp[i] != NULL) {
3000            printf("%s: %s\n", 
3001                   tcg_target_reg_names[i], 
3002                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3003        }
3004    }
3005}
3006
3007static void check_regs(TCGContext *s)
3008{
3009    int reg;
3010    int k;
3011    TCGTemp *ts;
3012    char buf[64];
3013
3014    for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3015        ts = s->reg_to_temp[reg];
3016        if (ts != NULL) {
3017            if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3018                printf("Inconsistency for register %s:\n", 
3019                       tcg_target_reg_names[reg]);
3020                goto fail;
3021            }
3022        }
3023    }
3024    for (k = 0; k < s->nb_temps; k++) {
3025        ts = &s->temps[k];
3026        if (ts->val_type == TEMP_VAL_REG
3027            && ts->kind != TEMP_FIXED
3028            && s->reg_to_temp[ts->reg] != ts) {
3029            printf("Inconsistency for temp %s:\n",
3030                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3031        fail:
3032            printf("reg state:\n");
3033            dump_regs(s);
3034            tcg_abort();
3035        }
3036    }
3037}
3038#endif
3039
3040static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3041{
3042    intptr_t off, size, align;
3043
3044    switch (ts->type) {
3045    case TCG_TYPE_I32:
3046        size = align = 4;
3047        break;
3048    case TCG_TYPE_I64:
3049    case TCG_TYPE_V64:
3050        size = align = 8;
3051        break;
3052    case TCG_TYPE_V128:
3053        size = align = 16;
3054        break;
3055    case TCG_TYPE_V256:
3056        /* Note that we do not require aligned storage for V256. */
3057        size = 32, align = 16;
3058        break;
3059    default:
3060        g_assert_not_reached();
3061    }
3062
3063    assert(align <= TCG_TARGET_STACK_ALIGN);
3064    off = ROUND_UP(s->current_frame_offset, align);
3065
3066    /* If we've exhausted the stack frame, restart with a smaller TB. */
3067    if (off + size > s->frame_end) {
3068        tcg_raise_tb_overflow(s);
3069    }
3070    s->current_frame_offset = off + size;
3071
3072    ts->mem_offset = off;
3073#if defined(__sparc__)
3074    ts->mem_offset += TCG_TARGET_STACK_BIAS;
3075#endif
3076    ts->mem_base = s->frame_temp;
3077    ts->mem_allocated = 1;
3078}
3079
3080static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3081
3082/* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3083   mark it free; otherwise mark it dead.  */
3084static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3085{
3086    TCGTempVal new_type;
3087
3088    switch (ts->kind) {
3089    case TEMP_FIXED:
3090        return;
3091    case TEMP_GLOBAL:
3092    case TEMP_LOCAL:
3093        new_type = TEMP_VAL_MEM;
3094        break;
3095    case TEMP_NORMAL:
3096        new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3097        break;
3098    case TEMP_CONST:
3099        new_type = TEMP_VAL_CONST;
3100        break;
3101    default:
3102        g_assert_not_reached();
3103    }
3104    if (ts->val_type == TEMP_VAL_REG) {
3105        s->reg_to_temp[ts->reg] = NULL;
3106    }
3107    ts->val_type = new_type;
3108}
3109
3110/* Mark a temporary as dead.  */
3111static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3112{
3113    temp_free_or_dead(s, ts, 1);
3114}
3115
3116/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3117   registers needs to be allocated to store a constant.  If 'free_or_dead'
3118   is non-zero, subsequently release the temporary; if it is positive, the
3119   temp is dead; if it is negative, the temp is free.  */
3120static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3121                      TCGRegSet preferred_regs, int free_or_dead)
3122{
3123    if (!temp_readonly(ts) && !ts->mem_coherent) {
3124        if (!ts->mem_allocated) {
3125            temp_allocate_frame(s, ts);
3126        }
3127        switch (ts->val_type) {
3128        case TEMP_VAL_CONST:
3129            /* If we're going to free the temp immediately, then we won't
3130               require it later in a register, so attempt to store the
3131               constant to memory directly.  */
3132            if (free_or_dead
3133                && tcg_out_sti(s, ts->type, ts->val,
3134                               ts->mem_base->reg, ts->mem_offset)) {
3135                break;
3136            }
3137            temp_load(s, ts, tcg_target_available_regs[ts->type],
3138                      allocated_regs, preferred_regs);
3139            /* fallthrough */
3140
3141        case TEMP_VAL_REG:
3142            tcg_out_st(s, ts->type, ts->reg,
3143                       ts->mem_base->reg, ts->mem_offset);
3144            break;
3145
3146        case TEMP_VAL_MEM:
3147            break;
3148
3149        case TEMP_VAL_DEAD:
3150        default:
3151            tcg_abort();
3152        }
3153        ts->mem_coherent = 1;
3154    }
3155    if (free_or_dead) {
3156        temp_free_or_dead(s, ts, free_or_dead);
3157    }
3158}
3159
3160/* free register 'reg' by spilling the corresponding temporary if necessary */
3161static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3162{
3163    TCGTemp *ts = s->reg_to_temp[reg];
3164    if (ts != NULL) {
3165        temp_sync(s, ts, allocated_regs, 0, -1);
3166    }
3167}
3168
3169/**
3170 * tcg_reg_alloc:
3171 * @required_regs: Set of registers in which we must allocate.
3172 * @allocated_regs: Set of registers which must be avoided.
3173 * @preferred_regs: Set of registers we should prefer.
3174 * @rev: True if we search the registers in "indirect" order.
3175 *
3176 * The allocated register must be in @required_regs & ~@allocated_regs,
3177 * but if we can put it in @preferred_regs we may save a move later.
3178 */
3179static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3180                            TCGRegSet allocated_regs,
3181                            TCGRegSet preferred_regs, bool rev)
3182{
3183    int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3184    TCGRegSet reg_ct[2];
3185    const int *order;
3186
3187    reg_ct[1] = required_regs & ~allocated_regs;
3188    tcg_debug_assert(reg_ct[1] != 0);
3189    reg_ct[0] = reg_ct[1] & preferred_regs;
3190
3191    /* Skip the preferred_regs option if it cannot be satisfied,
3192       or if the preference made no difference.  */
3193    f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3194
3195    order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3196
3197    /* Try free registers, preferences first.  */
3198    for (j = f; j < 2; j++) {
3199        TCGRegSet set = reg_ct[j];
3200
3201        if (tcg_regset_single(set)) {
3202            /* One register in the set.  */
3203            TCGReg reg = tcg_regset_first(set);
3204            if (s->reg_to_temp[reg] == NULL) {
3205                return reg;
3206            }
3207        } else {
3208            for (i = 0; i < n; i++) {
3209                TCGReg reg = order[i];
3210                if (s->reg_to_temp[reg] == NULL &&
3211                    tcg_regset_test_reg(set, reg)) {
3212                    return reg;
3213                }
3214            }
3215        }
3216    }
3217
3218    /* We must spill something.  */
3219    for (j = f; j < 2; j++) {
3220        TCGRegSet set = reg_ct[j];
3221
3222        if (tcg_regset_single(set)) {
3223            /* One register in the set.  */
3224            TCGReg reg = tcg_regset_first(set);
3225            tcg_reg_free(s, reg, allocated_regs);
3226            return reg;
3227        } else {
3228            for (i = 0; i < n; i++) {
3229                TCGReg reg = order[i];
3230                if (tcg_regset_test_reg(set, reg)) {
3231                    tcg_reg_free(s, reg, allocated_regs);
3232                    return reg;
3233                }
3234            }
3235        }
3236    }
3237
3238    tcg_abort();
3239}
3240
3241/* Make sure the temporary is in a register.  If needed, allocate the register
3242   from DESIRED while avoiding ALLOCATED.  */
3243static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3244                      TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3245{
3246    TCGReg reg;
3247
3248    switch (ts->val_type) {
3249    case TEMP_VAL_REG:
3250        return;
3251    case TEMP_VAL_CONST:
3252        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3253                            preferred_regs, ts->indirect_base);
3254        if (ts->type <= TCG_TYPE_I64) {
3255            tcg_out_movi(s, ts->type, reg, ts->val);
3256        } else {
3257            uint64_t val = ts->val;
3258            MemOp vece = MO_64;
3259
3260            /*
3261             * Find the minimal vector element that matches the constant.
3262             * The targets will, in general, have to do this search anyway,
3263             * do this generically.
3264             */
3265            if (val == dup_const(MO_8, val)) {
3266                vece = MO_8;
3267            } else if (val == dup_const(MO_16, val)) {
3268                vece = MO_16;
3269            } else if (val == dup_const(MO_32, val)) {
3270                vece = MO_32;
3271            }
3272
3273            tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3274        }
3275        ts->mem_coherent = 0;
3276        break;
3277    case TEMP_VAL_MEM:
3278        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3279                            preferred_regs, ts->indirect_base);
3280        tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3281        ts->mem_coherent = 1;
3282        break;
3283    case TEMP_VAL_DEAD:
3284    default:
3285        tcg_abort();
3286    }
3287    ts->reg = reg;
3288    ts->val_type = TEMP_VAL_REG;
3289    s->reg_to_temp[reg] = ts;
3290}
3291
3292/* Save a temporary to memory. 'allocated_regs' is used in case a
3293   temporary registers needs to be allocated to store a constant.  */
3294static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3295{
3296    /* The liveness analysis already ensures that globals are back
3297       in memory. Keep an tcg_debug_assert for safety. */
3298    tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3299}
3300
3301/* save globals to their canonical location and assume they can be
3302   modified be the following code. 'allocated_regs' is used in case a
3303   temporary registers needs to be allocated to store a constant. */
3304static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3305{
3306    int i, n;
3307
3308    for (i = 0, n = s->nb_globals; i < n; i++) {
3309        temp_save(s, &s->temps[i], allocated_regs);
3310    }
3311}
3312
3313/* sync globals to their canonical location and assume they can be
3314   read by the following code. 'allocated_regs' is used in case a
3315   temporary registers needs to be allocated to store a constant. */
3316static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3317{
3318    int i, n;
3319
3320    for (i = 0, n = s->nb_globals; i < n; i++) {
3321        TCGTemp *ts = &s->temps[i];
3322        tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3323                         || ts->kind == TEMP_FIXED
3324                         || ts->mem_coherent);
3325    }
3326}
3327
3328/* at the end of a basic block, we assume all temporaries are dead and
3329   all globals are stored at their canonical location. */
3330static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3331{
3332    int i;
3333
3334    for (i = s->nb_globals; i < s->nb_temps; i++) {
3335        TCGTemp *ts = &s->temps[i];
3336
3337        switch (ts->kind) {
3338        case TEMP_LOCAL:
3339            temp_save(s, ts, allocated_regs);
3340            break;
3341        case TEMP_NORMAL:
3342            /* The liveness analysis already ensures that temps are dead.
3343               Keep an tcg_debug_assert for safety. */
3344            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3345            break;
3346        case TEMP_CONST:
3347            /* Similarly, we should have freed any allocated register. */
3348            tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3349            break;
3350        default:
3351            g_assert_not_reached();
3352        }
3353    }
3354
3355    save_globals(s, allocated_regs);
3356}
3357
3358/*
3359 * At a conditional branch, we assume all temporaries are dead and
3360 * all globals and local temps are synced to their location.
3361 */
3362static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3363{
3364    sync_globals(s, allocated_regs);
3365
3366    for (int i = s->nb_globals; i < s->nb_temps; i++) {
3367        TCGTemp *ts = &s->temps[i];
3368        /*
3369         * The liveness analysis already ensures that temps are dead.
3370         * Keep tcg_debug_asserts for safety.
3371         */
3372        switch (ts->kind) {
3373        case TEMP_LOCAL:
3374            tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3375            break;
3376        case TEMP_NORMAL:
3377            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3378            break;
3379        case TEMP_CONST:
3380            break;
3381        default:
3382            g_assert_not_reached();
3383        }
3384    }
3385}
3386
3387/*
3388 * Specialized code generation for INDEX_op_mov_* with a constant.
3389 */
3390static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3391                                  tcg_target_ulong val, TCGLifeData arg_life,
3392                                  TCGRegSet preferred_regs)
3393{
3394    /* ENV should not be modified.  */
3395    tcg_debug_assert(!temp_readonly(ots));
3396
3397    /* The movi is not explicitly generated here.  */
3398    if (ots->val_type == TEMP_VAL_REG) {
3399        s->reg_to_temp[ots->reg] = NULL;
3400    }
3401    ots->val_type = TEMP_VAL_CONST;
3402    ots->val = val;
3403    ots->mem_coherent = 0;
3404    if (NEED_SYNC_ARG(0)) {
3405        temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3406    } else if (IS_DEAD_ARG(0)) {
3407        temp_dead(s, ots);
3408    }
3409}
3410
3411/*
3412 * Specialized code generation for INDEX_op_mov_*.
3413 */
3414static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3415{
3416    const TCGLifeData arg_life = op->life;
3417    TCGRegSet allocated_regs, preferred_regs;
3418    TCGTemp *ts, *ots;
3419    TCGType otype, itype;
3420
3421    allocated_regs = s->reserved_regs;
3422    preferred_regs = op->output_pref[0];
3423    ots = arg_temp(op->args[0]);
3424    ts = arg_temp(op->args[1]);
3425
3426    /* ENV should not be modified.  */
3427    tcg_debug_assert(!temp_readonly(ots));
3428
3429    /* Note that otype != itype for no-op truncation.  */
3430    otype = ots->type;
3431    itype = ts->type;
3432
3433    if (ts->val_type == TEMP_VAL_CONST) {
3434        /* propagate constant or generate sti */
3435        tcg_target_ulong val = ts->val;
3436        if (IS_DEAD_ARG(1)) {
3437            temp_dead(s, ts);
3438        }
3439        tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3440        return;
3441    }
3442
3443    /* If the source value is in memory we're going to be forced
3444       to have it in a register in order to perform the copy.  Copy
3445       the SOURCE value into its own register first, that way we
3446       don't have to reload SOURCE the next time it is used. */
3447    if (ts->val_type == TEMP_VAL_MEM) {
3448        temp_load(s, ts, tcg_target_available_regs[itype],
3449                  allocated_regs, preferred_regs);
3450    }
3451
3452    tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3453    if (IS_DEAD_ARG(0)) {
3454        /* mov to a non-saved dead register makes no sense (even with
3455           liveness analysis disabled). */
3456        tcg_debug_assert(NEED_SYNC_ARG(0));
3457        if (!ots->mem_allocated) {
3458            temp_allocate_frame(s, ots);
3459        }
3460        tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3461        if (IS_DEAD_ARG(1)) {
3462            temp_dead(s, ts);
3463        }
3464        temp_dead(s, ots);
3465    } else {
3466        if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3467            /* the mov can be suppressed */
3468            if (ots->val_type == TEMP_VAL_REG) {
3469                s->reg_to_temp[ots->reg] = NULL;
3470            }
3471            ots->reg = ts->reg;
3472            temp_dead(s, ts);
3473        } else {
3474            if (ots->val_type != TEMP_VAL_REG) {
3475                /* When allocating a new register, make sure to not spill the
3476                   input one. */
3477                tcg_regset_set_reg(allocated_regs, ts->reg);
3478                ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3479                                         allocated_regs, preferred_regs,
3480                                         ots->indirect_base);
3481            }
3482            if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3483                /*
3484                 * Cross register class move not supported.
3485                 * Store the source register into the destination slot
3486                 * and leave the destination temp as TEMP_VAL_MEM.
3487                 */
3488                assert(!temp_readonly(ots));
3489                if (!ts->mem_allocated) {
3490                    temp_allocate_frame(s, ots);
3491                }
3492                tcg_out_st(s, ts->type, ts->reg,
3493                           ots->mem_base->reg, ots->mem_offset);
3494                ots->mem_coherent = 1;
3495                temp_free_or_dead(s, ots, -1);
3496                return;
3497            }
3498        }
3499        ots->val_type = TEMP_VAL_REG;
3500        ots->mem_coherent = 0;
3501        s->reg_to_temp[ots->reg] = ots;
3502        if (NEED_SYNC_ARG(0)) {
3503            temp_sync(s, ots, allocated_regs, 0, 0);
3504        }
3505    }
3506}
3507
3508/*
3509 * Specialized code generation for INDEX_op_dup_vec.
3510 */
3511static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3512{
3513    const TCGLifeData arg_life = op->life;
3514    TCGRegSet dup_out_regs, dup_in_regs;
3515    TCGTemp *its, *ots;
3516    TCGType itype, vtype;
3517    intptr_t endian_fixup;
3518    unsigned vece;
3519    bool ok;
3520
3521    ots = arg_temp(op->args[0]);
3522    its = arg_temp(op->args[1]);
3523
3524    /* ENV should not be modified.  */
3525    tcg_debug_assert(!temp_readonly(ots));
3526
3527    itype = its->type;
3528    vece = TCGOP_VECE(op);
3529    vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3530
3531    if (its->val_type == TEMP_VAL_CONST) {
3532        /* Propagate constant via movi -> dupi.  */
3533        tcg_target_ulong val = its->val;
3534        if (IS_DEAD_ARG(1)) {
3535            temp_dead(s, its);
3536        }
3537        tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3538        return;
3539    }
3540
3541    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3542    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3543
3544    /* Allocate the output register now.  */
3545    if (ots->val_type != TEMP_VAL_REG) {
3546        TCGRegSet allocated_regs = s->reserved_regs;
3547
3548        if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3549            /* Make sure to not spill the input register. */
3550            tcg_regset_set_reg(allocated_regs, its->reg);
3551        }
3552        ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3553                                 op->output_pref[0], ots->indirect_base);
3554        ots->val_type = TEMP_VAL_REG;
3555        ots->mem_coherent = 0;
3556        s->reg_to_temp[ots->reg] = ots;
3557    }
3558
3559    switch (its->val_type) {
3560    case TEMP_VAL_REG:
3561        /*
3562         * The dup constriaints must be broad, covering all possible VECE.
3563         * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3564         * to fail, indicating that extra moves are required for that case.
3565         */
3566        if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3567            if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3568                goto done;
3569            }
3570            /* Try again from memory or a vector input register.  */
3571        }
3572        if (!its->mem_coherent) {
3573            /*
3574             * The input register is not synced, and so an extra store
3575             * would be required to use memory.  Attempt an integer-vector
3576             * register move first.  We do not have a TCGRegSet for this.
3577             */
3578            if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3579                break;
3580            }
3581            /* Sync the temp back to its slot and load from there.  */
3582            temp_sync(s, its, s->reserved_regs, 0, 0);
3583        }
3584        /* fall through */
3585
3586    case TEMP_VAL_MEM:
3587#ifdef HOST_WORDS_BIGENDIAN
3588        endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3589        endian_fixup -= 1 << vece;
3590#else
3591        endian_fixup = 0;
3592#endif
3593        if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3594                             its->mem_offset + endian_fixup)) {
3595            goto done;
3596        }
3597        tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3598        break;
3599
3600    default:
3601        g_assert_not_reached();
3602    }
3603
3604    /* We now have a vector input register, so dup must succeed. */
3605    ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3606    tcg_debug_assert(ok);
3607
3608 done:
3609    if (IS_DEAD_ARG(1)) {
3610        temp_dead(s, its);
3611    }
3612    if (NEED_SYNC_ARG(0)) {
3613        temp_sync(s, ots, s->reserved_regs, 0, 0);
3614    }
3615    if (IS_DEAD_ARG(0)) {
3616        temp_dead(s, ots);
3617    }
3618}
3619
3620static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3621{
3622    const TCGLifeData arg_life = op->life;
3623    const TCGOpDef * const def = &tcg_op_defs[op->opc];
3624    TCGRegSet i_allocated_regs;
3625    TCGRegSet o_allocated_regs;
3626    int i, k, nb_iargs, nb_oargs;
3627    TCGReg reg;
3628    TCGArg arg;
3629    const TCGArgConstraint *arg_ct;
3630    TCGTemp *ts;
3631    TCGArg new_args[TCG_MAX_OP_ARGS];
3632    int const_args[TCG_MAX_OP_ARGS];
3633
3634    nb_oargs = def->nb_oargs;
3635    nb_iargs = def->nb_iargs;
3636
3637    /* copy constants */
3638    memcpy(new_args + nb_oargs + nb_iargs, 
3639           op->args + nb_oargs + nb_iargs,
3640           sizeof(TCGArg) * def->nb_cargs);
3641
3642    i_allocated_regs = s->reserved_regs;
3643    o_allocated_regs = s->reserved_regs;
3644
3645    /* satisfy input constraints */ 
3646    for (k = 0; k < nb_iargs; k++) {
3647        TCGRegSet i_preferred_regs, o_preferred_regs;
3648
3649        i = def->args_ct[nb_oargs + k].sort_index;
3650        arg = op->args[i];
3651        arg_ct = &def->args_ct[i];
3652        ts = arg_temp(arg);
3653
3654        if (ts->val_type == TEMP_VAL_CONST
3655            && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3656            /* constant is OK for instruction */
3657            const_args[i] = 1;
3658            new_args[i] = ts->val;
3659            continue;
3660        }
3661
3662        i_preferred_regs = o_preferred_regs = 0;
3663        if (arg_ct->ialias) {
3664            o_preferred_regs = op->output_pref[arg_ct->alias_index];
3665
3666            /*
3667             * If the input is readonly, then it cannot also be an
3668             * output and aliased to itself.  If the input is not
3669             * dead after the instruction, we must allocate a new
3670             * register and move it.
3671             */
3672            if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3673                goto allocate_in_reg;
3674            }
3675
3676            /*
3677             * Check if the current register has already been allocated
3678             * for another input aliased to an output.
3679             */
3680            if (ts->val_type == TEMP_VAL_REG) {
3681                reg = ts->reg;
3682                for (int k2 = 0; k2 < k; k2++) {
3683                    int i2 = def->args_ct[nb_oargs + k2].sort_index;
3684                    if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3685                        goto allocate_in_reg;
3686                    }
3687                }
3688            }
3689            i_preferred_regs = o_preferred_regs;
3690        }
3691
3692        temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3693        reg = ts->reg;
3694
3695        if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3696 allocate_in_reg:
3697            /*
3698             * Allocate a new register matching the constraint
3699             * and move the temporary register into it.
3700             */
3701            temp_load(s, ts, tcg_target_available_regs[ts->type],
3702                      i_allocated_regs, 0);
3703            reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3704                                o_preferred_regs, ts->indirect_base);
3705            if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3706                /*
3707                 * Cross register class move not supported.  Sync the
3708                 * temp back to its slot and load from there.
3709                 */
3710                temp_sync(s, ts, i_allocated_regs, 0, 0);
3711                tcg_out_ld(s, ts->type, reg,
3712                           ts->mem_base->reg, ts->mem_offset);
3713            }
3714        }
3715        new_args[i] = reg;
3716        const_args[i] = 0;
3717        tcg_regset_set_reg(i_allocated_regs, reg);
3718    }
3719    
3720    /* mark dead temporaries and free the associated registers */
3721    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3722        if (IS_DEAD_ARG(i)) {
3723            temp_dead(s, arg_temp(op->args[i]));
3724        }
3725    }
3726
3727    if (def->flags & TCG_OPF_COND_BRANCH) {
3728        tcg_reg_alloc_cbranch(s, i_allocated_regs);
3729    } else if (def->flags & TCG_OPF_BB_END) {
3730        tcg_reg_alloc_bb_end(s, i_allocated_regs);
3731    } else {
3732        if (def->flags & TCG_OPF_CALL_CLOBBER) {
3733            /* XXX: permit generic clobber register list ? */ 
3734            for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3735                if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3736                    tcg_reg_free(s, i, i_allocated_regs);
3737                }
3738            }
3739        }
3740        if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3741            /* sync globals if the op has side effects and might trigger
3742               an exception. */
3743            sync_globals(s, i_allocated_regs);
3744        }
3745        
3746        /* satisfy the output constraints */
3747        for(k = 0; k < nb_oargs; k++) {
3748            i = def->args_ct[k].sort_index;
3749            arg = op->args[i];
3750            arg_ct = &def->args_ct[i];
3751            ts = arg_temp(arg);
3752
3753            /* ENV should not be modified.  */
3754            tcg_debug_assert(!temp_readonly(ts));
3755
3756            if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3757                reg = new_args[arg_ct->alias_index];
3758            } else if (arg_ct->newreg) {
3759                reg = tcg_reg_alloc(s, arg_ct->regs,
3760                                    i_allocated_regs | o_allocated_regs,
3761                                    op->output_pref[k], ts->indirect_base);
3762            } else {
3763                reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3764                                    op->output_pref[k], ts->indirect_base);
3765            }
3766            tcg_regset_set_reg(o_allocated_regs, reg);
3767            if (ts->val_type == TEMP_VAL_REG) {
3768                s->reg_to_temp[ts->reg] = NULL;
3769            }
3770            ts->val_type = TEMP_VAL_REG;
3771            ts->reg = reg;
3772            /*
3773             * Temp value is modified, so the value kept in memory is
3774             * potentially not the same.
3775             */
3776            ts->mem_coherent = 0;
3777            s->reg_to_temp[reg] = ts;
3778            new_args[i] = reg;
3779        }
3780    }
3781
3782    /* emit instruction */
3783    if (def->flags & TCG_OPF_VECTOR) {
3784        tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3785                       new_args, const_args);
3786    } else {
3787        tcg_out_op(s, op->opc, new_args, const_args);
3788    }
3789
3790    /* move the outputs in the correct register if needed */
3791    for(i = 0; i < nb_oargs; i++) {
3792        ts = arg_temp(op->args[i]);
3793
3794        /* ENV should not be modified.  */
3795        tcg_debug_assert(!temp_readonly(ts));
3796
3797        if (NEED_SYNC_ARG(i)) {
3798            temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3799        } else if (IS_DEAD_ARG(i)) {
3800            temp_dead(s, ts);
3801        }
3802    }
3803}
3804
3805static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3806{
3807    const TCGLifeData arg_life = op->life;
3808    TCGTemp *ots, *itsl, *itsh;
3809    TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3810
3811    /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3812    tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3813    tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3814
3815    ots = arg_temp(op->args[0]);
3816    itsl = arg_temp(op->args[1]);
3817    itsh = arg_temp(op->args[2]);
3818
3819    /* ENV should not be modified.  */
3820    tcg_debug_assert(!temp_readonly(ots));
3821
3822    /* Allocate the output register now.  */
3823    if (ots->val_type != TEMP_VAL_REG) {
3824        TCGRegSet allocated_regs = s->reserved_regs;
3825        TCGRegSet dup_out_regs =
3826            tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3827
3828        /* Make sure to not spill the input registers. */
3829        if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3830            tcg_regset_set_reg(allocated_regs, itsl->reg);
3831        }
3832        if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3833            tcg_regset_set_reg(allocated_regs, itsh->reg);
3834        }
3835
3836        ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3837                                 op->output_pref[0], ots->indirect_base);
3838        ots->val_type = TEMP_VAL_REG;
3839        ots->mem_coherent = 0;
3840        s->reg_to_temp[ots->reg] = ots;
3841    }
3842
3843    /* Promote dup2 of immediates to dupi_vec. */
3844    if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3845        uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3846        MemOp vece = MO_64;
3847
3848        if (val == dup_const(MO_8, val)) {
3849            vece = MO_8;
3850        } else if (val == dup_const(MO_16, val)) {
3851            vece = MO_16;
3852        } else if (val == dup_const(MO_32, val)) {
3853            vece = MO_32;
3854        }
3855
3856        tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3857        goto done;
3858    }
3859
3860    /* If the two inputs form one 64-bit value, try dupm_vec. */
3861    if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3862        if (!itsl->mem_coherent) {
3863            temp_sync(s, itsl, s->reserved_regs, 0, 0);
3864        }
3865        if (!itsh->mem_coherent) {
3866            temp_sync(s, itsh, s->reserved_regs, 0, 0);
3867        }
3868#ifdef HOST_WORDS_BIGENDIAN
3869        TCGTemp *its = itsh;
3870#else
3871        TCGTemp *its = itsl;
3872#endif
3873        if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3874                             its->mem_base->reg, its->mem_offset)) {
3875            goto done;
3876        }
3877    }
3878
3879    /* Fall back to generic expansion. */
3880    return false;
3881
3882 done:
3883    if (IS_DEAD_ARG(1)) {
3884        temp_dead(s, itsl);
3885    }
3886    if (IS_DEAD_ARG(2)) {
3887        temp_dead(s, itsh);
3888    }
3889    if (NEED_SYNC_ARG(0)) {
3890        temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3891    } else if (IS_DEAD_ARG(0)) {
3892        temp_dead(s, ots);
3893    }
3894    return true;
3895}
3896
3897#ifdef TCG_TARGET_STACK_GROWSUP
3898#define STACK_DIR(x) (-(x))
3899#else
3900#define STACK_DIR(x) (x)
3901#endif
3902
3903static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3904{
3905    const int nb_oargs = TCGOP_CALLO(op);
3906    const int nb_iargs = TCGOP_CALLI(op);
3907    const TCGLifeData arg_life = op->life;
3908    const TCGHelperInfo *info;
3909    int flags, nb_regs, i;
3910    TCGReg reg;
3911    TCGArg arg;
3912    TCGTemp *ts;
3913    intptr_t stack_offset;
3914    size_t call_stack_size;
3915    tcg_insn_unit *func_addr;
3916    int allocate_args;
3917    TCGRegSet allocated_regs;
3918
3919    func_addr = tcg_call_func(op);
3920    info = tcg_call_info(op);
3921    flags = info->flags;
3922
3923    nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3924    if (nb_regs > nb_iargs) {
3925        nb_regs = nb_iargs;
3926    }
3927
3928    /* assign stack slots first */
3929    call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3930    call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 
3931        ~(TCG_TARGET_STACK_ALIGN - 1);
3932    allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3933    if (allocate_args) {
3934        /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3935           preallocate call stack */
3936        tcg_abort();
3937    }
3938
3939    stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3940    for (i = nb_regs; i < nb_iargs; i++) {
3941        arg = op->args[nb_oargs + i];
3942#ifdef TCG_TARGET_STACK_GROWSUP
3943        stack_offset -= sizeof(tcg_target_long);
3944#endif
3945        if (arg != TCG_CALL_DUMMY_ARG) {
3946            ts = arg_temp(arg);
3947            temp_load(s, ts, tcg_target_available_regs[ts->type],
3948                      s->reserved_regs, 0);
3949            tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3950        }
3951#ifndef TCG_TARGET_STACK_GROWSUP
3952        stack_offset += sizeof(tcg_target_long);
3953#endif
3954    }
3955    
3956    /* assign input registers */
3957    allocated_regs = s->reserved_regs;
3958    for (i = 0; i < nb_regs; i++) {
3959        arg = op->args[nb_oargs + i];
3960        if (arg != TCG_CALL_DUMMY_ARG) {
3961            ts = arg_temp(arg);
3962            reg = tcg_target_call_iarg_regs[i];
3963
3964            if (ts->val_type == TEMP_VAL_REG) {
3965                if (ts->reg != reg) {
3966                    tcg_reg_free(s, reg, allocated_regs);
3967                    if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3968                        /*
3969                         * Cross register class move not supported.  Sync the
3970                         * temp back to its slot and load from there.
3971                         */
3972                        temp_sync(s, ts, allocated_regs, 0, 0);
3973                        tcg_out_ld(s, ts->type, reg,
3974                                   ts->mem_base->reg, ts->mem_offset);
3975                    }
3976                }
3977            } else {
3978                TCGRegSet arg_set = 0;
3979
3980                tcg_reg_free(s, reg, allocated_regs);
3981                tcg_regset_set_reg(arg_set, reg);
3982                temp_load(s, ts, arg_set, allocated_regs, 0);
3983            }
3984
3985            tcg_regset_set_reg(allocated_regs, reg);
3986        }
3987    }
3988    
3989    /* mark dead temporaries and free the associated registers */
3990    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3991        if (IS_DEAD_ARG(i)) {
3992            temp_dead(s, arg_temp(op->args[i]));
3993        }
3994    }
3995    
3996    /* clobber call registers */
3997    for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3998        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3999            tcg_reg_free(s, i, allocated_regs);
4000        }
4001    }
4002
4003    /* Save globals if they might be written by the helper, sync them if
4004       they might be read. */
4005    if (flags & TCG_CALL_NO_READ_GLOBALS) {
4006        /* Nothing to do */
4007    } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4008        sync_globals(s, allocated_regs);
4009    } else {
4010        save_globals(s, allocated_regs);
4011    }
4012
4013#ifdef CONFIG_TCG_INTERPRETER
4014    {
4015        gpointer hash = (gpointer)(uintptr_t)info->typemask;
4016        ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
4017        assert(cif != NULL);
4018        tcg_out_call(s, func_addr, cif);
4019    }
4020#else
4021    tcg_out_call(s, func_addr);
4022#endif
4023
4024    /* assign output registers and emit moves if needed */
4025    for(i = 0; i < nb_oargs; i++) {
4026        arg = op->args[i];
4027        ts = arg_temp(arg);
4028
4029        /* ENV should not be modified.  */
4030        tcg_debug_assert(!temp_readonly(ts));
4031
4032        reg = tcg_target_call_oarg_regs[i];
4033        tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4034        if (ts->val_type == TEMP_VAL_REG) {
4035            s->reg_to_temp[ts->reg] = NULL;
4036        }
4037        ts->val_type = TEMP_VAL_REG;
4038        ts->reg = reg;
4039        ts->mem_coherent = 0;
4040        s->reg_to_temp[reg] = ts;
4041        if (NEED_SYNC_ARG(i)) {
4042            temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4043        } else if (IS_DEAD_ARG(i)) {
4044            temp_dead(s, ts);
4045        }
4046    }
4047}
4048
4049#ifdef CONFIG_PROFILER
4050
4051/* avoid copy/paste errors */
4052#define PROF_ADD(to, from, field)                       \
4053    do {                                                \
4054        (to)->field += qatomic_read(&((from)->field));  \
4055    } while (0)
4056
4057#define PROF_MAX(to, from, field)                                       \
4058    do {                                                                \
4059        typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4060        if (val__ > (to)->field) {                                      \
4061            (to)->field = val__;                                        \
4062        }                                                               \
4063    } while (0)
4064
4065/* Pass in a zero'ed @prof */
4066static inline
4067void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4068{
4069    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4070    unsigned int i;
4071
4072    for (i = 0; i < n_ctxs; i++) {
4073        TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4074        const TCGProfile *orig = &s->prof;
4075
4076        if (counters) {
4077            PROF_ADD(prof, orig, cpu_exec_time);
4078            PROF_ADD(prof, orig, tb_count1);
4079            PROF_ADD(prof, orig, tb_count);
4080            PROF_ADD(prof, orig, op_count);
4081            PROF_MAX(prof, orig, op_count_max);
4082            PROF_ADD(prof, orig, temp_count);
4083            PROF_MAX(prof, orig, temp_count_max);
4084            PROF_ADD(prof, orig, del_op_count);
4085            PROF_ADD(prof, orig, code_in_len);
4086            PROF_ADD(prof, orig, code_out_len);
4087            PROF_ADD(prof, orig, search_out_len);
4088            PROF_ADD(prof, orig, interm_time);
4089            PROF_ADD(prof, orig, code_time);
4090            PROF_ADD(prof, orig, la_time);
4091            PROF_ADD(prof, orig, opt_time);
4092            PROF_ADD(prof, orig, restore_count);
4093            PROF_ADD(prof, orig, restore_time);
4094        }
4095        if (table) {
4096            int i;
4097
4098            for (i = 0; i < NB_OPS; i++) {
4099                PROF_ADD(prof, orig, table_op_count[i]);
4100            }
4101        }
4102    }
4103}
4104
4105#undef PROF_ADD
4106#undef PROF_MAX
4107
4108static void tcg_profile_snapshot_counters(TCGProfile *prof)
4109{
4110    tcg_profile_snapshot(prof, true, false);
4111}
4112
4113static void tcg_profile_snapshot_table(TCGProfile *prof)
4114{
4115    tcg_profile_snapshot(prof, false, true);
4116}
4117
4118void tcg_dump_op_count(void)
4119{
4120    TCGProfile prof = {};
4121    int i;
4122
4123    tcg_profile_snapshot_table(&prof);
4124    for (i = 0; i < NB_OPS; i++) {
4125        qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4126                    prof.table_op_count[i]);
4127    }
4128}
4129
4130int64_t tcg_cpu_exec_time(void)
4131{
4132    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4133    unsigned int i;
4134    int64_t ret = 0;
4135
4136    for (i = 0; i < n_ctxs; i++) {
4137        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4138        const TCGProfile *prof = &s->prof;
4139
4140        ret += qatomic_read(&prof->cpu_exec_time);
4141    }
4142    return ret;
4143}
4144#else
4145void tcg_dump_op_count(void)
4146{
4147    qemu_printf("[TCG profiler not compiled]\n");
4148}
4149
4150int64_t tcg_cpu_exec_time(void)
4151{
4152    error_report("%s: TCG profiler not compiled", __func__);
4153    exit(EXIT_FAILURE);
4154}
4155#endif
4156
4157
4158int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4159{
4160#ifdef CONFIG_PROFILER
4161    TCGProfile *prof = &s->prof;
4162#endif
4163    int i, num_insns;
4164    TCGOp *op;
4165
4166#ifdef CONFIG_PROFILER
4167    {
4168        int n = 0;
4169
4170        QTAILQ_FOREACH(op, &s->ops, link) {
4171            n++;
4172        }
4173        qatomic_set(&prof->op_count, prof->op_count + n);
4174        if (n > prof->op_count_max) {
4175            qatomic_set(&prof->op_count_max, n);
4176        }
4177
4178        n = s->nb_temps;
4179        qatomic_set(&prof->temp_count, prof->temp_count + n);
4180        if (n > prof->temp_count_max) {
4181            qatomic_set(&prof->temp_count_max, n);
4182        }
4183    }
4184#endif
4185
4186#ifdef DEBUG_DISAS
4187    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4188                 && qemu_log_in_addr_range(tb->pc))) {
4189        FILE *logfile = qemu_log_lock();
4190        qemu_log("OP:\n");
4191        tcg_dump_ops(s, false);
4192        qemu_log("\n");
4193        qemu_log_unlock(logfile);
4194    }
4195#endif
4196
4197#ifdef CONFIG_DEBUG_TCG
4198    /* Ensure all labels referenced have been emitted.  */
4199    {
4200        TCGLabel *l;
4201        bool error = false;
4202
4203        QSIMPLEQ_FOREACH(l, &s->labels, next) {
4204            if (unlikely(!l->present) && l->refs) {
4205                qemu_log_mask(CPU_LOG_TB_OP,
4206                              "$L%d referenced but not present.\n", l->id);
4207                error = true;
4208            }
4209        }
4210        assert(!error);
4211    }
4212#endif
4213
4214#ifdef CONFIG_PROFILER
4215    qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4216#endif
4217
4218#ifdef USE_TCG_OPTIMIZATIONS
4219    tcg_optimize(s);
4220#endif
4221
4222#ifdef CONFIG_PROFILER
4223    qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4224    qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4225#endif
4226
4227    reachable_code_pass(s);
4228    liveness_pass_1(s);
4229
4230    if (s->nb_indirects > 0) {
4231#ifdef DEBUG_DISAS
4232        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4233                     && qemu_log_in_addr_range(tb->pc))) {
4234            FILE *logfile = qemu_log_lock();
4235            qemu_log("OP before indirect lowering:\n");
4236            tcg_dump_ops(s, false);
4237            qemu_log("\n");
4238            qemu_log_unlock(logfile);
4239        }
4240#endif
4241        /* Replace indirect temps with direct temps.  */
4242        if (liveness_pass_2(s)) {
4243            /* If changes were made, re-run liveness.  */
4244            liveness_pass_1(s);
4245        }
4246    }
4247
4248#ifdef CONFIG_PROFILER
4249    qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4250#endif
4251
4252#ifdef DEBUG_DISAS
4253    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4254                 && qemu_log_in_addr_range(tb->pc))) {
4255        FILE *logfile = qemu_log_lock();
4256        qemu_log("OP after optimization and liveness analysis:\n");
4257        tcg_dump_ops(s, true);
4258        qemu_log("\n");
4259        qemu_log_unlock(logfile);
4260    }
4261#endif
4262
4263    tcg_reg_alloc_start(s);
4264
4265    /*
4266     * Reset the buffer pointers when restarting after overflow.
4267     * TODO: Move this into translate-all.c with the rest of the
4268     * buffer management.  Having only this done here is confusing.
4269     */
4270    s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4271    s->code_ptr = s->code_buf;
4272
4273#ifdef TCG_TARGET_NEED_LDST_LABELS
4274    QSIMPLEQ_INIT(&s->ldst_labels);
4275#endif
4276#ifdef TCG_TARGET_NEED_POOL_LABELS
4277    s->pool_labels = NULL;
4278#endif
4279
4280    num_insns = -1;
4281    QTAILQ_FOREACH(op, &s->ops, link) {
4282        TCGOpcode opc = op->opc;
4283
4284#ifdef CONFIG_PROFILER
4285        qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4286#endif
4287
4288        switch (opc) {
4289        case INDEX_op_mov_i32:
4290        case INDEX_op_mov_i64:
4291        case INDEX_op_mov_vec:
4292            tcg_reg_alloc_mov(s, op);
4293            break;
4294        case INDEX_op_dup_vec:
4295            tcg_reg_alloc_dup(s, op);
4296            break;
4297        case INDEX_op_insn_start:
4298            if (num_insns >= 0) {
4299                size_t off = tcg_current_code_size(s);
4300                s->gen_insn_end_off[num_insns] = off;
4301                /* Assert that we do not overflow our stored offset.  */
4302                assert(s->gen_insn_end_off[num_insns] == off);
4303            }
4304            num_insns++;
4305            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4306                target_ulong a;
4307#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4308                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4309#else
4310                a = op->args[i];
4311#endif
4312                s->gen_insn_data[num_insns][i] = a;
4313            }
4314            break;
4315        case INDEX_op_discard:
4316            temp_dead(s, arg_temp(op->args[0]));
4317            break;
4318        case INDEX_op_set_label:
4319            tcg_reg_alloc_bb_end(s, s->reserved_regs);
4320            tcg_out_label(s, arg_label(op->args[0]));
4321            break;
4322        case INDEX_op_call:
4323            tcg_reg_alloc_call(s, op);
4324            break;
4325        case INDEX_op_dup2_vec:
4326            if (tcg_reg_alloc_dup2(s, op)) {
4327                break;
4328            }
4329            /* fall through */
4330        default:
4331            /* Sanity check that we've not introduced any unhandled opcodes. */
4332            tcg_debug_assert(tcg_op_supported(opc));
4333            /* Note: in order to speed up the code, it would be much
4334               faster to have specialized register allocator functions for
4335               some common argument patterns */
4336            tcg_reg_alloc_op(s, op);
4337            break;
4338        }
4339#ifdef CONFIG_DEBUG_TCG
4340        check_regs(s);
4341#endif
4342        /* Test for (pending) buffer overflow.  The assumption is that any
4343           one operation beginning below the high water mark cannot overrun
4344           the buffer completely.  Thus we can test for overflow after
4345           generating code without having to check during generation.  */
4346        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4347            return -1;
4348        }
4349        /* Test for TB overflow, as seen by gen_insn_end_off.  */
4350        if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4351            return -2;
4352        }
4353    }
4354    tcg_debug_assert(num_insns >= 0);
4355    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4356
4357    /* Generate TB finalization at the end of block */
4358#ifdef TCG_TARGET_NEED_LDST_LABELS
4359    i = tcg_out_ldst_finalize(s);
4360    if (i < 0) {
4361        return i;
4362    }
4363#endif
4364#ifdef TCG_TARGET_NEED_POOL_LABELS
4365    i = tcg_out_pool_finalize(s);
4366    if (i < 0) {
4367        return i;
4368    }
4369#endif
4370    if (!tcg_resolve_relocs(s)) {
4371        return -2;
4372    }
4373
4374#ifndef CONFIG_TCG_INTERPRETER
4375    /* flush instruction cache */
4376    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4377                        (uintptr_t)s->code_buf,
4378                        tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4379#endif
4380
4381    return tcg_current_code_size(s);
4382}
4383
4384#ifdef CONFIG_PROFILER
4385void tcg_dump_info(void)
4386{
4387    TCGProfile prof = {};
4388    const TCGProfile *s;
4389    int64_t tb_count;
4390    int64_t tb_div_count;
4391    int64_t tot;
4392
4393    tcg_profile_snapshot_counters(&prof);
4394    s = &prof;
4395    tb_count = s->tb_count;
4396    tb_div_count = tb_count ? tb_count : 1;
4397    tot = s->interm_time + s->code_time;
4398
4399    qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4400                tot, tot / 2.4e9);
4401    qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4402                " %0.1f%%)\n",
4403                tb_count, s->tb_count1 - tb_count,
4404                (double)(s->tb_count1 - s->tb_count)
4405                / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4406    qemu_printf("avg ops/TB          %0.1f max=%d\n",
4407                (double)s->op_count / tb_div_count, s->op_count_max);
4408    qemu_printf("deleted ops/TB      %0.2f\n",
4409                (double)s->del_op_count / tb_div_count);
4410    qemu_printf("avg temps/TB        %0.2f max=%d\n",
4411                (double)s->temp_count / tb_div_count, s->temp_count_max);
4412    qemu_printf("avg host code/TB    %0.1f\n",
4413                (double)s->code_out_len / tb_div_count);
4414    qemu_printf("avg search data/TB  %0.1f\n",
4415                (double)s->search_out_len / tb_div_count);
4416    
4417    qemu_printf("cycles/op           %0.1f\n",
4418                s->op_count ? (double)tot / s->op_count : 0);
4419    qemu_printf("cycles/in byte      %0.1f\n",
4420                s->code_in_len ? (double)tot / s->code_in_len : 0);
4421    qemu_printf("cycles/out byte     %0.1f\n",
4422                s->code_out_len ? (double)tot / s->code_out_len : 0);
4423    qemu_printf("cycles/search byte     %0.1f\n",
4424                s->search_out_len ? (double)tot / s->search_out_len : 0);
4425    if (tot == 0) {
4426        tot = 1;
4427    }
4428    qemu_printf("  gen_interm time   %0.1f%%\n",
4429                (double)s->interm_time / tot * 100.0);
4430    qemu_printf("  gen_code time     %0.1f%%\n",
4431                (double)s->code_time / tot * 100.0);
4432    qemu_printf("optim./code time    %0.1f%%\n",
4433                (double)s->opt_time / (s->code_time ? s->code_time : 1)
4434                * 100.0);
4435    qemu_printf("liveness/code time  %0.1f%%\n",
4436                (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4437    qemu_printf("cpu_restore count   %" PRId64 "\n",
4438                s->restore_count);
4439    qemu_printf("  avg cycles        %0.1f\n",
4440                s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4441}
4442#else
4443void tcg_dump_info(void)
4444{
4445    qemu_printf("[TCG profiler not compiled]\n");
4446}
4447#endif
4448
4449#ifdef ELF_HOST_MACHINE
4450/* In order to use this feature, the backend needs to do three things:
4451
4452   (1) Define ELF_HOST_MACHINE to indicate both what value to
4453       put into the ELF image and to indicate support for the feature.
4454
4455   (2) Define tcg_register_jit.  This should create a buffer containing
4456       the contents of a .debug_frame section that describes the post-
4457       prologue unwind info for the tcg machine.
4458
4459   (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4460*/
4461
4462/* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4463typedef enum {
4464    JIT_NOACTION = 0,
4465    JIT_REGISTER_FN,
4466    JIT_UNREGISTER_FN
4467} jit_actions_t;
4468
4469struct jit_code_entry {
4470    struct jit_code_entry *next_entry;
4471    struct jit_code_entry *prev_entry;
4472    const void *symfile_addr;
4473    uint64_t symfile_size;
4474};
4475
4476struct jit_descriptor {
4477    uint32_t version;
4478    uint32_t action_flag;
4479    struct jit_code_entry *relevant_entry;
4480    struct jit_code_entry *first_entry;
4481};
4482
4483void __jit_debug_register_code(void) __attribute__((noinline));
4484void __jit_debug_register_code(void)
4485{
4486    asm("");
4487}
4488
4489/* Must statically initialize the version, because GDB may check
4490   the version before we can set it.  */
4491struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4492
4493/* End GDB interface.  */
4494
4495static int find_string(const char *strtab, const char *str)
4496{
4497    const char *p = strtab + 1;
4498
4499    while (1) {
4500        if (strcmp(p, str) == 0) {
4501            return p - strtab;
4502        }
4503        p += strlen(p) + 1;
4504    }
4505}
4506
4507static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4508                                 const void *debug_frame,
4509                                 size_t debug_frame_size)
4510{
4511    struct __attribute__((packed)) DebugInfo {
4512        uint32_t  len;
4513        uint16_t  version;
4514        uint32_t  abbrev;
4515        uint8_t   ptr_size;
4516        uint8_t   cu_die;
4517        uint16_t  cu_lang;
4518        uintptr_t cu_low_pc;
4519        uintptr_t cu_high_pc;
4520        uint8_t   fn_die;
4521        char      fn_name[16];
4522        uintptr_t fn_low_pc;
4523        uintptr_t fn_high_pc;
4524        uint8_t   cu_eoc;
4525    };
4526
4527    struct ElfImage {
4528        ElfW(Ehdr) ehdr;
4529        ElfW(Phdr) phdr;
4530        ElfW(Shdr) shdr[7];
4531        ElfW(Sym)  sym[2];
4532        struct DebugInfo di;
4533        uint8_t    da[24];
4534        char       str[80];
4535    };
4536
4537    struct ElfImage *img;
4538
4539    static const struct ElfImage img_template = {
4540        .ehdr = {
4541            .e_ident[EI_MAG0] = ELFMAG0,
4542            .e_ident[EI_MAG1] = ELFMAG1,
4543            .e_ident[EI_MAG2] = ELFMAG2,
4544            .e_ident[EI_MAG3] = ELFMAG3,
4545            .e_ident[EI_CLASS] = ELF_CLASS,
4546            .e_ident[EI_DATA] = ELF_DATA,
4547            .e_ident[EI_VERSION] = EV_CURRENT,
4548            .e_type = ET_EXEC,
4549            .e_machine = ELF_HOST_MACHINE,
4550            .e_version = EV_CURRENT,
4551            .e_phoff = offsetof(struct ElfImage, phdr),
4552            .e_shoff = offsetof(struct ElfImage, shdr),
4553            .e_ehsize = sizeof(ElfW(Shdr)),
4554            .e_phentsize = sizeof(ElfW(Phdr)),
4555            .e_phnum = 1,
4556            .e_shentsize = sizeof(ElfW(Shdr)),
4557            .e_shnum = ARRAY_SIZE(img->shdr),
4558            .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4559#ifdef ELF_HOST_FLAGS
4560            .e_flags = ELF_HOST_FLAGS,
4561#endif
4562#ifdef ELF_OSABI
4563            .e_ident[EI_OSABI] = ELF_OSABI,
4564#endif
4565        },
4566        .phdr = {
4567            .p_type = PT_LOAD,
4568            .p_flags = PF_X,
4569        },
4570        .shdr = {
4571            [0] = { .sh_type = SHT_NULL },
4572            /* Trick: The contents of code_gen_buffer are not present in
4573               this fake ELF file; that got allocated elsewhere.  Therefore
4574               we mark .text as SHT_NOBITS (similar to .bss) so that readers
4575               will not look for contents.  We can record any address.  */
4576            [1] = { /* .text */
4577                .sh_type = SHT_NOBITS,
4578                .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4579            },
4580            [2] = { /* .debug_info */
4581                .sh_type = SHT_PROGBITS,
4582                .sh_offset = offsetof(struct ElfImage, di),
4583                .sh_size = sizeof(struct DebugInfo),
4584            },
4585            [3] = { /* .debug_abbrev */
4586                .sh_type = SHT_PROGBITS,
4587                .sh_offset = offsetof(struct ElfImage, da),
4588                .sh_size = sizeof(img->da),
4589            },
4590            [4] = { /* .debug_frame */
4591                .sh_type = SHT_PROGBITS,
4592                .sh_offset = sizeof(struct ElfImage),
4593            },
4594            [5] = { /* .symtab */
4595                .sh_type = SHT_SYMTAB,
4596                .sh_offset = offsetof(struct ElfImage, sym),
4597                .sh_size = sizeof(img->sym),
4598                .sh_info = 1,
4599                .sh_link = ARRAY_SIZE(img->shdr) - 1,
4600                .sh_entsize = sizeof(ElfW(Sym)),
4601            },
4602            [6] = { /* .strtab */
4603                .sh_type = SHT_STRTAB,
4604                .sh_offset = offsetof(struct ElfImage, str),
4605                .sh_size = sizeof(img->str),
4606            }
4607        },
4608        .sym = {
4609            [1] = { /* code_gen_buffer */
4610                .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4611                .st_shndx = 1,
4612            }
4613        },
4614        .di = {
4615            .len = sizeof(struct DebugInfo) - 4,
4616            .version = 2,
4617            .ptr_size = sizeof(void *),
4618            .cu_die = 1,
4619            .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4620            .fn_die = 2,
4621            .fn_name = "code_gen_buffer"
4622        },
4623        .da = {
4624            1,          /* abbrev number (the cu) */
4625            0x11, 1,    /* DW_TAG_compile_unit, has children */
4626            0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4627            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4628            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4629            0, 0,       /* end of abbrev */
4630            2,          /* abbrev number (the fn) */
4631            0x2e, 0,    /* DW_TAG_subprogram, no children */
4632            0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4633            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4634            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4635            0, 0,       /* end of abbrev */
4636            0           /* no more abbrev */
4637        },
4638        .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4639               ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4640    };
4641
4642    /* We only need a single jit entry; statically allocate it.  */
4643    static struct jit_code_entry one_entry;
4644
4645    uintptr_t buf = (uintptr_t)buf_ptr;
4646    size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4647    DebugFrameHeader *dfh;
4648
4649    img = g_malloc(img_size);
4650    *img = img_template;
4651
4652    img->phdr.p_vaddr = buf;
4653    img->phdr.p_paddr = buf;
4654    img->phdr.p_memsz = buf_size;
4655
4656    img->shdr[1].sh_name = find_string(img->str, ".text");
4657    img->shdr[1].sh_addr = buf;
4658    img->shdr[1].sh_size = buf_size;
4659
4660    img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4661    img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4662
4663    img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4664    img->shdr[4].sh_size = debug_frame_size;
4665
4666    img->shdr[5].sh_name = find_string(img->str, ".symtab");
4667    img->shdr[6].sh_name = find_string(img->str, ".strtab");
4668
4669    img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4670    img->sym[1].st_value = buf;
4671    img->sym[1].st_size = buf_size;
4672
4673    img->di.cu_low_pc = buf;
4674    img->di.cu_high_pc = buf + buf_size;
4675    img->di.fn_low_pc = buf;
4676    img->di.fn_high_pc = buf + buf_size;
4677
4678    dfh = (DebugFrameHeader *)(img + 1);
4679    memcpy(dfh, debug_frame, debug_frame_size);
4680    dfh->fde.func_start = buf;
4681    dfh->fde.func_len = buf_size;
4682
4683#ifdef DEBUG_JIT
4684    /* Enable this block to be able to debug the ELF image file creation.
4685       One can use readelf, objdump, or other inspection utilities.  */
4686    {
4687        FILE *f = fopen("/tmp/qemu.jit", "w+b");
4688        if (f) {
4689            if (fwrite(img, img_size, 1, f) != img_size) {
4690                /* Avoid stupid unused return value warning for fwrite.  */
4691            }
4692            fclose(f);
4693        }
4694    }
4695#endif
4696
4697    one_entry.symfile_addr = img;
4698    one_entry.symfile_size = img_size;
4699
4700    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4701    __jit_debug_descriptor.relevant_entry = &one_entry;
4702    __jit_debug_descriptor.first_entry = &one_entry;
4703    __jit_debug_register_code();
4704}
4705#else
4706/* No support for the feature.  Provide the entry point expected by exec.c,
4707   and implement the internal function we declared earlier.  */
4708
4709static void tcg_register_jit_int(const void *buf, size_t size,
4710                                 const void *debug_frame,
4711                                 size_t debug_frame_size)
4712{
4713}
4714
4715void tcg_register_jit(const void *buf, size_t buf_size)
4716{
4717}
4718#endif /* ELF_HOST_MACHINE */
4719
4720#if !TCG_TARGET_MAYBE_vec
4721void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4722{
4723    g_assert_not_reached();
4724}
4725#endif
4726