qemu/tcg/tcg.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* define it to use liveness analysis (better code) */
  26#define USE_TCG_OPTIMIZATIONS
  27
  28#include "qemu/osdep.h"
  29
  30/* Define to jump the ELF file used to communicate with GDB.  */
  31#undef DEBUG_JIT
  32
  33#include "qemu/error-report.h"
  34#include "qemu/cutils.h"
  35#include "qemu/host-utils.h"
  36#include "qemu/qemu-print.h"
  37#include "qemu/timer.h"
  38#include "qemu/cacheflush.h"
  39
  40/* Note: the long term plan is to reduce the dependencies on the QEMU
  41   CPU definitions. Currently they are used for qemu_ld/st
  42   instructions */
  43#define NO_CPU_IO_DEFS
  44
  45#include "exec/exec-all.h"
  46#include "tcg/tcg-op.h"
  47
  48#if UINTPTR_MAX == UINT32_MAX
  49# define ELF_CLASS  ELFCLASS32
  50#else
  51# define ELF_CLASS  ELFCLASS64
  52#endif
  53#ifdef HOST_WORDS_BIGENDIAN
  54# define ELF_DATA   ELFDATA2MSB
  55#else
  56# define ELF_DATA   ELFDATA2LSB
  57#endif
  58
  59#include "elf.h"
  60#include "exec/log.h"
  61#include "tcg-internal.h"
  62
  63#ifdef CONFIG_TCG_INTERPRETER
  64#include <ffi.h>
  65#endif
  66
  67/* Forward declarations for functions declared in tcg-target.c.inc and
  68   used here. */
  69static void tcg_target_init(TCGContext *s);
  70static void tcg_target_qemu_prologue(TCGContext *s);
  71static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
  72                        intptr_t value, intptr_t addend);
  73
  74/* The CIE and FDE header definitions will be common to all hosts.  */
  75typedef struct {
  76    uint32_t len __attribute__((aligned((sizeof(void *)))));
  77    uint32_t id;
  78    uint8_t version;
  79    char augmentation[1];
  80    uint8_t code_align;
  81    uint8_t data_align;
  82    uint8_t return_column;
  83} DebugFrameCIE;
  84
  85typedef struct QEMU_PACKED {
  86    uint32_t len __attribute__((aligned((sizeof(void *)))));
  87    uint32_t cie_offset;
  88    uintptr_t func_start;
  89    uintptr_t func_len;
  90} DebugFrameFDEHeader;
  91
  92typedef struct QEMU_PACKED {
  93    DebugFrameCIE cie;
  94    DebugFrameFDEHeader fde;
  95} DebugFrameHeader;
  96
  97static void tcg_register_jit_int(const void *buf, size_t size,
  98                                 const void *debug_frame,
  99                                 size_t debug_frame_size)
 100    __attribute__((unused));
 101
 102/* Forward declarations for functions declared and used in tcg-target.c.inc. */
 103static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
 104                       intptr_t arg2);
 105static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
 106static void tcg_out_movi(TCGContext *s, TCGType type,
 107                         TCGReg ret, tcg_target_long arg);
 108static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 109                       const TCGArg args[TCG_MAX_OP_ARGS],
 110                       const int const_args[TCG_MAX_OP_ARGS]);
 111#if TCG_TARGET_MAYBE_vec
 112static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 113                            TCGReg dst, TCGReg src);
 114static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 115                             TCGReg dst, TCGReg base, intptr_t offset);
 116static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 117                             TCGReg dst, int64_t arg);
 118static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 119                           unsigned vecl, unsigned vece,
 120                           const TCGArg args[TCG_MAX_OP_ARGS],
 121                           const int const_args[TCG_MAX_OP_ARGS]);
 122#else
 123static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 124                                   TCGReg dst, TCGReg src)
 125{
 126    g_assert_not_reached();
 127}
 128static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 129                                    TCGReg dst, TCGReg base, intptr_t offset)
 130{
 131    g_assert_not_reached();
 132}
 133static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 134                                    TCGReg dst, int64_t arg)
 135{
 136    g_assert_not_reached();
 137}
 138static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 139                                  unsigned vecl, unsigned vece,
 140                                  const TCGArg args[TCG_MAX_OP_ARGS],
 141                                  const int const_args[TCG_MAX_OP_ARGS])
 142{
 143    g_assert_not_reached();
 144}
 145#endif
 146static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
 147                       intptr_t arg2);
 148static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 149                        TCGReg base, intptr_t ofs);
 150#ifdef CONFIG_TCG_INTERPRETER
 151static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
 152                         ffi_cif *cif);
 153#else
 154static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
 155#endif
 156static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
 157#ifdef TCG_TARGET_NEED_LDST_LABELS
 158static int tcg_out_ldst_finalize(TCGContext *s);
 159#endif
 160
 161TCGContext tcg_init_ctx;
 162__thread TCGContext *tcg_ctx;
 163
 164TCGContext **tcg_ctxs;
 165unsigned int tcg_cur_ctxs;
 166unsigned int tcg_max_ctxs;
 167TCGv_env cpu_env = 0;
 168const void *tcg_code_gen_epilogue;
 169uintptr_t tcg_splitwx_diff;
 170
 171#ifndef CONFIG_TCG_INTERPRETER
 172tcg_prologue_fn *tcg_qemu_tb_exec;
 173#endif
 174
 175static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
 176static TCGRegSet tcg_target_call_clobber_regs;
 177
 178#if TCG_TARGET_INSN_UNIT_SIZE == 1
 179static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
 180{
 181    *s->code_ptr++ = v;
 182}
 183
 184static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
 185                                                      uint8_t v)
 186{
 187    *p = v;
 188}
 189#endif
 190
 191#if TCG_TARGET_INSN_UNIT_SIZE <= 2
 192static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
 193{
 194    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 195        *s->code_ptr++ = v;
 196    } else {
 197        tcg_insn_unit *p = s->code_ptr;
 198        memcpy(p, &v, sizeof(v));
 199        s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
 200    }
 201}
 202
 203static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
 204                                                       uint16_t v)
 205{
 206    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 207        *p = v;
 208    } else {
 209        memcpy(p, &v, sizeof(v));
 210    }
 211}
 212#endif
 213
 214#if TCG_TARGET_INSN_UNIT_SIZE <= 4
 215static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
 216{
 217    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 218        *s->code_ptr++ = v;
 219    } else {
 220        tcg_insn_unit *p = s->code_ptr;
 221        memcpy(p, &v, sizeof(v));
 222        s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
 223    }
 224}
 225
 226static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
 227                                                       uint32_t v)
 228{
 229    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 230        *p = v;
 231    } else {
 232        memcpy(p, &v, sizeof(v));
 233    }
 234}
 235#endif
 236
 237#if TCG_TARGET_INSN_UNIT_SIZE <= 8
 238static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
 239{
 240    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 241        *s->code_ptr++ = v;
 242    } else {
 243        tcg_insn_unit *p = s->code_ptr;
 244        memcpy(p, &v, sizeof(v));
 245        s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
 246    }
 247}
 248
 249static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
 250                                                       uint64_t v)
 251{
 252    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 253        *p = v;
 254    } else {
 255        memcpy(p, &v, sizeof(v));
 256    }
 257}
 258#endif
 259
 260/* label relocation processing */
 261
 262static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
 263                          TCGLabel *l, intptr_t addend)
 264{
 265    TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
 266
 267    r->type = type;
 268    r->ptr = code_ptr;
 269    r->addend = addend;
 270    QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
 271}
 272
 273static void tcg_out_label(TCGContext *s, TCGLabel *l)
 274{
 275    tcg_debug_assert(!l->has_value);
 276    l->has_value = 1;
 277    l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
 278}
 279
 280TCGLabel *gen_new_label(void)
 281{
 282    TCGContext *s = tcg_ctx;
 283    TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
 284
 285    memset(l, 0, sizeof(TCGLabel));
 286    l->id = s->nb_labels++;
 287    QSIMPLEQ_INIT(&l->relocs);
 288
 289    QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
 290
 291    return l;
 292}
 293
 294static bool tcg_resolve_relocs(TCGContext *s)
 295{
 296    TCGLabel *l;
 297
 298    QSIMPLEQ_FOREACH(l, &s->labels, next) {
 299        TCGRelocation *r;
 300        uintptr_t value = l->u.value;
 301
 302        QSIMPLEQ_FOREACH(r, &l->relocs, next) {
 303            if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
 304                return false;
 305            }
 306        }
 307    }
 308    return true;
 309}
 310
 311static void set_jmp_reset_offset(TCGContext *s, int which)
 312{
 313    /*
 314     * We will check for overflow at the end of the opcode loop in
 315     * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
 316     */
 317    s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
 318}
 319
 320/* Signal overflow, starting over with fewer guest insns. */
 321static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
 322{
 323    siglongjmp(s->jmp_trans, -2);
 324}
 325
 326#define C_PFX1(P, A)                    P##A
 327#define C_PFX2(P, A, B)                 P##A##_##B
 328#define C_PFX3(P, A, B, C)              P##A##_##B##_##C
 329#define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
 330#define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
 331#define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
 332
 333/* Define an enumeration for the various combinations. */
 334
 335#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
 336#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
 337#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
 338#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
 339
 340#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
 341#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
 342#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
 343#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
 344
 345#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
 346
 347#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
 348#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
 349#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
 350#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
 351
 352typedef enum {
 353#include "tcg-target-con-set.h"
 354} TCGConstraintSetIndex;
 355
 356static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
 357
 358#undef C_O0_I1
 359#undef C_O0_I2
 360#undef C_O0_I3
 361#undef C_O0_I4
 362#undef C_O1_I1
 363#undef C_O1_I2
 364#undef C_O1_I3
 365#undef C_O1_I4
 366#undef C_N1_I2
 367#undef C_O2_I1
 368#undef C_O2_I2
 369#undef C_O2_I3
 370#undef C_O2_I4
 371
 372/* Put all of the constraint sets into an array, indexed by the enum. */
 373
 374#define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
 375#define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
 376#define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
 377#define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
 378
 379#define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
 380#define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
 381#define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
 382#define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
 383
 384#define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
 385
 386#define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
 387#define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
 388#define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
 389#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
 390
 391static const TCGTargetOpDef constraint_sets[] = {
 392#include "tcg-target-con-set.h"
 393};
 394
 395
 396#undef C_O0_I1
 397#undef C_O0_I2
 398#undef C_O0_I3
 399#undef C_O0_I4
 400#undef C_O1_I1
 401#undef C_O1_I2
 402#undef C_O1_I3
 403#undef C_O1_I4
 404#undef C_N1_I2
 405#undef C_O2_I1
 406#undef C_O2_I2
 407#undef C_O2_I3
 408#undef C_O2_I4
 409
 410/* Expand the enumerator to be returned from tcg_target_op_def(). */
 411
 412#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
 413#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
 414#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
 415#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
 416
 417#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
 418#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
 419#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
 420#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
 421
 422#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
 423
 424#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
 425#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
 426#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
 427#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
 428
 429#include "tcg-target.c.inc"
 430
 431static void alloc_tcg_plugin_context(TCGContext *s)
 432{
 433#ifdef CONFIG_PLUGIN
 434    s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
 435    s->plugin_tb->insns =
 436        g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
 437#endif
 438}
 439
 440/*
 441 * All TCG threads except the parent (i.e. the one that called tcg_context_init
 442 * and registered the target's TCG globals) must register with this function
 443 * before initiating translation.
 444 *
 445 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
 446 * of tcg_region_init() for the reasoning behind this.
 447 *
 448 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
 449 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
 450 * is not used anymore for translation once this function is called.
 451 *
 452 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
 453 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
 454 */
 455#ifdef CONFIG_USER_ONLY
 456void tcg_register_thread(void)
 457{
 458    tcg_ctx = &tcg_init_ctx;
 459}
 460#else
 461void tcg_register_thread(void)
 462{
 463    TCGContext *s = g_malloc(sizeof(*s));
 464    unsigned int i, n;
 465
 466    *s = tcg_init_ctx;
 467
 468    /* Relink mem_base.  */
 469    for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
 470        if (tcg_init_ctx.temps[i].mem_base) {
 471            ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
 472            tcg_debug_assert(b >= 0 && b < n);
 473            s->temps[i].mem_base = &s->temps[b];
 474        }
 475    }
 476
 477    /* Claim an entry in tcg_ctxs */
 478    n = qatomic_fetch_inc(&tcg_cur_ctxs);
 479    g_assert(n < tcg_max_ctxs);
 480    qatomic_set(&tcg_ctxs[n], s);
 481
 482    if (n > 0) {
 483        alloc_tcg_plugin_context(s);
 484        tcg_region_initial_alloc(s);
 485    }
 486
 487    tcg_ctx = s;
 488}
 489#endif /* !CONFIG_USER_ONLY */
 490
 491/* pool based memory allocation */
 492void *tcg_malloc_internal(TCGContext *s, int size)
 493{
 494    TCGPool *p;
 495    int pool_size;
 496    
 497    if (size > TCG_POOL_CHUNK_SIZE) {
 498        /* big malloc: insert a new pool (XXX: could optimize) */
 499        p = g_malloc(sizeof(TCGPool) + size);
 500        p->size = size;
 501        p->next = s->pool_first_large;
 502        s->pool_first_large = p;
 503        return p->data;
 504    } else {
 505        p = s->pool_current;
 506        if (!p) {
 507            p = s->pool_first;
 508            if (!p)
 509                goto new_pool;
 510        } else {
 511            if (!p->next) {
 512            new_pool:
 513                pool_size = TCG_POOL_CHUNK_SIZE;
 514                p = g_malloc(sizeof(TCGPool) + pool_size);
 515                p->size = pool_size;
 516                p->next = NULL;
 517                if (s->pool_current) 
 518                    s->pool_current->next = p;
 519                else
 520                    s->pool_first = p;
 521            } else {
 522                p = p->next;
 523            }
 524        }
 525    }
 526    s->pool_current = p;
 527    s->pool_cur = p->data + size;
 528    s->pool_end = p->data + p->size;
 529    return p->data;
 530}
 531
 532void tcg_pool_reset(TCGContext *s)
 533{
 534    TCGPool *p, *t;
 535    for (p = s->pool_first_large; p; p = t) {
 536        t = p->next;
 537        g_free(p);
 538    }
 539    s->pool_first_large = NULL;
 540    s->pool_cur = s->pool_end = NULL;
 541    s->pool_current = NULL;
 542}
 543
 544#include "exec/helper-proto.h"
 545
 546static const TCGHelperInfo all_helpers[] = {
 547#include "exec/helper-tcg.h"
 548};
 549static GHashTable *helper_table;
 550
 551#ifdef CONFIG_TCG_INTERPRETER
 552static GHashTable *ffi_table;
 553
 554static ffi_type * const typecode_to_ffi[8] = {
 555    [dh_typecode_void] = &ffi_type_void,
 556    [dh_typecode_i32]  = &ffi_type_uint32,
 557    [dh_typecode_s32]  = &ffi_type_sint32,
 558    [dh_typecode_i64]  = &ffi_type_uint64,
 559    [dh_typecode_s64]  = &ffi_type_sint64,
 560    [dh_typecode_ptr]  = &ffi_type_pointer,
 561};
 562#endif
 563
 564static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
 565static void process_op_defs(TCGContext *s);
 566static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
 567                                            TCGReg reg, const char *name);
 568
 569static void tcg_context_init(unsigned max_cpus)
 570{
 571    TCGContext *s = &tcg_init_ctx;
 572    int op, total_args, n, i;
 573    TCGOpDef *def;
 574    TCGArgConstraint *args_ct;
 575    TCGTemp *ts;
 576
 577    memset(s, 0, sizeof(*s));
 578    s->nb_globals = 0;
 579
 580    /* Count total number of arguments and allocate the corresponding
 581       space */
 582    total_args = 0;
 583    for(op = 0; op < NB_OPS; op++) {
 584        def = &tcg_op_defs[op];
 585        n = def->nb_iargs + def->nb_oargs;
 586        total_args += n;
 587    }
 588
 589    args_ct = g_new0(TCGArgConstraint, total_args);
 590
 591    for(op = 0; op < NB_OPS; op++) {
 592        def = &tcg_op_defs[op];
 593        def->args_ct = args_ct;
 594        n = def->nb_iargs + def->nb_oargs;
 595        args_ct += n;
 596    }
 597
 598    /* Register helpers.  */
 599    /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
 600    helper_table = g_hash_table_new(NULL, NULL);
 601
 602    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
 603        g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
 604                            (gpointer)&all_helpers[i]);
 605    }
 606
 607#ifdef CONFIG_TCG_INTERPRETER
 608    /* g_direct_hash/equal for direct comparisons on uint32_t.  */
 609    ffi_table = g_hash_table_new(NULL, NULL);
 610    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
 611        struct {
 612            ffi_cif cif;
 613            ffi_type *args[];
 614        } *ca;
 615        uint32_t typemask = all_helpers[i].typemask;
 616        gpointer hash = (gpointer)(uintptr_t)typemask;
 617        ffi_status status;
 618        int nargs;
 619
 620        if (g_hash_table_lookup(ffi_table, hash)) {
 621            continue;
 622        }
 623
 624        /* Ignoring the return type, find the last non-zero field. */
 625        nargs = 32 - clz32(typemask >> 3);
 626        nargs = DIV_ROUND_UP(nargs, 3);
 627
 628        ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
 629        ca->cif.rtype = typecode_to_ffi[typemask & 7];
 630        ca->cif.nargs = nargs;
 631
 632        if (nargs != 0) {
 633            ca->cif.arg_types = ca->args;
 634            for (i = 0; i < nargs; ++i) {
 635                int typecode = extract32(typemask, (i + 1) * 3, 3);
 636                ca->args[i] = typecode_to_ffi[typecode];
 637            }
 638        }
 639
 640        status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
 641                              ca->cif.rtype, ca->cif.arg_types);
 642        assert(status == FFI_OK);
 643
 644        g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
 645    }
 646#endif
 647
 648    tcg_target_init(s);
 649    process_op_defs(s);
 650
 651    /* Reverse the order of the saved registers, assuming they're all at
 652       the start of tcg_target_reg_alloc_order.  */
 653    for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
 654        int r = tcg_target_reg_alloc_order[n];
 655        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
 656            break;
 657        }
 658    }
 659    for (i = 0; i < n; ++i) {
 660        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
 661    }
 662    for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
 663        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
 664    }
 665
 666    alloc_tcg_plugin_context(s);
 667
 668    tcg_ctx = s;
 669    /*
 670     * In user-mode we simply share the init context among threads, since we
 671     * use a single region. See the documentation tcg_region_init() for the
 672     * reasoning behind this.
 673     * In softmmu we will have at most max_cpus TCG threads.
 674     */
 675#ifdef CONFIG_USER_ONLY
 676    tcg_ctxs = &tcg_ctx;
 677    tcg_cur_ctxs = 1;
 678    tcg_max_ctxs = 1;
 679#else
 680    tcg_max_ctxs = max_cpus;
 681    tcg_ctxs = g_new0(TCGContext *, max_cpus);
 682#endif
 683
 684    tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
 685    ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
 686    cpu_env = temp_tcgv_ptr(ts);
 687}
 688
 689void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
 690{
 691    tcg_context_init(max_cpus);
 692    tcg_region_init(tb_size, splitwx, max_cpus);
 693}
 694
 695/*
 696 * Allocate TBs right before their corresponding translated code, making
 697 * sure that TBs and code are on different cache lines.
 698 */
 699TranslationBlock *tcg_tb_alloc(TCGContext *s)
 700{
 701    uintptr_t align = qemu_icache_linesize;
 702    TranslationBlock *tb;
 703    void *next;
 704
 705 retry:
 706    tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
 707    next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
 708
 709    if (unlikely(next > s->code_gen_highwater)) {
 710        if (tcg_region_alloc(s)) {
 711            return NULL;
 712        }
 713        goto retry;
 714    }
 715    qatomic_set(&s->code_gen_ptr, next);
 716    s->data_gen_ptr = NULL;
 717    return tb;
 718}
 719
 720void tcg_prologue_init(TCGContext *s)
 721{
 722    size_t prologue_size;
 723
 724    s->code_ptr = s->code_gen_ptr;
 725    s->code_buf = s->code_gen_ptr;
 726    s->data_gen_ptr = NULL;
 727
 728#ifndef CONFIG_TCG_INTERPRETER
 729    tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
 730#endif
 731
 732#ifdef TCG_TARGET_NEED_POOL_LABELS
 733    s->pool_labels = NULL;
 734#endif
 735
 736    qemu_thread_jit_write();
 737    /* Generate the prologue.  */
 738    tcg_target_qemu_prologue(s);
 739
 740#ifdef TCG_TARGET_NEED_POOL_LABELS
 741    /* Allow the prologue to put e.g. guest_base into a pool entry.  */
 742    {
 743        int result = tcg_out_pool_finalize(s);
 744        tcg_debug_assert(result == 0);
 745    }
 746#endif
 747
 748    prologue_size = tcg_current_code_size(s);
 749
 750#ifndef CONFIG_TCG_INTERPRETER
 751    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
 752                        (uintptr_t)s->code_buf, prologue_size);
 753#endif
 754
 755#ifdef DEBUG_DISAS
 756    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
 757        FILE *logfile = qemu_log_lock();
 758        qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
 759        if (s->data_gen_ptr) {
 760            size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
 761            size_t data_size = prologue_size - code_size;
 762            size_t i;
 763
 764            log_disas(s->code_gen_ptr, code_size);
 765
 766            for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
 767                if (sizeof(tcg_target_ulong) == 8) {
 768                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
 769                             (uintptr_t)s->data_gen_ptr + i,
 770                             *(uint64_t *)(s->data_gen_ptr + i));
 771                } else {
 772                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
 773                             (uintptr_t)s->data_gen_ptr + i,
 774                             *(uint32_t *)(s->data_gen_ptr + i));
 775                }
 776            }
 777        } else {
 778            log_disas(s->code_gen_ptr, prologue_size);
 779        }
 780        qemu_log("\n");
 781        qemu_log_flush();
 782        qemu_log_unlock(logfile);
 783    }
 784#endif
 785
 786#ifndef CONFIG_TCG_INTERPRETER
 787    /*
 788     * Assert that goto_ptr is implemented completely, setting an epilogue.
 789     * For tci, we use NULL as the signal to return from the interpreter,
 790     * so skip this check.
 791     */
 792    tcg_debug_assert(tcg_code_gen_epilogue != NULL);
 793#endif
 794
 795    tcg_region_prologue_set(s);
 796}
 797
 798void tcg_func_start(TCGContext *s)
 799{
 800    tcg_pool_reset(s);
 801    s->nb_temps = s->nb_globals;
 802
 803    /* No temps have been previously allocated for size or locality.  */
 804    memset(s->free_temps, 0, sizeof(s->free_temps));
 805
 806    /* No constant temps have been previously allocated. */
 807    for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
 808        if (s->const_table[i]) {
 809            g_hash_table_remove_all(s->const_table[i]);
 810        }
 811    }
 812
 813    s->nb_ops = 0;
 814    s->nb_labels = 0;
 815    s->current_frame_offset = s->frame_start;
 816
 817#ifdef CONFIG_DEBUG_TCG
 818    s->goto_tb_issue_mask = 0;
 819#endif
 820
 821    QTAILQ_INIT(&s->ops);
 822    QTAILQ_INIT(&s->free_ops);
 823    QSIMPLEQ_INIT(&s->labels);
 824}
 825
 826static TCGTemp *tcg_temp_alloc(TCGContext *s)
 827{
 828    int n = s->nb_temps++;
 829
 830    if (n >= TCG_MAX_TEMPS) {
 831        tcg_raise_tb_overflow(s);
 832    }
 833    return memset(&s->temps[n], 0, sizeof(TCGTemp));
 834}
 835
 836static TCGTemp *tcg_global_alloc(TCGContext *s)
 837{
 838    TCGTemp *ts;
 839
 840    tcg_debug_assert(s->nb_globals == s->nb_temps);
 841    tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
 842    s->nb_globals++;
 843    ts = tcg_temp_alloc(s);
 844    ts->kind = TEMP_GLOBAL;
 845
 846    return ts;
 847}
 848
 849static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
 850                                            TCGReg reg, const char *name)
 851{
 852    TCGTemp *ts;
 853
 854    if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
 855        tcg_abort();
 856    }
 857
 858    ts = tcg_global_alloc(s);
 859    ts->base_type = type;
 860    ts->type = type;
 861    ts->kind = TEMP_FIXED;
 862    ts->reg = reg;
 863    ts->name = name;
 864    tcg_regset_set_reg(s->reserved_regs, reg);
 865
 866    return ts;
 867}
 868
 869void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
 870{
 871    s->frame_start = start;
 872    s->frame_end = start + size;
 873    s->frame_temp
 874        = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
 875}
 876
 877TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
 878                                     intptr_t offset, const char *name)
 879{
 880    TCGContext *s = tcg_ctx;
 881    TCGTemp *base_ts = tcgv_ptr_temp(base);
 882    TCGTemp *ts = tcg_global_alloc(s);
 883    int indirect_reg = 0, bigendian = 0;
 884#ifdef HOST_WORDS_BIGENDIAN
 885    bigendian = 1;
 886#endif
 887
 888    switch (base_ts->kind) {
 889    case TEMP_FIXED:
 890        break;
 891    case TEMP_GLOBAL:
 892        /* We do not support double-indirect registers.  */
 893        tcg_debug_assert(!base_ts->indirect_reg);
 894        base_ts->indirect_base = 1;
 895        s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
 896                            ? 2 : 1);
 897        indirect_reg = 1;
 898        break;
 899    default:
 900        g_assert_not_reached();
 901    }
 902
 903    if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
 904        TCGTemp *ts2 = tcg_global_alloc(s);
 905        char buf[64];
 906
 907        ts->base_type = TCG_TYPE_I64;
 908        ts->type = TCG_TYPE_I32;
 909        ts->indirect_reg = indirect_reg;
 910        ts->mem_allocated = 1;
 911        ts->mem_base = base_ts;
 912        ts->mem_offset = offset + bigendian * 4;
 913        pstrcpy(buf, sizeof(buf), name);
 914        pstrcat(buf, sizeof(buf), "_0");
 915        ts->name = strdup(buf);
 916
 917        tcg_debug_assert(ts2 == ts + 1);
 918        ts2->base_type = TCG_TYPE_I64;
 919        ts2->type = TCG_TYPE_I32;
 920        ts2->indirect_reg = indirect_reg;
 921        ts2->mem_allocated = 1;
 922        ts2->mem_base = base_ts;
 923        ts2->mem_offset = offset + (1 - bigendian) * 4;
 924        pstrcpy(buf, sizeof(buf), name);
 925        pstrcat(buf, sizeof(buf), "_1");
 926        ts2->name = strdup(buf);
 927    } else {
 928        ts->base_type = type;
 929        ts->type = type;
 930        ts->indirect_reg = indirect_reg;
 931        ts->mem_allocated = 1;
 932        ts->mem_base = base_ts;
 933        ts->mem_offset = offset;
 934        ts->name = name;
 935    }
 936    return ts;
 937}
 938
 939TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
 940{
 941    TCGContext *s = tcg_ctx;
 942    TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
 943    TCGTemp *ts;
 944    int idx, k;
 945
 946    k = type + (temp_local ? TCG_TYPE_COUNT : 0);
 947    idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
 948    if (idx < TCG_MAX_TEMPS) {
 949        /* There is already an available temp with the right type.  */
 950        clear_bit(idx, s->free_temps[k].l);
 951
 952        ts = &s->temps[idx];
 953        ts->temp_allocated = 1;
 954        tcg_debug_assert(ts->base_type == type);
 955        tcg_debug_assert(ts->kind == kind);
 956    } else {
 957        ts = tcg_temp_alloc(s);
 958        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
 959            TCGTemp *ts2 = tcg_temp_alloc(s);
 960
 961            ts->base_type = type;
 962            ts->type = TCG_TYPE_I32;
 963            ts->temp_allocated = 1;
 964            ts->kind = kind;
 965
 966            tcg_debug_assert(ts2 == ts + 1);
 967            ts2->base_type = TCG_TYPE_I64;
 968            ts2->type = TCG_TYPE_I32;
 969            ts2->temp_allocated = 1;
 970            ts2->kind = kind;
 971        } else {
 972            ts->base_type = type;
 973            ts->type = type;
 974            ts->temp_allocated = 1;
 975            ts->kind = kind;
 976        }
 977    }
 978
 979#if defined(CONFIG_DEBUG_TCG)
 980    s->temps_in_use++;
 981#endif
 982    return ts;
 983}
 984
 985TCGv_vec tcg_temp_new_vec(TCGType type)
 986{
 987    TCGTemp *t;
 988
 989#ifdef CONFIG_DEBUG_TCG
 990    switch (type) {
 991    case TCG_TYPE_V64:
 992        assert(TCG_TARGET_HAS_v64);
 993        break;
 994    case TCG_TYPE_V128:
 995        assert(TCG_TARGET_HAS_v128);
 996        break;
 997    case TCG_TYPE_V256:
 998        assert(TCG_TARGET_HAS_v256);
 999        break;
1000    default:
1001        g_assert_not_reached();
1002    }
1003#endif
1004
1005    t = tcg_temp_new_internal(type, 0);
1006    return temp_tcgv_vec(t);
1007}
1008
1009/* Create a new temp of the same type as an existing temp.  */
1010TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1011{
1012    TCGTemp *t = tcgv_vec_temp(match);
1013
1014    tcg_debug_assert(t->temp_allocated != 0);
1015
1016    t = tcg_temp_new_internal(t->base_type, 0);
1017    return temp_tcgv_vec(t);
1018}
1019
1020void tcg_temp_free_internal(TCGTemp *ts)
1021{
1022    TCGContext *s = tcg_ctx;
1023    int k, idx;
1024
1025    /* In order to simplify users of tcg_constant_*, silently ignore free. */
1026    if (ts->kind == TEMP_CONST) {
1027        return;
1028    }
1029
1030#if defined(CONFIG_DEBUG_TCG)
1031    s->temps_in_use--;
1032    if (s->temps_in_use < 0) {
1033        fprintf(stderr, "More temporaries freed than allocated!\n");
1034    }
1035#endif
1036
1037    tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1038    tcg_debug_assert(ts->temp_allocated != 0);
1039    ts->temp_allocated = 0;
1040
1041    idx = temp_idx(ts);
1042    k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1043    set_bit(idx, s->free_temps[k].l);
1044}
1045
1046TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1047{
1048    TCGContext *s = tcg_ctx;
1049    GHashTable *h = s->const_table[type];
1050    TCGTemp *ts;
1051
1052    if (h == NULL) {
1053        h = g_hash_table_new(g_int64_hash, g_int64_equal);
1054        s->const_table[type] = h;
1055    }
1056
1057    ts = g_hash_table_lookup(h, &val);
1058    if (ts == NULL) {
1059        ts = tcg_temp_alloc(s);
1060
1061        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1062            TCGTemp *ts2 = tcg_temp_alloc(s);
1063
1064            ts->base_type = TCG_TYPE_I64;
1065            ts->type = TCG_TYPE_I32;
1066            ts->kind = TEMP_CONST;
1067            ts->temp_allocated = 1;
1068            /*
1069             * Retain the full value of the 64-bit constant in the low
1070             * part, so that the hash table works.  Actual uses will
1071             * truncate the value to the low part.
1072             */
1073            ts->val = val;
1074
1075            tcg_debug_assert(ts2 == ts + 1);
1076            ts2->base_type = TCG_TYPE_I64;
1077            ts2->type = TCG_TYPE_I32;
1078            ts2->kind = TEMP_CONST;
1079            ts2->temp_allocated = 1;
1080            ts2->val = val >> 32;
1081        } else {
1082            ts->base_type = type;
1083            ts->type = type;
1084            ts->kind = TEMP_CONST;
1085            ts->temp_allocated = 1;
1086            ts->val = val;
1087        }
1088        g_hash_table_insert(h, &ts->val, ts);
1089    }
1090
1091    return ts;
1092}
1093
1094TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1095{
1096    val = dup_const(vece, val);
1097    return temp_tcgv_vec(tcg_constant_internal(type, val));
1098}
1099
1100TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1101{
1102    TCGTemp *t = tcgv_vec_temp(match);
1103
1104    tcg_debug_assert(t->temp_allocated != 0);
1105    return tcg_constant_vec(t->base_type, vece, val);
1106}
1107
1108TCGv_i32 tcg_const_i32(int32_t val)
1109{
1110    TCGv_i32 t0;
1111    t0 = tcg_temp_new_i32();
1112    tcg_gen_movi_i32(t0, val);
1113    return t0;
1114}
1115
1116TCGv_i64 tcg_const_i64(int64_t val)
1117{
1118    TCGv_i64 t0;
1119    t0 = tcg_temp_new_i64();
1120    tcg_gen_movi_i64(t0, val);
1121    return t0;
1122}
1123
1124TCGv_i32 tcg_const_local_i32(int32_t val)
1125{
1126    TCGv_i32 t0;
1127    t0 = tcg_temp_local_new_i32();
1128    tcg_gen_movi_i32(t0, val);
1129    return t0;
1130}
1131
1132TCGv_i64 tcg_const_local_i64(int64_t val)
1133{
1134    TCGv_i64 t0;
1135    t0 = tcg_temp_local_new_i64();
1136    tcg_gen_movi_i64(t0, val);
1137    return t0;
1138}
1139
1140#if defined(CONFIG_DEBUG_TCG)
1141void tcg_clear_temp_count(void)
1142{
1143    TCGContext *s = tcg_ctx;
1144    s->temps_in_use = 0;
1145}
1146
1147int tcg_check_temp_count(void)
1148{
1149    TCGContext *s = tcg_ctx;
1150    if (s->temps_in_use) {
1151        /* Clear the count so that we don't give another
1152         * warning immediately next time around.
1153         */
1154        s->temps_in_use = 0;
1155        return 1;
1156    }
1157    return 0;
1158}
1159#endif
1160
1161/* Return true if OP may appear in the opcode stream.
1162   Test the runtime variable that controls each opcode.  */
1163bool tcg_op_supported(TCGOpcode op)
1164{
1165    const bool have_vec
1166        = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1167
1168    switch (op) {
1169    case INDEX_op_discard:
1170    case INDEX_op_set_label:
1171    case INDEX_op_call:
1172    case INDEX_op_br:
1173    case INDEX_op_mb:
1174    case INDEX_op_insn_start:
1175    case INDEX_op_exit_tb:
1176    case INDEX_op_goto_tb:
1177    case INDEX_op_goto_ptr:
1178    case INDEX_op_qemu_ld_i32:
1179    case INDEX_op_qemu_st_i32:
1180    case INDEX_op_qemu_ld_i64:
1181    case INDEX_op_qemu_st_i64:
1182        return true;
1183
1184    case INDEX_op_qemu_st8_i32:
1185        return TCG_TARGET_HAS_qemu_st8_i32;
1186
1187    case INDEX_op_mov_i32:
1188    case INDEX_op_setcond_i32:
1189    case INDEX_op_brcond_i32:
1190    case INDEX_op_ld8u_i32:
1191    case INDEX_op_ld8s_i32:
1192    case INDEX_op_ld16u_i32:
1193    case INDEX_op_ld16s_i32:
1194    case INDEX_op_ld_i32:
1195    case INDEX_op_st8_i32:
1196    case INDEX_op_st16_i32:
1197    case INDEX_op_st_i32:
1198    case INDEX_op_add_i32:
1199    case INDEX_op_sub_i32:
1200    case INDEX_op_mul_i32:
1201    case INDEX_op_and_i32:
1202    case INDEX_op_or_i32:
1203    case INDEX_op_xor_i32:
1204    case INDEX_op_shl_i32:
1205    case INDEX_op_shr_i32:
1206    case INDEX_op_sar_i32:
1207        return true;
1208
1209    case INDEX_op_movcond_i32:
1210        return TCG_TARGET_HAS_movcond_i32;
1211    case INDEX_op_div_i32:
1212    case INDEX_op_divu_i32:
1213        return TCG_TARGET_HAS_div_i32;
1214    case INDEX_op_rem_i32:
1215    case INDEX_op_remu_i32:
1216        return TCG_TARGET_HAS_rem_i32;
1217    case INDEX_op_div2_i32:
1218    case INDEX_op_divu2_i32:
1219        return TCG_TARGET_HAS_div2_i32;
1220    case INDEX_op_rotl_i32:
1221    case INDEX_op_rotr_i32:
1222        return TCG_TARGET_HAS_rot_i32;
1223    case INDEX_op_deposit_i32:
1224        return TCG_TARGET_HAS_deposit_i32;
1225    case INDEX_op_extract_i32:
1226        return TCG_TARGET_HAS_extract_i32;
1227    case INDEX_op_sextract_i32:
1228        return TCG_TARGET_HAS_sextract_i32;
1229    case INDEX_op_extract2_i32:
1230        return TCG_TARGET_HAS_extract2_i32;
1231    case INDEX_op_add2_i32:
1232        return TCG_TARGET_HAS_add2_i32;
1233    case INDEX_op_sub2_i32:
1234        return TCG_TARGET_HAS_sub2_i32;
1235    case INDEX_op_mulu2_i32:
1236        return TCG_TARGET_HAS_mulu2_i32;
1237    case INDEX_op_muls2_i32:
1238        return TCG_TARGET_HAS_muls2_i32;
1239    case INDEX_op_muluh_i32:
1240        return TCG_TARGET_HAS_muluh_i32;
1241    case INDEX_op_mulsh_i32:
1242        return TCG_TARGET_HAS_mulsh_i32;
1243    case INDEX_op_ext8s_i32:
1244        return TCG_TARGET_HAS_ext8s_i32;
1245    case INDEX_op_ext16s_i32:
1246        return TCG_TARGET_HAS_ext16s_i32;
1247    case INDEX_op_ext8u_i32:
1248        return TCG_TARGET_HAS_ext8u_i32;
1249    case INDEX_op_ext16u_i32:
1250        return TCG_TARGET_HAS_ext16u_i32;
1251    case INDEX_op_bswap16_i32:
1252        return TCG_TARGET_HAS_bswap16_i32;
1253    case INDEX_op_bswap32_i32:
1254        return TCG_TARGET_HAS_bswap32_i32;
1255    case INDEX_op_not_i32:
1256        return TCG_TARGET_HAS_not_i32;
1257    case INDEX_op_neg_i32:
1258        return TCG_TARGET_HAS_neg_i32;
1259    case INDEX_op_andc_i32:
1260        return TCG_TARGET_HAS_andc_i32;
1261    case INDEX_op_orc_i32:
1262        return TCG_TARGET_HAS_orc_i32;
1263    case INDEX_op_eqv_i32:
1264        return TCG_TARGET_HAS_eqv_i32;
1265    case INDEX_op_nand_i32:
1266        return TCG_TARGET_HAS_nand_i32;
1267    case INDEX_op_nor_i32:
1268        return TCG_TARGET_HAS_nor_i32;
1269    case INDEX_op_clz_i32:
1270        return TCG_TARGET_HAS_clz_i32;
1271    case INDEX_op_ctz_i32:
1272        return TCG_TARGET_HAS_ctz_i32;
1273    case INDEX_op_ctpop_i32:
1274        return TCG_TARGET_HAS_ctpop_i32;
1275
1276    case INDEX_op_brcond2_i32:
1277    case INDEX_op_setcond2_i32:
1278        return TCG_TARGET_REG_BITS == 32;
1279
1280    case INDEX_op_mov_i64:
1281    case INDEX_op_setcond_i64:
1282    case INDEX_op_brcond_i64:
1283    case INDEX_op_ld8u_i64:
1284    case INDEX_op_ld8s_i64:
1285    case INDEX_op_ld16u_i64:
1286    case INDEX_op_ld16s_i64:
1287    case INDEX_op_ld32u_i64:
1288    case INDEX_op_ld32s_i64:
1289    case INDEX_op_ld_i64:
1290    case INDEX_op_st8_i64:
1291    case INDEX_op_st16_i64:
1292    case INDEX_op_st32_i64:
1293    case INDEX_op_st_i64:
1294    case INDEX_op_add_i64:
1295    case INDEX_op_sub_i64:
1296    case INDEX_op_mul_i64:
1297    case INDEX_op_and_i64:
1298    case INDEX_op_or_i64:
1299    case INDEX_op_xor_i64:
1300    case INDEX_op_shl_i64:
1301    case INDEX_op_shr_i64:
1302    case INDEX_op_sar_i64:
1303    case INDEX_op_ext_i32_i64:
1304    case INDEX_op_extu_i32_i64:
1305        return TCG_TARGET_REG_BITS == 64;
1306
1307    case INDEX_op_movcond_i64:
1308        return TCG_TARGET_HAS_movcond_i64;
1309    case INDEX_op_div_i64:
1310    case INDEX_op_divu_i64:
1311        return TCG_TARGET_HAS_div_i64;
1312    case INDEX_op_rem_i64:
1313    case INDEX_op_remu_i64:
1314        return TCG_TARGET_HAS_rem_i64;
1315    case INDEX_op_div2_i64:
1316    case INDEX_op_divu2_i64:
1317        return TCG_TARGET_HAS_div2_i64;
1318    case INDEX_op_rotl_i64:
1319    case INDEX_op_rotr_i64:
1320        return TCG_TARGET_HAS_rot_i64;
1321    case INDEX_op_deposit_i64:
1322        return TCG_TARGET_HAS_deposit_i64;
1323    case INDEX_op_extract_i64:
1324        return TCG_TARGET_HAS_extract_i64;
1325    case INDEX_op_sextract_i64:
1326        return TCG_TARGET_HAS_sextract_i64;
1327    case INDEX_op_extract2_i64:
1328        return TCG_TARGET_HAS_extract2_i64;
1329    case INDEX_op_extrl_i64_i32:
1330        return TCG_TARGET_HAS_extrl_i64_i32;
1331    case INDEX_op_extrh_i64_i32:
1332        return TCG_TARGET_HAS_extrh_i64_i32;
1333    case INDEX_op_ext8s_i64:
1334        return TCG_TARGET_HAS_ext8s_i64;
1335    case INDEX_op_ext16s_i64:
1336        return TCG_TARGET_HAS_ext16s_i64;
1337    case INDEX_op_ext32s_i64:
1338        return TCG_TARGET_HAS_ext32s_i64;
1339    case INDEX_op_ext8u_i64:
1340        return TCG_TARGET_HAS_ext8u_i64;
1341    case INDEX_op_ext16u_i64:
1342        return TCG_TARGET_HAS_ext16u_i64;
1343    case INDEX_op_ext32u_i64:
1344        return TCG_TARGET_HAS_ext32u_i64;
1345    case INDEX_op_bswap16_i64:
1346        return TCG_TARGET_HAS_bswap16_i64;
1347    case INDEX_op_bswap32_i64:
1348        return TCG_TARGET_HAS_bswap32_i64;
1349    case INDEX_op_bswap64_i64:
1350        return TCG_TARGET_HAS_bswap64_i64;
1351    case INDEX_op_not_i64:
1352        return TCG_TARGET_HAS_not_i64;
1353    case INDEX_op_neg_i64:
1354        return TCG_TARGET_HAS_neg_i64;
1355    case INDEX_op_andc_i64:
1356        return TCG_TARGET_HAS_andc_i64;
1357    case INDEX_op_orc_i64:
1358        return TCG_TARGET_HAS_orc_i64;
1359    case INDEX_op_eqv_i64:
1360        return TCG_TARGET_HAS_eqv_i64;
1361    case INDEX_op_nand_i64:
1362        return TCG_TARGET_HAS_nand_i64;
1363    case INDEX_op_nor_i64:
1364        return TCG_TARGET_HAS_nor_i64;
1365    case INDEX_op_clz_i64:
1366        return TCG_TARGET_HAS_clz_i64;
1367    case INDEX_op_ctz_i64:
1368        return TCG_TARGET_HAS_ctz_i64;
1369    case INDEX_op_ctpop_i64:
1370        return TCG_TARGET_HAS_ctpop_i64;
1371    case INDEX_op_add2_i64:
1372        return TCG_TARGET_HAS_add2_i64;
1373    case INDEX_op_sub2_i64:
1374        return TCG_TARGET_HAS_sub2_i64;
1375    case INDEX_op_mulu2_i64:
1376        return TCG_TARGET_HAS_mulu2_i64;
1377    case INDEX_op_muls2_i64:
1378        return TCG_TARGET_HAS_muls2_i64;
1379    case INDEX_op_muluh_i64:
1380        return TCG_TARGET_HAS_muluh_i64;
1381    case INDEX_op_mulsh_i64:
1382        return TCG_TARGET_HAS_mulsh_i64;
1383
1384    case INDEX_op_mov_vec:
1385    case INDEX_op_dup_vec:
1386    case INDEX_op_dupm_vec:
1387    case INDEX_op_ld_vec:
1388    case INDEX_op_st_vec:
1389    case INDEX_op_add_vec:
1390    case INDEX_op_sub_vec:
1391    case INDEX_op_and_vec:
1392    case INDEX_op_or_vec:
1393    case INDEX_op_xor_vec:
1394    case INDEX_op_cmp_vec:
1395        return have_vec;
1396    case INDEX_op_dup2_vec:
1397        return have_vec && TCG_TARGET_REG_BITS == 32;
1398    case INDEX_op_not_vec:
1399        return have_vec && TCG_TARGET_HAS_not_vec;
1400    case INDEX_op_neg_vec:
1401        return have_vec && TCG_TARGET_HAS_neg_vec;
1402    case INDEX_op_abs_vec:
1403        return have_vec && TCG_TARGET_HAS_abs_vec;
1404    case INDEX_op_andc_vec:
1405        return have_vec && TCG_TARGET_HAS_andc_vec;
1406    case INDEX_op_orc_vec:
1407        return have_vec && TCG_TARGET_HAS_orc_vec;
1408    case INDEX_op_mul_vec:
1409        return have_vec && TCG_TARGET_HAS_mul_vec;
1410    case INDEX_op_shli_vec:
1411    case INDEX_op_shri_vec:
1412    case INDEX_op_sari_vec:
1413        return have_vec && TCG_TARGET_HAS_shi_vec;
1414    case INDEX_op_shls_vec:
1415    case INDEX_op_shrs_vec:
1416    case INDEX_op_sars_vec:
1417        return have_vec && TCG_TARGET_HAS_shs_vec;
1418    case INDEX_op_shlv_vec:
1419    case INDEX_op_shrv_vec:
1420    case INDEX_op_sarv_vec:
1421        return have_vec && TCG_TARGET_HAS_shv_vec;
1422    case INDEX_op_rotli_vec:
1423        return have_vec && TCG_TARGET_HAS_roti_vec;
1424    case INDEX_op_rotls_vec:
1425        return have_vec && TCG_TARGET_HAS_rots_vec;
1426    case INDEX_op_rotlv_vec:
1427    case INDEX_op_rotrv_vec:
1428        return have_vec && TCG_TARGET_HAS_rotv_vec;
1429    case INDEX_op_ssadd_vec:
1430    case INDEX_op_usadd_vec:
1431    case INDEX_op_sssub_vec:
1432    case INDEX_op_ussub_vec:
1433        return have_vec && TCG_TARGET_HAS_sat_vec;
1434    case INDEX_op_smin_vec:
1435    case INDEX_op_umin_vec:
1436    case INDEX_op_smax_vec:
1437    case INDEX_op_umax_vec:
1438        return have_vec && TCG_TARGET_HAS_minmax_vec;
1439    case INDEX_op_bitsel_vec:
1440        return have_vec && TCG_TARGET_HAS_bitsel_vec;
1441    case INDEX_op_cmpsel_vec:
1442        return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1443
1444    default:
1445        tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1446        return true;
1447    }
1448}
1449
1450/* Note: we convert the 64 bit args to 32 bit and do some alignment
1451   and endian swap. Maybe it would be better to do the alignment
1452   and endian swap in tcg_reg_alloc_call(). */
1453void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1454{
1455    int i, real_args, nb_rets, pi;
1456    unsigned typemask;
1457    const TCGHelperInfo *info;
1458    TCGOp *op;
1459
1460    info = g_hash_table_lookup(helper_table, (gpointer)func);
1461    typemask = info->typemask;
1462
1463#ifdef CONFIG_PLUGIN
1464    /* detect non-plugin helpers */
1465    if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1466        tcg_ctx->plugin_insn->calls_helpers = true;
1467    }
1468#endif
1469
1470#if defined(__sparc__) && !defined(__arch64__) \
1471    && !defined(CONFIG_TCG_INTERPRETER)
1472    /* We have 64-bit values in one register, but need to pass as two
1473       separate parameters.  Split them.  */
1474    int orig_typemask = typemask;
1475    int orig_nargs = nargs;
1476    TCGv_i64 retl, reth;
1477    TCGTemp *split_args[MAX_OPC_PARAM];
1478
1479    retl = NULL;
1480    reth = NULL;
1481    typemask = 0;
1482    for (i = real_args = 0; i < nargs; ++i) {
1483        int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1484        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1485
1486        if (is_64bit) {
1487            TCGv_i64 orig = temp_tcgv_i64(args[i]);
1488            TCGv_i32 h = tcg_temp_new_i32();
1489            TCGv_i32 l = tcg_temp_new_i32();
1490            tcg_gen_extr_i64_i32(l, h, orig);
1491            split_args[real_args++] = tcgv_i32_temp(h);
1492            typemask |= dh_typecode_i32 << (real_args * 3);
1493            split_args[real_args++] = tcgv_i32_temp(l);
1494            typemask |= dh_typecode_i32 << (real_args * 3);
1495        } else {
1496            split_args[real_args++] = args[i];
1497            typemask |= argtype << (real_args * 3);
1498        }
1499    }
1500    nargs = real_args;
1501    args = split_args;
1502#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1503    for (i = 0; i < nargs; ++i) {
1504        int argtype = extract32(typemask, (i + 1) * 3, 3);
1505        bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1506        bool is_signed = argtype & 1;
1507
1508        if (is_32bit) {
1509            TCGv_i64 temp = tcg_temp_new_i64();
1510            TCGv_i64 orig = temp_tcgv_i64(args[i]);
1511            if (is_signed) {
1512                tcg_gen_ext32s_i64(temp, orig);
1513            } else {
1514                tcg_gen_ext32u_i64(temp, orig);
1515            }
1516            args[i] = tcgv_i64_temp(temp);
1517        }
1518    }
1519#endif /* TCG_TARGET_EXTEND_ARGS */
1520
1521    op = tcg_emit_op(INDEX_op_call);
1522
1523    pi = 0;
1524    if (ret != NULL) {
1525#if defined(__sparc__) && !defined(__arch64__) \
1526    && !defined(CONFIG_TCG_INTERPRETER)
1527        if ((typemask & 6) == dh_typecode_i64) {
1528            /* The 32-bit ABI is going to return the 64-bit value in
1529               the %o0/%o1 register pair.  Prepare for this by using
1530               two return temporaries, and reassemble below.  */
1531            retl = tcg_temp_new_i64();
1532            reth = tcg_temp_new_i64();
1533            op->args[pi++] = tcgv_i64_arg(reth);
1534            op->args[pi++] = tcgv_i64_arg(retl);
1535            nb_rets = 2;
1536        } else {
1537            op->args[pi++] = temp_arg(ret);
1538            nb_rets = 1;
1539        }
1540#else
1541        if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1542#ifdef HOST_WORDS_BIGENDIAN
1543            op->args[pi++] = temp_arg(ret + 1);
1544            op->args[pi++] = temp_arg(ret);
1545#else
1546            op->args[pi++] = temp_arg(ret);
1547            op->args[pi++] = temp_arg(ret + 1);
1548#endif
1549            nb_rets = 2;
1550        } else {
1551            op->args[pi++] = temp_arg(ret);
1552            nb_rets = 1;
1553        }
1554#endif
1555    } else {
1556        nb_rets = 0;
1557    }
1558    TCGOP_CALLO(op) = nb_rets;
1559
1560    real_args = 0;
1561    for (i = 0; i < nargs; i++) {
1562        int argtype = extract32(typemask, (i + 1) * 3, 3);
1563        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1564        bool want_align = false;
1565
1566#if defined(CONFIG_TCG_INTERPRETER)
1567        /*
1568         * Align all arguments, so that they land in predictable places
1569         * for passing off to ffi_call.
1570         */
1571        want_align = true;
1572#elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1573        /* Some targets want aligned 64 bit args */
1574        want_align = is_64bit;
1575#endif
1576
1577        if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1578            op->args[pi++] = TCG_CALL_DUMMY_ARG;
1579            real_args++;
1580        }
1581
1582        if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1583            /*
1584             * If stack grows up, then we will be placing successive
1585             * arguments at lower addresses, which means we need to
1586             * reverse the order compared to how we would normally
1587             * treat either big or little-endian.  For those arguments
1588             * that will wind up in registers, this still works for
1589             * HPPA (the only current STACK_GROWSUP target) since the
1590             * argument registers are *also* allocated in decreasing
1591             * order.  If another such target is added, this logic may
1592             * have to get more complicated to differentiate between
1593             * stack arguments and register arguments.
1594             */
1595#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1596            op->args[pi++] = temp_arg(args[i] + 1);
1597            op->args[pi++] = temp_arg(args[i]);
1598#else
1599            op->args[pi++] = temp_arg(args[i]);
1600            op->args[pi++] = temp_arg(args[i] + 1);
1601#endif
1602            real_args += 2;
1603            continue;
1604        }
1605
1606        op->args[pi++] = temp_arg(args[i]);
1607        real_args++;
1608    }
1609    op->args[pi++] = (uintptr_t)func;
1610    op->args[pi++] = (uintptr_t)info;
1611    TCGOP_CALLI(op) = real_args;
1612
1613    /* Make sure the fields didn't overflow.  */
1614    tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1615    tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1616
1617#if defined(__sparc__) && !defined(__arch64__) \
1618    && !defined(CONFIG_TCG_INTERPRETER)
1619    /* Free all of the parts we allocated above.  */
1620    for (i = real_args = 0; i < orig_nargs; ++i) {
1621        int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1622        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1623
1624        if (is_64bit) {
1625            tcg_temp_free_internal(args[real_args++]);
1626            tcg_temp_free_internal(args[real_args++]);
1627        } else {
1628            real_args++;
1629        }
1630    }
1631    if ((orig_typemask & 6) == dh_typecode_i64) {
1632        /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1633           Note that describing these as TCGv_i64 eliminates an unnecessary
1634           zero-extension that tcg_gen_concat_i32_i64 would create.  */
1635        tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1636        tcg_temp_free_i64(retl);
1637        tcg_temp_free_i64(reth);
1638    }
1639#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1640    for (i = 0; i < nargs; ++i) {
1641        int argtype = extract32(typemask, (i + 1) * 3, 3);
1642        bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1643
1644        if (is_32bit) {
1645            tcg_temp_free_internal(args[i]);
1646        }
1647    }
1648#endif /* TCG_TARGET_EXTEND_ARGS */
1649}
1650
1651static void tcg_reg_alloc_start(TCGContext *s)
1652{
1653    int i, n;
1654
1655    for (i = 0, n = s->nb_temps; i < n; i++) {
1656        TCGTemp *ts = &s->temps[i];
1657        TCGTempVal val = TEMP_VAL_MEM;
1658
1659        switch (ts->kind) {
1660        case TEMP_CONST:
1661            val = TEMP_VAL_CONST;
1662            break;
1663        case TEMP_FIXED:
1664            val = TEMP_VAL_REG;
1665            break;
1666        case TEMP_GLOBAL:
1667            break;
1668        case TEMP_NORMAL:
1669            val = TEMP_VAL_DEAD;
1670            /* fall through */
1671        case TEMP_LOCAL:
1672            ts->mem_allocated = 0;
1673            break;
1674        default:
1675            g_assert_not_reached();
1676        }
1677        ts->val_type = val;
1678    }
1679
1680    memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1681}
1682
1683static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1684                                 TCGTemp *ts)
1685{
1686    int idx = temp_idx(ts);
1687
1688    switch (ts->kind) {
1689    case TEMP_FIXED:
1690    case TEMP_GLOBAL:
1691        pstrcpy(buf, buf_size, ts->name);
1692        break;
1693    case TEMP_LOCAL:
1694        snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1695        break;
1696    case TEMP_NORMAL:
1697        snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1698        break;
1699    case TEMP_CONST:
1700        switch (ts->type) {
1701        case TCG_TYPE_I32:
1702            snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1703            break;
1704#if TCG_TARGET_REG_BITS > 32
1705        case TCG_TYPE_I64:
1706            snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1707            break;
1708#endif
1709        case TCG_TYPE_V64:
1710        case TCG_TYPE_V128:
1711        case TCG_TYPE_V256:
1712            snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1713                     64 << (ts->type - TCG_TYPE_V64), ts->val);
1714            break;
1715        default:
1716            g_assert_not_reached();
1717        }
1718        break;
1719    }
1720    return buf;
1721}
1722
1723static char *tcg_get_arg_str(TCGContext *s, char *buf,
1724                             int buf_size, TCGArg arg)
1725{
1726    return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1727}
1728
1729static const char * const cond_name[] =
1730{
1731    [TCG_COND_NEVER] = "never",
1732    [TCG_COND_ALWAYS] = "always",
1733    [TCG_COND_EQ] = "eq",
1734    [TCG_COND_NE] = "ne",
1735    [TCG_COND_LT] = "lt",
1736    [TCG_COND_GE] = "ge",
1737    [TCG_COND_LE] = "le",
1738    [TCG_COND_GT] = "gt",
1739    [TCG_COND_LTU] = "ltu",
1740    [TCG_COND_GEU] = "geu",
1741    [TCG_COND_LEU] = "leu",
1742    [TCG_COND_GTU] = "gtu"
1743};
1744
1745static const char * const ldst_name[] =
1746{
1747    [MO_UB]   = "ub",
1748    [MO_SB]   = "sb",
1749    [MO_LEUW] = "leuw",
1750    [MO_LESW] = "lesw",
1751    [MO_LEUL] = "leul",
1752    [MO_LESL] = "lesl",
1753    [MO_LEQ]  = "leq",
1754    [MO_BEUW] = "beuw",
1755    [MO_BESW] = "besw",
1756    [MO_BEUL] = "beul",
1757    [MO_BESL] = "besl",
1758    [MO_BEQ]  = "beq",
1759};
1760
1761static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1762#ifdef TARGET_ALIGNED_ONLY
1763    [MO_UNALN >> MO_ASHIFT]    = "un+",
1764    [MO_ALIGN >> MO_ASHIFT]    = "",
1765#else
1766    [MO_UNALN >> MO_ASHIFT]    = "",
1767    [MO_ALIGN >> MO_ASHIFT]    = "al+",
1768#endif
1769    [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1770    [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1771    [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1772    [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1773    [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1774    [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1775};
1776
1777static const char bswap_flag_name[][6] = {
1778    [TCG_BSWAP_IZ] = "iz",
1779    [TCG_BSWAP_OZ] = "oz",
1780    [TCG_BSWAP_OS] = "os",
1781    [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1782    [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1783};
1784
1785static inline bool tcg_regset_single(TCGRegSet d)
1786{
1787    return (d & (d - 1)) == 0;
1788}
1789
1790static inline TCGReg tcg_regset_first(TCGRegSet d)
1791{
1792    if (TCG_TARGET_NB_REGS <= 32) {
1793        return ctz32(d);
1794    } else {
1795        return ctz64(d);
1796    }
1797}
1798
1799static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1800{
1801    char buf[128];
1802    TCGOp *op;
1803
1804    QTAILQ_FOREACH(op, &s->ops, link) {
1805        int i, k, nb_oargs, nb_iargs, nb_cargs;
1806        const TCGOpDef *def;
1807        TCGOpcode c;
1808        int col = 0;
1809
1810        c = op->opc;
1811        def = &tcg_op_defs[c];
1812
1813        if (c == INDEX_op_insn_start) {
1814            nb_oargs = 0;
1815            col += qemu_log("\n ----");
1816
1817            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1818                target_ulong a;
1819#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1820                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1821#else
1822                a = op->args[i];
1823#endif
1824                col += qemu_log(" " TARGET_FMT_lx, a);
1825            }
1826        } else if (c == INDEX_op_call) {
1827            const TCGHelperInfo *info = tcg_call_info(op);
1828            void *func = tcg_call_func(op);
1829
1830            /* variable number of arguments */
1831            nb_oargs = TCGOP_CALLO(op);
1832            nb_iargs = TCGOP_CALLI(op);
1833            nb_cargs = def->nb_cargs;
1834
1835            col += qemu_log(" %s ", def->name);
1836
1837            /*
1838             * Print the function name from TCGHelperInfo, if available.
1839             * Note that plugins have a template function for the info,
1840             * but the actual function pointer comes from the plugin.
1841             */
1842            if (func == info->func) {
1843                col += qemu_log("%s", info->name);
1844            } else {
1845                col += qemu_log("plugin(%p)", func);
1846            }
1847
1848            col += qemu_log(",$0x%x,$%d", info->flags, nb_oargs);
1849            for (i = 0; i < nb_oargs; i++) {
1850                col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1851                                                       op->args[i]));
1852            }
1853            for (i = 0; i < nb_iargs; i++) {
1854                TCGArg arg = op->args[nb_oargs + i];
1855                const char *t = "<dummy>";
1856                if (arg != TCG_CALL_DUMMY_ARG) {
1857                    t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1858                }
1859                col += qemu_log(",%s", t);
1860            }
1861        } else {
1862            col += qemu_log(" %s ", def->name);
1863
1864            nb_oargs = def->nb_oargs;
1865            nb_iargs = def->nb_iargs;
1866            nb_cargs = def->nb_cargs;
1867
1868            if (def->flags & TCG_OPF_VECTOR) {
1869                col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1870                                8 << TCGOP_VECE(op));
1871            }
1872
1873            k = 0;
1874            for (i = 0; i < nb_oargs; i++) {
1875                if (k != 0) {
1876                    col += qemu_log(",");
1877                }
1878                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1879                                                      op->args[k++]));
1880            }
1881            for (i = 0; i < nb_iargs; i++) {
1882                if (k != 0) {
1883                    col += qemu_log(",");
1884                }
1885                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1886                                                      op->args[k++]));
1887            }
1888            switch (c) {
1889            case INDEX_op_brcond_i32:
1890            case INDEX_op_setcond_i32:
1891            case INDEX_op_movcond_i32:
1892            case INDEX_op_brcond2_i32:
1893            case INDEX_op_setcond2_i32:
1894            case INDEX_op_brcond_i64:
1895            case INDEX_op_setcond_i64:
1896            case INDEX_op_movcond_i64:
1897            case INDEX_op_cmp_vec:
1898            case INDEX_op_cmpsel_vec:
1899                if (op->args[k] < ARRAY_SIZE(cond_name)
1900                    && cond_name[op->args[k]]) {
1901                    col += qemu_log(",%s", cond_name[op->args[k++]]);
1902                } else {
1903                    col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1904                }
1905                i = 1;
1906                break;
1907            case INDEX_op_qemu_ld_i32:
1908            case INDEX_op_qemu_st_i32:
1909            case INDEX_op_qemu_st8_i32:
1910            case INDEX_op_qemu_ld_i64:
1911            case INDEX_op_qemu_st_i64:
1912                {
1913                    TCGMemOpIdx oi = op->args[k++];
1914                    MemOp op = get_memop(oi);
1915                    unsigned ix = get_mmuidx(oi);
1916
1917                    if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1918                        col += qemu_log(",$0x%x,%u", op, ix);
1919                    } else {
1920                        const char *s_al, *s_op;
1921                        s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1922                        s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1923                        col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1924                    }
1925                    i = 1;
1926                }
1927                break;
1928            case INDEX_op_bswap16_i32:
1929            case INDEX_op_bswap16_i64:
1930            case INDEX_op_bswap32_i32:
1931            case INDEX_op_bswap32_i64:
1932            case INDEX_op_bswap64_i64:
1933                {
1934                    TCGArg flags = op->args[k];
1935                    const char *name = NULL;
1936
1937                    if (flags < ARRAY_SIZE(bswap_flag_name)) {
1938                        name = bswap_flag_name[flags];
1939                    }
1940                    if (name) {
1941                        col += qemu_log(",%s", name);
1942                    } else {
1943                        col += qemu_log(",$0x%" TCG_PRIlx, flags);
1944                    }
1945                    i = k = 1;
1946                }
1947                break;
1948            default:
1949                i = 0;
1950                break;
1951            }
1952            switch (c) {
1953            case INDEX_op_set_label:
1954            case INDEX_op_br:
1955            case INDEX_op_brcond_i32:
1956            case INDEX_op_brcond_i64:
1957            case INDEX_op_brcond2_i32:
1958                col += qemu_log("%s$L%d", k ? "," : "",
1959                                arg_label(op->args[k])->id);
1960                i++, k++;
1961                break;
1962            default:
1963                break;
1964            }
1965            for (; i < nb_cargs; i++, k++) {
1966                col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1967            }
1968        }
1969
1970        if (have_prefs || op->life) {
1971
1972            QemuLogFile *logfile;
1973
1974            rcu_read_lock();
1975            logfile = qatomic_rcu_read(&qemu_logfile);
1976            if (logfile) {
1977                for (; col < 40; ++col) {
1978                    putc(' ', logfile->fd);
1979                }
1980            }
1981            rcu_read_unlock();
1982        }
1983
1984        if (op->life) {
1985            unsigned life = op->life;
1986
1987            if (life & (SYNC_ARG * 3)) {
1988                qemu_log("  sync:");
1989                for (i = 0; i < 2; ++i) {
1990                    if (life & (SYNC_ARG << i)) {
1991                        qemu_log(" %d", i);
1992                    }
1993                }
1994            }
1995            life /= DEAD_ARG;
1996            if (life) {
1997                qemu_log("  dead:");
1998                for (i = 0; life; ++i, life >>= 1) {
1999                    if (life & 1) {
2000                        qemu_log(" %d", i);
2001                    }
2002                }
2003            }
2004        }
2005
2006        if (have_prefs) {
2007            for (i = 0; i < nb_oargs; ++i) {
2008                TCGRegSet set = op->output_pref[i];
2009
2010                if (i == 0) {
2011                    qemu_log("  pref=");
2012                } else {
2013                    qemu_log(",");
2014                }
2015                if (set == 0) {
2016                    qemu_log("none");
2017                } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2018                    qemu_log("all");
2019#ifdef CONFIG_DEBUG_TCG
2020                } else if (tcg_regset_single(set)) {
2021                    TCGReg reg = tcg_regset_first(set);
2022                    qemu_log("%s", tcg_target_reg_names[reg]);
2023#endif
2024                } else if (TCG_TARGET_NB_REGS <= 32) {
2025                    qemu_log("%#x", (uint32_t)set);
2026                } else {
2027                    qemu_log("%#" PRIx64, (uint64_t)set);
2028                }
2029            }
2030        }
2031
2032        qemu_log("\n");
2033    }
2034}
2035
2036/* we give more priority to constraints with less registers */
2037static int get_constraint_priority(const TCGOpDef *def, int k)
2038{
2039    const TCGArgConstraint *arg_ct = &def->args_ct[k];
2040    int n;
2041
2042    if (arg_ct->oalias) {
2043        /* an alias is equivalent to a single register */
2044        n = 1;
2045    } else {
2046        n = ctpop64(arg_ct->regs);
2047    }
2048    return TCG_TARGET_NB_REGS - n + 1;
2049}
2050
2051/* sort from highest priority to lowest */
2052static void sort_constraints(TCGOpDef *def, int start, int n)
2053{
2054    int i, j;
2055    TCGArgConstraint *a = def->args_ct;
2056
2057    for (i = 0; i < n; i++) {
2058        a[start + i].sort_index = start + i;
2059    }
2060    if (n <= 1) {
2061        return;
2062    }
2063    for (i = 0; i < n - 1; i++) {
2064        for (j = i + 1; j < n; j++) {
2065            int p1 = get_constraint_priority(def, a[start + i].sort_index);
2066            int p2 = get_constraint_priority(def, a[start + j].sort_index);
2067            if (p1 < p2) {
2068                int tmp = a[start + i].sort_index;
2069                a[start + i].sort_index = a[start + j].sort_index;
2070                a[start + j].sort_index = tmp;
2071            }
2072        }
2073    }
2074}
2075
2076static void process_op_defs(TCGContext *s)
2077{
2078    TCGOpcode op;
2079
2080    for (op = 0; op < NB_OPS; op++) {
2081        TCGOpDef *def = &tcg_op_defs[op];
2082        const TCGTargetOpDef *tdefs;
2083        int i, nb_args;
2084
2085        if (def->flags & TCG_OPF_NOT_PRESENT) {
2086            continue;
2087        }
2088
2089        nb_args = def->nb_iargs + def->nb_oargs;
2090        if (nb_args == 0) {
2091            continue;
2092        }
2093
2094        /*
2095         * Macro magic should make it impossible, but double-check that
2096         * the array index is in range.  Since the signness of an enum
2097         * is implementation defined, force the result to unsigned.
2098         */
2099        unsigned con_set = tcg_target_op_def(op);
2100        tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2101        tdefs = &constraint_sets[con_set];
2102
2103        for (i = 0; i < nb_args; i++) {
2104            const char *ct_str = tdefs->args_ct_str[i];
2105            /* Incomplete TCGTargetOpDef entry. */
2106            tcg_debug_assert(ct_str != NULL);
2107
2108            while (*ct_str != '\0') {
2109                switch(*ct_str) {
2110                case '0' ... '9':
2111                    {
2112                        int oarg = *ct_str - '0';
2113                        tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2114                        tcg_debug_assert(oarg < def->nb_oargs);
2115                        tcg_debug_assert(def->args_ct[oarg].regs != 0);
2116                        def->args_ct[i] = def->args_ct[oarg];
2117                        /* The output sets oalias.  */
2118                        def->args_ct[oarg].oalias = true;
2119                        def->args_ct[oarg].alias_index = i;
2120                        /* The input sets ialias. */
2121                        def->args_ct[i].ialias = true;
2122                        def->args_ct[i].alias_index = oarg;
2123                    }
2124                    ct_str++;
2125                    break;
2126                case '&':
2127                    def->args_ct[i].newreg = true;
2128                    ct_str++;
2129                    break;
2130                case 'i':
2131                    def->args_ct[i].ct |= TCG_CT_CONST;
2132                    ct_str++;
2133                    break;
2134
2135                /* Include all of the target-specific constraints. */
2136
2137#undef CONST
2138#define CONST(CASE, MASK) \
2139    case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2140#define REGS(CASE, MASK) \
2141    case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2142
2143#include "tcg-target-con-str.h"
2144
2145#undef REGS
2146#undef CONST
2147                default:
2148                    /* Typo in TCGTargetOpDef constraint. */
2149                    g_assert_not_reached();
2150                }
2151            }
2152        }
2153
2154        /* TCGTargetOpDef entry with too much information? */
2155        tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2156
2157        /* sort the constraints (XXX: this is just an heuristic) */
2158        sort_constraints(def, 0, def->nb_oargs);
2159        sort_constraints(def, def->nb_oargs, def->nb_iargs);
2160    }
2161}
2162
2163void tcg_op_remove(TCGContext *s, TCGOp *op)
2164{
2165    TCGLabel *label;
2166
2167    switch (op->opc) {
2168    case INDEX_op_br:
2169        label = arg_label(op->args[0]);
2170        label->refs--;
2171        break;
2172    case INDEX_op_brcond_i32:
2173    case INDEX_op_brcond_i64:
2174        label = arg_label(op->args[3]);
2175        label->refs--;
2176        break;
2177    case INDEX_op_brcond2_i32:
2178        label = arg_label(op->args[5]);
2179        label->refs--;
2180        break;
2181    default:
2182        break;
2183    }
2184
2185    QTAILQ_REMOVE(&s->ops, op, link);
2186    QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2187    s->nb_ops--;
2188
2189#ifdef CONFIG_PROFILER
2190    qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2191#endif
2192}
2193
2194void tcg_remove_ops_after(TCGOp *op)
2195{
2196    TCGContext *s = tcg_ctx;
2197
2198    while (true) {
2199        TCGOp *last = tcg_last_op();
2200        if (last == op) {
2201            return;
2202        }
2203        tcg_op_remove(s, last);
2204    }
2205}
2206
2207static TCGOp *tcg_op_alloc(TCGOpcode opc)
2208{
2209    TCGContext *s = tcg_ctx;
2210    TCGOp *op;
2211
2212    if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2213        op = tcg_malloc(sizeof(TCGOp));
2214    } else {
2215        op = QTAILQ_FIRST(&s->free_ops);
2216        QTAILQ_REMOVE(&s->free_ops, op, link);
2217    }
2218    memset(op, 0, offsetof(TCGOp, link));
2219    op->opc = opc;
2220    s->nb_ops++;
2221
2222    return op;
2223}
2224
2225TCGOp *tcg_emit_op(TCGOpcode opc)
2226{
2227    TCGOp *op = tcg_op_alloc(opc);
2228    QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2229    return op;
2230}
2231
2232TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2233{
2234    TCGOp *new_op = tcg_op_alloc(opc);
2235    QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2236    return new_op;
2237}
2238
2239TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2240{
2241    TCGOp *new_op = tcg_op_alloc(opc);
2242    QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2243    return new_op;
2244}
2245
2246/* Reachable analysis : remove unreachable code.  */
2247static void reachable_code_pass(TCGContext *s)
2248{
2249    TCGOp *op, *op_next;
2250    bool dead = false;
2251
2252    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2253        bool remove = dead;
2254        TCGLabel *label;
2255
2256        switch (op->opc) {
2257        case INDEX_op_set_label:
2258            label = arg_label(op->args[0]);
2259            if (label->refs == 0) {
2260                /*
2261                 * While there is an occasional backward branch, virtually
2262                 * all branches generated by the translators are forward.
2263                 * Which means that generally we will have already removed
2264                 * all references to the label that will be, and there is
2265                 * little to be gained by iterating.
2266                 */
2267                remove = true;
2268            } else {
2269                /* Once we see a label, insns become live again.  */
2270                dead = false;
2271                remove = false;
2272
2273                /*
2274                 * Optimization can fold conditional branches to unconditional.
2275                 * If we find a label with one reference which is preceded by
2276                 * an unconditional branch to it, remove both.  This needed to
2277                 * wait until the dead code in between them was removed.
2278                 */
2279                if (label->refs == 1) {
2280                    TCGOp *op_prev = QTAILQ_PREV(op, link);
2281                    if (op_prev->opc == INDEX_op_br &&
2282                        label == arg_label(op_prev->args[0])) {
2283                        tcg_op_remove(s, op_prev);
2284                        remove = true;
2285                    }
2286                }
2287            }
2288            break;
2289
2290        case INDEX_op_br:
2291        case INDEX_op_exit_tb:
2292        case INDEX_op_goto_ptr:
2293            /* Unconditional branches; everything following is dead.  */
2294            dead = true;
2295            break;
2296
2297        case INDEX_op_call:
2298            /* Notice noreturn helper calls, raising exceptions.  */
2299            if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2300                dead = true;
2301            }
2302            break;
2303
2304        case INDEX_op_insn_start:
2305            /* Never remove -- we need to keep these for unwind.  */
2306            remove = false;
2307            break;
2308
2309        default:
2310            break;
2311        }
2312
2313        if (remove) {
2314            tcg_op_remove(s, op);
2315        }
2316    }
2317}
2318
2319#define TS_DEAD  1
2320#define TS_MEM   2
2321
2322#define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2323#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2324
2325/* For liveness_pass_1, the register preferences for a given temp.  */
2326static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2327{
2328    return ts->state_ptr;
2329}
2330
2331/* For liveness_pass_1, reset the preferences for a given temp to the
2332 * maximal regset for its type.
2333 */
2334static inline void la_reset_pref(TCGTemp *ts)
2335{
2336    *la_temp_pref(ts)
2337        = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2338}
2339
2340/* liveness analysis: end of function: all temps are dead, and globals
2341   should be in memory. */
2342static void la_func_end(TCGContext *s, int ng, int nt)
2343{
2344    int i;
2345
2346    for (i = 0; i < ng; ++i) {
2347        s->temps[i].state = TS_DEAD | TS_MEM;
2348        la_reset_pref(&s->temps[i]);
2349    }
2350    for (i = ng; i < nt; ++i) {
2351        s->temps[i].state = TS_DEAD;
2352        la_reset_pref(&s->temps[i]);
2353    }
2354}
2355
2356/* liveness analysis: end of basic block: all temps are dead, globals
2357   and local temps should be in memory. */
2358static void la_bb_end(TCGContext *s, int ng, int nt)
2359{
2360    int i;
2361
2362    for (i = 0; i < nt; ++i) {
2363        TCGTemp *ts = &s->temps[i];
2364        int state;
2365
2366        switch (ts->kind) {
2367        case TEMP_FIXED:
2368        case TEMP_GLOBAL:
2369        case TEMP_LOCAL:
2370            state = TS_DEAD | TS_MEM;
2371            break;
2372        case TEMP_NORMAL:
2373        case TEMP_CONST:
2374            state = TS_DEAD;
2375            break;
2376        default:
2377            g_assert_not_reached();
2378        }
2379        ts->state = state;
2380        la_reset_pref(ts);
2381    }
2382}
2383
2384/* liveness analysis: sync globals back to memory.  */
2385static void la_global_sync(TCGContext *s, int ng)
2386{
2387    int i;
2388
2389    for (i = 0; i < ng; ++i) {
2390        int state = s->temps[i].state;
2391        s->temps[i].state = state | TS_MEM;
2392        if (state == TS_DEAD) {
2393            /* If the global was previously dead, reset prefs.  */
2394            la_reset_pref(&s->temps[i]);
2395        }
2396    }
2397}
2398
2399/*
2400 * liveness analysis: conditional branch: all temps are dead,
2401 * globals and local temps should be synced.
2402 */
2403static void la_bb_sync(TCGContext *s, int ng, int nt)
2404{
2405    la_global_sync(s, ng);
2406
2407    for (int i = ng; i < nt; ++i) {
2408        TCGTemp *ts = &s->temps[i];
2409        int state;
2410
2411        switch (ts->kind) {
2412        case TEMP_LOCAL:
2413            state = ts->state;
2414            ts->state = state | TS_MEM;
2415            if (state != TS_DEAD) {
2416                continue;
2417            }
2418            break;
2419        case TEMP_NORMAL:
2420            s->temps[i].state = TS_DEAD;
2421            break;
2422        case TEMP_CONST:
2423            continue;
2424        default:
2425            g_assert_not_reached();
2426        }
2427        la_reset_pref(&s->temps[i]);
2428    }
2429}
2430
2431/* liveness analysis: sync globals back to memory and kill.  */
2432static void la_global_kill(TCGContext *s, int ng)
2433{
2434    int i;
2435
2436    for (i = 0; i < ng; i++) {
2437        s->temps[i].state = TS_DEAD | TS_MEM;
2438        la_reset_pref(&s->temps[i]);
2439    }
2440}
2441
2442/* liveness analysis: note live globals crossing calls.  */
2443static void la_cross_call(TCGContext *s, int nt)
2444{
2445    TCGRegSet mask = ~tcg_target_call_clobber_regs;
2446    int i;
2447
2448    for (i = 0; i < nt; i++) {
2449        TCGTemp *ts = &s->temps[i];
2450        if (!(ts->state & TS_DEAD)) {
2451            TCGRegSet *pset = la_temp_pref(ts);
2452            TCGRegSet set = *pset;
2453
2454            set &= mask;
2455            /* If the combination is not possible, restart.  */
2456            if (set == 0) {
2457                set = tcg_target_available_regs[ts->type] & mask;
2458            }
2459            *pset = set;
2460        }
2461    }
2462}
2463
2464/* Liveness analysis : update the opc_arg_life array to tell if a
2465   given input arguments is dead. Instructions updating dead
2466   temporaries are removed. */
2467static void liveness_pass_1(TCGContext *s)
2468{
2469    int nb_globals = s->nb_globals;
2470    int nb_temps = s->nb_temps;
2471    TCGOp *op, *op_prev;
2472    TCGRegSet *prefs;
2473    int i;
2474
2475    prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2476    for (i = 0; i < nb_temps; ++i) {
2477        s->temps[i].state_ptr = prefs + i;
2478    }
2479
2480    /* ??? Should be redundant with the exit_tb that ends the TB.  */
2481    la_func_end(s, nb_globals, nb_temps);
2482
2483    QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2484        int nb_iargs, nb_oargs;
2485        TCGOpcode opc_new, opc_new2;
2486        bool have_opc_new2;
2487        TCGLifeData arg_life = 0;
2488        TCGTemp *ts;
2489        TCGOpcode opc = op->opc;
2490        const TCGOpDef *def = &tcg_op_defs[opc];
2491
2492        switch (opc) {
2493        case INDEX_op_call:
2494            {
2495                int call_flags;
2496                int nb_call_regs;
2497
2498                nb_oargs = TCGOP_CALLO(op);
2499                nb_iargs = TCGOP_CALLI(op);
2500                call_flags = tcg_call_flags(op);
2501
2502                /* pure functions can be removed if their result is unused */
2503                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2504                    for (i = 0; i < nb_oargs; i++) {
2505                        ts = arg_temp(op->args[i]);
2506                        if (ts->state != TS_DEAD) {
2507                            goto do_not_remove_call;
2508                        }
2509                    }
2510                    goto do_remove;
2511                }
2512            do_not_remove_call:
2513
2514                /* Output args are dead.  */
2515                for (i = 0; i < nb_oargs; i++) {
2516                    ts = arg_temp(op->args[i]);
2517                    if (ts->state & TS_DEAD) {
2518                        arg_life |= DEAD_ARG << i;
2519                    }
2520                    if (ts->state & TS_MEM) {
2521                        arg_life |= SYNC_ARG << i;
2522                    }
2523                    ts->state = TS_DEAD;
2524                    la_reset_pref(ts);
2525
2526                    /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2527                    op->output_pref[i] = 0;
2528                }
2529
2530                if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2531                                    TCG_CALL_NO_READ_GLOBALS))) {
2532                    la_global_kill(s, nb_globals);
2533                } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2534                    la_global_sync(s, nb_globals);
2535                }
2536
2537                /* Record arguments that die in this helper.  */
2538                for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2539                    ts = arg_temp(op->args[i]);
2540                    if (ts && ts->state & TS_DEAD) {
2541                        arg_life |= DEAD_ARG << i;
2542                    }
2543                }
2544
2545                /* For all live registers, remove call-clobbered prefs.  */
2546                la_cross_call(s, nb_temps);
2547
2548                nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2549
2550                /* Input arguments are live for preceding opcodes.  */
2551                for (i = 0; i < nb_iargs; i++) {
2552                    ts = arg_temp(op->args[i + nb_oargs]);
2553                    if (ts && ts->state & TS_DEAD) {
2554                        /* For those arguments that die, and will be allocated
2555                         * in registers, clear the register set for that arg,
2556                         * to be filled in below.  For args that will be on
2557                         * the stack, reset to any available reg.
2558                         */
2559                        *la_temp_pref(ts)
2560                            = (i < nb_call_regs ? 0 :
2561                               tcg_target_available_regs[ts->type]);
2562                        ts->state &= ~TS_DEAD;
2563                    }
2564                }
2565
2566                /* For each input argument, add its input register to prefs.
2567                   If a temp is used once, this produces a single set bit.  */
2568                for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2569                    ts = arg_temp(op->args[i + nb_oargs]);
2570                    if (ts) {
2571                        tcg_regset_set_reg(*la_temp_pref(ts),
2572                                           tcg_target_call_iarg_regs[i]);
2573                    }
2574                }
2575            }
2576            break;
2577        case INDEX_op_insn_start:
2578            break;
2579        case INDEX_op_discard:
2580            /* mark the temporary as dead */
2581            ts = arg_temp(op->args[0]);
2582            ts->state = TS_DEAD;
2583            la_reset_pref(ts);
2584            break;
2585
2586        case INDEX_op_add2_i32:
2587            opc_new = INDEX_op_add_i32;
2588            goto do_addsub2;
2589        case INDEX_op_sub2_i32:
2590            opc_new = INDEX_op_sub_i32;
2591            goto do_addsub2;
2592        case INDEX_op_add2_i64:
2593            opc_new = INDEX_op_add_i64;
2594            goto do_addsub2;
2595        case INDEX_op_sub2_i64:
2596            opc_new = INDEX_op_sub_i64;
2597        do_addsub2:
2598            nb_iargs = 4;
2599            nb_oargs = 2;
2600            /* Test if the high part of the operation is dead, but not
2601               the low part.  The result can be optimized to a simple
2602               add or sub.  This happens often for x86_64 guest when the
2603               cpu mode is set to 32 bit.  */
2604            if (arg_temp(op->args[1])->state == TS_DEAD) {
2605                if (arg_temp(op->args[0])->state == TS_DEAD) {
2606                    goto do_remove;
2607                }
2608                /* Replace the opcode and adjust the args in place,
2609                   leaving 3 unused args at the end.  */
2610                op->opc = opc = opc_new;
2611                op->args[1] = op->args[2];
2612                op->args[2] = op->args[4];
2613                /* Fall through and mark the single-word operation live.  */
2614                nb_iargs = 2;
2615                nb_oargs = 1;
2616            }
2617            goto do_not_remove;
2618
2619        case INDEX_op_mulu2_i32:
2620            opc_new = INDEX_op_mul_i32;
2621            opc_new2 = INDEX_op_muluh_i32;
2622            have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2623            goto do_mul2;
2624        case INDEX_op_muls2_i32:
2625            opc_new = INDEX_op_mul_i32;
2626            opc_new2 = INDEX_op_mulsh_i32;
2627            have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2628            goto do_mul2;
2629        case INDEX_op_mulu2_i64:
2630            opc_new = INDEX_op_mul_i64;
2631            opc_new2 = INDEX_op_muluh_i64;
2632            have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2633            goto do_mul2;
2634        case INDEX_op_muls2_i64:
2635            opc_new = INDEX_op_mul_i64;
2636            opc_new2 = INDEX_op_mulsh_i64;
2637            have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2638            goto do_mul2;
2639        do_mul2:
2640            nb_iargs = 2;
2641            nb_oargs = 2;
2642            if (arg_temp(op->args[1])->state == TS_DEAD) {
2643                if (arg_temp(op->args[0])->state == TS_DEAD) {
2644                    /* Both parts of the operation are dead.  */
2645                    goto do_remove;
2646                }
2647                /* The high part of the operation is dead; generate the low. */
2648                op->opc = opc = opc_new;
2649                op->args[1] = op->args[2];
2650                op->args[2] = op->args[3];
2651            } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2652                /* The low part of the operation is dead; generate the high. */
2653                op->opc = opc = opc_new2;
2654                op->args[0] = op->args[1];
2655                op->args[1] = op->args[2];
2656                op->args[2] = op->args[3];
2657            } else {
2658                goto do_not_remove;
2659            }
2660            /* Mark the single-word operation live.  */
2661            nb_oargs = 1;
2662            goto do_not_remove;
2663
2664        default:
2665            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2666            nb_iargs = def->nb_iargs;
2667            nb_oargs = def->nb_oargs;
2668
2669            /* Test if the operation can be removed because all
2670               its outputs are dead. We assume that nb_oargs == 0
2671               implies side effects */
2672            if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2673                for (i = 0; i < nb_oargs; i++) {
2674                    if (arg_temp(op->args[i])->state != TS_DEAD) {
2675                        goto do_not_remove;
2676                    }
2677                }
2678                goto do_remove;
2679            }
2680            goto do_not_remove;
2681
2682        do_remove:
2683            tcg_op_remove(s, op);
2684            break;
2685
2686        do_not_remove:
2687            for (i = 0; i < nb_oargs; i++) {
2688                ts = arg_temp(op->args[i]);
2689
2690                /* Remember the preference of the uses that followed.  */
2691                op->output_pref[i] = *la_temp_pref(ts);
2692
2693                /* Output args are dead.  */
2694                if (ts->state & TS_DEAD) {
2695                    arg_life |= DEAD_ARG << i;
2696                }
2697                if (ts->state & TS_MEM) {
2698                    arg_life |= SYNC_ARG << i;
2699                }
2700                ts->state = TS_DEAD;
2701                la_reset_pref(ts);
2702            }
2703
2704            /* If end of basic block, update.  */
2705            if (def->flags & TCG_OPF_BB_EXIT) {
2706                la_func_end(s, nb_globals, nb_temps);
2707            } else if (def->flags & TCG_OPF_COND_BRANCH) {
2708                la_bb_sync(s, nb_globals, nb_temps);
2709            } else if (def->flags & TCG_OPF_BB_END) {
2710                la_bb_end(s, nb_globals, nb_temps);
2711            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2712                la_global_sync(s, nb_globals);
2713                if (def->flags & TCG_OPF_CALL_CLOBBER) {
2714                    la_cross_call(s, nb_temps);
2715                }
2716            }
2717
2718            /* Record arguments that die in this opcode.  */
2719            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2720                ts = arg_temp(op->args[i]);
2721                if (ts->state & TS_DEAD) {
2722                    arg_life |= DEAD_ARG << i;
2723                }
2724            }
2725
2726            /* Input arguments are live for preceding opcodes.  */
2727            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2728                ts = arg_temp(op->args[i]);
2729                if (ts->state & TS_DEAD) {
2730                    /* For operands that were dead, initially allow
2731                       all regs for the type.  */
2732                    *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2733                    ts->state &= ~TS_DEAD;
2734                }
2735            }
2736
2737            /* Incorporate constraints for this operand.  */
2738            switch (opc) {
2739            case INDEX_op_mov_i32:
2740            case INDEX_op_mov_i64:
2741                /* Note that these are TCG_OPF_NOT_PRESENT and do not
2742                   have proper constraints.  That said, special case
2743                   moves to propagate preferences backward.  */
2744                if (IS_DEAD_ARG(1)) {
2745                    *la_temp_pref(arg_temp(op->args[0]))
2746                        = *la_temp_pref(arg_temp(op->args[1]));
2747                }
2748                break;
2749
2750            default:
2751                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2752                    const TCGArgConstraint *ct = &def->args_ct[i];
2753                    TCGRegSet set, *pset;
2754
2755                    ts = arg_temp(op->args[i]);
2756                    pset = la_temp_pref(ts);
2757                    set = *pset;
2758
2759                    set &= ct->regs;
2760                    if (ct->ialias) {
2761                        set &= op->output_pref[ct->alias_index];
2762                    }
2763                    /* If the combination is not possible, restart.  */
2764                    if (set == 0) {
2765                        set = ct->regs;
2766                    }
2767                    *pset = set;
2768                }
2769                break;
2770            }
2771            break;
2772        }
2773        op->life = arg_life;
2774    }
2775}
2776
2777/* Liveness analysis: Convert indirect regs to direct temporaries.  */
2778static bool liveness_pass_2(TCGContext *s)
2779{
2780    int nb_globals = s->nb_globals;
2781    int nb_temps, i;
2782    bool changes = false;
2783    TCGOp *op, *op_next;
2784
2785    /* Create a temporary for each indirect global.  */
2786    for (i = 0; i < nb_globals; ++i) {
2787        TCGTemp *its = &s->temps[i];
2788        if (its->indirect_reg) {
2789            TCGTemp *dts = tcg_temp_alloc(s);
2790            dts->type = its->type;
2791            dts->base_type = its->base_type;
2792            its->state_ptr = dts;
2793        } else {
2794            its->state_ptr = NULL;
2795        }
2796        /* All globals begin dead.  */
2797        its->state = TS_DEAD;
2798    }
2799    for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2800        TCGTemp *its = &s->temps[i];
2801        its->state_ptr = NULL;
2802        its->state = TS_DEAD;
2803    }
2804
2805    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2806        TCGOpcode opc = op->opc;
2807        const TCGOpDef *def = &tcg_op_defs[opc];
2808        TCGLifeData arg_life = op->life;
2809        int nb_iargs, nb_oargs, call_flags;
2810        TCGTemp *arg_ts, *dir_ts;
2811
2812        if (opc == INDEX_op_call) {
2813            nb_oargs = TCGOP_CALLO(op);
2814            nb_iargs = TCGOP_CALLI(op);
2815            call_flags = tcg_call_flags(op);
2816        } else {
2817            nb_iargs = def->nb_iargs;
2818            nb_oargs = def->nb_oargs;
2819
2820            /* Set flags similar to how calls require.  */
2821            if (def->flags & TCG_OPF_COND_BRANCH) {
2822                /* Like reading globals: sync_globals */
2823                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2824            } else if (def->flags & TCG_OPF_BB_END) {
2825                /* Like writing globals: save_globals */
2826                call_flags = 0;
2827            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2828                /* Like reading globals: sync_globals */
2829                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2830            } else {
2831                /* No effect on globals.  */
2832                call_flags = (TCG_CALL_NO_READ_GLOBALS |
2833                              TCG_CALL_NO_WRITE_GLOBALS);
2834            }
2835        }
2836
2837        /* Make sure that input arguments are available.  */
2838        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2839            arg_ts = arg_temp(op->args[i]);
2840            if (arg_ts) {
2841                dir_ts = arg_ts->state_ptr;
2842                if (dir_ts && arg_ts->state == TS_DEAD) {
2843                    TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2844                                      ? INDEX_op_ld_i32
2845                                      : INDEX_op_ld_i64);
2846                    TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2847
2848                    lop->args[0] = temp_arg(dir_ts);
2849                    lop->args[1] = temp_arg(arg_ts->mem_base);
2850                    lop->args[2] = arg_ts->mem_offset;
2851
2852                    /* Loaded, but synced with memory.  */
2853                    arg_ts->state = TS_MEM;
2854                }
2855            }
2856        }
2857
2858        /* Perform input replacement, and mark inputs that became dead.
2859           No action is required except keeping temp_state up to date
2860           so that we reload when needed.  */
2861        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2862            arg_ts = arg_temp(op->args[i]);
2863            if (arg_ts) {
2864                dir_ts = arg_ts->state_ptr;
2865                if (dir_ts) {
2866                    op->args[i] = temp_arg(dir_ts);
2867                    changes = true;
2868                    if (IS_DEAD_ARG(i)) {
2869                        arg_ts->state = TS_DEAD;
2870                    }
2871                }
2872            }
2873        }
2874
2875        /* Liveness analysis should ensure that the following are
2876           all correct, for call sites and basic block end points.  */
2877        if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2878            /* Nothing to do */
2879        } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2880            for (i = 0; i < nb_globals; ++i) {
2881                /* Liveness should see that globals are synced back,
2882                   that is, either TS_DEAD or TS_MEM.  */
2883                arg_ts = &s->temps[i];
2884                tcg_debug_assert(arg_ts->state_ptr == 0
2885                                 || arg_ts->state != 0);
2886            }
2887        } else {
2888            for (i = 0; i < nb_globals; ++i) {
2889                /* Liveness should see that globals are saved back,
2890                   that is, TS_DEAD, waiting to be reloaded.  */
2891                arg_ts = &s->temps[i];
2892                tcg_debug_assert(arg_ts->state_ptr == 0
2893                                 || arg_ts->state == TS_DEAD);
2894            }
2895        }
2896
2897        /* Outputs become available.  */
2898        if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2899            arg_ts = arg_temp(op->args[0]);
2900            dir_ts = arg_ts->state_ptr;
2901            if (dir_ts) {
2902                op->args[0] = temp_arg(dir_ts);
2903                changes = true;
2904
2905                /* The output is now live and modified.  */
2906                arg_ts->state = 0;
2907
2908                if (NEED_SYNC_ARG(0)) {
2909                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2910                                      ? INDEX_op_st_i32
2911                                      : INDEX_op_st_i64);
2912                    TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2913                    TCGTemp *out_ts = dir_ts;
2914
2915                    if (IS_DEAD_ARG(0)) {
2916                        out_ts = arg_temp(op->args[1]);
2917                        arg_ts->state = TS_DEAD;
2918                        tcg_op_remove(s, op);
2919                    } else {
2920                        arg_ts->state = TS_MEM;
2921                    }
2922
2923                    sop->args[0] = temp_arg(out_ts);
2924                    sop->args[1] = temp_arg(arg_ts->mem_base);
2925                    sop->args[2] = arg_ts->mem_offset;
2926                } else {
2927                    tcg_debug_assert(!IS_DEAD_ARG(0));
2928                }
2929            }
2930        } else {
2931            for (i = 0; i < nb_oargs; i++) {
2932                arg_ts = arg_temp(op->args[i]);
2933                dir_ts = arg_ts->state_ptr;
2934                if (!dir_ts) {
2935                    continue;
2936                }
2937                op->args[i] = temp_arg(dir_ts);
2938                changes = true;
2939
2940                /* The output is now live and modified.  */
2941                arg_ts->state = 0;
2942
2943                /* Sync outputs upon their last write.  */
2944                if (NEED_SYNC_ARG(i)) {
2945                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2946                                      ? INDEX_op_st_i32
2947                                      : INDEX_op_st_i64);
2948                    TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2949
2950                    sop->args[0] = temp_arg(dir_ts);
2951                    sop->args[1] = temp_arg(arg_ts->mem_base);
2952                    sop->args[2] = arg_ts->mem_offset;
2953
2954                    arg_ts->state = TS_MEM;
2955                }
2956                /* Drop outputs that are dead.  */
2957                if (IS_DEAD_ARG(i)) {
2958                    arg_ts->state = TS_DEAD;
2959                }
2960            }
2961        }
2962    }
2963
2964    return changes;
2965}
2966
2967#ifdef CONFIG_DEBUG_TCG
2968static void dump_regs(TCGContext *s)
2969{
2970    TCGTemp *ts;
2971    int i;
2972    char buf[64];
2973
2974    for(i = 0; i < s->nb_temps; i++) {
2975        ts = &s->temps[i];
2976        printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2977        switch(ts->val_type) {
2978        case TEMP_VAL_REG:
2979            printf("%s", tcg_target_reg_names[ts->reg]);
2980            break;
2981        case TEMP_VAL_MEM:
2982            printf("%d(%s)", (int)ts->mem_offset,
2983                   tcg_target_reg_names[ts->mem_base->reg]);
2984            break;
2985        case TEMP_VAL_CONST:
2986            printf("$0x%" PRIx64, ts->val);
2987            break;
2988        case TEMP_VAL_DEAD:
2989            printf("D");
2990            break;
2991        default:
2992            printf("???");
2993            break;
2994        }
2995        printf("\n");
2996    }
2997
2998    for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2999        if (s->reg_to_temp[i] != NULL) {
3000            printf("%s: %s\n", 
3001                   tcg_target_reg_names[i], 
3002                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3003        }
3004    }
3005}
3006
3007static void check_regs(TCGContext *s)
3008{
3009    int reg;
3010    int k;
3011    TCGTemp *ts;
3012    char buf[64];
3013
3014    for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3015        ts = s->reg_to_temp[reg];
3016        if (ts != NULL) {
3017            if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3018                printf("Inconsistency for register %s:\n", 
3019                       tcg_target_reg_names[reg]);
3020                goto fail;
3021            }
3022        }
3023    }
3024    for (k = 0; k < s->nb_temps; k++) {
3025        ts = &s->temps[k];
3026        if (ts->val_type == TEMP_VAL_REG
3027            && ts->kind != TEMP_FIXED
3028            && s->reg_to_temp[ts->reg] != ts) {
3029            printf("Inconsistency for temp %s:\n",
3030                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3031        fail:
3032            printf("reg state:\n");
3033            dump_regs(s);
3034            tcg_abort();
3035        }
3036    }
3037}
3038#endif
3039
3040static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3041{
3042    intptr_t off, size, align;
3043
3044    switch (ts->type) {
3045    case TCG_TYPE_I32:
3046        size = align = 4;
3047        break;
3048    case TCG_TYPE_I64:
3049    case TCG_TYPE_V64:
3050        size = align = 8;
3051        break;
3052    case TCG_TYPE_V128:
3053        size = align = 16;
3054        break;
3055    case TCG_TYPE_V256:
3056        /* Note that we do not require aligned storage for V256. */
3057        size = 32, align = 16;
3058        break;
3059    default:
3060        g_assert_not_reached();
3061    }
3062
3063    /*
3064     * Assume the stack is sufficiently aligned.
3065     * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3066     * and do not require 16 byte vector alignment.  This seems slightly
3067     * easier than fully parameterizing the above switch statement.
3068     */
3069    align = MIN(TCG_TARGET_STACK_ALIGN, align);
3070    off = ROUND_UP(s->current_frame_offset, align);
3071
3072    /* If we've exhausted the stack frame, restart with a smaller TB. */
3073    if (off + size > s->frame_end) {
3074        tcg_raise_tb_overflow(s);
3075    }
3076    s->current_frame_offset = off + size;
3077
3078    ts->mem_offset = off;
3079#if defined(__sparc__)
3080    ts->mem_offset += TCG_TARGET_STACK_BIAS;
3081#endif
3082    ts->mem_base = s->frame_temp;
3083    ts->mem_allocated = 1;
3084}
3085
3086static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3087
3088/* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3089   mark it free; otherwise mark it dead.  */
3090static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3091{
3092    TCGTempVal new_type;
3093
3094    switch (ts->kind) {
3095    case TEMP_FIXED:
3096        return;
3097    case TEMP_GLOBAL:
3098    case TEMP_LOCAL:
3099        new_type = TEMP_VAL_MEM;
3100        break;
3101    case TEMP_NORMAL:
3102        new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3103        break;
3104    case TEMP_CONST:
3105        new_type = TEMP_VAL_CONST;
3106        break;
3107    default:
3108        g_assert_not_reached();
3109    }
3110    if (ts->val_type == TEMP_VAL_REG) {
3111        s->reg_to_temp[ts->reg] = NULL;
3112    }
3113    ts->val_type = new_type;
3114}
3115
3116/* Mark a temporary as dead.  */
3117static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3118{
3119    temp_free_or_dead(s, ts, 1);
3120}
3121
3122/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3123   registers needs to be allocated to store a constant.  If 'free_or_dead'
3124   is non-zero, subsequently release the temporary; if it is positive, the
3125   temp is dead; if it is negative, the temp is free.  */
3126static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3127                      TCGRegSet preferred_regs, int free_or_dead)
3128{
3129    if (!temp_readonly(ts) && !ts->mem_coherent) {
3130        if (!ts->mem_allocated) {
3131            temp_allocate_frame(s, ts);
3132        }
3133        switch (ts->val_type) {
3134        case TEMP_VAL_CONST:
3135            /* If we're going to free the temp immediately, then we won't
3136               require it later in a register, so attempt to store the
3137               constant to memory directly.  */
3138            if (free_or_dead
3139                && tcg_out_sti(s, ts->type, ts->val,
3140                               ts->mem_base->reg, ts->mem_offset)) {
3141                break;
3142            }
3143            temp_load(s, ts, tcg_target_available_regs[ts->type],
3144                      allocated_regs, preferred_regs);
3145            /* fallthrough */
3146
3147        case TEMP_VAL_REG:
3148            tcg_out_st(s, ts->type, ts->reg,
3149                       ts->mem_base->reg, ts->mem_offset);
3150            break;
3151
3152        case TEMP_VAL_MEM:
3153            break;
3154
3155        case TEMP_VAL_DEAD:
3156        default:
3157            tcg_abort();
3158        }
3159        ts->mem_coherent = 1;
3160    }
3161    if (free_or_dead) {
3162        temp_free_or_dead(s, ts, free_or_dead);
3163    }
3164}
3165
3166/* free register 'reg' by spilling the corresponding temporary if necessary */
3167static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3168{
3169    TCGTemp *ts = s->reg_to_temp[reg];
3170    if (ts != NULL) {
3171        temp_sync(s, ts, allocated_regs, 0, -1);
3172    }
3173}
3174
3175/**
3176 * tcg_reg_alloc:
3177 * @required_regs: Set of registers in which we must allocate.
3178 * @allocated_regs: Set of registers which must be avoided.
3179 * @preferred_regs: Set of registers we should prefer.
3180 * @rev: True if we search the registers in "indirect" order.
3181 *
3182 * The allocated register must be in @required_regs & ~@allocated_regs,
3183 * but if we can put it in @preferred_regs we may save a move later.
3184 */
3185static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3186                            TCGRegSet allocated_regs,
3187                            TCGRegSet preferred_regs, bool rev)
3188{
3189    int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3190    TCGRegSet reg_ct[2];
3191    const int *order;
3192
3193    reg_ct[1] = required_regs & ~allocated_regs;
3194    tcg_debug_assert(reg_ct[1] != 0);
3195    reg_ct[0] = reg_ct[1] & preferred_regs;
3196
3197    /* Skip the preferred_regs option if it cannot be satisfied,
3198       or if the preference made no difference.  */
3199    f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3200
3201    order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3202
3203    /* Try free registers, preferences first.  */
3204    for (j = f; j < 2; j++) {
3205        TCGRegSet set = reg_ct[j];
3206
3207        if (tcg_regset_single(set)) {
3208            /* One register in the set.  */
3209            TCGReg reg = tcg_regset_first(set);
3210            if (s->reg_to_temp[reg] == NULL) {
3211                return reg;
3212            }
3213        } else {
3214            for (i = 0; i < n; i++) {
3215                TCGReg reg = order[i];
3216                if (s->reg_to_temp[reg] == NULL &&
3217                    tcg_regset_test_reg(set, reg)) {
3218                    return reg;
3219                }
3220            }
3221        }
3222    }
3223
3224    /* We must spill something.  */
3225    for (j = f; j < 2; j++) {
3226        TCGRegSet set = reg_ct[j];
3227
3228        if (tcg_regset_single(set)) {
3229            /* One register in the set.  */
3230            TCGReg reg = tcg_regset_first(set);
3231            tcg_reg_free(s, reg, allocated_regs);
3232            return reg;
3233        } else {
3234            for (i = 0; i < n; i++) {
3235                TCGReg reg = order[i];
3236                if (tcg_regset_test_reg(set, reg)) {
3237                    tcg_reg_free(s, reg, allocated_regs);
3238                    return reg;
3239                }
3240            }
3241        }
3242    }
3243
3244    tcg_abort();
3245}
3246
3247/* Make sure the temporary is in a register.  If needed, allocate the register
3248   from DESIRED while avoiding ALLOCATED.  */
3249static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3250                      TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3251{
3252    TCGReg reg;
3253
3254    switch (ts->val_type) {
3255    case TEMP_VAL_REG:
3256        return;
3257    case TEMP_VAL_CONST:
3258        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3259                            preferred_regs, ts->indirect_base);
3260        if (ts->type <= TCG_TYPE_I64) {
3261            tcg_out_movi(s, ts->type, reg, ts->val);
3262        } else {
3263            uint64_t val = ts->val;
3264            MemOp vece = MO_64;
3265
3266            /*
3267             * Find the minimal vector element that matches the constant.
3268             * The targets will, in general, have to do this search anyway,
3269             * do this generically.
3270             */
3271            if (val == dup_const(MO_8, val)) {
3272                vece = MO_8;
3273            } else if (val == dup_const(MO_16, val)) {
3274                vece = MO_16;
3275            } else if (val == dup_const(MO_32, val)) {
3276                vece = MO_32;
3277            }
3278
3279            tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3280        }
3281        ts->mem_coherent = 0;
3282        break;
3283    case TEMP_VAL_MEM:
3284        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3285                            preferred_regs, ts->indirect_base);
3286        tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3287        ts->mem_coherent = 1;
3288        break;
3289    case TEMP_VAL_DEAD:
3290    default:
3291        tcg_abort();
3292    }
3293    ts->reg = reg;
3294    ts->val_type = TEMP_VAL_REG;
3295    s->reg_to_temp[reg] = ts;
3296}
3297
3298/* Save a temporary to memory. 'allocated_regs' is used in case a
3299   temporary registers needs to be allocated to store a constant.  */
3300static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3301{
3302    /* The liveness analysis already ensures that globals are back
3303       in memory. Keep an tcg_debug_assert for safety. */
3304    tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3305}
3306
3307/* save globals to their canonical location and assume they can be
3308   modified be the following code. 'allocated_regs' is used in case a
3309   temporary registers needs to be allocated to store a constant. */
3310static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3311{
3312    int i, n;
3313
3314    for (i = 0, n = s->nb_globals; i < n; i++) {
3315        temp_save(s, &s->temps[i], allocated_regs);
3316    }
3317}
3318
3319/* sync globals to their canonical location and assume they can be
3320   read by the following code. 'allocated_regs' is used in case a
3321   temporary registers needs to be allocated to store a constant. */
3322static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3323{
3324    int i, n;
3325
3326    for (i = 0, n = s->nb_globals; i < n; i++) {
3327        TCGTemp *ts = &s->temps[i];
3328        tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3329                         || ts->kind == TEMP_FIXED
3330                         || ts->mem_coherent);
3331    }
3332}
3333
3334/* at the end of a basic block, we assume all temporaries are dead and
3335   all globals are stored at their canonical location. */
3336static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3337{
3338    int i;
3339
3340    for (i = s->nb_globals; i < s->nb_temps; i++) {
3341        TCGTemp *ts = &s->temps[i];
3342
3343        switch (ts->kind) {
3344        case TEMP_LOCAL:
3345            temp_save(s, ts, allocated_regs);
3346            break;
3347        case TEMP_NORMAL:
3348            /* The liveness analysis already ensures that temps are dead.
3349               Keep an tcg_debug_assert for safety. */
3350            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3351            break;
3352        case TEMP_CONST:
3353            /* Similarly, we should have freed any allocated register. */
3354            tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3355            break;
3356        default:
3357            g_assert_not_reached();
3358        }
3359    }
3360
3361    save_globals(s, allocated_regs);
3362}
3363
3364/*
3365 * At a conditional branch, we assume all temporaries are dead and
3366 * all globals and local temps are synced to their location.
3367 */
3368static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3369{
3370    sync_globals(s, allocated_regs);
3371
3372    for (int i = s->nb_globals; i < s->nb_temps; i++) {
3373        TCGTemp *ts = &s->temps[i];
3374        /*
3375         * The liveness analysis already ensures that temps are dead.
3376         * Keep tcg_debug_asserts for safety.
3377         */
3378        switch (ts->kind) {
3379        case TEMP_LOCAL:
3380            tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3381            break;
3382        case TEMP_NORMAL:
3383            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3384            break;
3385        case TEMP_CONST:
3386            break;
3387        default:
3388            g_assert_not_reached();
3389        }
3390    }
3391}
3392
3393/*
3394 * Specialized code generation for INDEX_op_mov_* with a constant.
3395 */
3396static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3397                                  tcg_target_ulong val, TCGLifeData arg_life,
3398                                  TCGRegSet preferred_regs)
3399{
3400    /* ENV should not be modified.  */
3401    tcg_debug_assert(!temp_readonly(ots));
3402
3403    /* The movi is not explicitly generated here.  */
3404    if (ots->val_type == TEMP_VAL_REG) {
3405        s->reg_to_temp[ots->reg] = NULL;
3406    }
3407    ots->val_type = TEMP_VAL_CONST;
3408    ots->val = val;
3409    ots->mem_coherent = 0;
3410    if (NEED_SYNC_ARG(0)) {
3411        temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3412    } else if (IS_DEAD_ARG(0)) {
3413        temp_dead(s, ots);
3414    }
3415}
3416
3417/*
3418 * Specialized code generation for INDEX_op_mov_*.
3419 */
3420static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3421{
3422    const TCGLifeData arg_life = op->life;
3423    TCGRegSet allocated_regs, preferred_regs;
3424    TCGTemp *ts, *ots;
3425    TCGType otype, itype;
3426
3427    allocated_regs = s->reserved_regs;
3428    preferred_regs = op->output_pref[0];
3429    ots = arg_temp(op->args[0]);
3430    ts = arg_temp(op->args[1]);
3431
3432    /* ENV should not be modified.  */
3433    tcg_debug_assert(!temp_readonly(ots));
3434
3435    /* Note that otype != itype for no-op truncation.  */
3436    otype = ots->type;
3437    itype = ts->type;
3438
3439    if (ts->val_type == TEMP_VAL_CONST) {
3440        /* propagate constant or generate sti */
3441        tcg_target_ulong val = ts->val;
3442        if (IS_DEAD_ARG(1)) {
3443            temp_dead(s, ts);
3444        }
3445        tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3446        return;
3447    }
3448
3449    /* If the source value is in memory we're going to be forced
3450       to have it in a register in order to perform the copy.  Copy
3451       the SOURCE value into its own register first, that way we
3452       don't have to reload SOURCE the next time it is used. */
3453    if (ts->val_type == TEMP_VAL_MEM) {
3454        temp_load(s, ts, tcg_target_available_regs[itype],
3455                  allocated_regs, preferred_regs);
3456    }
3457
3458    tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3459    if (IS_DEAD_ARG(0)) {
3460        /* mov to a non-saved dead register makes no sense (even with
3461           liveness analysis disabled). */
3462        tcg_debug_assert(NEED_SYNC_ARG(0));
3463        if (!ots->mem_allocated) {
3464            temp_allocate_frame(s, ots);
3465        }
3466        tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3467        if (IS_DEAD_ARG(1)) {
3468            temp_dead(s, ts);
3469        }
3470        temp_dead(s, ots);
3471    } else {
3472        if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3473            /* the mov can be suppressed */
3474            if (ots->val_type == TEMP_VAL_REG) {
3475                s->reg_to_temp[ots->reg] = NULL;
3476            }
3477            ots->reg = ts->reg;
3478            temp_dead(s, ts);
3479        } else {
3480            if (ots->val_type != TEMP_VAL_REG) {
3481                /* When allocating a new register, make sure to not spill the
3482                   input one. */
3483                tcg_regset_set_reg(allocated_regs, ts->reg);
3484                ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3485                                         allocated_regs, preferred_regs,
3486                                         ots->indirect_base);
3487            }
3488            if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3489                /*
3490                 * Cross register class move not supported.
3491                 * Store the source register into the destination slot
3492                 * and leave the destination temp as TEMP_VAL_MEM.
3493                 */
3494                assert(!temp_readonly(ots));
3495                if (!ts->mem_allocated) {
3496                    temp_allocate_frame(s, ots);
3497                }
3498                tcg_out_st(s, ts->type, ts->reg,
3499                           ots->mem_base->reg, ots->mem_offset);
3500                ots->mem_coherent = 1;
3501                temp_free_or_dead(s, ots, -1);
3502                return;
3503            }
3504        }
3505        ots->val_type = TEMP_VAL_REG;
3506        ots->mem_coherent = 0;
3507        s->reg_to_temp[ots->reg] = ots;
3508        if (NEED_SYNC_ARG(0)) {
3509            temp_sync(s, ots, allocated_regs, 0, 0);
3510        }
3511    }
3512}
3513
3514/*
3515 * Specialized code generation for INDEX_op_dup_vec.
3516 */
3517static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3518{
3519    const TCGLifeData arg_life = op->life;
3520    TCGRegSet dup_out_regs, dup_in_regs;
3521    TCGTemp *its, *ots;
3522    TCGType itype, vtype;
3523    intptr_t endian_fixup;
3524    unsigned vece;
3525    bool ok;
3526
3527    ots = arg_temp(op->args[0]);
3528    its = arg_temp(op->args[1]);
3529
3530    /* ENV should not be modified.  */
3531    tcg_debug_assert(!temp_readonly(ots));
3532
3533    itype = its->type;
3534    vece = TCGOP_VECE(op);
3535    vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3536
3537    if (its->val_type == TEMP_VAL_CONST) {
3538        /* Propagate constant via movi -> dupi.  */
3539        tcg_target_ulong val = its->val;
3540        if (IS_DEAD_ARG(1)) {
3541            temp_dead(s, its);
3542        }
3543        tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3544        return;
3545    }
3546
3547    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3548    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3549
3550    /* Allocate the output register now.  */
3551    if (ots->val_type != TEMP_VAL_REG) {
3552        TCGRegSet allocated_regs = s->reserved_regs;
3553
3554        if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3555            /* Make sure to not spill the input register. */
3556            tcg_regset_set_reg(allocated_regs, its->reg);
3557        }
3558        ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3559                                 op->output_pref[0], ots->indirect_base);
3560        ots->val_type = TEMP_VAL_REG;
3561        ots->mem_coherent = 0;
3562        s->reg_to_temp[ots->reg] = ots;
3563    }
3564
3565    switch (its->val_type) {
3566    case TEMP_VAL_REG:
3567        /*
3568         * The dup constriaints must be broad, covering all possible VECE.
3569         * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3570         * to fail, indicating that extra moves are required for that case.
3571         */
3572        if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3573            if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3574                goto done;
3575            }
3576            /* Try again from memory or a vector input register.  */
3577        }
3578        if (!its->mem_coherent) {
3579            /*
3580             * The input register is not synced, and so an extra store
3581             * would be required to use memory.  Attempt an integer-vector
3582             * register move first.  We do not have a TCGRegSet for this.
3583             */
3584            if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3585                break;
3586            }
3587            /* Sync the temp back to its slot and load from there.  */
3588            temp_sync(s, its, s->reserved_regs, 0, 0);
3589        }
3590        /* fall through */
3591
3592    case TEMP_VAL_MEM:
3593#ifdef HOST_WORDS_BIGENDIAN
3594        endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3595        endian_fixup -= 1 << vece;
3596#else
3597        endian_fixup = 0;
3598#endif
3599        if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3600                             its->mem_offset + endian_fixup)) {
3601            goto done;
3602        }
3603        tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3604        break;
3605
3606    default:
3607        g_assert_not_reached();
3608    }
3609
3610    /* We now have a vector input register, so dup must succeed. */
3611    ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3612    tcg_debug_assert(ok);
3613
3614 done:
3615    if (IS_DEAD_ARG(1)) {
3616        temp_dead(s, its);
3617    }
3618    if (NEED_SYNC_ARG(0)) {
3619        temp_sync(s, ots, s->reserved_regs, 0, 0);
3620    }
3621    if (IS_DEAD_ARG(0)) {
3622        temp_dead(s, ots);
3623    }
3624}
3625
3626static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3627{
3628    const TCGLifeData arg_life = op->life;
3629    const TCGOpDef * const def = &tcg_op_defs[op->opc];
3630    TCGRegSet i_allocated_regs;
3631    TCGRegSet o_allocated_regs;
3632    int i, k, nb_iargs, nb_oargs;
3633    TCGReg reg;
3634    TCGArg arg;
3635    const TCGArgConstraint *arg_ct;
3636    TCGTemp *ts;
3637    TCGArg new_args[TCG_MAX_OP_ARGS];
3638    int const_args[TCG_MAX_OP_ARGS];
3639
3640    nb_oargs = def->nb_oargs;
3641    nb_iargs = def->nb_iargs;
3642
3643    /* copy constants */
3644    memcpy(new_args + nb_oargs + nb_iargs, 
3645           op->args + nb_oargs + nb_iargs,
3646           sizeof(TCGArg) * def->nb_cargs);
3647
3648    i_allocated_regs = s->reserved_regs;
3649    o_allocated_regs = s->reserved_regs;
3650
3651    /* satisfy input constraints */ 
3652    for (k = 0; k < nb_iargs; k++) {
3653        TCGRegSet i_preferred_regs, o_preferred_regs;
3654
3655        i = def->args_ct[nb_oargs + k].sort_index;
3656        arg = op->args[i];
3657        arg_ct = &def->args_ct[i];
3658        ts = arg_temp(arg);
3659
3660        if (ts->val_type == TEMP_VAL_CONST
3661            && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3662            /* constant is OK for instruction */
3663            const_args[i] = 1;
3664            new_args[i] = ts->val;
3665            continue;
3666        }
3667
3668        i_preferred_regs = o_preferred_regs = 0;
3669        if (arg_ct->ialias) {
3670            o_preferred_regs = op->output_pref[arg_ct->alias_index];
3671
3672            /*
3673             * If the input is readonly, then it cannot also be an
3674             * output and aliased to itself.  If the input is not
3675             * dead after the instruction, we must allocate a new
3676             * register and move it.
3677             */
3678            if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3679                goto allocate_in_reg;
3680            }
3681
3682            /*
3683             * Check if the current register has already been allocated
3684             * for another input aliased to an output.
3685             */
3686            if (ts->val_type == TEMP_VAL_REG) {
3687                reg = ts->reg;
3688                for (int k2 = 0; k2 < k; k2++) {
3689                    int i2 = def->args_ct[nb_oargs + k2].sort_index;
3690                    if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3691                        goto allocate_in_reg;
3692                    }
3693                }
3694            }
3695            i_preferred_regs = o_preferred_regs;
3696        }
3697
3698        temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3699        reg = ts->reg;
3700
3701        if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3702 allocate_in_reg:
3703            /*
3704             * Allocate a new register matching the constraint
3705             * and move the temporary register into it.
3706             */
3707            temp_load(s, ts, tcg_target_available_regs[ts->type],
3708                      i_allocated_regs, 0);
3709            reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3710                                o_preferred_regs, ts->indirect_base);
3711            if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3712                /*
3713                 * Cross register class move not supported.  Sync the
3714                 * temp back to its slot and load from there.
3715                 */
3716                temp_sync(s, ts, i_allocated_regs, 0, 0);
3717                tcg_out_ld(s, ts->type, reg,
3718                           ts->mem_base->reg, ts->mem_offset);
3719            }
3720        }
3721        new_args[i] = reg;
3722        const_args[i] = 0;
3723        tcg_regset_set_reg(i_allocated_regs, reg);
3724    }
3725    
3726    /* mark dead temporaries and free the associated registers */
3727    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3728        if (IS_DEAD_ARG(i)) {
3729            temp_dead(s, arg_temp(op->args[i]));
3730        }
3731    }
3732
3733    if (def->flags & TCG_OPF_COND_BRANCH) {
3734        tcg_reg_alloc_cbranch(s, i_allocated_regs);
3735    } else if (def->flags & TCG_OPF_BB_END) {
3736        tcg_reg_alloc_bb_end(s, i_allocated_regs);
3737    } else {
3738        if (def->flags & TCG_OPF_CALL_CLOBBER) {
3739            /* XXX: permit generic clobber register list ? */ 
3740            for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3741                if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3742                    tcg_reg_free(s, i, i_allocated_regs);
3743                }
3744            }
3745        }
3746        if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3747            /* sync globals if the op has side effects and might trigger
3748               an exception. */
3749            sync_globals(s, i_allocated_regs);
3750        }
3751        
3752        /* satisfy the output constraints */
3753        for(k = 0; k < nb_oargs; k++) {
3754            i = def->args_ct[k].sort_index;
3755            arg = op->args[i];
3756            arg_ct = &def->args_ct[i];
3757            ts = arg_temp(arg);
3758
3759            /* ENV should not be modified.  */
3760            tcg_debug_assert(!temp_readonly(ts));
3761
3762            if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3763                reg = new_args[arg_ct->alias_index];
3764            } else if (arg_ct->newreg) {
3765                reg = tcg_reg_alloc(s, arg_ct->regs,
3766                                    i_allocated_regs | o_allocated_regs,
3767                                    op->output_pref[k], ts->indirect_base);
3768            } else {
3769                reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3770                                    op->output_pref[k], ts->indirect_base);
3771            }
3772            tcg_regset_set_reg(o_allocated_regs, reg);
3773            if (ts->val_type == TEMP_VAL_REG) {
3774                s->reg_to_temp[ts->reg] = NULL;
3775            }
3776            ts->val_type = TEMP_VAL_REG;
3777            ts->reg = reg;
3778            /*
3779             * Temp value is modified, so the value kept in memory is
3780             * potentially not the same.
3781             */
3782            ts->mem_coherent = 0;
3783            s->reg_to_temp[reg] = ts;
3784            new_args[i] = reg;
3785        }
3786    }
3787
3788    /* emit instruction */
3789    if (def->flags & TCG_OPF_VECTOR) {
3790        tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3791                       new_args, const_args);
3792    } else {
3793        tcg_out_op(s, op->opc, new_args, const_args);
3794    }
3795
3796    /* move the outputs in the correct register if needed */
3797    for(i = 0; i < nb_oargs; i++) {
3798        ts = arg_temp(op->args[i]);
3799
3800        /* ENV should not be modified.  */
3801        tcg_debug_assert(!temp_readonly(ts));
3802
3803        if (NEED_SYNC_ARG(i)) {
3804            temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3805        } else if (IS_DEAD_ARG(i)) {
3806            temp_dead(s, ts);
3807        }
3808    }
3809}
3810
3811static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3812{
3813    const TCGLifeData arg_life = op->life;
3814    TCGTemp *ots, *itsl, *itsh;
3815    TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3816
3817    /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3818    tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3819    tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3820
3821    ots = arg_temp(op->args[0]);
3822    itsl = arg_temp(op->args[1]);
3823    itsh = arg_temp(op->args[2]);
3824
3825    /* ENV should not be modified.  */
3826    tcg_debug_assert(!temp_readonly(ots));
3827
3828    /* Allocate the output register now.  */
3829    if (ots->val_type != TEMP_VAL_REG) {
3830        TCGRegSet allocated_regs = s->reserved_regs;
3831        TCGRegSet dup_out_regs =
3832            tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3833
3834        /* Make sure to not spill the input registers. */
3835        if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3836            tcg_regset_set_reg(allocated_regs, itsl->reg);
3837        }
3838        if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3839            tcg_regset_set_reg(allocated_regs, itsh->reg);
3840        }
3841
3842        ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3843                                 op->output_pref[0], ots->indirect_base);
3844        ots->val_type = TEMP_VAL_REG;
3845        ots->mem_coherent = 0;
3846        s->reg_to_temp[ots->reg] = ots;
3847    }
3848
3849    /* Promote dup2 of immediates to dupi_vec. */
3850    if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3851        uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3852        MemOp vece = MO_64;
3853
3854        if (val == dup_const(MO_8, val)) {
3855            vece = MO_8;
3856        } else if (val == dup_const(MO_16, val)) {
3857            vece = MO_16;
3858        } else if (val == dup_const(MO_32, val)) {
3859            vece = MO_32;
3860        }
3861
3862        tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3863        goto done;
3864    }
3865
3866    /* If the two inputs form one 64-bit value, try dupm_vec. */
3867    if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3868        if (!itsl->mem_coherent) {
3869            temp_sync(s, itsl, s->reserved_regs, 0, 0);
3870        }
3871        if (!itsh->mem_coherent) {
3872            temp_sync(s, itsh, s->reserved_regs, 0, 0);
3873        }
3874#ifdef HOST_WORDS_BIGENDIAN
3875        TCGTemp *its = itsh;
3876#else
3877        TCGTemp *its = itsl;
3878#endif
3879        if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3880                             its->mem_base->reg, its->mem_offset)) {
3881            goto done;
3882        }
3883    }
3884
3885    /* Fall back to generic expansion. */
3886    return false;
3887
3888 done:
3889    if (IS_DEAD_ARG(1)) {
3890        temp_dead(s, itsl);
3891    }
3892    if (IS_DEAD_ARG(2)) {
3893        temp_dead(s, itsh);
3894    }
3895    if (NEED_SYNC_ARG(0)) {
3896        temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3897    } else if (IS_DEAD_ARG(0)) {
3898        temp_dead(s, ots);
3899    }
3900    return true;
3901}
3902
3903#ifdef TCG_TARGET_STACK_GROWSUP
3904#define STACK_DIR(x) (-(x))
3905#else
3906#define STACK_DIR(x) (x)
3907#endif
3908
3909static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3910{
3911    const int nb_oargs = TCGOP_CALLO(op);
3912    const int nb_iargs = TCGOP_CALLI(op);
3913    const TCGLifeData arg_life = op->life;
3914    const TCGHelperInfo *info;
3915    int flags, nb_regs, i;
3916    TCGReg reg;
3917    TCGArg arg;
3918    TCGTemp *ts;
3919    intptr_t stack_offset;
3920    size_t call_stack_size;
3921    tcg_insn_unit *func_addr;
3922    int allocate_args;
3923    TCGRegSet allocated_regs;
3924
3925    func_addr = tcg_call_func(op);
3926    info = tcg_call_info(op);
3927    flags = info->flags;
3928
3929    nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3930    if (nb_regs > nb_iargs) {
3931        nb_regs = nb_iargs;
3932    }
3933
3934    /* assign stack slots first */
3935    call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3936    call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 
3937        ~(TCG_TARGET_STACK_ALIGN - 1);
3938    allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3939    if (allocate_args) {
3940        /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3941           preallocate call stack */
3942        tcg_abort();
3943    }
3944
3945    stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3946    for (i = nb_regs; i < nb_iargs; i++) {
3947        arg = op->args[nb_oargs + i];
3948#ifdef TCG_TARGET_STACK_GROWSUP
3949        stack_offset -= sizeof(tcg_target_long);
3950#endif
3951        if (arg != TCG_CALL_DUMMY_ARG) {
3952            ts = arg_temp(arg);
3953            temp_load(s, ts, tcg_target_available_regs[ts->type],
3954                      s->reserved_regs, 0);
3955            tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3956        }
3957#ifndef TCG_TARGET_STACK_GROWSUP
3958        stack_offset += sizeof(tcg_target_long);
3959#endif
3960    }
3961    
3962    /* assign input registers */
3963    allocated_regs = s->reserved_regs;
3964    for (i = 0; i < nb_regs; i++) {
3965        arg = op->args[nb_oargs + i];
3966        if (arg != TCG_CALL_DUMMY_ARG) {
3967            ts = arg_temp(arg);
3968            reg = tcg_target_call_iarg_regs[i];
3969
3970            if (ts->val_type == TEMP_VAL_REG) {
3971                if (ts->reg != reg) {
3972                    tcg_reg_free(s, reg, allocated_regs);
3973                    if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3974                        /*
3975                         * Cross register class move not supported.  Sync the
3976                         * temp back to its slot and load from there.
3977                         */
3978                        temp_sync(s, ts, allocated_regs, 0, 0);
3979                        tcg_out_ld(s, ts->type, reg,
3980                                   ts->mem_base->reg, ts->mem_offset);
3981                    }
3982                }
3983            } else {
3984                TCGRegSet arg_set = 0;
3985
3986                tcg_reg_free(s, reg, allocated_regs);
3987                tcg_regset_set_reg(arg_set, reg);
3988                temp_load(s, ts, arg_set, allocated_regs, 0);
3989            }
3990
3991            tcg_regset_set_reg(allocated_regs, reg);
3992        }
3993    }
3994    
3995    /* mark dead temporaries and free the associated registers */
3996    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3997        if (IS_DEAD_ARG(i)) {
3998            temp_dead(s, arg_temp(op->args[i]));
3999        }
4000    }
4001    
4002    /* clobber call registers */
4003    for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4004        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4005            tcg_reg_free(s, i, allocated_regs);
4006        }
4007    }
4008
4009    /* Save globals if they might be written by the helper, sync them if
4010       they might be read. */
4011    if (flags & TCG_CALL_NO_READ_GLOBALS) {
4012        /* Nothing to do */
4013    } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4014        sync_globals(s, allocated_regs);
4015    } else {
4016        save_globals(s, allocated_regs);
4017    }
4018
4019#ifdef CONFIG_TCG_INTERPRETER
4020    {
4021        gpointer hash = (gpointer)(uintptr_t)info->typemask;
4022        ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
4023        assert(cif != NULL);
4024        tcg_out_call(s, func_addr, cif);
4025    }
4026#else
4027    tcg_out_call(s, func_addr);
4028#endif
4029
4030    /* assign output registers and emit moves if needed */
4031    for(i = 0; i < nb_oargs; i++) {
4032        arg = op->args[i];
4033        ts = arg_temp(arg);
4034
4035        /* ENV should not be modified.  */
4036        tcg_debug_assert(!temp_readonly(ts));
4037
4038        reg = tcg_target_call_oarg_regs[i];
4039        tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4040        if (ts->val_type == TEMP_VAL_REG) {
4041            s->reg_to_temp[ts->reg] = NULL;
4042        }
4043        ts->val_type = TEMP_VAL_REG;
4044        ts->reg = reg;
4045        ts->mem_coherent = 0;
4046        s->reg_to_temp[reg] = ts;
4047        if (NEED_SYNC_ARG(i)) {
4048            temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4049        } else if (IS_DEAD_ARG(i)) {
4050            temp_dead(s, ts);
4051        }
4052    }
4053}
4054
4055#ifdef CONFIG_PROFILER
4056
4057/* avoid copy/paste errors */
4058#define PROF_ADD(to, from, field)                       \
4059    do {                                                \
4060        (to)->field += qatomic_read(&((from)->field));  \
4061    } while (0)
4062
4063#define PROF_MAX(to, from, field)                                       \
4064    do {                                                                \
4065        typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4066        if (val__ > (to)->field) {                                      \
4067            (to)->field = val__;                                        \
4068        }                                                               \
4069    } while (0)
4070
4071/* Pass in a zero'ed @prof */
4072static inline
4073void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4074{
4075    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4076    unsigned int i;
4077
4078    for (i = 0; i < n_ctxs; i++) {
4079        TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4080        const TCGProfile *orig = &s->prof;
4081
4082        if (counters) {
4083            PROF_ADD(prof, orig, cpu_exec_time);
4084            PROF_ADD(prof, orig, tb_count1);
4085            PROF_ADD(prof, orig, tb_count);
4086            PROF_ADD(prof, orig, op_count);
4087            PROF_MAX(prof, orig, op_count_max);
4088            PROF_ADD(prof, orig, temp_count);
4089            PROF_MAX(prof, orig, temp_count_max);
4090            PROF_ADD(prof, orig, del_op_count);
4091            PROF_ADD(prof, orig, code_in_len);
4092            PROF_ADD(prof, orig, code_out_len);
4093            PROF_ADD(prof, orig, search_out_len);
4094            PROF_ADD(prof, orig, interm_time);
4095            PROF_ADD(prof, orig, code_time);
4096            PROF_ADD(prof, orig, la_time);
4097            PROF_ADD(prof, orig, opt_time);
4098            PROF_ADD(prof, orig, restore_count);
4099            PROF_ADD(prof, orig, restore_time);
4100        }
4101        if (table) {
4102            int i;
4103
4104            for (i = 0; i < NB_OPS; i++) {
4105                PROF_ADD(prof, orig, table_op_count[i]);
4106            }
4107        }
4108    }
4109}
4110
4111#undef PROF_ADD
4112#undef PROF_MAX
4113
4114static void tcg_profile_snapshot_counters(TCGProfile *prof)
4115{
4116    tcg_profile_snapshot(prof, true, false);
4117}
4118
4119static void tcg_profile_snapshot_table(TCGProfile *prof)
4120{
4121    tcg_profile_snapshot(prof, false, true);
4122}
4123
4124void tcg_dump_op_count(void)
4125{
4126    TCGProfile prof = {};
4127    int i;
4128
4129    tcg_profile_snapshot_table(&prof);
4130    for (i = 0; i < NB_OPS; i++) {
4131        qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4132                    prof.table_op_count[i]);
4133    }
4134}
4135
4136int64_t tcg_cpu_exec_time(void)
4137{
4138    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4139    unsigned int i;
4140    int64_t ret = 0;
4141
4142    for (i = 0; i < n_ctxs; i++) {
4143        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4144        const TCGProfile *prof = &s->prof;
4145
4146        ret += qatomic_read(&prof->cpu_exec_time);
4147    }
4148    return ret;
4149}
4150#else
4151void tcg_dump_op_count(void)
4152{
4153    qemu_printf("[TCG profiler not compiled]\n");
4154}
4155
4156int64_t tcg_cpu_exec_time(void)
4157{
4158    error_report("%s: TCG profiler not compiled", __func__);
4159    exit(EXIT_FAILURE);
4160}
4161#endif
4162
4163
4164int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4165{
4166#ifdef CONFIG_PROFILER
4167    TCGProfile *prof = &s->prof;
4168#endif
4169    int i, num_insns;
4170    TCGOp *op;
4171
4172#ifdef CONFIG_PROFILER
4173    {
4174        int n = 0;
4175
4176        QTAILQ_FOREACH(op, &s->ops, link) {
4177            n++;
4178        }
4179        qatomic_set(&prof->op_count, prof->op_count + n);
4180        if (n > prof->op_count_max) {
4181            qatomic_set(&prof->op_count_max, n);
4182        }
4183
4184        n = s->nb_temps;
4185        qatomic_set(&prof->temp_count, prof->temp_count + n);
4186        if (n > prof->temp_count_max) {
4187            qatomic_set(&prof->temp_count_max, n);
4188        }
4189    }
4190#endif
4191
4192#ifdef DEBUG_DISAS
4193    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4194                 && qemu_log_in_addr_range(tb->pc))) {
4195        FILE *logfile = qemu_log_lock();
4196        qemu_log("OP:\n");
4197        tcg_dump_ops(s, false);
4198        qemu_log("\n");
4199        qemu_log_unlock(logfile);
4200    }
4201#endif
4202
4203#ifdef CONFIG_DEBUG_TCG
4204    /* Ensure all labels referenced have been emitted.  */
4205    {
4206        TCGLabel *l;
4207        bool error = false;
4208
4209        QSIMPLEQ_FOREACH(l, &s->labels, next) {
4210            if (unlikely(!l->present) && l->refs) {
4211                qemu_log_mask(CPU_LOG_TB_OP,
4212                              "$L%d referenced but not present.\n", l->id);
4213                error = true;
4214            }
4215        }
4216        assert(!error);
4217    }
4218#endif
4219
4220#ifdef CONFIG_PROFILER
4221    qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4222#endif
4223
4224#ifdef USE_TCG_OPTIMIZATIONS
4225    tcg_optimize(s);
4226#endif
4227
4228#ifdef CONFIG_PROFILER
4229    qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4230    qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4231#endif
4232
4233    reachable_code_pass(s);
4234    liveness_pass_1(s);
4235
4236    if (s->nb_indirects > 0) {
4237#ifdef DEBUG_DISAS
4238        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4239                     && qemu_log_in_addr_range(tb->pc))) {
4240            FILE *logfile = qemu_log_lock();
4241            qemu_log("OP before indirect lowering:\n");
4242            tcg_dump_ops(s, false);
4243            qemu_log("\n");
4244            qemu_log_unlock(logfile);
4245        }
4246#endif
4247        /* Replace indirect temps with direct temps.  */
4248        if (liveness_pass_2(s)) {
4249            /* If changes were made, re-run liveness.  */
4250            liveness_pass_1(s);
4251        }
4252    }
4253
4254#ifdef CONFIG_PROFILER
4255    qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4256#endif
4257
4258#ifdef DEBUG_DISAS
4259    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4260                 && qemu_log_in_addr_range(tb->pc))) {
4261        FILE *logfile = qemu_log_lock();
4262        qemu_log("OP after optimization and liveness analysis:\n");
4263        tcg_dump_ops(s, true);
4264        qemu_log("\n");
4265        qemu_log_unlock(logfile);
4266    }
4267#endif
4268
4269    tcg_reg_alloc_start(s);
4270
4271    /*
4272     * Reset the buffer pointers when restarting after overflow.
4273     * TODO: Move this into translate-all.c with the rest of the
4274     * buffer management.  Having only this done here is confusing.
4275     */
4276    s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4277    s->code_ptr = s->code_buf;
4278
4279#ifdef TCG_TARGET_NEED_LDST_LABELS
4280    QSIMPLEQ_INIT(&s->ldst_labels);
4281#endif
4282#ifdef TCG_TARGET_NEED_POOL_LABELS
4283    s->pool_labels = NULL;
4284#endif
4285
4286    num_insns = -1;
4287    QTAILQ_FOREACH(op, &s->ops, link) {
4288        TCGOpcode opc = op->opc;
4289
4290#ifdef CONFIG_PROFILER
4291        qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4292#endif
4293
4294        switch (opc) {
4295        case INDEX_op_mov_i32:
4296        case INDEX_op_mov_i64:
4297        case INDEX_op_mov_vec:
4298            tcg_reg_alloc_mov(s, op);
4299            break;
4300        case INDEX_op_dup_vec:
4301            tcg_reg_alloc_dup(s, op);
4302            break;
4303        case INDEX_op_insn_start:
4304            if (num_insns >= 0) {
4305                size_t off = tcg_current_code_size(s);
4306                s->gen_insn_end_off[num_insns] = off;
4307                /* Assert that we do not overflow our stored offset.  */
4308                assert(s->gen_insn_end_off[num_insns] == off);
4309            }
4310            num_insns++;
4311            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4312                target_ulong a;
4313#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4314                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4315#else
4316                a = op->args[i];
4317#endif
4318                s->gen_insn_data[num_insns][i] = a;
4319            }
4320            break;
4321        case INDEX_op_discard:
4322            temp_dead(s, arg_temp(op->args[0]));
4323            break;
4324        case INDEX_op_set_label:
4325            tcg_reg_alloc_bb_end(s, s->reserved_regs);
4326            tcg_out_label(s, arg_label(op->args[0]));
4327            break;
4328        case INDEX_op_call:
4329            tcg_reg_alloc_call(s, op);
4330            break;
4331        case INDEX_op_dup2_vec:
4332            if (tcg_reg_alloc_dup2(s, op)) {
4333                break;
4334            }
4335            /* fall through */
4336        default:
4337            /* Sanity check that we've not introduced any unhandled opcodes. */
4338            tcg_debug_assert(tcg_op_supported(opc));
4339            /* Note: in order to speed up the code, it would be much
4340               faster to have specialized register allocator functions for
4341               some common argument patterns */
4342            tcg_reg_alloc_op(s, op);
4343            break;
4344        }
4345#ifdef CONFIG_DEBUG_TCG
4346        check_regs(s);
4347#endif
4348        /* Test for (pending) buffer overflow.  The assumption is that any
4349           one operation beginning below the high water mark cannot overrun
4350           the buffer completely.  Thus we can test for overflow after
4351           generating code without having to check during generation.  */
4352        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4353            return -1;
4354        }
4355        /* Test for TB overflow, as seen by gen_insn_end_off.  */
4356        if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4357            return -2;
4358        }
4359    }
4360    tcg_debug_assert(num_insns >= 0);
4361    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4362
4363    /* Generate TB finalization at the end of block */
4364#ifdef TCG_TARGET_NEED_LDST_LABELS
4365    i = tcg_out_ldst_finalize(s);
4366    if (i < 0) {
4367        return i;
4368    }
4369#endif
4370#ifdef TCG_TARGET_NEED_POOL_LABELS
4371    i = tcg_out_pool_finalize(s);
4372    if (i < 0) {
4373        return i;
4374    }
4375#endif
4376    if (!tcg_resolve_relocs(s)) {
4377        return -2;
4378    }
4379
4380#ifndef CONFIG_TCG_INTERPRETER
4381    /* flush instruction cache */
4382    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4383                        (uintptr_t)s->code_buf,
4384                        tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4385#endif
4386
4387    return tcg_current_code_size(s);
4388}
4389
4390#ifdef CONFIG_PROFILER
4391void tcg_dump_info(void)
4392{
4393    TCGProfile prof = {};
4394    const TCGProfile *s;
4395    int64_t tb_count;
4396    int64_t tb_div_count;
4397    int64_t tot;
4398
4399    tcg_profile_snapshot_counters(&prof);
4400    s = &prof;
4401    tb_count = s->tb_count;
4402    tb_div_count = tb_count ? tb_count : 1;
4403    tot = s->interm_time + s->code_time;
4404
4405    qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4406                tot, tot / 2.4e9);
4407    qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4408                " %0.1f%%)\n",
4409                tb_count, s->tb_count1 - tb_count,
4410                (double)(s->tb_count1 - s->tb_count)
4411                / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4412    qemu_printf("avg ops/TB          %0.1f max=%d\n",
4413                (double)s->op_count / tb_div_count, s->op_count_max);
4414    qemu_printf("deleted ops/TB      %0.2f\n",
4415                (double)s->del_op_count / tb_div_count);
4416    qemu_printf("avg temps/TB        %0.2f max=%d\n",
4417                (double)s->temp_count / tb_div_count, s->temp_count_max);
4418    qemu_printf("avg host code/TB    %0.1f\n",
4419                (double)s->code_out_len / tb_div_count);
4420    qemu_printf("avg search data/TB  %0.1f\n",
4421                (double)s->search_out_len / tb_div_count);
4422    
4423    qemu_printf("cycles/op           %0.1f\n",
4424                s->op_count ? (double)tot / s->op_count : 0);
4425    qemu_printf("cycles/in byte      %0.1f\n",
4426                s->code_in_len ? (double)tot / s->code_in_len : 0);
4427    qemu_printf("cycles/out byte     %0.1f\n",
4428                s->code_out_len ? (double)tot / s->code_out_len : 0);
4429    qemu_printf("cycles/search byte     %0.1f\n",
4430                s->search_out_len ? (double)tot / s->search_out_len : 0);
4431    if (tot == 0) {
4432        tot = 1;
4433    }
4434    qemu_printf("  gen_interm time   %0.1f%%\n",
4435                (double)s->interm_time / tot * 100.0);
4436    qemu_printf("  gen_code time     %0.1f%%\n",
4437                (double)s->code_time / tot * 100.0);
4438    qemu_printf("optim./code time    %0.1f%%\n",
4439                (double)s->opt_time / (s->code_time ? s->code_time : 1)
4440                * 100.0);
4441    qemu_printf("liveness/code time  %0.1f%%\n",
4442                (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4443    qemu_printf("cpu_restore count   %" PRId64 "\n",
4444                s->restore_count);
4445    qemu_printf("  avg cycles        %0.1f\n",
4446                s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4447}
4448#else
4449void tcg_dump_info(void)
4450{
4451    qemu_printf("[TCG profiler not compiled]\n");
4452}
4453#endif
4454
4455#ifdef ELF_HOST_MACHINE
4456/* In order to use this feature, the backend needs to do three things:
4457
4458   (1) Define ELF_HOST_MACHINE to indicate both what value to
4459       put into the ELF image and to indicate support for the feature.
4460
4461   (2) Define tcg_register_jit.  This should create a buffer containing
4462       the contents of a .debug_frame section that describes the post-
4463       prologue unwind info for the tcg machine.
4464
4465   (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4466*/
4467
4468/* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4469typedef enum {
4470    JIT_NOACTION = 0,
4471    JIT_REGISTER_FN,
4472    JIT_UNREGISTER_FN
4473} jit_actions_t;
4474
4475struct jit_code_entry {
4476    struct jit_code_entry *next_entry;
4477    struct jit_code_entry *prev_entry;
4478    const void *symfile_addr;
4479    uint64_t symfile_size;
4480};
4481
4482struct jit_descriptor {
4483    uint32_t version;
4484    uint32_t action_flag;
4485    struct jit_code_entry *relevant_entry;
4486    struct jit_code_entry *first_entry;
4487};
4488
4489void __jit_debug_register_code(void) __attribute__((noinline));
4490void __jit_debug_register_code(void)
4491{
4492    asm("");
4493}
4494
4495/* Must statically initialize the version, because GDB may check
4496   the version before we can set it.  */
4497struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4498
4499/* End GDB interface.  */
4500
4501static int find_string(const char *strtab, const char *str)
4502{
4503    const char *p = strtab + 1;
4504
4505    while (1) {
4506        if (strcmp(p, str) == 0) {
4507            return p - strtab;
4508        }
4509        p += strlen(p) + 1;
4510    }
4511}
4512
4513static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4514                                 const void *debug_frame,
4515                                 size_t debug_frame_size)
4516{
4517    struct __attribute__((packed)) DebugInfo {
4518        uint32_t  len;
4519        uint16_t  version;
4520        uint32_t  abbrev;
4521        uint8_t   ptr_size;
4522        uint8_t   cu_die;
4523        uint16_t  cu_lang;
4524        uintptr_t cu_low_pc;
4525        uintptr_t cu_high_pc;
4526        uint8_t   fn_die;
4527        char      fn_name[16];
4528        uintptr_t fn_low_pc;
4529        uintptr_t fn_high_pc;
4530        uint8_t   cu_eoc;
4531    };
4532
4533    struct ElfImage {
4534        ElfW(Ehdr) ehdr;
4535        ElfW(Phdr) phdr;
4536        ElfW(Shdr) shdr[7];
4537        ElfW(Sym)  sym[2];
4538        struct DebugInfo di;
4539        uint8_t    da[24];
4540        char       str[80];
4541    };
4542
4543    struct ElfImage *img;
4544
4545    static const struct ElfImage img_template = {
4546        .ehdr = {
4547            .e_ident[EI_MAG0] = ELFMAG0,
4548            .e_ident[EI_MAG1] = ELFMAG1,
4549            .e_ident[EI_MAG2] = ELFMAG2,
4550            .e_ident[EI_MAG3] = ELFMAG3,
4551            .e_ident[EI_CLASS] = ELF_CLASS,
4552            .e_ident[EI_DATA] = ELF_DATA,
4553            .e_ident[EI_VERSION] = EV_CURRENT,
4554            .e_type = ET_EXEC,
4555            .e_machine = ELF_HOST_MACHINE,
4556            .e_version = EV_CURRENT,
4557            .e_phoff = offsetof(struct ElfImage, phdr),
4558            .e_shoff = offsetof(struct ElfImage, shdr),
4559            .e_ehsize = sizeof(ElfW(Shdr)),
4560            .e_phentsize = sizeof(ElfW(Phdr)),
4561            .e_phnum = 1,
4562            .e_shentsize = sizeof(ElfW(Shdr)),
4563            .e_shnum = ARRAY_SIZE(img->shdr),
4564            .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4565#ifdef ELF_HOST_FLAGS
4566            .e_flags = ELF_HOST_FLAGS,
4567#endif
4568#ifdef ELF_OSABI
4569            .e_ident[EI_OSABI] = ELF_OSABI,
4570#endif
4571        },
4572        .phdr = {
4573            .p_type = PT_LOAD,
4574            .p_flags = PF_X,
4575        },
4576        .shdr = {
4577            [0] = { .sh_type = SHT_NULL },
4578            /* Trick: The contents of code_gen_buffer are not present in
4579               this fake ELF file; that got allocated elsewhere.  Therefore
4580               we mark .text as SHT_NOBITS (similar to .bss) so that readers
4581               will not look for contents.  We can record any address.  */
4582            [1] = { /* .text */
4583                .sh_type = SHT_NOBITS,
4584                .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4585            },
4586            [2] = { /* .debug_info */
4587                .sh_type = SHT_PROGBITS,
4588                .sh_offset = offsetof(struct ElfImage, di),
4589                .sh_size = sizeof(struct DebugInfo),
4590            },
4591            [3] = { /* .debug_abbrev */
4592                .sh_type = SHT_PROGBITS,
4593                .sh_offset = offsetof(struct ElfImage, da),
4594                .sh_size = sizeof(img->da),
4595            },
4596            [4] = { /* .debug_frame */
4597                .sh_type = SHT_PROGBITS,
4598                .sh_offset = sizeof(struct ElfImage),
4599            },
4600            [5] = { /* .symtab */
4601                .sh_type = SHT_SYMTAB,
4602                .sh_offset = offsetof(struct ElfImage, sym),
4603                .sh_size = sizeof(img->sym),
4604                .sh_info = 1,
4605                .sh_link = ARRAY_SIZE(img->shdr) - 1,
4606                .sh_entsize = sizeof(ElfW(Sym)),
4607            },
4608            [6] = { /* .strtab */
4609                .sh_type = SHT_STRTAB,
4610                .sh_offset = offsetof(struct ElfImage, str),
4611                .sh_size = sizeof(img->str),
4612            }
4613        },
4614        .sym = {
4615            [1] = { /* code_gen_buffer */
4616                .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4617                .st_shndx = 1,
4618            }
4619        },
4620        .di = {
4621            .len = sizeof(struct DebugInfo) - 4,
4622            .version = 2,
4623            .ptr_size = sizeof(void *),
4624            .cu_die = 1,
4625            .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4626            .fn_die = 2,
4627            .fn_name = "code_gen_buffer"
4628        },
4629        .da = {
4630            1,          /* abbrev number (the cu) */
4631            0x11, 1,    /* DW_TAG_compile_unit, has children */
4632            0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4633            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4634            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4635            0, 0,       /* end of abbrev */
4636            2,          /* abbrev number (the fn) */
4637            0x2e, 0,    /* DW_TAG_subprogram, no children */
4638            0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4639            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4640            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4641            0, 0,       /* end of abbrev */
4642            0           /* no more abbrev */
4643        },
4644        .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4645               ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4646    };
4647
4648    /* We only need a single jit entry; statically allocate it.  */
4649    static struct jit_code_entry one_entry;
4650
4651    uintptr_t buf = (uintptr_t)buf_ptr;
4652    size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4653    DebugFrameHeader *dfh;
4654
4655    img = g_malloc(img_size);
4656    *img = img_template;
4657
4658    img->phdr.p_vaddr = buf;
4659    img->phdr.p_paddr = buf;
4660    img->phdr.p_memsz = buf_size;
4661
4662    img->shdr[1].sh_name = find_string(img->str, ".text");
4663    img->shdr[1].sh_addr = buf;
4664    img->shdr[1].sh_size = buf_size;
4665
4666    img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4667    img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4668
4669    img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4670    img->shdr[4].sh_size = debug_frame_size;
4671
4672    img->shdr[5].sh_name = find_string(img->str, ".symtab");
4673    img->shdr[6].sh_name = find_string(img->str, ".strtab");
4674
4675    img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4676    img->sym[1].st_value = buf;
4677    img->sym[1].st_size = buf_size;
4678
4679    img->di.cu_low_pc = buf;
4680    img->di.cu_high_pc = buf + buf_size;
4681    img->di.fn_low_pc = buf;
4682    img->di.fn_high_pc = buf + buf_size;
4683
4684    dfh = (DebugFrameHeader *)(img + 1);
4685    memcpy(dfh, debug_frame, debug_frame_size);
4686    dfh->fde.func_start = buf;
4687    dfh->fde.func_len = buf_size;
4688
4689#ifdef DEBUG_JIT
4690    /* Enable this block to be able to debug the ELF image file creation.
4691       One can use readelf, objdump, or other inspection utilities.  */
4692    {
4693        FILE *f = fopen("/tmp/qemu.jit", "w+b");
4694        if (f) {
4695            if (fwrite(img, img_size, 1, f) != img_size) {
4696                /* Avoid stupid unused return value warning for fwrite.  */
4697            }
4698            fclose(f);
4699        }
4700    }
4701#endif
4702
4703    one_entry.symfile_addr = img;
4704    one_entry.symfile_size = img_size;
4705
4706    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4707    __jit_debug_descriptor.relevant_entry = &one_entry;
4708    __jit_debug_descriptor.first_entry = &one_entry;
4709    __jit_debug_register_code();
4710}
4711#else
4712/* No support for the feature.  Provide the entry point expected by exec.c,
4713   and implement the internal function we declared earlier.  */
4714
4715static void tcg_register_jit_int(const void *buf, size_t size,
4716                                 const void *debug_frame,
4717                                 size_t debug_frame_size)
4718{
4719}
4720
4721void tcg_register_jit(const void *buf, size_t buf_size)
4722{
4723}
4724#endif /* ELF_HOST_MACHINE */
4725
4726#if !TCG_TARGET_MAYBE_vec
4727void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4728{
4729    g_assert_not_reached();
4730}
4731#endif
4732