qemu/tcg/tcg.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* define it to use liveness analysis (better code) */
  26#define USE_TCG_OPTIMIZATIONS
  27
  28#include "qemu/osdep.h"
  29
  30/* Define to jump the ELF file used to communicate with GDB.  */
  31#undef DEBUG_JIT
  32
  33#include "qemu/error-report.h"
  34#include "qemu/cutils.h"
  35#include "qemu/host-utils.h"
  36#include "qemu/qemu-print.h"
  37#include "qemu/timer.h"
  38#include "qemu/cacheflush.h"
  39#include "qemu/cacheinfo.h"
  40
  41/* Note: the long term plan is to reduce the dependencies on the QEMU
  42   CPU definitions. Currently they are used for qemu_ld/st
  43   instructions */
  44#define NO_CPU_IO_DEFS
  45
  46#include "exec/exec-all.h"
  47#include "tcg/tcg-op.h"
  48
  49#if UINTPTR_MAX == UINT32_MAX
  50# define ELF_CLASS  ELFCLASS32
  51#else
  52# define ELF_CLASS  ELFCLASS64
  53#endif
  54#ifdef HOST_WORDS_BIGENDIAN
  55# define ELF_DATA   ELFDATA2MSB
  56#else
  57# define ELF_DATA   ELFDATA2LSB
  58#endif
  59
  60#include "elf.h"
  61#include "exec/log.h"
  62#include "tcg/tcg-ldst.h"
  63#include "tcg-internal.h"
  64
  65#ifdef CONFIG_TCG_INTERPRETER
  66#include <ffi.h>
  67#endif
  68
  69/* Forward declarations for functions declared in tcg-target.c.inc and
  70   used here. */
  71static void tcg_target_init(TCGContext *s);
  72static void tcg_target_qemu_prologue(TCGContext *s);
  73static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
  74                        intptr_t value, intptr_t addend);
  75
  76/* The CIE and FDE header definitions will be common to all hosts.  */
  77typedef struct {
  78    uint32_t len __attribute__((aligned((sizeof(void *)))));
  79    uint32_t id;
  80    uint8_t version;
  81    char augmentation[1];
  82    uint8_t code_align;
  83    uint8_t data_align;
  84    uint8_t return_column;
  85} DebugFrameCIE;
  86
  87typedef struct QEMU_PACKED {
  88    uint32_t len __attribute__((aligned((sizeof(void *)))));
  89    uint32_t cie_offset;
  90    uintptr_t func_start;
  91    uintptr_t func_len;
  92} DebugFrameFDEHeader;
  93
  94typedef struct QEMU_PACKED {
  95    DebugFrameCIE cie;
  96    DebugFrameFDEHeader fde;
  97} DebugFrameHeader;
  98
  99static void tcg_register_jit_int(const void *buf, size_t size,
 100                                 const void *debug_frame,
 101                                 size_t debug_frame_size)
 102    __attribute__((unused));
 103
 104/* Forward declarations for functions declared and used in tcg-target.c.inc. */
 105static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
 106                       intptr_t arg2);
 107static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
 108static void tcg_out_movi(TCGContext *s, TCGType type,
 109                         TCGReg ret, tcg_target_long arg);
 110static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 111                       const TCGArg args[TCG_MAX_OP_ARGS],
 112                       const int const_args[TCG_MAX_OP_ARGS]);
 113#if TCG_TARGET_MAYBE_vec
 114static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 115                            TCGReg dst, TCGReg src);
 116static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 117                             TCGReg dst, TCGReg base, intptr_t offset);
 118static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 119                             TCGReg dst, int64_t arg);
 120static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 121                           unsigned vecl, unsigned vece,
 122                           const TCGArg args[TCG_MAX_OP_ARGS],
 123                           const int const_args[TCG_MAX_OP_ARGS]);
 124#else
 125static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 126                                   TCGReg dst, TCGReg src)
 127{
 128    g_assert_not_reached();
 129}
 130static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 131                                    TCGReg dst, TCGReg base, intptr_t offset)
 132{
 133    g_assert_not_reached();
 134}
 135static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 136                                    TCGReg dst, int64_t arg)
 137{
 138    g_assert_not_reached();
 139}
 140static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 141                                  unsigned vecl, unsigned vece,
 142                                  const TCGArg args[TCG_MAX_OP_ARGS],
 143                                  const int const_args[TCG_MAX_OP_ARGS])
 144{
 145    g_assert_not_reached();
 146}
 147#endif
 148static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
 149                       intptr_t arg2);
 150static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 151                        TCGReg base, intptr_t ofs);
 152#ifdef CONFIG_TCG_INTERPRETER
 153static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
 154                         ffi_cif *cif);
 155#else
 156static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
 157#endif
 158static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
 159#ifdef TCG_TARGET_NEED_LDST_LABELS
 160static int tcg_out_ldst_finalize(TCGContext *s);
 161#endif
 162
 163TCGContext tcg_init_ctx;
 164__thread TCGContext *tcg_ctx;
 165
 166TCGContext **tcg_ctxs;
 167unsigned int tcg_cur_ctxs;
 168unsigned int tcg_max_ctxs;
 169TCGv_env cpu_env = 0;
 170const void *tcg_code_gen_epilogue;
 171uintptr_t tcg_splitwx_diff;
 172
 173#ifndef CONFIG_TCG_INTERPRETER
 174tcg_prologue_fn *tcg_qemu_tb_exec;
 175#endif
 176
 177static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
 178static TCGRegSet tcg_target_call_clobber_regs;
 179
 180#if TCG_TARGET_INSN_UNIT_SIZE == 1
 181static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
 182{
 183    *s->code_ptr++ = v;
 184}
 185
 186static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
 187                                                      uint8_t v)
 188{
 189    *p = v;
 190}
 191#endif
 192
 193#if TCG_TARGET_INSN_UNIT_SIZE <= 2
 194static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
 195{
 196    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 197        *s->code_ptr++ = v;
 198    } else {
 199        tcg_insn_unit *p = s->code_ptr;
 200        memcpy(p, &v, sizeof(v));
 201        s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
 202    }
 203}
 204
 205static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
 206                                                       uint16_t v)
 207{
 208    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 209        *p = v;
 210    } else {
 211        memcpy(p, &v, sizeof(v));
 212    }
 213}
 214#endif
 215
 216#if TCG_TARGET_INSN_UNIT_SIZE <= 4
 217static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
 218{
 219    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 220        *s->code_ptr++ = v;
 221    } else {
 222        tcg_insn_unit *p = s->code_ptr;
 223        memcpy(p, &v, sizeof(v));
 224        s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
 225    }
 226}
 227
 228static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
 229                                                       uint32_t v)
 230{
 231    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 232        *p = v;
 233    } else {
 234        memcpy(p, &v, sizeof(v));
 235    }
 236}
 237#endif
 238
 239#if TCG_TARGET_INSN_UNIT_SIZE <= 8
 240static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
 241{
 242    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 243        *s->code_ptr++ = v;
 244    } else {
 245        tcg_insn_unit *p = s->code_ptr;
 246        memcpy(p, &v, sizeof(v));
 247        s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
 248    }
 249}
 250
 251static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
 252                                                       uint64_t v)
 253{
 254    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 255        *p = v;
 256    } else {
 257        memcpy(p, &v, sizeof(v));
 258    }
 259}
 260#endif
 261
 262/* label relocation processing */
 263
 264static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
 265                          TCGLabel *l, intptr_t addend)
 266{
 267    TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
 268
 269    r->type = type;
 270    r->ptr = code_ptr;
 271    r->addend = addend;
 272    QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
 273}
 274
 275static void tcg_out_label(TCGContext *s, TCGLabel *l)
 276{
 277    tcg_debug_assert(!l->has_value);
 278    l->has_value = 1;
 279    l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
 280}
 281
 282TCGLabel *gen_new_label(void)
 283{
 284    TCGContext *s = tcg_ctx;
 285    TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
 286
 287    memset(l, 0, sizeof(TCGLabel));
 288    l->id = s->nb_labels++;
 289    QSIMPLEQ_INIT(&l->relocs);
 290
 291    QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
 292
 293    return l;
 294}
 295
 296static bool tcg_resolve_relocs(TCGContext *s)
 297{
 298    TCGLabel *l;
 299
 300    QSIMPLEQ_FOREACH(l, &s->labels, next) {
 301        TCGRelocation *r;
 302        uintptr_t value = l->u.value;
 303
 304        QSIMPLEQ_FOREACH(r, &l->relocs, next) {
 305            if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
 306                return false;
 307            }
 308        }
 309    }
 310    return true;
 311}
 312
 313static void set_jmp_reset_offset(TCGContext *s, int which)
 314{
 315    /*
 316     * We will check for overflow at the end of the opcode loop in
 317     * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
 318     */
 319    s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
 320}
 321
 322/* Signal overflow, starting over with fewer guest insns. */
 323static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
 324{
 325    siglongjmp(s->jmp_trans, -2);
 326}
 327
 328#define C_PFX1(P, A)                    P##A
 329#define C_PFX2(P, A, B)                 P##A##_##B
 330#define C_PFX3(P, A, B, C)              P##A##_##B##_##C
 331#define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
 332#define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
 333#define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
 334
 335/* Define an enumeration for the various combinations. */
 336
 337#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
 338#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
 339#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
 340#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
 341
 342#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
 343#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
 344#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
 345#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
 346
 347#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
 348
 349#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
 350#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
 351#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
 352#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
 353
 354typedef enum {
 355#include "tcg-target-con-set.h"
 356} TCGConstraintSetIndex;
 357
 358static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
 359
 360#undef C_O0_I1
 361#undef C_O0_I2
 362#undef C_O0_I3
 363#undef C_O0_I4
 364#undef C_O1_I1
 365#undef C_O1_I2
 366#undef C_O1_I3
 367#undef C_O1_I4
 368#undef C_N1_I2
 369#undef C_O2_I1
 370#undef C_O2_I2
 371#undef C_O2_I3
 372#undef C_O2_I4
 373
 374/* Put all of the constraint sets into an array, indexed by the enum. */
 375
 376#define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
 377#define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
 378#define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
 379#define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
 380
 381#define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
 382#define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
 383#define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
 384#define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
 385
 386#define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
 387
 388#define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
 389#define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
 390#define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
 391#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
 392
 393static const TCGTargetOpDef constraint_sets[] = {
 394#include "tcg-target-con-set.h"
 395};
 396
 397
 398#undef C_O0_I1
 399#undef C_O0_I2
 400#undef C_O0_I3
 401#undef C_O0_I4
 402#undef C_O1_I1
 403#undef C_O1_I2
 404#undef C_O1_I3
 405#undef C_O1_I4
 406#undef C_N1_I2
 407#undef C_O2_I1
 408#undef C_O2_I2
 409#undef C_O2_I3
 410#undef C_O2_I4
 411
 412/* Expand the enumerator to be returned from tcg_target_op_def(). */
 413
 414#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
 415#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
 416#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
 417#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
 418
 419#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
 420#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
 421#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
 422#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
 423
 424#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
 425
 426#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
 427#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
 428#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
 429#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
 430
 431#include "tcg-target.c.inc"
 432
 433static void alloc_tcg_plugin_context(TCGContext *s)
 434{
 435#ifdef CONFIG_PLUGIN
 436    s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
 437    s->plugin_tb->insns =
 438        g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
 439#endif
 440}
 441
 442/*
 443 * All TCG threads except the parent (i.e. the one that called tcg_context_init
 444 * and registered the target's TCG globals) must register with this function
 445 * before initiating translation.
 446 *
 447 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
 448 * of tcg_region_init() for the reasoning behind this.
 449 *
 450 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
 451 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
 452 * is not used anymore for translation once this function is called.
 453 *
 454 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
 455 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
 456 */
 457#ifdef CONFIG_USER_ONLY
 458void tcg_register_thread(void)
 459{
 460    tcg_ctx = &tcg_init_ctx;
 461}
 462#else
 463void tcg_register_thread(void)
 464{
 465    TCGContext *s = g_malloc(sizeof(*s));
 466    unsigned int i, n;
 467
 468    *s = tcg_init_ctx;
 469
 470    /* Relink mem_base.  */
 471    for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
 472        if (tcg_init_ctx.temps[i].mem_base) {
 473            ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
 474            tcg_debug_assert(b >= 0 && b < n);
 475            s->temps[i].mem_base = &s->temps[b];
 476        }
 477    }
 478
 479    /* Claim an entry in tcg_ctxs */
 480    n = qatomic_fetch_inc(&tcg_cur_ctxs);
 481    g_assert(n < tcg_max_ctxs);
 482    qatomic_set(&tcg_ctxs[n], s);
 483
 484    if (n > 0) {
 485        alloc_tcg_plugin_context(s);
 486        tcg_region_initial_alloc(s);
 487    }
 488
 489    tcg_ctx = s;
 490}
 491#endif /* !CONFIG_USER_ONLY */
 492
 493/* pool based memory allocation */
 494void *tcg_malloc_internal(TCGContext *s, int size)
 495{
 496    TCGPool *p;
 497    int pool_size;
 498    
 499    if (size > TCG_POOL_CHUNK_SIZE) {
 500        /* big malloc: insert a new pool (XXX: could optimize) */
 501        p = g_malloc(sizeof(TCGPool) + size);
 502        p->size = size;
 503        p->next = s->pool_first_large;
 504        s->pool_first_large = p;
 505        return p->data;
 506    } else {
 507        p = s->pool_current;
 508        if (!p) {
 509            p = s->pool_first;
 510            if (!p)
 511                goto new_pool;
 512        } else {
 513            if (!p->next) {
 514            new_pool:
 515                pool_size = TCG_POOL_CHUNK_SIZE;
 516                p = g_malloc(sizeof(TCGPool) + pool_size);
 517                p->size = pool_size;
 518                p->next = NULL;
 519                if (s->pool_current) 
 520                    s->pool_current->next = p;
 521                else
 522                    s->pool_first = p;
 523            } else {
 524                p = p->next;
 525            }
 526        }
 527    }
 528    s->pool_current = p;
 529    s->pool_cur = p->data + size;
 530    s->pool_end = p->data + p->size;
 531    return p->data;
 532}
 533
 534void tcg_pool_reset(TCGContext *s)
 535{
 536    TCGPool *p, *t;
 537    for (p = s->pool_first_large; p; p = t) {
 538        t = p->next;
 539        g_free(p);
 540    }
 541    s->pool_first_large = NULL;
 542    s->pool_cur = s->pool_end = NULL;
 543    s->pool_current = NULL;
 544}
 545
 546#include "exec/helper-proto.h"
 547
 548static const TCGHelperInfo all_helpers[] = {
 549#include "exec/helper-tcg.h"
 550};
 551static GHashTable *helper_table;
 552
 553#ifdef CONFIG_TCG_INTERPRETER
 554static GHashTable *ffi_table;
 555
 556static ffi_type * const typecode_to_ffi[8] = {
 557    [dh_typecode_void] = &ffi_type_void,
 558    [dh_typecode_i32]  = &ffi_type_uint32,
 559    [dh_typecode_s32]  = &ffi_type_sint32,
 560    [dh_typecode_i64]  = &ffi_type_uint64,
 561    [dh_typecode_s64]  = &ffi_type_sint64,
 562    [dh_typecode_ptr]  = &ffi_type_pointer,
 563};
 564#endif
 565
 566static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
 567static void process_op_defs(TCGContext *s);
 568static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
 569                                            TCGReg reg, const char *name);
 570
 571static void tcg_context_init(unsigned max_cpus)
 572{
 573    TCGContext *s = &tcg_init_ctx;
 574    int op, total_args, n, i;
 575    TCGOpDef *def;
 576    TCGArgConstraint *args_ct;
 577    TCGTemp *ts;
 578
 579    memset(s, 0, sizeof(*s));
 580    s->nb_globals = 0;
 581
 582    /* Count total number of arguments and allocate the corresponding
 583       space */
 584    total_args = 0;
 585    for(op = 0; op < NB_OPS; op++) {
 586        def = &tcg_op_defs[op];
 587        n = def->nb_iargs + def->nb_oargs;
 588        total_args += n;
 589    }
 590
 591    args_ct = g_new0(TCGArgConstraint, total_args);
 592
 593    for(op = 0; op < NB_OPS; op++) {
 594        def = &tcg_op_defs[op];
 595        def->args_ct = args_ct;
 596        n = def->nb_iargs + def->nb_oargs;
 597        args_ct += n;
 598    }
 599
 600    /* Register helpers.  */
 601    /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
 602    helper_table = g_hash_table_new(NULL, NULL);
 603
 604    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
 605        g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
 606                            (gpointer)&all_helpers[i]);
 607    }
 608
 609#ifdef CONFIG_TCG_INTERPRETER
 610    /* g_direct_hash/equal for direct comparisons on uint32_t.  */
 611    ffi_table = g_hash_table_new(NULL, NULL);
 612    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
 613        struct {
 614            ffi_cif cif;
 615            ffi_type *args[];
 616        } *ca;
 617        uint32_t typemask = all_helpers[i].typemask;
 618        gpointer hash = (gpointer)(uintptr_t)typemask;
 619        ffi_status status;
 620        int nargs;
 621
 622        if (g_hash_table_lookup(ffi_table, hash)) {
 623            continue;
 624        }
 625
 626        /* Ignoring the return type, find the last non-zero field. */
 627        nargs = 32 - clz32(typemask >> 3);
 628        nargs = DIV_ROUND_UP(nargs, 3);
 629
 630        ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
 631        ca->cif.rtype = typecode_to_ffi[typemask & 7];
 632        ca->cif.nargs = nargs;
 633
 634        if (nargs != 0) {
 635            ca->cif.arg_types = ca->args;
 636            for (i = 0; i < nargs; ++i) {
 637                int typecode = extract32(typemask, (i + 1) * 3, 3);
 638                ca->args[i] = typecode_to_ffi[typecode];
 639            }
 640        }
 641
 642        status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
 643                              ca->cif.rtype, ca->cif.arg_types);
 644        assert(status == FFI_OK);
 645
 646        g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
 647    }
 648#endif
 649
 650    tcg_target_init(s);
 651    process_op_defs(s);
 652
 653    /* Reverse the order of the saved registers, assuming they're all at
 654       the start of tcg_target_reg_alloc_order.  */
 655    for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
 656        int r = tcg_target_reg_alloc_order[n];
 657        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
 658            break;
 659        }
 660    }
 661    for (i = 0; i < n; ++i) {
 662        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
 663    }
 664    for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
 665        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
 666    }
 667
 668    alloc_tcg_plugin_context(s);
 669
 670    tcg_ctx = s;
 671    /*
 672     * In user-mode we simply share the init context among threads, since we
 673     * use a single region. See the documentation tcg_region_init() for the
 674     * reasoning behind this.
 675     * In softmmu we will have at most max_cpus TCG threads.
 676     */
 677#ifdef CONFIG_USER_ONLY
 678    tcg_ctxs = &tcg_ctx;
 679    tcg_cur_ctxs = 1;
 680    tcg_max_ctxs = 1;
 681#else
 682    tcg_max_ctxs = max_cpus;
 683    tcg_ctxs = g_new0(TCGContext *, max_cpus);
 684#endif
 685
 686    tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
 687    ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
 688    cpu_env = temp_tcgv_ptr(ts);
 689}
 690
 691void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
 692{
 693    tcg_context_init(max_cpus);
 694    tcg_region_init(tb_size, splitwx, max_cpus);
 695}
 696
 697/*
 698 * Allocate TBs right before their corresponding translated code, making
 699 * sure that TBs and code are on different cache lines.
 700 */
 701TranslationBlock *tcg_tb_alloc(TCGContext *s)
 702{
 703    uintptr_t align = qemu_icache_linesize;
 704    TranslationBlock *tb;
 705    void *next;
 706
 707 retry:
 708    tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
 709    next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
 710
 711    if (unlikely(next > s->code_gen_highwater)) {
 712        if (tcg_region_alloc(s)) {
 713            return NULL;
 714        }
 715        goto retry;
 716    }
 717    qatomic_set(&s->code_gen_ptr, next);
 718    s->data_gen_ptr = NULL;
 719    return tb;
 720}
 721
 722void tcg_prologue_init(TCGContext *s)
 723{
 724    size_t prologue_size;
 725
 726    s->code_ptr = s->code_gen_ptr;
 727    s->code_buf = s->code_gen_ptr;
 728    s->data_gen_ptr = NULL;
 729
 730#ifndef CONFIG_TCG_INTERPRETER
 731    tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
 732#endif
 733
 734#ifdef TCG_TARGET_NEED_POOL_LABELS
 735    s->pool_labels = NULL;
 736#endif
 737
 738    qemu_thread_jit_write();
 739    /* Generate the prologue.  */
 740    tcg_target_qemu_prologue(s);
 741
 742#ifdef TCG_TARGET_NEED_POOL_LABELS
 743    /* Allow the prologue to put e.g. guest_base into a pool entry.  */
 744    {
 745        int result = tcg_out_pool_finalize(s);
 746        tcg_debug_assert(result == 0);
 747    }
 748#endif
 749
 750    prologue_size = tcg_current_code_size(s);
 751
 752#ifndef CONFIG_TCG_INTERPRETER
 753    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
 754                        (uintptr_t)s->code_buf, prologue_size);
 755#endif
 756
 757#ifdef DEBUG_DISAS
 758    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
 759        FILE *logfile = qemu_log_lock();
 760        qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
 761        if (s->data_gen_ptr) {
 762            size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
 763            size_t data_size = prologue_size - code_size;
 764            size_t i;
 765
 766            log_disas(s->code_gen_ptr, code_size);
 767
 768            for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
 769                if (sizeof(tcg_target_ulong) == 8) {
 770                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
 771                             (uintptr_t)s->data_gen_ptr + i,
 772                             *(uint64_t *)(s->data_gen_ptr + i));
 773                } else {
 774                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
 775                             (uintptr_t)s->data_gen_ptr + i,
 776                             *(uint32_t *)(s->data_gen_ptr + i));
 777                }
 778            }
 779        } else {
 780            log_disas(s->code_gen_ptr, prologue_size);
 781        }
 782        qemu_log("\n");
 783        qemu_log_flush();
 784        qemu_log_unlock(logfile);
 785    }
 786#endif
 787
 788#ifndef CONFIG_TCG_INTERPRETER
 789    /*
 790     * Assert that goto_ptr is implemented completely, setting an epilogue.
 791     * For tci, we use NULL as the signal to return from the interpreter,
 792     * so skip this check.
 793     */
 794    tcg_debug_assert(tcg_code_gen_epilogue != NULL);
 795#endif
 796
 797    tcg_region_prologue_set(s);
 798}
 799
 800void tcg_func_start(TCGContext *s)
 801{
 802    tcg_pool_reset(s);
 803    s->nb_temps = s->nb_globals;
 804
 805    /* No temps have been previously allocated for size or locality.  */
 806    memset(s->free_temps, 0, sizeof(s->free_temps));
 807
 808    /* No constant temps have been previously allocated. */
 809    for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
 810        if (s->const_table[i]) {
 811            g_hash_table_remove_all(s->const_table[i]);
 812        }
 813    }
 814
 815    s->nb_ops = 0;
 816    s->nb_labels = 0;
 817    s->current_frame_offset = s->frame_start;
 818
 819#ifdef CONFIG_DEBUG_TCG
 820    s->goto_tb_issue_mask = 0;
 821#endif
 822
 823    QTAILQ_INIT(&s->ops);
 824    QTAILQ_INIT(&s->free_ops);
 825    QSIMPLEQ_INIT(&s->labels);
 826}
 827
 828static TCGTemp *tcg_temp_alloc(TCGContext *s)
 829{
 830    int n = s->nb_temps++;
 831
 832    if (n >= TCG_MAX_TEMPS) {
 833        tcg_raise_tb_overflow(s);
 834    }
 835    return memset(&s->temps[n], 0, sizeof(TCGTemp));
 836}
 837
 838static TCGTemp *tcg_global_alloc(TCGContext *s)
 839{
 840    TCGTemp *ts;
 841
 842    tcg_debug_assert(s->nb_globals == s->nb_temps);
 843    tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
 844    s->nb_globals++;
 845    ts = tcg_temp_alloc(s);
 846    ts->kind = TEMP_GLOBAL;
 847
 848    return ts;
 849}
 850
 851static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
 852                                            TCGReg reg, const char *name)
 853{
 854    TCGTemp *ts;
 855
 856    if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
 857        tcg_abort();
 858    }
 859
 860    ts = tcg_global_alloc(s);
 861    ts->base_type = type;
 862    ts->type = type;
 863    ts->kind = TEMP_FIXED;
 864    ts->reg = reg;
 865    ts->name = name;
 866    tcg_regset_set_reg(s->reserved_regs, reg);
 867
 868    return ts;
 869}
 870
 871void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
 872{
 873    s->frame_start = start;
 874    s->frame_end = start + size;
 875    s->frame_temp
 876        = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
 877}
 878
 879TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
 880                                     intptr_t offset, const char *name)
 881{
 882    TCGContext *s = tcg_ctx;
 883    TCGTemp *base_ts = tcgv_ptr_temp(base);
 884    TCGTemp *ts = tcg_global_alloc(s);
 885    int indirect_reg = 0, bigendian = 0;
 886#ifdef HOST_WORDS_BIGENDIAN
 887    bigendian = 1;
 888#endif
 889
 890    switch (base_ts->kind) {
 891    case TEMP_FIXED:
 892        break;
 893    case TEMP_GLOBAL:
 894        /* We do not support double-indirect registers.  */
 895        tcg_debug_assert(!base_ts->indirect_reg);
 896        base_ts->indirect_base = 1;
 897        s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
 898                            ? 2 : 1);
 899        indirect_reg = 1;
 900        break;
 901    default:
 902        g_assert_not_reached();
 903    }
 904
 905    if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
 906        TCGTemp *ts2 = tcg_global_alloc(s);
 907        char buf[64];
 908
 909        ts->base_type = TCG_TYPE_I64;
 910        ts->type = TCG_TYPE_I32;
 911        ts->indirect_reg = indirect_reg;
 912        ts->mem_allocated = 1;
 913        ts->mem_base = base_ts;
 914        ts->mem_offset = offset + bigendian * 4;
 915        pstrcpy(buf, sizeof(buf), name);
 916        pstrcat(buf, sizeof(buf), "_0");
 917        ts->name = strdup(buf);
 918
 919        tcg_debug_assert(ts2 == ts + 1);
 920        ts2->base_type = TCG_TYPE_I64;
 921        ts2->type = TCG_TYPE_I32;
 922        ts2->indirect_reg = indirect_reg;
 923        ts2->mem_allocated = 1;
 924        ts2->mem_base = base_ts;
 925        ts2->mem_offset = offset + (1 - bigendian) * 4;
 926        pstrcpy(buf, sizeof(buf), name);
 927        pstrcat(buf, sizeof(buf), "_1");
 928        ts2->name = strdup(buf);
 929    } else {
 930        ts->base_type = type;
 931        ts->type = type;
 932        ts->indirect_reg = indirect_reg;
 933        ts->mem_allocated = 1;
 934        ts->mem_base = base_ts;
 935        ts->mem_offset = offset;
 936        ts->name = name;
 937    }
 938    return ts;
 939}
 940
 941TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
 942{
 943    TCGContext *s = tcg_ctx;
 944    TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
 945    TCGTemp *ts;
 946    int idx, k;
 947
 948    k = type + (temp_local ? TCG_TYPE_COUNT : 0);
 949    idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
 950    if (idx < TCG_MAX_TEMPS) {
 951        /* There is already an available temp with the right type.  */
 952        clear_bit(idx, s->free_temps[k].l);
 953
 954        ts = &s->temps[idx];
 955        ts->temp_allocated = 1;
 956        tcg_debug_assert(ts->base_type == type);
 957        tcg_debug_assert(ts->kind == kind);
 958    } else {
 959        ts = tcg_temp_alloc(s);
 960        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
 961            TCGTemp *ts2 = tcg_temp_alloc(s);
 962
 963            ts->base_type = type;
 964            ts->type = TCG_TYPE_I32;
 965            ts->temp_allocated = 1;
 966            ts->kind = kind;
 967
 968            tcg_debug_assert(ts2 == ts + 1);
 969            ts2->base_type = TCG_TYPE_I64;
 970            ts2->type = TCG_TYPE_I32;
 971            ts2->temp_allocated = 1;
 972            ts2->kind = kind;
 973        } else {
 974            ts->base_type = type;
 975            ts->type = type;
 976            ts->temp_allocated = 1;
 977            ts->kind = kind;
 978        }
 979    }
 980
 981#if defined(CONFIG_DEBUG_TCG)
 982    s->temps_in_use++;
 983#endif
 984    return ts;
 985}
 986
 987TCGv_vec tcg_temp_new_vec(TCGType type)
 988{
 989    TCGTemp *t;
 990
 991#ifdef CONFIG_DEBUG_TCG
 992    switch (type) {
 993    case TCG_TYPE_V64:
 994        assert(TCG_TARGET_HAS_v64);
 995        break;
 996    case TCG_TYPE_V128:
 997        assert(TCG_TARGET_HAS_v128);
 998        break;
 999    case TCG_TYPE_V256:
1000        assert(TCG_TARGET_HAS_v256);
1001        break;
1002    default:
1003        g_assert_not_reached();
1004    }
1005#endif
1006
1007    t = tcg_temp_new_internal(type, 0);
1008    return temp_tcgv_vec(t);
1009}
1010
1011/* Create a new temp of the same type as an existing temp.  */
1012TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1013{
1014    TCGTemp *t = tcgv_vec_temp(match);
1015
1016    tcg_debug_assert(t->temp_allocated != 0);
1017
1018    t = tcg_temp_new_internal(t->base_type, 0);
1019    return temp_tcgv_vec(t);
1020}
1021
1022void tcg_temp_free_internal(TCGTemp *ts)
1023{
1024    TCGContext *s = tcg_ctx;
1025    int k, idx;
1026
1027    /* In order to simplify users of tcg_constant_*, silently ignore free. */
1028    if (ts->kind == TEMP_CONST) {
1029        return;
1030    }
1031
1032#if defined(CONFIG_DEBUG_TCG)
1033    s->temps_in_use--;
1034    if (s->temps_in_use < 0) {
1035        fprintf(stderr, "More temporaries freed than allocated!\n");
1036    }
1037#endif
1038
1039    tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1040    tcg_debug_assert(ts->temp_allocated != 0);
1041    ts->temp_allocated = 0;
1042
1043    idx = temp_idx(ts);
1044    k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1045    set_bit(idx, s->free_temps[k].l);
1046}
1047
1048TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1049{
1050    TCGContext *s = tcg_ctx;
1051    GHashTable *h = s->const_table[type];
1052    TCGTemp *ts;
1053
1054    if (h == NULL) {
1055        h = g_hash_table_new(g_int64_hash, g_int64_equal);
1056        s->const_table[type] = h;
1057    }
1058
1059    ts = g_hash_table_lookup(h, &val);
1060    if (ts == NULL) {
1061        ts = tcg_temp_alloc(s);
1062
1063        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1064            TCGTemp *ts2 = tcg_temp_alloc(s);
1065
1066            ts->base_type = TCG_TYPE_I64;
1067            ts->type = TCG_TYPE_I32;
1068            ts->kind = TEMP_CONST;
1069            ts->temp_allocated = 1;
1070            /*
1071             * Retain the full value of the 64-bit constant in the low
1072             * part, so that the hash table works.  Actual uses will
1073             * truncate the value to the low part.
1074             */
1075            ts->val = val;
1076
1077            tcg_debug_assert(ts2 == ts + 1);
1078            ts2->base_type = TCG_TYPE_I64;
1079            ts2->type = TCG_TYPE_I32;
1080            ts2->kind = TEMP_CONST;
1081            ts2->temp_allocated = 1;
1082            ts2->val = val >> 32;
1083        } else {
1084            ts->base_type = type;
1085            ts->type = type;
1086            ts->kind = TEMP_CONST;
1087            ts->temp_allocated = 1;
1088            ts->val = val;
1089        }
1090        g_hash_table_insert(h, &ts->val, ts);
1091    }
1092
1093    return ts;
1094}
1095
1096TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1097{
1098    val = dup_const(vece, val);
1099    return temp_tcgv_vec(tcg_constant_internal(type, val));
1100}
1101
1102TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1103{
1104    TCGTemp *t = tcgv_vec_temp(match);
1105
1106    tcg_debug_assert(t->temp_allocated != 0);
1107    return tcg_constant_vec(t->base_type, vece, val);
1108}
1109
1110TCGv_i32 tcg_const_i32(int32_t val)
1111{
1112    TCGv_i32 t0;
1113    t0 = tcg_temp_new_i32();
1114    tcg_gen_movi_i32(t0, val);
1115    return t0;
1116}
1117
1118TCGv_i64 tcg_const_i64(int64_t val)
1119{
1120    TCGv_i64 t0;
1121    t0 = tcg_temp_new_i64();
1122    tcg_gen_movi_i64(t0, val);
1123    return t0;
1124}
1125
1126TCGv_i32 tcg_const_local_i32(int32_t val)
1127{
1128    TCGv_i32 t0;
1129    t0 = tcg_temp_local_new_i32();
1130    tcg_gen_movi_i32(t0, val);
1131    return t0;
1132}
1133
1134TCGv_i64 tcg_const_local_i64(int64_t val)
1135{
1136    TCGv_i64 t0;
1137    t0 = tcg_temp_local_new_i64();
1138    tcg_gen_movi_i64(t0, val);
1139    return t0;
1140}
1141
1142#if defined(CONFIG_DEBUG_TCG)
1143void tcg_clear_temp_count(void)
1144{
1145    TCGContext *s = tcg_ctx;
1146    s->temps_in_use = 0;
1147}
1148
1149int tcg_check_temp_count(void)
1150{
1151    TCGContext *s = tcg_ctx;
1152    if (s->temps_in_use) {
1153        /* Clear the count so that we don't give another
1154         * warning immediately next time around.
1155         */
1156        s->temps_in_use = 0;
1157        return 1;
1158    }
1159    return 0;
1160}
1161#endif
1162
1163/* Return true if OP may appear in the opcode stream.
1164   Test the runtime variable that controls each opcode.  */
1165bool tcg_op_supported(TCGOpcode op)
1166{
1167    const bool have_vec
1168        = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1169
1170    switch (op) {
1171    case INDEX_op_discard:
1172    case INDEX_op_set_label:
1173    case INDEX_op_call:
1174    case INDEX_op_br:
1175    case INDEX_op_mb:
1176    case INDEX_op_insn_start:
1177    case INDEX_op_exit_tb:
1178    case INDEX_op_goto_tb:
1179    case INDEX_op_goto_ptr:
1180    case INDEX_op_qemu_ld_i32:
1181    case INDEX_op_qemu_st_i32:
1182    case INDEX_op_qemu_ld_i64:
1183    case INDEX_op_qemu_st_i64:
1184        return true;
1185
1186    case INDEX_op_qemu_st8_i32:
1187        return TCG_TARGET_HAS_qemu_st8_i32;
1188
1189    case INDEX_op_mov_i32:
1190    case INDEX_op_setcond_i32:
1191    case INDEX_op_brcond_i32:
1192    case INDEX_op_ld8u_i32:
1193    case INDEX_op_ld8s_i32:
1194    case INDEX_op_ld16u_i32:
1195    case INDEX_op_ld16s_i32:
1196    case INDEX_op_ld_i32:
1197    case INDEX_op_st8_i32:
1198    case INDEX_op_st16_i32:
1199    case INDEX_op_st_i32:
1200    case INDEX_op_add_i32:
1201    case INDEX_op_sub_i32:
1202    case INDEX_op_mul_i32:
1203    case INDEX_op_and_i32:
1204    case INDEX_op_or_i32:
1205    case INDEX_op_xor_i32:
1206    case INDEX_op_shl_i32:
1207    case INDEX_op_shr_i32:
1208    case INDEX_op_sar_i32:
1209        return true;
1210
1211    case INDEX_op_movcond_i32:
1212        return TCG_TARGET_HAS_movcond_i32;
1213    case INDEX_op_div_i32:
1214    case INDEX_op_divu_i32:
1215        return TCG_TARGET_HAS_div_i32;
1216    case INDEX_op_rem_i32:
1217    case INDEX_op_remu_i32:
1218        return TCG_TARGET_HAS_rem_i32;
1219    case INDEX_op_div2_i32:
1220    case INDEX_op_divu2_i32:
1221        return TCG_TARGET_HAS_div2_i32;
1222    case INDEX_op_rotl_i32:
1223    case INDEX_op_rotr_i32:
1224        return TCG_TARGET_HAS_rot_i32;
1225    case INDEX_op_deposit_i32:
1226        return TCG_TARGET_HAS_deposit_i32;
1227    case INDEX_op_extract_i32:
1228        return TCG_TARGET_HAS_extract_i32;
1229    case INDEX_op_sextract_i32:
1230        return TCG_TARGET_HAS_sextract_i32;
1231    case INDEX_op_extract2_i32:
1232        return TCG_TARGET_HAS_extract2_i32;
1233    case INDEX_op_add2_i32:
1234        return TCG_TARGET_HAS_add2_i32;
1235    case INDEX_op_sub2_i32:
1236        return TCG_TARGET_HAS_sub2_i32;
1237    case INDEX_op_mulu2_i32:
1238        return TCG_TARGET_HAS_mulu2_i32;
1239    case INDEX_op_muls2_i32:
1240        return TCG_TARGET_HAS_muls2_i32;
1241    case INDEX_op_muluh_i32:
1242        return TCG_TARGET_HAS_muluh_i32;
1243    case INDEX_op_mulsh_i32:
1244        return TCG_TARGET_HAS_mulsh_i32;
1245    case INDEX_op_ext8s_i32:
1246        return TCG_TARGET_HAS_ext8s_i32;
1247    case INDEX_op_ext16s_i32:
1248        return TCG_TARGET_HAS_ext16s_i32;
1249    case INDEX_op_ext8u_i32:
1250        return TCG_TARGET_HAS_ext8u_i32;
1251    case INDEX_op_ext16u_i32:
1252        return TCG_TARGET_HAS_ext16u_i32;
1253    case INDEX_op_bswap16_i32:
1254        return TCG_TARGET_HAS_bswap16_i32;
1255    case INDEX_op_bswap32_i32:
1256        return TCG_TARGET_HAS_bswap32_i32;
1257    case INDEX_op_not_i32:
1258        return TCG_TARGET_HAS_not_i32;
1259    case INDEX_op_neg_i32:
1260        return TCG_TARGET_HAS_neg_i32;
1261    case INDEX_op_andc_i32:
1262        return TCG_TARGET_HAS_andc_i32;
1263    case INDEX_op_orc_i32:
1264        return TCG_TARGET_HAS_orc_i32;
1265    case INDEX_op_eqv_i32:
1266        return TCG_TARGET_HAS_eqv_i32;
1267    case INDEX_op_nand_i32:
1268        return TCG_TARGET_HAS_nand_i32;
1269    case INDEX_op_nor_i32:
1270        return TCG_TARGET_HAS_nor_i32;
1271    case INDEX_op_clz_i32:
1272        return TCG_TARGET_HAS_clz_i32;
1273    case INDEX_op_ctz_i32:
1274        return TCG_TARGET_HAS_ctz_i32;
1275    case INDEX_op_ctpop_i32:
1276        return TCG_TARGET_HAS_ctpop_i32;
1277
1278    case INDEX_op_brcond2_i32:
1279    case INDEX_op_setcond2_i32:
1280        return TCG_TARGET_REG_BITS == 32;
1281
1282    case INDEX_op_mov_i64:
1283    case INDEX_op_setcond_i64:
1284    case INDEX_op_brcond_i64:
1285    case INDEX_op_ld8u_i64:
1286    case INDEX_op_ld8s_i64:
1287    case INDEX_op_ld16u_i64:
1288    case INDEX_op_ld16s_i64:
1289    case INDEX_op_ld32u_i64:
1290    case INDEX_op_ld32s_i64:
1291    case INDEX_op_ld_i64:
1292    case INDEX_op_st8_i64:
1293    case INDEX_op_st16_i64:
1294    case INDEX_op_st32_i64:
1295    case INDEX_op_st_i64:
1296    case INDEX_op_add_i64:
1297    case INDEX_op_sub_i64:
1298    case INDEX_op_mul_i64:
1299    case INDEX_op_and_i64:
1300    case INDEX_op_or_i64:
1301    case INDEX_op_xor_i64:
1302    case INDEX_op_shl_i64:
1303    case INDEX_op_shr_i64:
1304    case INDEX_op_sar_i64:
1305    case INDEX_op_ext_i32_i64:
1306    case INDEX_op_extu_i32_i64:
1307        return TCG_TARGET_REG_BITS == 64;
1308
1309    case INDEX_op_movcond_i64:
1310        return TCG_TARGET_HAS_movcond_i64;
1311    case INDEX_op_div_i64:
1312    case INDEX_op_divu_i64:
1313        return TCG_TARGET_HAS_div_i64;
1314    case INDEX_op_rem_i64:
1315    case INDEX_op_remu_i64:
1316        return TCG_TARGET_HAS_rem_i64;
1317    case INDEX_op_div2_i64:
1318    case INDEX_op_divu2_i64:
1319        return TCG_TARGET_HAS_div2_i64;
1320    case INDEX_op_rotl_i64:
1321    case INDEX_op_rotr_i64:
1322        return TCG_TARGET_HAS_rot_i64;
1323    case INDEX_op_deposit_i64:
1324        return TCG_TARGET_HAS_deposit_i64;
1325    case INDEX_op_extract_i64:
1326        return TCG_TARGET_HAS_extract_i64;
1327    case INDEX_op_sextract_i64:
1328        return TCG_TARGET_HAS_sextract_i64;
1329    case INDEX_op_extract2_i64:
1330        return TCG_TARGET_HAS_extract2_i64;
1331    case INDEX_op_extrl_i64_i32:
1332        return TCG_TARGET_HAS_extrl_i64_i32;
1333    case INDEX_op_extrh_i64_i32:
1334        return TCG_TARGET_HAS_extrh_i64_i32;
1335    case INDEX_op_ext8s_i64:
1336        return TCG_TARGET_HAS_ext8s_i64;
1337    case INDEX_op_ext16s_i64:
1338        return TCG_TARGET_HAS_ext16s_i64;
1339    case INDEX_op_ext32s_i64:
1340        return TCG_TARGET_HAS_ext32s_i64;
1341    case INDEX_op_ext8u_i64:
1342        return TCG_TARGET_HAS_ext8u_i64;
1343    case INDEX_op_ext16u_i64:
1344        return TCG_TARGET_HAS_ext16u_i64;
1345    case INDEX_op_ext32u_i64:
1346        return TCG_TARGET_HAS_ext32u_i64;
1347    case INDEX_op_bswap16_i64:
1348        return TCG_TARGET_HAS_bswap16_i64;
1349    case INDEX_op_bswap32_i64:
1350        return TCG_TARGET_HAS_bswap32_i64;
1351    case INDEX_op_bswap64_i64:
1352        return TCG_TARGET_HAS_bswap64_i64;
1353    case INDEX_op_not_i64:
1354        return TCG_TARGET_HAS_not_i64;
1355    case INDEX_op_neg_i64:
1356        return TCG_TARGET_HAS_neg_i64;
1357    case INDEX_op_andc_i64:
1358        return TCG_TARGET_HAS_andc_i64;
1359    case INDEX_op_orc_i64:
1360        return TCG_TARGET_HAS_orc_i64;
1361    case INDEX_op_eqv_i64:
1362        return TCG_TARGET_HAS_eqv_i64;
1363    case INDEX_op_nand_i64:
1364        return TCG_TARGET_HAS_nand_i64;
1365    case INDEX_op_nor_i64:
1366        return TCG_TARGET_HAS_nor_i64;
1367    case INDEX_op_clz_i64:
1368        return TCG_TARGET_HAS_clz_i64;
1369    case INDEX_op_ctz_i64:
1370        return TCG_TARGET_HAS_ctz_i64;
1371    case INDEX_op_ctpop_i64:
1372        return TCG_TARGET_HAS_ctpop_i64;
1373    case INDEX_op_add2_i64:
1374        return TCG_TARGET_HAS_add2_i64;
1375    case INDEX_op_sub2_i64:
1376        return TCG_TARGET_HAS_sub2_i64;
1377    case INDEX_op_mulu2_i64:
1378        return TCG_TARGET_HAS_mulu2_i64;
1379    case INDEX_op_muls2_i64:
1380        return TCG_TARGET_HAS_muls2_i64;
1381    case INDEX_op_muluh_i64:
1382        return TCG_TARGET_HAS_muluh_i64;
1383    case INDEX_op_mulsh_i64:
1384        return TCG_TARGET_HAS_mulsh_i64;
1385
1386    case INDEX_op_mov_vec:
1387    case INDEX_op_dup_vec:
1388    case INDEX_op_dupm_vec:
1389    case INDEX_op_ld_vec:
1390    case INDEX_op_st_vec:
1391    case INDEX_op_add_vec:
1392    case INDEX_op_sub_vec:
1393    case INDEX_op_and_vec:
1394    case INDEX_op_or_vec:
1395    case INDEX_op_xor_vec:
1396    case INDEX_op_cmp_vec:
1397        return have_vec;
1398    case INDEX_op_dup2_vec:
1399        return have_vec && TCG_TARGET_REG_BITS == 32;
1400    case INDEX_op_not_vec:
1401        return have_vec && TCG_TARGET_HAS_not_vec;
1402    case INDEX_op_neg_vec:
1403        return have_vec && TCG_TARGET_HAS_neg_vec;
1404    case INDEX_op_abs_vec:
1405        return have_vec && TCG_TARGET_HAS_abs_vec;
1406    case INDEX_op_andc_vec:
1407        return have_vec && TCG_TARGET_HAS_andc_vec;
1408    case INDEX_op_orc_vec:
1409        return have_vec && TCG_TARGET_HAS_orc_vec;
1410    case INDEX_op_nand_vec:
1411        return have_vec && TCG_TARGET_HAS_nand_vec;
1412    case INDEX_op_nor_vec:
1413        return have_vec && TCG_TARGET_HAS_nor_vec;
1414    case INDEX_op_eqv_vec:
1415        return have_vec && TCG_TARGET_HAS_eqv_vec;
1416    case INDEX_op_mul_vec:
1417        return have_vec && TCG_TARGET_HAS_mul_vec;
1418    case INDEX_op_shli_vec:
1419    case INDEX_op_shri_vec:
1420    case INDEX_op_sari_vec:
1421        return have_vec && TCG_TARGET_HAS_shi_vec;
1422    case INDEX_op_shls_vec:
1423    case INDEX_op_shrs_vec:
1424    case INDEX_op_sars_vec:
1425        return have_vec && TCG_TARGET_HAS_shs_vec;
1426    case INDEX_op_shlv_vec:
1427    case INDEX_op_shrv_vec:
1428    case INDEX_op_sarv_vec:
1429        return have_vec && TCG_TARGET_HAS_shv_vec;
1430    case INDEX_op_rotli_vec:
1431        return have_vec && TCG_TARGET_HAS_roti_vec;
1432    case INDEX_op_rotls_vec:
1433        return have_vec && TCG_TARGET_HAS_rots_vec;
1434    case INDEX_op_rotlv_vec:
1435    case INDEX_op_rotrv_vec:
1436        return have_vec && TCG_TARGET_HAS_rotv_vec;
1437    case INDEX_op_ssadd_vec:
1438    case INDEX_op_usadd_vec:
1439    case INDEX_op_sssub_vec:
1440    case INDEX_op_ussub_vec:
1441        return have_vec && TCG_TARGET_HAS_sat_vec;
1442    case INDEX_op_smin_vec:
1443    case INDEX_op_umin_vec:
1444    case INDEX_op_smax_vec:
1445    case INDEX_op_umax_vec:
1446        return have_vec && TCG_TARGET_HAS_minmax_vec;
1447    case INDEX_op_bitsel_vec:
1448        return have_vec && TCG_TARGET_HAS_bitsel_vec;
1449    case INDEX_op_cmpsel_vec:
1450        return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1451
1452    default:
1453        tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1454        return true;
1455    }
1456}
1457
1458/* Note: we convert the 64 bit args to 32 bit and do some alignment
1459   and endian swap. Maybe it would be better to do the alignment
1460   and endian swap in tcg_reg_alloc_call(). */
1461void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1462{
1463    int i, real_args, nb_rets, pi;
1464    unsigned typemask;
1465    const TCGHelperInfo *info;
1466    TCGOp *op;
1467
1468    info = g_hash_table_lookup(helper_table, (gpointer)func);
1469    typemask = info->typemask;
1470
1471#ifdef CONFIG_PLUGIN
1472    /* detect non-plugin helpers */
1473    if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1474        tcg_ctx->plugin_insn->calls_helpers = true;
1475    }
1476#endif
1477
1478#if defined(__sparc__) && !defined(__arch64__) \
1479    && !defined(CONFIG_TCG_INTERPRETER)
1480    /* We have 64-bit values in one register, but need to pass as two
1481       separate parameters.  Split them.  */
1482    int orig_typemask = typemask;
1483    int orig_nargs = nargs;
1484    TCGv_i64 retl, reth;
1485    TCGTemp *split_args[MAX_OPC_PARAM];
1486
1487    retl = NULL;
1488    reth = NULL;
1489    typemask = 0;
1490    for (i = real_args = 0; i < nargs; ++i) {
1491        int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1492        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1493
1494        if (is_64bit) {
1495            TCGv_i64 orig = temp_tcgv_i64(args[i]);
1496            TCGv_i32 h = tcg_temp_new_i32();
1497            TCGv_i32 l = tcg_temp_new_i32();
1498            tcg_gen_extr_i64_i32(l, h, orig);
1499            split_args[real_args++] = tcgv_i32_temp(h);
1500            typemask |= dh_typecode_i32 << (real_args * 3);
1501            split_args[real_args++] = tcgv_i32_temp(l);
1502            typemask |= dh_typecode_i32 << (real_args * 3);
1503        } else {
1504            split_args[real_args++] = args[i];
1505            typemask |= argtype << (real_args * 3);
1506        }
1507    }
1508    nargs = real_args;
1509    args = split_args;
1510#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1511    for (i = 0; i < nargs; ++i) {
1512        int argtype = extract32(typemask, (i + 1) * 3, 3);
1513        bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1514        bool is_signed = argtype & 1;
1515
1516        if (is_32bit) {
1517            TCGv_i64 temp = tcg_temp_new_i64();
1518            TCGv_i32 orig = temp_tcgv_i32(args[i]);
1519            if (is_signed) {
1520                tcg_gen_ext_i32_i64(temp, orig);
1521            } else {
1522                tcg_gen_extu_i32_i64(temp, orig);
1523            }
1524            args[i] = tcgv_i64_temp(temp);
1525        }
1526    }
1527#endif /* TCG_TARGET_EXTEND_ARGS */
1528
1529    op = tcg_emit_op(INDEX_op_call);
1530
1531    pi = 0;
1532    if (ret != NULL) {
1533#if defined(__sparc__) && !defined(__arch64__) \
1534    && !defined(CONFIG_TCG_INTERPRETER)
1535        if ((typemask & 6) == dh_typecode_i64) {
1536            /* The 32-bit ABI is going to return the 64-bit value in
1537               the %o0/%o1 register pair.  Prepare for this by using
1538               two return temporaries, and reassemble below.  */
1539            retl = tcg_temp_new_i64();
1540            reth = tcg_temp_new_i64();
1541            op->args[pi++] = tcgv_i64_arg(reth);
1542            op->args[pi++] = tcgv_i64_arg(retl);
1543            nb_rets = 2;
1544        } else {
1545            op->args[pi++] = temp_arg(ret);
1546            nb_rets = 1;
1547        }
1548#else
1549        if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1550#ifdef HOST_WORDS_BIGENDIAN
1551            op->args[pi++] = temp_arg(ret + 1);
1552            op->args[pi++] = temp_arg(ret);
1553#else
1554            op->args[pi++] = temp_arg(ret);
1555            op->args[pi++] = temp_arg(ret + 1);
1556#endif
1557            nb_rets = 2;
1558        } else {
1559            op->args[pi++] = temp_arg(ret);
1560            nb_rets = 1;
1561        }
1562#endif
1563    } else {
1564        nb_rets = 0;
1565    }
1566    TCGOP_CALLO(op) = nb_rets;
1567
1568    real_args = 0;
1569    for (i = 0; i < nargs; i++) {
1570        int argtype = extract32(typemask, (i + 1) * 3, 3);
1571        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1572        bool want_align = false;
1573
1574#if defined(CONFIG_TCG_INTERPRETER)
1575        /*
1576         * Align all arguments, so that they land in predictable places
1577         * for passing off to ffi_call.
1578         */
1579        want_align = true;
1580#elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1581        /* Some targets want aligned 64 bit args */
1582        want_align = is_64bit;
1583#endif
1584
1585        if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1586            op->args[pi++] = TCG_CALL_DUMMY_ARG;
1587            real_args++;
1588        }
1589
1590        if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1591            /*
1592             * If stack grows up, then we will be placing successive
1593             * arguments at lower addresses, which means we need to
1594             * reverse the order compared to how we would normally
1595             * treat either big or little-endian.  For those arguments
1596             * that will wind up in registers, this still works for
1597             * HPPA (the only current STACK_GROWSUP target) since the
1598             * argument registers are *also* allocated in decreasing
1599             * order.  If another such target is added, this logic may
1600             * have to get more complicated to differentiate between
1601             * stack arguments and register arguments.
1602             */
1603#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1604            op->args[pi++] = temp_arg(args[i] + 1);
1605            op->args[pi++] = temp_arg(args[i]);
1606#else
1607            op->args[pi++] = temp_arg(args[i]);
1608            op->args[pi++] = temp_arg(args[i] + 1);
1609#endif
1610            real_args += 2;
1611            continue;
1612        }
1613
1614        op->args[pi++] = temp_arg(args[i]);
1615        real_args++;
1616    }
1617    op->args[pi++] = (uintptr_t)func;
1618    op->args[pi++] = (uintptr_t)info;
1619    TCGOP_CALLI(op) = real_args;
1620
1621    /* Make sure the fields didn't overflow.  */
1622    tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1623    tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1624
1625#if defined(__sparc__) && !defined(__arch64__) \
1626    && !defined(CONFIG_TCG_INTERPRETER)
1627    /* Free all of the parts we allocated above.  */
1628    for (i = real_args = 0; i < orig_nargs; ++i) {
1629        int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1630        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1631
1632        if (is_64bit) {
1633            tcg_temp_free_internal(args[real_args++]);
1634            tcg_temp_free_internal(args[real_args++]);
1635        } else {
1636            real_args++;
1637        }
1638    }
1639    if ((orig_typemask & 6) == dh_typecode_i64) {
1640        /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1641           Note that describing these as TCGv_i64 eliminates an unnecessary
1642           zero-extension that tcg_gen_concat_i32_i64 would create.  */
1643        tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1644        tcg_temp_free_i64(retl);
1645        tcg_temp_free_i64(reth);
1646    }
1647#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1648    for (i = 0; i < nargs; ++i) {
1649        int argtype = extract32(typemask, (i + 1) * 3, 3);
1650        bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1651
1652        if (is_32bit) {
1653            tcg_temp_free_internal(args[i]);
1654        }
1655    }
1656#endif /* TCG_TARGET_EXTEND_ARGS */
1657}
1658
1659static void tcg_reg_alloc_start(TCGContext *s)
1660{
1661    int i, n;
1662
1663    for (i = 0, n = s->nb_temps; i < n; i++) {
1664        TCGTemp *ts = &s->temps[i];
1665        TCGTempVal val = TEMP_VAL_MEM;
1666
1667        switch (ts->kind) {
1668        case TEMP_CONST:
1669            val = TEMP_VAL_CONST;
1670            break;
1671        case TEMP_FIXED:
1672            val = TEMP_VAL_REG;
1673            break;
1674        case TEMP_GLOBAL:
1675            break;
1676        case TEMP_NORMAL:
1677            val = TEMP_VAL_DEAD;
1678            /* fall through */
1679        case TEMP_LOCAL:
1680            ts->mem_allocated = 0;
1681            break;
1682        default:
1683            g_assert_not_reached();
1684        }
1685        ts->val_type = val;
1686    }
1687
1688    memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1689}
1690
1691static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1692                                 TCGTemp *ts)
1693{
1694    int idx = temp_idx(ts);
1695
1696    switch (ts->kind) {
1697    case TEMP_FIXED:
1698    case TEMP_GLOBAL:
1699        pstrcpy(buf, buf_size, ts->name);
1700        break;
1701    case TEMP_LOCAL:
1702        snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1703        break;
1704    case TEMP_NORMAL:
1705        snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1706        break;
1707    case TEMP_CONST:
1708        switch (ts->type) {
1709        case TCG_TYPE_I32:
1710            snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1711            break;
1712#if TCG_TARGET_REG_BITS > 32
1713        case TCG_TYPE_I64:
1714            snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1715            break;
1716#endif
1717        case TCG_TYPE_V64:
1718        case TCG_TYPE_V128:
1719        case TCG_TYPE_V256:
1720            snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1721                     64 << (ts->type - TCG_TYPE_V64), ts->val);
1722            break;
1723        default:
1724            g_assert_not_reached();
1725        }
1726        break;
1727    }
1728    return buf;
1729}
1730
1731static char *tcg_get_arg_str(TCGContext *s, char *buf,
1732                             int buf_size, TCGArg arg)
1733{
1734    return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1735}
1736
1737static const char * const cond_name[] =
1738{
1739    [TCG_COND_NEVER] = "never",
1740    [TCG_COND_ALWAYS] = "always",
1741    [TCG_COND_EQ] = "eq",
1742    [TCG_COND_NE] = "ne",
1743    [TCG_COND_LT] = "lt",
1744    [TCG_COND_GE] = "ge",
1745    [TCG_COND_LE] = "le",
1746    [TCG_COND_GT] = "gt",
1747    [TCG_COND_LTU] = "ltu",
1748    [TCG_COND_GEU] = "geu",
1749    [TCG_COND_LEU] = "leu",
1750    [TCG_COND_GTU] = "gtu"
1751};
1752
1753static const char * const ldst_name[] =
1754{
1755    [MO_UB]   = "ub",
1756    [MO_SB]   = "sb",
1757    [MO_LEUW] = "leuw",
1758    [MO_LESW] = "lesw",
1759    [MO_LEUL] = "leul",
1760    [MO_LESL] = "lesl",
1761    [MO_LEUQ] = "leq",
1762    [MO_BEUW] = "beuw",
1763    [MO_BESW] = "besw",
1764    [MO_BEUL] = "beul",
1765    [MO_BESL] = "besl",
1766    [MO_BEUQ] = "beq",
1767};
1768
1769static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1770#ifdef TARGET_ALIGNED_ONLY
1771    [MO_UNALN >> MO_ASHIFT]    = "un+",
1772    [MO_ALIGN >> MO_ASHIFT]    = "",
1773#else
1774    [MO_UNALN >> MO_ASHIFT]    = "",
1775    [MO_ALIGN >> MO_ASHIFT]    = "al+",
1776#endif
1777    [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1778    [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1779    [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1780    [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1781    [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1782    [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1783};
1784
1785static const char bswap_flag_name[][6] = {
1786    [TCG_BSWAP_IZ] = "iz",
1787    [TCG_BSWAP_OZ] = "oz",
1788    [TCG_BSWAP_OS] = "os",
1789    [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1790    [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1791};
1792
1793static inline bool tcg_regset_single(TCGRegSet d)
1794{
1795    return (d & (d - 1)) == 0;
1796}
1797
1798static inline TCGReg tcg_regset_first(TCGRegSet d)
1799{
1800    if (TCG_TARGET_NB_REGS <= 32) {
1801        return ctz32(d);
1802    } else {
1803        return ctz64(d);
1804    }
1805}
1806
1807static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1808{
1809    char buf[128];
1810    TCGOp *op;
1811
1812    QTAILQ_FOREACH(op, &s->ops, link) {
1813        int i, k, nb_oargs, nb_iargs, nb_cargs;
1814        const TCGOpDef *def;
1815        TCGOpcode c;
1816        int col = 0;
1817
1818        c = op->opc;
1819        def = &tcg_op_defs[c];
1820
1821        if (c == INDEX_op_insn_start) {
1822            nb_oargs = 0;
1823            col += qemu_log("\n ----");
1824
1825            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1826                target_ulong a;
1827#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1828                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1829#else
1830                a = op->args[i];
1831#endif
1832                col += qemu_log(" " TARGET_FMT_lx, a);
1833            }
1834        } else if (c == INDEX_op_call) {
1835            const TCGHelperInfo *info = tcg_call_info(op);
1836            void *func = tcg_call_func(op);
1837
1838            /* variable number of arguments */
1839            nb_oargs = TCGOP_CALLO(op);
1840            nb_iargs = TCGOP_CALLI(op);
1841            nb_cargs = def->nb_cargs;
1842
1843            col += qemu_log(" %s ", def->name);
1844
1845            /*
1846             * Print the function name from TCGHelperInfo, if available.
1847             * Note that plugins have a template function for the info,
1848             * but the actual function pointer comes from the plugin.
1849             */
1850            if (func == info->func) {
1851                col += qemu_log("%s", info->name);
1852            } else {
1853                col += qemu_log("plugin(%p)", func);
1854            }
1855
1856            col += qemu_log(",$0x%x,$%d", info->flags, nb_oargs);
1857            for (i = 0; i < nb_oargs; i++) {
1858                col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1859                                                       op->args[i]));
1860            }
1861            for (i = 0; i < nb_iargs; i++) {
1862                TCGArg arg = op->args[nb_oargs + i];
1863                const char *t = "<dummy>";
1864                if (arg != TCG_CALL_DUMMY_ARG) {
1865                    t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1866                }
1867                col += qemu_log(",%s", t);
1868            }
1869        } else {
1870            col += qemu_log(" %s ", def->name);
1871
1872            nb_oargs = def->nb_oargs;
1873            nb_iargs = def->nb_iargs;
1874            nb_cargs = def->nb_cargs;
1875
1876            if (def->flags & TCG_OPF_VECTOR) {
1877                col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1878                                8 << TCGOP_VECE(op));
1879            }
1880
1881            k = 0;
1882            for (i = 0; i < nb_oargs; i++) {
1883                if (k != 0) {
1884                    col += qemu_log(",");
1885                }
1886                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1887                                                      op->args[k++]));
1888            }
1889            for (i = 0; i < nb_iargs; i++) {
1890                if (k != 0) {
1891                    col += qemu_log(",");
1892                }
1893                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1894                                                      op->args[k++]));
1895            }
1896            switch (c) {
1897            case INDEX_op_brcond_i32:
1898            case INDEX_op_setcond_i32:
1899            case INDEX_op_movcond_i32:
1900            case INDEX_op_brcond2_i32:
1901            case INDEX_op_setcond2_i32:
1902            case INDEX_op_brcond_i64:
1903            case INDEX_op_setcond_i64:
1904            case INDEX_op_movcond_i64:
1905            case INDEX_op_cmp_vec:
1906            case INDEX_op_cmpsel_vec:
1907                if (op->args[k] < ARRAY_SIZE(cond_name)
1908                    && cond_name[op->args[k]]) {
1909                    col += qemu_log(",%s", cond_name[op->args[k++]]);
1910                } else {
1911                    col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1912                }
1913                i = 1;
1914                break;
1915            case INDEX_op_qemu_ld_i32:
1916            case INDEX_op_qemu_st_i32:
1917            case INDEX_op_qemu_st8_i32:
1918            case INDEX_op_qemu_ld_i64:
1919            case INDEX_op_qemu_st_i64:
1920                {
1921                    MemOpIdx oi = op->args[k++];
1922                    MemOp op = get_memop(oi);
1923                    unsigned ix = get_mmuidx(oi);
1924
1925                    if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1926                        col += qemu_log(",$0x%x,%u", op, ix);
1927                    } else {
1928                        const char *s_al, *s_op;
1929                        s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1930                        s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1931                        col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1932                    }
1933                    i = 1;
1934                }
1935                break;
1936            case INDEX_op_bswap16_i32:
1937            case INDEX_op_bswap16_i64:
1938            case INDEX_op_bswap32_i32:
1939            case INDEX_op_bswap32_i64:
1940            case INDEX_op_bswap64_i64:
1941                {
1942                    TCGArg flags = op->args[k];
1943                    const char *name = NULL;
1944
1945                    if (flags < ARRAY_SIZE(bswap_flag_name)) {
1946                        name = bswap_flag_name[flags];
1947                    }
1948                    if (name) {
1949                        col += qemu_log(",%s", name);
1950                    } else {
1951                        col += qemu_log(",$0x%" TCG_PRIlx, flags);
1952                    }
1953                    i = k = 1;
1954                }
1955                break;
1956            default:
1957                i = 0;
1958                break;
1959            }
1960            switch (c) {
1961            case INDEX_op_set_label:
1962            case INDEX_op_br:
1963            case INDEX_op_brcond_i32:
1964            case INDEX_op_brcond_i64:
1965            case INDEX_op_brcond2_i32:
1966                col += qemu_log("%s$L%d", k ? "," : "",
1967                                arg_label(op->args[k])->id);
1968                i++, k++;
1969                break;
1970            default:
1971                break;
1972            }
1973            for (; i < nb_cargs; i++, k++) {
1974                col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1975            }
1976        }
1977
1978        if (have_prefs || op->life) {
1979
1980            QemuLogFile *logfile;
1981
1982            rcu_read_lock();
1983            logfile = qatomic_rcu_read(&qemu_logfile);
1984            if (logfile) {
1985                for (; col < 40; ++col) {
1986                    putc(' ', logfile->fd);
1987                }
1988            }
1989            rcu_read_unlock();
1990        }
1991
1992        if (op->life) {
1993            unsigned life = op->life;
1994
1995            if (life & (SYNC_ARG * 3)) {
1996                qemu_log("  sync:");
1997                for (i = 0; i < 2; ++i) {
1998                    if (life & (SYNC_ARG << i)) {
1999                        qemu_log(" %d", i);
2000                    }
2001                }
2002            }
2003            life /= DEAD_ARG;
2004            if (life) {
2005                qemu_log("  dead:");
2006                for (i = 0; life; ++i, life >>= 1) {
2007                    if (life & 1) {
2008                        qemu_log(" %d", i);
2009                    }
2010                }
2011            }
2012        }
2013
2014        if (have_prefs) {
2015            for (i = 0; i < nb_oargs; ++i) {
2016                TCGRegSet set = op->output_pref[i];
2017
2018                if (i == 0) {
2019                    qemu_log("  pref=");
2020                } else {
2021                    qemu_log(",");
2022                }
2023                if (set == 0) {
2024                    qemu_log("none");
2025                } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2026                    qemu_log("all");
2027#ifdef CONFIG_DEBUG_TCG
2028                } else if (tcg_regset_single(set)) {
2029                    TCGReg reg = tcg_regset_first(set);
2030                    qemu_log("%s", tcg_target_reg_names[reg]);
2031#endif
2032                } else if (TCG_TARGET_NB_REGS <= 32) {
2033                    qemu_log("%#x", (uint32_t)set);
2034                } else {
2035                    qemu_log("%#" PRIx64, (uint64_t)set);
2036                }
2037            }
2038        }
2039
2040        qemu_log("\n");
2041    }
2042}
2043
2044/* we give more priority to constraints with less registers */
2045static int get_constraint_priority(const TCGOpDef *def, int k)
2046{
2047    const TCGArgConstraint *arg_ct = &def->args_ct[k];
2048    int n;
2049
2050    if (arg_ct->oalias) {
2051        /* an alias is equivalent to a single register */
2052        n = 1;
2053    } else {
2054        n = ctpop64(arg_ct->regs);
2055    }
2056    return TCG_TARGET_NB_REGS - n + 1;
2057}
2058
2059/* sort from highest priority to lowest */
2060static void sort_constraints(TCGOpDef *def, int start, int n)
2061{
2062    int i, j;
2063    TCGArgConstraint *a = def->args_ct;
2064
2065    for (i = 0; i < n; i++) {
2066        a[start + i].sort_index = start + i;
2067    }
2068    if (n <= 1) {
2069        return;
2070    }
2071    for (i = 0; i < n - 1; i++) {
2072        for (j = i + 1; j < n; j++) {
2073            int p1 = get_constraint_priority(def, a[start + i].sort_index);
2074            int p2 = get_constraint_priority(def, a[start + j].sort_index);
2075            if (p1 < p2) {
2076                int tmp = a[start + i].sort_index;
2077                a[start + i].sort_index = a[start + j].sort_index;
2078                a[start + j].sort_index = tmp;
2079            }
2080        }
2081    }
2082}
2083
2084static void process_op_defs(TCGContext *s)
2085{
2086    TCGOpcode op;
2087
2088    for (op = 0; op < NB_OPS; op++) {
2089        TCGOpDef *def = &tcg_op_defs[op];
2090        const TCGTargetOpDef *tdefs;
2091        int i, nb_args;
2092
2093        if (def->flags & TCG_OPF_NOT_PRESENT) {
2094            continue;
2095        }
2096
2097        nb_args = def->nb_iargs + def->nb_oargs;
2098        if (nb_args == 0) {
2099            continue;
2100        }
2101
2102        /*
2103         * Macro magic should make it impossible, but double-check that
2104         * the array index is in range.  Since the signness of an enum
2105         * is implementation defined, force the result to unsigned.
2106         */
2107        unsigned con_set = tcg_target_op_def(op);
2108        tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2109        tdefs = &constraint_sets[con_set];
2110
2111        for (i = 0; i < nb_args; i++) {
2112            const char *ct_str = tdefs->args_ct_str[i];
2113            /* Incomplete TCGTargetOpDef entry. */
2114            tcg_debug_assert(ct_str != NULL);
2115
2116            while (*ct_str != '\0') {
2117                switch(*ct_str) {
2118                case '0' ... '9':
2119                    {
2120                        int oarg = *ct_str - '0';
2121                        tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2122                        tcg_debug_assert(oarg < def->nb_oargs);
2123                        tcg_debug_assert(def->args_ct[oarg].regs != 0);
2124                        def->args_ct[i] = def->args_ct[oarg];
2125                        /* The output sets oalias.  */
2126                        def->args_ct[oarg].oalias = true;
2127                        def->args_ct[oarg].alias_index = i;
2128                        /* The input sets ialias. */
2129                        def->args_ct[i].ialias = true;
2130                        def->args_ct[i].alias_index = oarg;
2131                    }
2132                    ct_str++;
2133                    break;
2134                case '&':
2135                    def->args_ct[i].newreg = true;
2136                    ct_str++;
2137                    break;
2138                case 'i':
2139                    def->args_ct[i].ct |= TCG_CT_CONST;
2140                    ct_str++;
2141                    break;
2142
2143                /* Include all of the target-specific constraints. */
2144
2145#undef CONST
2146#define CONST(CASE, MASK) \
2147    case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2148#define REGS(CASE, MASK) \
2149    case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2150
2151#include "tcg-target-con-str.h"
2152
2153#undef REGS
2154#undef CONST
2155                default:
2156                    /* Typo in TCGTargetOpDef constraint. */
2157                    g_assert_not_reached();
2158                }
2159            }
2160        }
2161
2162        /* TCGTargetOpDef entry with too much information? */
2163        tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2164
2165        /* sort the constraints (XXX: this is just an heuristic) */
2166        sort_constraints(def, 0, def->nb_oargs);
2167        sort_constraints(def, def->nb_oargs, def->nb_iargs);
2168    }
2169}
2170
2171void tcg_op_remove(TCGContext *s, TCGOp *op)
2172{
2173    TCGLabel *label;
2174
2175    switch (op->opc) {
2176    case INDEX_op_br:
2177        label = arg_label(op->args[0]);
2178        label->refs--;
2179        break;
2180    case INDEX_op_brcond_i32:
2181    case INDEX_op_brcond_i64:
2182        label = arg_label(op->args[3]);
2183        label->refs--;
2184        break;
2185    case INDEX_op_brcond2_i32:
2186        label = arg_label(op->args[5]);
2187        label->refs--;
2188        break;
2189    default:
2190        break;
2191    }
2192
2193    QTAILQ_REMOVE(&s->ops, op, link);
2194    QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2195    s->nb_ops--;
2196
2197#ifdef CONFIG_PROFILER
2198    qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2199#endif
2200}
2201
2202void tcg_remove_ops_after(TCGOp *op)
2203{
2204    TCGContext *s = tcg_ctx;
2205
2206    while (true) {
2207        TCGOp *last = tcg_last_op();
2208        if (last == op) {
2209            return;
2210        }
2211        tcg_op_remove(s, last);
2212    }
2213}
2214
2215static TCGOp *tcg_op_alloc(TCGOpcode opc)
2216{
2217    TCGContext *s = tcg_ctx;
2218    TCGOp *op;
2219
2220    if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2221        op = tcg_malloc(sizeof(TCGOp));
2222    } else {
2223        op = QTAILQ_FIRST(&s->free_ops);
2224        QTAILQ_REMOVE(&s->free_ops, op, link);
2225    }
2226    memset(op, 0, offsetof(TCGOp, link));
2227    op->opc = opc;
2228    s->nb_ops++;
2229
2230    return op;
2231}
2232
2233TCGOp *tcg_emit_op(TCGOpcode opc)
2234{
2235    TCGOp *op = tcg_op_alloc(opc);
2236    QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2237    return op;
2238}
2239
2240TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2241{
2242    TCGOp *new_op = tcg_op_alloc(opc);
2243    QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2244    return new_op;
2245}
2246
2247TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2248{
2249    TCGOp *new_op = tcg_op_alloc(opc);
2250    QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2251    return new_op;
2252}
2253
2254/* Reachable analysis : remove unreachable code.  */
2255static void reachable_code_pass(TCGContext *s)
2256{
2257    TCGOp *op, *op_next;
2258    bool dead = false;
2259
2260    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2261        bool remove = dead;
2262        TCGLabel *label;
2263
2264        switch (op->opc) {
2265        case INDEX_op_set_label:
2266            label = arg_label(op->args[0]);
2267            if (label->refs == 0) {
2268                /*
2269                 * While there is an occasional backward branch, virtually
2270                 * all branches generated by the translators are forward.
2271                 * Which means that generally we will have already removed
2272                 * all references to the label that will be, and there is
2273                 * little to be gained by iterating.
2274                 */
2275                remove = true;
2276            } else {
2277                /* Once we see a label, insns become live again.  */
2278                dead = false;
2279                remove = false;
2280
2281                /*
2282                 * Optimization can fold conditional branches to unconditional.
2283                 * If we find a label with one reference which is preceded by
2284                 * an unconditional branch to it, remove both.  This needed to
2285                 * wait until the dead code in between them was removed.
2286                 */
2287                if (label->refs == 1) {
2288                    TCGOp *op_prev = QTAILQ_PREV(op, link);
2289                    if (op_prev->opc == INDEX_op_br &&
2290                        label == arg_label(op_prev->args[0])) {
2291                        tcg_op_remove(s, op_prev);
2292                        remove = true;
2293                    }
2294                }
2295            }
2296            break;
2297
2298        case INDEX_op_br:
2299        case INDEX_op_exit_tb:
2300        case INDEX_op_goto_ptr:
2301            /* Unconditional branches; everything following is dead.  */
2302            dead = true;
2303            break;
2304
2305        case INDEX_op_call:
2306            /* Notice noreturn helper calls, raising exceptions.  */
2307            if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2308                dead = true;
2309            }
2310            break;
2311
2312        case INDEX_op_insn_start:
2313            /* Never remove -- we need to keep these for unwind.  */
2314            remove = false;
2315            break;
2316
2317        default:
2318            break;
2319        }
2320
2321        if (remove) {
2322            tcg_op_remove(s, op);
2323        }
2324    }
2325}
2326
2327#define TS_DEAD  1
2328#define TS_MEM   2
2329
2330#define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2331#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2332
2333/* For liveness_pass_1, the register preferences for a given temp.  */
2334static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2335{
2336    return ts->state_ptr;
2337}
2338
2339/* For liveness_pass_1, reset the preferences for a given temp to the
2340 * maximal regset for its type.
2341 */
2342static inline void la_reset_pref(TCGTemp *ts)
2343{
2344    *la_temp_pref(ts)
2345        = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2346}
2347
2348/* liveness analysis: end of function: all temps are dead, and globals
2349   should be in memory. */
2350static void la_func_end(TCGContext *s, int ng, int nt)
2351{
2352    int i;
2353
2354    for (i = 0; i < ng; ++i) {
2355        s->temps[i].state = TS_DEAD | TS_MEM;
2356        la_reset_pref(&s->temps[i]);
2357    }
2358    for (i = ng; i < nt; ++i) {
2359        s->temps[i].state = TS_DEAD;
2360        la_reset_pref(&s->temps[i]);
2361    }
2362}
2363
2364/* liveness analysis: end of basic block: all temps are dead, globals
2365   and local temps should be in memory. */
2366static void la_bb_end(TCGContext *s, int ng, int nt)
2367{
2368    int i;
2369
2370    for (i = 0; i < nt; ++i) {
2371        TCGTemp *ts = &s->temps[i];
2372        int state;
2373
2374        switch (ts->kind) {
2375        case TEMP_FIXED:
2376        case TEMP_GLOBAL:
2377        case TEMP_LOCAL:
2378            state = TS_DEAD | TS_MEM;
2379            break;
2380        case TEMP_NORMAL:
2381        case TEMP_CONST:
2382            state = TS_DEAD;
2383            break;
2384        default:
2385            g_assert_not_reached();
2386        }
2387        ts->state = state;
2388        la_reset_pref(ts);
2389    }
2390}
2391
2392/* liveness analysis: sync globals back to memory.  */
2393static void la_global_sync(TCGContext *s, int ng)
2394{
2395    int i;
2396
2397    for (i = 0; i < ng; ++i) {
2398        int state = s->temps[i].state;
2399        s->temps[i].state = state | TS_MEM;
2400        if (state == TS_DEAD) {
2401            /* If the global was previously dead, reset prefs.  */
2402            la_reset_pref(&s->temps[i]);
2403        }
2404    }
2405}
2406
2407/*
2408 * liveness analysis: conditional branch: all temps are dead,
2409 * globals and local temps should be synced.
2410 */
2411static void la_bb_sync(TCGContext *s, int ng, int nt)
2412{
2413    la_global_sync(s, ng);
2414
2415    for (int i = ng; i < nt; ++i) {
2416        TCGTemp *ts = &s->temps[i];
2417        int state;
2418
2419        switch (ts->kind) {
2420        case TEMP_LOCAL:
2421            state = ts->state;
2422            ts->state = state | TS_MEM;
2423            if (state != TS_DEAD) {
2424                continue;
2425            }
2426            break;
2427        case TEMP_NORMAL:
2428            s->temps[i].state = TS_DEAD;
2429            break;
2430        case TEMP_CONST:
2431            continue;
2432        default:
2433            g_assert_not_reached();
2434        }
2435        la_reset_pref(&s->temps[i]);
2436    }
2437}
2438
2439/* liveness analysis: sync globals back to memory and kill.  */
2440static void la_global_kill(TCGContext *s, int ng)
2441{
2442    int i;
2443
2444    for (i = 0; i < ng; i++) {
2445        s->temps[i].state = TS_DEAD | TS_MEM;
2446        la_reset_pref(&s->temps[i]);
2447    }
2448}
2449
2450/* liveness analysis: note live globals crossing calls.  */
2451static void la_cross_call(TCGContext *s, int nt)
2452{
2453    TCGRegSet mask = ~tcg_target_call_clobber_regs;
2454    int i;
2455
2456    for (i = 0; i < nt; i++) {
2457        TCGTemp *ts = &s->temps[i];
2458        if (!(ts->state & TS_DEAD)) {
2459            TCGRegSet *pset = la_temp_pref(ts);
2460            TCGRegSet set = *pset;
2461
2462            set &= mask;
2463            /* If the combination is not possible, restart.  */
2464            if (set == 0) {
2465                set = tcg_target_available_regs[ts->type] & mask;
2466            }
2467            *pset = set;
2468        }
2469    }
2470}
2471
2472/* Liveness analysis : update the opc_arg_life array to tell if a
2473   given input arguments is dead. Instructions updating dead
2474   temporaries are removed. */
2475static void liveness_pass_1(TCGContext *s)
2476{
2477    int nb_globals = s->nb_globals;
2478    int nb_temps = s->nb_temps;
2479    TCGOp *op, *op_prev;
2480    TCGRegSet *prefs;
2481    int i;
2482
2483    prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2484    for (i = 0; i < nb_temps; ++i) {
2485        s->temps[i].state_ptr = prefs + i;
2486    }
2487
2488    /* ??? Should be redundant with the exit_tb that ends the TB.  */
2489    la_func_end(s, nb_globals, nb_temps);
2490
2491    QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2492        int nb_iargs, nb_oargs;
2493        TCGOpcode opc_new, opc_new2;
2494        bool have_opc_new2;
2495        TCGLifeData arg_life = 0;
2496        TCGTemp *ts;
2497        TCGOpcode opc = op->opc;
2498        const TCGOpDef *def = &tcg_op_defs[opc];
2499
2500        switch (opc) {
2501        case INDEX_op_call:
2502            {
2503                int call_flags;
2504                int nb_call_regs;
2505
2506                nb_oargs = TCGOP_CALLO(op);
2507                nb_iargs = TCGOP_CALLI(op);
2508                call_flags = tcg_call_flags(op);
2509
2510                /* pure functions can be removed if their result is unused */
2511                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2512                    for (i = 0; i < nb_oargs; i++) {
2513                        ts = arg_temp(op->args[i]);
2514                        if (ts->state != TS_DEAD) {
2515                            goto do_not_remove_call;
2516                        }
2517                    }
2518                    goto do_remove;
2519                }
2520            do_not_remove_call:
2521
2522                /* Output args are dead.  */
2523                for (i = 0; i < nb_oargs; i++) {
2524                    ts = arg_temp(op->args[i]);
2525                    if (ts->state & TS_DEAD) {
2526                        arg_life |= DEAD_ARG << i;
2527                    }
2528                    if (ts->state & TS_MEM) {
2529                        arg_life |= SYNC_ARG << i;
2530                    }
2531                    ts->state = TS_DEAD;
2532                    la_reset_pref(ts);
2533
2534                    /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2535                    op->output_pref[i] = 0;
2536                }
2537
2538                if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2539                                    TCG_CALL_NO_READ_GLOBALS))) {
2540                    la_global_kill(s, nb_globals);
2541                } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2542                    la_global_sync(s, nb_globals);
2543                }
2544
2545                /* Record arguments that die in this helper.  */
2546                for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2547                    ts = arg_temp(op->args[i]);
2548                    if (ts && ts->state & TS_DEAD) {
2549                        arg_life |= DEAD_ARG << i;
2550                    }
2551                }
2552
2553                /* For all live registers, remove call-clobbered prefs.  */
2554                la_cross_call(s, nb_temps);
2555
2556                nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2557
2558                /* Input arguments are live for preceding opcodes.  */
2559                for (i = 0; i < nb_iargs; i++) {
2560                    ts = arg_temp(op->args[i + nb_oargs]);
2561                    if (ts && ts->state & TS_DEAD) {
2562                        /* For those arguments that die, and will be allocated
2563                         * in registers, clear the register set for that arg,
2564                         * to be filled in below.  For args that will be on
2565                         * the stack, reset to any available reg.
2566                         */
2567                        *la_temp_pref(ts)
2568                            = (i < nb_call_regs ? 0 :
2569                               tcg_target_available_regs[ts->type]);
2570                        ts->state &= ~TS_DEAD;
2571                    }
2572                }
2573
2574                /* For each input argument, add its input register to prefs.
2575                   If a temp is used once, this produces a single set bit.  */
2576                for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2577                    ts = arg_temp(op->args[i + nb_oargs]);
2578                    if (ts) {
2579                        tcg_regset_set_reg(*la_temp_pref(ts),
2580                                           tcg_target_call_iarg_regs[i]);
2581                    }
2582                }
2583            }
2584            break;
2585        case INDEX_op_insn_start:
2586            break;
2587        case INDEX_op_discard:
2588            /* mark the temporary as dead */
2589            ts = arg_temp(op->args[0]);
2590            ts->state = TS_DEAD;
2591            la_reset_pref(ts);
2592            break;
2593
2594        case INDEX_op_add2_i32:
2595            opc_new = INDEX_op_add_i32;
2596            goto do_addsub2;
2597        case INDEX_op_sub2_i32:
2598            opc_new = INDEX_op_sub_i32;
2599            goto do_addsub2;
2600        case INDEX_op_add2_i64:
2601            opc_new = INDEX_op_add_i64;
2602            goto do_addsub2;
2603        case INDEX_op_sub2_i64:
2604            opc_new = INDEX_op_sub_i64;
2605        do_addsub2:
2606            nb_iargs = 4;
2607            nb_oargs = 2;
2608            /* Test if the high part of the operation is dead, but not
2609               the low part.  The result can be optimized to a simple
2610               add or sub.  This happens often for x86_64 guest when the
2611               cpu mode is set to 32 bit.  */
2612            if (arg_temp(op->args[1])->state == TS_DEAD) {
2613                if (arg_temp(op->args[0])->state == TS_DEAD) {
2614                    goto do_remove;
2615                }
2616                /* Replace the opcode and adjust the args in place,
2617                   leaving 3 unused args at the end.  */
2618                op->opc = opc = opc_new;
2619                op->args[1] = op->args[2];
2620                op->args[2] = op->args[4];
2621                /* Fall through and mark the single-word operation live.  */
2622                nb_iargs = 2;
2623                nb_oargs = 1;
2624            }
2625            goto do_not_remove;
2626
2627        case INDEX_op_mulu2_i32:
2628            opc_new = INDEX_op_mul_i32;
2629            opc_new2 = INDEX_op_muluh_i32;
2630            have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2631            goto do_mul2;
2632        case INDEX_op_muls2_i32:
2633            opc_new = INDEX_op_mul_i32;
2634            opc_new2 = INDEX_op_mulsh_i32;
2635            have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2636            goto do_mul2;
2637        case INDEX_op_mulu2_i64:
2638            opc_new = INDEX_op_mul_i64;
2639            opc_new2 = INDEX_op_muluh_i64;
2640            have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2641            goto do_mul2;
2642        case INDEX_op_muls2_i64:
2643            opc_new = INDEX_op_mul_i64;
2644            opc_new2 = INDEX_op_mulsh_i64;
2645            have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2646            goto do_mul2;
2647        do_mul2:
2648            nb_iargs = 2;
2649            nb_oargs = 2;
2650            if (arg_temp(op->args[1])->state == TS_DEAD) {
2651                if (arg_temp(op->args[0])->state == TS_DEAD) {
2652                    /* Both parts of the operation are dead.  */
2653                    goto do_remove;
2654                }
2655                /* The high part of the operation is dead; generate the low. */
2656                op->opc = opc = opc_new;
2657                op->args[1] = op->args[2];
2658                op->args[2] = op->args[3];
2659            } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2660                /* The low part of the operation is dead; generate the high. */
2661                op->opc = opc = opc_new2;
2662                op->args[0] = op->args[1];
2663                op->args[1] = op->args[2];
2664                op->args[2] = op->args[3];
2665            } else {
2666                goto do_not_remove;
2667            }
2668            /* Mark the single-word operation live.  */
2669            nb_oargs = 1;
2670            goto do_not_remove;
2671
2672        default:
2673            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2674            nb_iargs = def->nb_iargs;
2675            nb_oargs = def->nb_oargs;
2676
2677            /* Test if the operation can be removed because all
2678               its outputs are dead. We assume that nb_oargs == 0
2679               implies side effects */
2680            if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2681                for (i = 0; i < nb_oargs; i++) {
2682                    if (arg_temp(op->args[i])->state != TS_DEAD) {
2683                        goto do_not_remove;
2684                    }
2685                }
2686                goto do_remove;
2687            }
2688            goto do_not_remove;
2689
2690        do_remove:
2691            tcg_op_remove(s, op);
2692            break;
2693
2694        do_not_remove:
2695            for (i = 0; i < nb_oargs; i++) {
2696                ts = arg_temp(op->args[i]);
2697
2698                /* Remember the preference of the uses that followed.  */
2699                op->output_pref[i] = *la_temp_pref(ts);
2700
2701                /* Output args are dead.  */
2702                if (ts->state & TS_DEAD) {
2703                    arg_life |= DEAD_ARG << i;
2704                }
2705                if (ts->state & TS_MEM) {
2706                    arg_life |= SYNC_ARG << i;
2707                }
2708                ts->state = TS_DEAD;
2709                la_reset_pref(ts);
2710            }
2711
2712            /* If end of basic block, update.  */
2713            if (def->flags & TCG_OPF_BB_EXIT) {
2714                la_func_end(s, nb_globals, nb_temps);
2715            } else if (def->flags & TCG_OPF_COND_BRANCH) {
2716                la_bb_sync(s, nb_globals, nb_temps);
2717            } else if (def->flags & TCG_OPF_BB_END) {
2718                la_bb_end(s, nb_globals, nb_temps);
2719            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2720                la_global_sync(s, nb_globals);
2721                if (def->flags & TCG_OPF_CALL_CLOBBER) {
2722                    la_cross_call(s, nb_temps);
2723                }
2724            }
2725
2726            /* Record arguments that die in this opcode.  */
2727            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2728                ts = arg_temp(op->args[i]);
2729                if (ts->state & TS_DEAD) {
2730                    arg_life |= DEAD_ARG << i;
2731                }
2732            }
2733
2734            /* Input arguments are live for preceding opcodes.  */
2735            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2736                ts = arg_temp(op->args[i]);
2737                if (ts->state & TS_DEAD) {
2738                    /* For operands that were dead, initially allow
2739                       all regs for the type.  */
2740                    *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2741                    ts->state &= ~TS_DEAD;
2742                }
2743            }
2744
2745            /* Incorporate constraints for this operand.  */
2746            switch (opc) {
2747            case INDEX_op_mov_i32:
2748            case INDEX_op_mov_i64:
2749                /* Note that these are TCG_OPF_NOT_PRESENT and do not
2750                   have proper constraints.  That said, special case
2751                   moves to propagate preferences backward.  */
2752                if (IS_DEAD_ARG(1)) {
2753                    *la_temp_pref(arg_temp(op->args[0]))
2754                        = *la_temp_pref(arg_temp(op->args[1]));
2755                }
2756                break;
2757
2758            default:
2759                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2760                    const TCGArgConstraint *ct = &def->args_ct[i];
2761                    TCGRegSet set, *pset;
2762
2763                    ts = arg_temp(op->args[i]);
2764                    pset = la_temp_pref(ts);
2765                    set = *pset;
2766
2767                    set &= ct->regs;
2768                    if (ct->ialias) {
2769                        set &= op->output_pref[ct->alias_index];
2770                    }
2771                    /* If the combination is not possible, restart.  */
2772                    if (set == 0) {
2773                        set = ct->regs;
2774                    }
2775                    *pset = set;
2776                }
2777                break;
2778            }
2779            break;
2780        }
2781        op->life = arg_life;
2782    }
2783}
2784
2785/* Liveness analysis: Convert indirect regs to direct temporaries.  */
2786static bool liveness_pass_2(TCGContext *s)
2787{
2788    int nb_globals = s->nb_globals;
2789    int nb_temps, i;
2790    bool changes = false;
2791    TCGOp *op, *op_next;
2792
2793    /* Create a temporary for each indirect global.  */
2794    for (i = 0; i < nb_globals; ++i) {
2795        TCGTemp *its = &s->temps[i];
2796        if (its->indirect_reg) {
2797            TCGTemp *dts = tcg_temp_alloc(s);
2798            dts->type = its->type;
2799            dts->base_type = its->base_type;
2800            its->state_ptr = dts;
2801        } else {
2802            its->state_ptr = NULL;
2803        }
2804        /* All globals begin dead.  */
2805        its->state = TS_DEAD;
2806    }
2807    for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2808        TCGTemp *its = &s->temps[i];
2809        its->state_ptr = NULL;
2810        its->state = TS_DEAD;
2811    }
2812
2813    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2814        TCGOpcode opc = op->opc;
2815        const TCGOpDef *def = &tcg_op_defs[opc];
2816        TCGLifeData arg_life = op->life;
2817        int nb_iargs, nb_oargs, call_flags;
2818        TCGTemp *arg_ts, *dir_ts;
2819
2820        if (opc == INDEX_op_call) {
2821            nb_oargs = TCGOP_CALLO(op);
2822            nb_iargs = TCGOP_CALLI(op);
2823            call_flags = tcg_call_flags(op);
2824        } else {
2825            nb_iargs = def->nb_iargs;
2826            nb_oargs = def->nb_oargs;
2827
2828            /* Set flags similar to how calls require.  */
2829            if (def->flags & TCG_OPF_COND_BRANCH) {
2830                /* Like reading globals: sync_globals */
2831                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2832            } else if (def->flags & TCG_OPF_BB_END) {
2833                /* Like writing globals: save_globals */
2834                call_flags = 0;
2835            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2836                /* Like reading globals: sync_globals */
2837                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2838            } else {
2839                /* No effect on globals.  */
2840                call_flags = (TCG_CALL_NO_READ_GLOBALS |
2841                              TCG_CALL_NO_WRITE_GLOBALS);
2842            }
2843        }
2844
2845        /* Make sure that input arguments are available.  */
2846        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2847            arg_ts = arg_temp(op->args[i]);
2848            if (arg_ts) {
2849                dir_ts = arg_ts->state_ptr;
2850                if (dir_ts && arg_ts->state == TS_DEAD) {
2851                    TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2852                                      ? INDEX_op_ld_i32
2853                                      : INDEX_op_ld_i64);
2854                    TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2855
2856                    lop->args[0] = temp_arg(dir_ts);
2857                    lop->args[1] = temp_arg(arg_ts->mem_base);
2858                    lop->args[2] = arg_ts->mem_offset;
2859
2860                    /* Loaded, but synced with memory.  */
2861                    arg_ts->state = TS_MEM;
2862                }
2863            }
2864        }
2865
2866        /* Perform input replacement, and mark inputs that became dead.
2867           No action is required except keeping temp_state up to date
2868           so that we reload when needed.  */
2869        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2870            arg_ts = arg_temp(op->args[i]);
2871            if (arg_ts) {
2872                dir_ts = arg_ts->state_ptr;
2873                if (dir_ts) {
2874                    op->args[i] = temp_arg(dir_ts);
2875                    changes = true;
2876                    if (IS_DEAD_ARG(i)) {
2877                        arg_ts->state = TS_DEAD;
2878                    }
2879                }
2880            }
2881        }
2882
2883        /* Liveness analysis should ensure that the following are
2884           all correct, for call sites and basic block end points.  */
2885        if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2886            /* Nothing to do */
2887        } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2888            for (i = 0; i < nb_globals; ++i) {
2889                /* Liveness should see that globals are synced back,
2890                   that is, either TS_DEAD or TS_MEM.  */
2891                arg_ts = &s->temps[i];
2892                tcg_debug_assert(arg_ts->state_ptr == 0
2893                                 || arg_ts->state != 0);
2894            }
2895        } else {
2896            for (i = 0; i < nb_globals; ++i) {
2897                /* Liveness should see that globals are saved back,
2898                   that is, TS_DEAD, waiting to be reloaded.  */
2899                arg_ts = &s->temps[i];
2900                tcg_debug_assert(arg_ts->state_ptr == 0
2901                                 || arg_ts->state == TS_DEAD);
2902            }
2903        }
2904
2905        /* Outputs become available.  */
2906        if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2907            arg_ts = arg_temp(op->args[0]);
2908            dir_ts = arg_ts->state_ptr;
2909            if (dir_ts) {
2910                op->args[0] = temp_arg(dir_ts);
2911                changes = true;
2912
2913                /* The output is now live and modified.  */
2914                arg_ts->state = 0;
2915
2916                if (NEED_SYNC_ARG(0)) {
2917                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2918                                      ? INDEX_op_st_i32
2919                                      : INDEX_op_st_i64);
2920                    TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2921                    TCGTemp *out_ts = dir_ts;
2922
2923                    if (IS_DEAD_ARG(0)) {
2924                        out_ts = arg_temp(op->args[1]);
2925                        arg_ts->state = TS_DEAD;
2926                        tcg_op_remove(s, op);
2927                    } else {
2928                        arg_ts->state = TS_MEM;
2929                    }
2930
2931                    sop->args[0] = temp_arg(out_ts);
2932                    sop->args[1] = temp_arg(arg_ts->mem_base);
2933                    sop->args[2] = arg_ts->mem_offset;
2934                } else {
2935                    tcg_debug_assert(!IS_DEAD_ARG(0));
2936                }
2937            }
2938        } else {
2939            for (i = 0; i < nb_oargs; i++) {
2940                arg_ts = arg_temp(op->args[i]);
2941                dir_ts = arg_ts->state_ptr;
2942                if (!dir_ts) {
2943                    continue;
2944                }
2945                op->args[i] = temp_arg(dir_ts);
2946                changes = true;
2947
2948                /* The output is now live and modified.  */
2949                arg_ts->state = 0;
2950
2951                /* Sync outputs upon their last write.  */
2952                if (NEED_SYNC_ARG(i)) {
2953                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2954                                      ? INDEX_op_st_i32
2955                                      : INDEX_op_st_i64);
2956                    TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2957
2958                    sop->args[0] = temp_arg(dir_ts);
2959                    sop->args[1] = temp_arg(arg_ts->mem_base);
2960                    sop->args[2] = arg_ts->mem_offset;
2961
2962                    arg_ts->state = TS_MEM;
2963                }
2964                /* Drop outputs that are dead.  */
2965                if (IS_DEAD_ARG(i)) {
2966                    arg_ts->state = TS_DEAD;
2967                }
2968            }
2969        }
2970    }
2971
2972    return changes;
2973}
2974
2975#ifdef CONFIG_DEBUG_TCG
2976static void dump_regs(TCGContext *s)
2977{
2978    TCGTemp *ts;
2979    int i;
2980    char buf[64];
2981
2982    for(i = 0; i < s->nb_temps; i++) {
2983        ts = &s->temps[i];
2984        printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2985        switch(ts->val_type) {
2986        case TEMP_VAL_REG:
2987            printf("%s", tcg_target_reg_names[ts->reg]);
2988            break;
2989        case TEMP_VAL_MEM:
2990            printf("%d(%s)", (int)ts->mem_offset,
2991                   tcg_target_reg_names[ts->mem_base->reg]);
2992            break;
2993        case TEMP_VAL_CONST:
2994            printf("$0x%" PRIx64, ts->val);
2995            break;
2996        case TEMP_VAL_DEAD:
2997            printf("D");
2998            break;
2999        default:
3000            printf("???");
3001            break;
3002        }
3003        printf("\n");
3004    }
3005
3006    for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3007        if (s->reg_to_temp[i] != NULL) {
3008            printf("%s: %s\n", 
3009                   tcg_target_reg_names[i], 
3010                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3011        }
3012    }
3013}
3014
3015static void check_regs(TCGContext *s)
3016{
3017    int reg;
3018    int k;
3019    TCGTemp *ts;
3020    char buf[64];
3021
3022    for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3023        ts = s->reg_to_temp[reg];
3024        if (ts != NULL) {
3025            if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3026                printf("Inconsistency for register %s:\n", 
3027                       tcg_target_reg_names[reg]);
3028                goto fail;
3029            }
3030        }
3031    }
3032    for (k = 0; k < s->nb_temps; k++) {
3033        ts = &s->temps[k];
3034        if (ts->val_type == TEMP_VAL_REG
3035            && ts->kind != TEMP_FIXED
3036            && s->reg_to_temp[ts->reg] != ts) {
3037            printf("Inconsistency for temp %s:\n",
3038                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3039        fail:
3040            printf("reg state:\n");
3041            dump_regs(s);
3042            tcg_abort();
3043        }
3044    }
3045}
3046#endif
3047
3048static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3049{
3050    intptr_t off, size, align;
3051
3052    switch (ts->type) {
3053    case TCG_TYPE_I32:
3054        size = align = 4;
3055        break;
3056    case TCG_TYPE_I64:
3057    case TCG_TYPE_V64:
3058        size = align = 8;
3059        break;
3060    case TCG_TYPE_V128:
3061        size = align = 16;
3062        break;
3063    case TCG_TYPE_V256:
3064        /* Note that we do not require aligned storage for V256. */
3065        size = 32, align = 16;
3066        break;
3067    default:
3068        g_assert_not_reached();
3069    }
3070
3071    /*
3072     * Assume the stack is sufficiently aligned.
3073     * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3074     * and do not require 16 byte vector alignment.  This seems slightly
3075     * easier than fully parameterizing the above switch statement.
3076     */
3077    align = MIN(TCG_TARGET_STACK_ALIGN, align);
3078    off = ROUND_UP(s->current_frame_offset, align);
3079
3080    /* If we've exhausted the stack frame, restart with a smaller TB. */
3081    if (off + size > s->frame_end) {
3082        tcg_raise_tb_overflow(s);
3083    }
3084    s->current_frame_offset = off + size;
3085
3086    ts->mem_offset = off;
3087#if defined(__sparc__)
3088    ts->mem_offset += TCG_TARGET_STACK_BIAS;
3089#endif
3090    ts->mem_base = s->frame_temp;
3091    ts->mem_allocated = 1;
3092}
3093
3094static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3095
3096/* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3097   mark it free; otherwise mark it dead.  */
3098static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3099{
3100    TCGTempVal new_type;
3101
3102    switch (ts->kind) {
3103    case TEMP_FIXED:
3104        return;
3105    case TEMP_GLOBAL:
3106    case TEMP_LOCAL:
3107        new_type = TEMP_VAL_MEM;
3108        break;
3109    case TEMP_NORMAL:
3110        new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3111        break;
3112    case TEMP_CONST:
3113        new_type = TEMP_VAL_CONST;
3114        break;
3115    default:
3116        g_assert_not_reached();
3117    }
3118    if (ts->val_type == TEMP_VAL_REG) {
3119        s->reg_to_temp[ts->reg] = NULL;
3120    }
3121    ts->val_type = new_type;
3122}
3123
3124/* Mark a temporary as dead.  */
3125static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3126{
3127    temp_free_or_dead(s, ts, 1);
3128}
3129
3130/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3131   registers needs to be allocated to store a constant.  If 'free_or_dead'
3132   is non-zero, subsequently release the temporary; if it is positive, the
3133   temp is dead; if it is negative, the temp is free.  */
3134static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3135                      TCGRegSet preferred_regs, int free_or_dead)
3136{
3137    if (!temp_readonly(ts) && !ts->mem_coherent) {
3138        if (!ts->mem_allocated) {
3139            temp_allocate_frame(s, ts);
3140        }
3141        switch (ts->val_type) {
3142        case TEMP_VAL_CONST:
3143            /* If we're going to free the temp immediately, then we won't
3144               require it later in a register, so attempt to store the
3145               constant to memory directly.  */
3146            if (free_or_dead
3147                && tcg_out_sti(s, ts->type, ts->val,
3148                               ts->mem_base->reg, ts->mem_offset)) {
3149                break;
3150            }
3151            temp_load(s, ts, tcg_target_available_regs[ts->type],
3152                      allocated_regs, preferred_regs);
3153            /* fallthrough */
3154
3155        case TEMP_VAL_REG:
3156            tcg_out_st(s, ts->type, ts->reg,
3157                       ts->mem_base->reg, ts->mem_offset);
3158            break;
3159
3160        case TEMP_VAL_MEM:
3161            break;
3162
3163        case TEMP_VAL_DEAD:
3164        default:
3165            tcg_abort();
3166        }
3167        ts->mem_coherent = 1;
3168    }
3169    if (free_or_dead) {
3170        temp_free_or_dead(s, ts, free_or_dead);
3171    }
3172}
3173
3174/* free register 'reg' by spilling the corresponding temporary if necessary */
3175static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3176{
3177    TCGTemp *ts = s->reg_to_temp[reg];
3178    if (ts != NULL) {
3179        temp_sync(s, ts, allocated_regs, 0, -1);
3180    }
3181}
3182
3183/**
3184 * tcg_reg_alloc:
3185 * @required_regs: Set of registers in which we must allocate.
3186 * @allocated_regs: Set of registers which must be avoided.
3187 * @preferred_regs: Set of registers we should prefer.
3188 * @rev: True if we search the registers in "indirect" order.
3189 *
3190 * The allocated register must be in @required_regs & ~@allocated_regs,
3191 * but if we can put it in @preferred_regs we may save a move later.
3192 */
3193static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3194                            TCGRegSet allocated_regs,
3195                            TCGRegSet preferred_regs, bool rev)
3196{
3197    int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3198    TCGRegSet reg_ct[2];
3199    const int *order;
3200
3201    reg_ct[1] = required_regs & ~allocated_regs;
3202    tcg_debug_assert(reg_ct[1] != 0);
3203    reg_ct[0] = reg_ct[1] & preferred_regs;
3204
3205    /* Skip the preferred_regs option if it cannot be satisfied,
3206       or if the preference made no difference.  */
3207    f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3208
3209    order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3210
3211    /* Try free registers, preferences first.  */
3212    for (j = f; j < 2; j++) {
3213        TCGRegSet set = reg_ct[j];
3214
3215        if (tcg_regset_single(set)) {
3216            /* One register in the set.  */
3217            TCGReg reg = tcg_regset_first(set);
3218            if (s->reg_to_temp[reg] == NULL) {
3219                return reg;
3220            }
3221        } else {
3222            for (i = 0; i < n; i++) {
3223                TCGReg reg = order[i];
3224                if (s->reg_to_temp[reg] == NULL &&
3225                    tcg_regset_test_reg(set, reg)) {
3226                    return reg;
3227                }
3228            }
3229        }
3230    }
3231
3232    /* We must spill something.  */
3233    for (j = f; j < 2; j++) {
3234        TCGRegSet set = reg_ct[j];
3235
3236        if (tcg_regset_single(set)) {
3237            /* One register in the set.  */
3238            TCGReg reg = tcg_regset_first(set);
3239            tcg_reg_free(s, reg, allocated_regs);
3240            return reg;
3241        } else {
3242            for (i = 0; i < n; i++) {
3243                TCGReg reg = order[i];
3244                if (tcg_regset_test_reg(set, reg)) {
3245                    tcg_reg_free(s, reg, allocated_regs);
3246                    return reg;
3247                }
3248            }
3249        }
3250    }
3251
3252    tcg_abort();
3253}
3254
3255/* Make sure the temporary is in a register.  If needed, allocate the register
3256   from DESIRED while avoiding ALLOCATED.  */
3257static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3258                      TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3259{
3260    TCGReg reg;
3261
3262    switch (ts->val_type) {
3263    case TEMP_VAL_REG:
3264        return;
3265    case TEMP_VAL_CONST:
3266        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3267                            preferred_regs, ts->indirect_base);
3268        if (ts->type <= TCG_TYPE_I64) {
3269            tcg_out_movi(s, ts->type, reg, ts->val);
3270        } else {
3271            uint64_t val = ts->val;
3272            MemOp vece = MO_64;
3273
3274            /*
3275             * Find the minimal vector element that matches the constant.
3276             * The targets will, in general, have to do this search anyway,
3277             * do this generically.
3278             */
3279            if (val == dup_const(MO_8, val)) {
3280                vece = MO_8;
3281            } else if (val == dup_const(MO_16, val)) {
3282                vece = MO_16;
3283            } else if (val == dup_const(MO_32, val)) {
3284                vece = MO_32;
3285            }
3286
3287            tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3288        }
3289        ts->mem_coherent = 0;
3290        break;
3291    case TEMP_VAL_MEM:
3292        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3293                            preferred_regs, ts->indirect_base);
3294        tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3295        ts->mem_coherent = 1;
3296        break;
3297    case TEMP_VAL_DEAD:
3298    default:
3299        tcg_abort();
3300    }
3301    ts->reg = reg;
3302    ts->val_type = TEMP_VAL_REG;
3303    s->reg_to_temp[reg] = ts;
3304}
3305
3306/* Save a temporary to memory. 'allocated_regs' is used in case a
3307   temporary registers needs to be allocated to store a constant.  */
3308static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3309{
3310    /* The liveness analysis already ensures that globals are back
3311       in memory. Keep an tcg_debug_assert for safety. */
3312    tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3313}
3314
3315/* save globals to their canonical location and assume they can be
3316   modified be the following code. 'allocated_regs' is used in case a
3317   temporary registers needs to be allocated to store a constant. */
3318static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3319{
3320    int i, n;
3321
3322    for (i = 0, n = s->nb_globals; i < n; i++) {
3323        temp_save(s, &s->temps[i], allocated_regs);
3324    }
3325}
3326
3327/* sync globals to their canonical location and assume they can be
3328   read by the following code. 'allocated_regs' is used in case a
3329   temporary registers needs to be allocated to store a constant. */
3330static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3331{
3332    int i, n;
3333
3334    for (i = 0, n = s->nb_globals; i < n; i++) {
3335        TCGTemp *ts = &s->temps[i];
3336        tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3337                         || ts->kind == TEMP_FIXED
3338                         || ts->mem_coherent);
3339    }
3340}
3341
3342/* at the end of a basic block, we assume all temporaries are dead and
3343   all globals are stored at their canonical location. */
3344static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3345{
3346    int i;
3347
3348    for (i = s->nb_globals; i < s->nb_temps; i++) {
3349        TCGTemp *ts = &s->temps[i];
3350
3351        switch (ts->kind) {
3352        case TEMP_LOCAL:
3353            temp_save(s, ts, allocated_regs);
3354            break;
3355        case TEMP_NORMAL:
3356            /* The liveness analysis already ensures that temps are dead.
3357               Keep an tcg_debug_assert for safety. */
3358            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3359            break;
3360        case TEMP_CONST:
3361            /* Similarly, we should have freed any allocated register. */
3362            tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3363            break;
3364        default:
3365            g_assert_not_reached();
3366        }
3367    }
3368
3369    save_globals(s, allocated_regs);
3370}
3371
3372/*
3373 * At a conditional branch, we assume all temporaries are dead and
3374 * all globals and local temps are synced to their location.
3375 */
3376static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3377{
3378    sync_globals(s, allocated_regs);
3379
3380    for (int i = s->nb_globals; i < s->nb_temps; i++) {
3381        TCGTemp *ts = &s->temps[i];
3382        /*
3383         * The liveness analysis already ensures that temps are dead.
3384         * Keep tcg_debug_asserts for safety.
3385         */
3386        switch (ts->kind) {
3387        case TEMP_LOCAL:
3388            tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3389            break;
3390        case TEMP_NORMAL:
3391            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3392            break;
3393        case TEMP_CONST:
3394            break;
3395        default:
3396            g_assert_not_reached();
3397        }
3398    }
3399}
3400
3401/*
3402 * Specialized code generation for INDEX_op_mov_* with a constant.
3403 */
3404static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3405                                  tcg_target_ulong val, TCGLifeData arg_life,
3406                                  TCGRegSet preferred_regs)
3407{
3408    /* ENV should not be modified.  */
3409    tcg_debug_assert(!temp_readonly(ots));
3410
3411    /* The movi is not explicitly generated here.  */
3412    if (ots->val_type == TEMP_VAL_REG) {
3413        s->reg_to_temp[ots->reg] = NULL;
3414    }
3415    ots->val_type = TEMP_VAL_CONST;
3416    ots->val = val;
3417    ots->mem_coherent = 0;
3418    if (NEED_SYNC_ARG(0)) {
3419        temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3420    } else if (IS_DEAD_ARG(0)) {
3421        temp_dead(s, ots);
3422    }
3423}
3424
3425/*
3426 * Specialized code generation for INDEX_op_mov_*.
3427 */
3428static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3429{
3430    const TCGLifeData arg_life = op->life;
3431    TCGRegSet allocated_regs, preferred_regs;
3432    TCGTemp *ts, *ots;
3433    TCGType otype, itype;
3434
3435    allocated_regs = s->reserved_regs;
3436    preferred_regs = op->output_pref[0];
3437    ots = arg_temp(op->args[0]);
3438    ts = arg_temp(op->args[1]);
3439
3440    /* ENV should not be modified.  */
3441    tcg_debug_assert(!temp_readonly(ots));
3442
3443    /* Note that otype != itype for no-op truncation.  */
3444    otype = ots->type;
3445    itype = ts->type;
3446
3447    if (ts->val_type == TEMP_VAL_CONST) {
3448        /* propagate constant or generate sti */
3449        tcg_target_ulong val = ts->val;
3450        if (IS_DEAD_ARG(1)) {
3451            temp_dead(s, ts);
3452        }
3453        tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3454        return;
3455    }
3456
3457    /* If the source value is in memory we're going to be forced
3458       to have it in a register in order to perform the copy.  Copy
3459       the SOURCE value into its own register first, that way we
3460       don't have to reload SOURCE the next time it is used. */
3461    if (ts->val_type == TEMP_VAL_MEM) {
3462        temp_load(s, ts, tcg_target_available_regs[itype],
3463                  allocated_regs, preferred_regs);
3464    }
3465
3466    tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3467    if (IS_DEAD_ARG(0)) {
3468        /* mov to a non-saved dead register makes no sense (even with
3469           liveness analysis disabled). */
3470        tcg_debug_assert(NEED_SYNC_ARG(0));
3471        if (!ots->mem_allocated) {
3472            temp_allocate_frame(s, ots);
3473        }
3474        tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3475        if (IS_DEAD_ARG(1)) {
3476            temp_dead(s, ts);
3477        }
3478        temp_dead(s, ots);
3479    } else {
3480        if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3481            /* the mov can be suppressed */
3482            if (ots->val_type == TEMP_VAL_REG) {
3483                s->reg_to_temp[ots->reg] = NULL;
3484            }
3485            ots->reg = ts->reg;
3486            temp_dead(s, ts);
3487        } else {
3488            if (ots->val_type != TEMP_VAL_REG) {
3489                /* When allocating a new register, make sure to not spill the
3490                   input one. */
3491                tcg_regset_set_reg(allocated_regs, ts->reg);
3492                ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3493                                         allocated_regs, preferred_regs,
3494                                         ots->indirect_base);
3495            }
3496            if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3497                /*
3498                 * Cross register class move not supported.
3499                 * Store the source register into the destination slot
3500                 * and leave the destination temp as TEMP_VAL_MEM.
3501                 */
3502                assert(!temp_readonly(ots));
3503                if (!ts->mem_allocated) {
3504                    temp_allocate_frame(s, ots);
3505                }
3506                tcg_out_st(s, ts->type, ts->reg,
3507                           ots->mem_base->reg, ots->mem_offset);
3508                ots->mem_coherent = 1;
3509                temp_free_or_dead(s, ots, -1);
3510                return;
3511            }
3512        }
3513        ots->val_type = TEMP_VAL_REG;
3514        ots->mem_coherent = 0;
3515        s->reg_to_temp[ots->reg] = ots;
3516        if (NEED_SYNC_ARG(0)) {
3517            temp_sync(s, ots, allocated_regs, 0, 0);
3518        }
3519    }
3520}
3521
3522/*
3523 * Specialized code generation for INDEX_op_dup_vec.
3524 */
3525static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3526{
3527    const TCGLifeData arg_life = op->life;
3528    TCGRegSet dup_out_regs, dup_in_regs;
3529    TCGTemp *its, *ots;
3530    TCGType itype, vtype;
3531    intptr_t endian_fixup;
3532    unsigned vece;
3533    bool ok;
3534
3535    ots = arg_temp(op->args[0]);
3536    its = arg_temp(op->args[1]);
3537
3538    /* ENV should not be modified.  */
3539    tcg_debug_assert(!temp_readonly(ots));
3540
3541    itype = its->type;
3542    vece = TCGOP_VECE(op);
3543    vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3544
3545    if (its->val_type == TEMP_VAL_CONST) {
3546        /* Propagate constant via movi -> dupi.  */
3547        tcg_target_ulong val = its->val;
3548        if (IS_DEAD_ARG(1)) {
3549            temp_dead(s, its);
3550        }
3551        tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3552        return;
3553    }
3554
3555    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3556    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3557
3558    /* Allocate the output register now.  */
3559    if (ots->val_type != TEMP_VAL_REG) {
3560        TCGRegSet allocated_regs = s->reserved_regs;
3561
3562        if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3563            /* Make sure to not spill the input register. */
3564            tcg_regset_set_reg(allocated_regs, its->reg);
3565        }
3566        ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3567                                 op->output_pref[0], ots->indirect_base);
3568        ots->val_type = TEMP_VAL_REG;
3569        ots->mem_coherent = 0;
3570        s->reg_to_temp[ots->reg] = ots;
3571    }
3572
3573    switch (its->val_type) {
3574    case TEMP_VAL_REG:
3575        /*
3576         * The dup constriaints must be broad, covering all possible VECE.
3577         * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3578         * to fail, indicating that extra moves are required for that case.
3579         */
3580        if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3581            if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3582                goto done;
3583            }
3584            /* Try again from memory or a vector input register.  */
3585        }
3586        if (!its->mem_coherent) {
3587            /*
3588             * The input register is not synced, and so an extra store
3589             * would be required to use memory.  Attempt an integer-vector
3590             * register move first.  We do not have a TCGRegSet for this.
3591             */
3592            if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3593                break;
3594            }
3595            /* Sync the temp back to its slot and load from there.  */
3596            temp_sync(s, its, s->reserved_regs, 0, 0);
3597        }
3598        /* fall through */
3599
3600    case TEMP_VAL_MEM:
3601#ifdef HOST_WORDS_BIGENDIAN
3602        endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3603        endian_fixup -= 1 << vece;
3604#else
3605        endian_fixup = 0;
3606#endif
3607        if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3608                             its->mem_offset + endian_fixup)) {
3609            goto done;
3610        }
3611        tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3612        break;
3613
3614    default:
3615        g_assert_not_reached();
3616    }
3617
3618    /* We now have a vector input register, so dup must succeed. */
3619    ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3620    tcg_debug_assert(ok);
3621
3622 done:
3623    if (IS_DEAD_ARG(1)) {
3624        temp_dead(s, its);
3625    }
3626    if (NEED_SYNC_ARG(0)) {
3627        temp_sync(s, ots, s->reserved_regs, 0, 0);
3628    }
3629    if (IS_DEAD_ARG(0)) {
3630        temp_dead(s, ots);
3631    }
3632}
3633
3634static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3635{
3636    const TCGLifeData arg_life = op->life;
3637    const TCGOpDef * const def = &tcg_op_defs[op->opc];
3638    TCGRegSet i_allocated_regs;
3639    TCGRegSet o_allocated_regs;
3640    int i, k, nb_iargs, nb_oargs;
3641    TCGReg reg;
3642    TCGArg arg;
3643    const TCGArgConstraint *arg_ct;
3644    TCGTemp *ts;
3645    TCGArg new_args[TCG_MAX_OP_ARGS];
3646    int const_args[TCG_MAX_OP_ARGS];
3647
3648    nb_oargs = def->nb_oargs;
3649    nb_iargs = def->nb_iargs;
3650
3651    /* copy constants */
3652    memcpy(new_args + nb_oargs + nb_iargs, 
3653           op->args + nb_oargs + nb_iargs,
3654           sizeof(TCGArg) * def->nb_cargs);
3655
3656    i_allocated_regs = s->reserved_regs;
3657    o_allocated_regs = s->reserved_regs;
3658
3659    /* satisfy input constraints */ 
3660    for (k = 0; k < nb_iargs; k++) {
3661        TCGRegSet i_preferred_regs, o_preferred_regs;
3662
3663        i = def->args_ct[nb_oargs + k].sort_index;
3664        arg = op->args[i];
3665        arg_ct = &def->args_ct[i];
3666        ts = arg_temp(arg);
3667
3668        if (ts->val_type == TEMP_VAL_CONST
3669            && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3670            /* constant is OK for instruction */
3671            const_args[i] = 1;
3672            new_args[i] = ts->val;
3673            continue;
3674        }
3675
3676        i_preferred_regs = o_preferred_regs = 0;
3677        if (arg_ct->ialias) {
3678            o_preferred_regs = op->output_pref[arg_ct->alias_index];
3679
3680            /*
3681             * If the input is readonly, then it cannot also be an
3682             * output and aliased to itself.  If the input is not
3683             * dead after the instruction, we must allocate a new
3684             * register and move it.
3685             */
3686            if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3687                goto allocate_in_reg;
3688            }
3689
3690            /*
3691             * Check if the current register has already been allocated
3692             * for another input aliased to an output.
3693             */
3694            if (ts->val_type == TEMP_VAL_REG) {
3695                reg = ts->reg;
3696                for (int k2 = 0; k2 < k; k2++) {
3697                    int i2 = def->args_ct[nb_oargs + k2].sort_index;
3698                    if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3699                        goto allocate_in_reg;
3700                    }
3701                }
3702            }
3703            i_preferred_regs = o_preferred_regs;
3704        }
3705
3706        temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3707        reg = ts->reg;
3708
3709        if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3710 allocate_in_reg:
3711            /*
3712             * Allocate a new register matching the constraint
3713             * and move the temporary register into it.
3714             */
3715            temp_load(s, ts, tcg_target_available_regs[ts->type],
3716                      i_allocated_regs, 0);
3717            reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3718                                o_preferred_regs, ts->indirect_base);
3719            if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3720                /*
3721                 * Cross register class move not supported.  Sync the
3722                 * temp back to its slot and load from there.
3723                 */
3724                temp_sync(s, ts, i_allocated_regs, 0, 0);
3725                tcg_out_ld(s, ts->type, reg,
3726                           ts->mem_base->reg, ts->mem_offset);
3727            }
3728        }
3729        new_args[i] = reg;
3730        const_args[i] = 0;
3731        tcg_regset_set_reg(i_allocated_regs, reg);
3732    }
3733    
3734    /* mark dead temporaries and free the associated registers */
3735    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3736        if (IS_DEAD_ARG(i)) {
3737            temp_dead(s, arg_temp(op->args[i]));
3738        }
3739    }
3740
3741    if (def->flags & TCG_OPF_COND_BRANCH) {
3742        tcg_reg_alloc_cbranch(s, i_allocated_regs);
3743    } else if (def->flags & TCG_OPF_BB_END) {
3744        tcg_reg_alloc_bb_end(s, i_allocated_regs);
3745    } else {
3746        if (def->flags & TCG_OPF_CALL_CLOBBER) {
3747            /* XXX: permit generic clobber register list ? */ 
3748            for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3749                if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3750                    tcg_reg_free(s, i, i_allocated_regs);
3751                }
3752            }
3753        }
3754        if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3755            /* sync globals if the op has side effects and might trigger
3756               an exception. */
3757            sync_globals(s, i_allocated_regs);
3758        }
3759        
3760        /* satisfy the output constraints */
3761        for(k = 0; k < nb_oargs; k++) {
3762            i = def->args_ct[k].sort_index;
3763            arg = op->args[i];
3764            arg_ct = &def->args_ct[i];
3765            ts = arg_temp(arg);
3766
3767            /* ENV should not be modified.  */
3768            tcg_debug_assert(!temp_readonly(ts));
3769
3770            if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3771                reg = new_args[arg_ct->alias_index];
3772            } else if (arg_ct->newreg) {
3773                reg = tcg_reg_alloc(s, arg_ct->regs,
3774                                    i_allocated_regs | o_allocated_regs,
3775                                    op->output_pref[k], ts->indirect_base);
3776            } else {
3777                reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3778                                    op->output_pref[k], ts->indirect_base);
3779            }
3780            tcg_regset_set_reg(o_allocated_regs, reg);
3781            if (ts->val_type == TEMP_VAL_REG) {
3782                s->reg_to_temp[ts->reg] = NULL;
3783            }
3784            ts->val_type = TEMP_VAL_REG;
3785            ts->reg = reg;
3786            /*
3787             * Temp value is modified, so the value kept in memory is
3788             * potentially not the same.
3789             */
3790            ts->mem_coherent = 0;
3791            s->reg_to_temp[reg] = ts;
3792            new_args[i] = reg;
3793        }
3794    }
3795
3796    /* emit instruction */
3797    if (def->flags & TCG_OPF_VECTOR) {
3798        tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3799                       new_args, const_args);
3800    } else {
3801        tcg_out_op(s, op->opc, new_args, const_args);
3802    }
3803
3804    /* move the outputs in the correct register if needed */
3805    for(i = 0; i < nb_oargs; i++) {
3806        ts = arg_temp(op->args[i]);
3807
3808        /* ENV should not be modified.  */
3809        tcg_debug_assert(!temp_readonly(ts));
3810
3811        if (NEED_SYNC_ARG(i)) {
3812            temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3813        } else if (IS_DEAD_ARG(i)) {
3814            temp_dead(s, ts);
3815        }
3816    }
3817}
3818
3819static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3820{
3821    const TCGLifeData arg_life = op->life;
3822    TCGTemp *ots, *itsl, *itsh;
3823    TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3824
3825    /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3826    tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3827    tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3828
3829    ots = arg_temp(op->args[0]);
3830    itsl = arg_temp(op->args[1]);
3831    itsh = arg_temp(op->args[2]);
3832
3833    /* ENV should not be modified.  */
3834    tcg_debug_assert(!temp_readonly(ots));
3835
3836    /* Allocate the output register now.  */
3837    if (ots->val_type != TEMP_VAL_REG) {
3838        TCGRegSet allocated_regs = s->reserved_regs;
3839        TCGRegSet dup_out_regs =
3840            tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3841
3842        /* Make sure to not spill the input registers. */
3843        if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3844            tcg_regset_set_reg(allocated_regs, itsl->reg);
3845        }
3846        if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3847            tcg_regset_set_reg(allocated_regs, itsh->reg);
3848        }
3849
3850        ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3851                                 op->output_pref[0], ots->indirect_base);
3852        ots->val_type = TEMP_VAL_REG;
3853        ots->mem_coherent = 0;
3854        s->reg_to_temp[ots->reg] = ots;
3855    }
3856
3857    /* Promote dup2 of immediates to dupi_vec. */
3858    if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3859        uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3860        MemOp vece = MO_64;
3861
3862        if (val == dup_const(MO_8, val)) {
3863            vece = MO_8;
3864        } else if (val == dup_const(MO_16, val)) {
3865            vece = MO_16;
3866        } else if (val == dup_const(MO_32, val)) {
3867            vece = MO_32;
3868        }
3869
3870        tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3871        goto done;
3872    }
3873
3874    /* If the two inputs form one 64-bit value, try dupm_vec. */
3875    if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3876        if (!itsl->mem_coherent) {
3877            temp_sync(s, itsl, s->reserved_regs, 0, 0);
3878        }
3879        if (!itsh->mem_coherent) {
3880            temp_sync(s, itsh, s->reserved_regs, 0, 0);
3881        }
3882#ifdef HOST_WORDS_BIGENDIAN
3883        TCGTemp *its = itsh;
3884#else
3885        TCGTemp *its = itsl;
3886#endif
3887        if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3888                             its->mem_base->reg, its->mem_offset)) {
3889            goto done;
3890        }
3891    }
3892
3893    /* Fall back to generic expansion. */
3894    return false;
3895
3896 done:
3897    if (IS_DEAD_ARG(1)) {
3898        temp_dead(s, itsl);
3899    }
3900    if (IS_DEAD_ARG(2)) {
3901        temp_dead(s, itsh);
3902    }
3903    if (NEED_SYNC_ARG(0)) {
3904        temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3905    } else if (IS_DEAD_ARG(0)) {
3906        temp_dead(s, ots);
3907    }
3908    return true;
3909}
3910
3911#ifdef TCG_TARGET_STACK_GROWSUP
3912#define STACK_DIR(x) (-(x))
3913#else
3914#define STACK_DIR(x) (x)
3915#endif
3916
3917static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3918{
3919    const int nb_oargs = TCGOP_CALLO(op);
3920    const int nb_iargs = TCGOP_CALLI(op);
3921    const TCGLifeData arg_life = op->life;
3922    const TCGHelperInfo *info;
3923    int flags, nb_regs, i;
3924    TCGReg reg;
3925    TCGArg arg;
3926    TCGTemp *ts;
3927    intptr_t stack_offset;
3928    size_t call_stack_size;
3929    tcg_insn_unit *func_addr;
3930    int allocate_args;
3931    TCGRegSet allocated_regs;
3932
3933    func_addr = tcg_call_func(op);
3934    info = tcg_call_info(op);
3935    flags = info->flags;
3936
3937    nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3938    if (nb_regs > nb_iargs) {
3939        nb_regs = nb_iargs;
3940    }
3941
3942    /* assign stack slots first */
3943    call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3944    call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 
3945        ~(TCG_TARGET_STACK_ALIGN - 1);
3946    allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3947    if (allocate_args) {
3948        /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3949           preallocate call stack */
3950        tcg_abort();
3951    }
3952
3953    stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3954    for (i = nb_regs; i < nb_iargs; i++) {
3955        arg = op->args[nb_oargs + i];
3956#ifdef TCG_TARGET_STACK_GROWSUP
3957        stack_offset -= sizeof(tcg_target_long);
3958#endif
3959        if (arg != TCG_CALL_DUMMY_ARG) {
3960            ts = arg_temp(arg);
3961            temp_load(s, ts, tcg_target_available_regs[ts->type],
3962                      s->reserved_regs, 0);
3963            tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3964        }
3965#ifndef TCG_TARGET_STACK_GROWSUP
3966        stack_offset += sizeof(tcg_target_long);
3967#endif
3968    }
3969    
3970    /* assign input registers */
3971    allocated_regs = s->reserved_regs;
3972    for (i = 0; i < nb_regs; i++) {
3973        arg = op->args[nb_oargs + i];
3974        if (arg != TCG_CALL_DUMMY_ARG) {
3975            ts = arg_temp(arg);
3976            reg = tcg_target_call_iarg_regs[i];
3977
3978            if (ts->val_type == TEMP_VAL_REG) {
3979                if (ts->reg != reg) {
3980                    tcg_reg_free(s, reg, allocated_regs);
3981                    if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3982                        /*
3983                         * Cross register class move not supported.  Sync the
3984                         * temp back to its slot and load from there.
3985                         */
3986                        temp_sync(s, ts, allocated_regs, 0, 0);
3987                        tcg_out_ld(s, ts->type, reg,
3988                                   ts->mem_base->reg, ts->mem_offset);
3989                    }
3990                }
3991            } else {
3992                TCGRegSet arg_set = 0;
3993
3994                tcg_reg_free(s, reg, allocated_regs);
3995                tcg_regset_set_reg(arg_set, reg);
3996                temp_load(s, ts, arg_set, allocated_regs, 0);
3997            }
3998
3999            tcg_regset_set_reg(allocated_regs, reg);
4000        }
4001    }
4002    
4003    /* mark dead temporaries and free the associated registers */
4004    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4005        if (IS_DEAD_ARG(i)) {
4006            temp_dead(s, arg_temp(op->args[i]));
4007        }
4008    }
4009    
4010    /* clobber call registers */
4011    for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4012        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4013            tcg_reg_free(s, i, allocated_regs);
4014        }
4015    }
4016
4017    /* Save globals if they might be written by the helper, sync them if
4018       they might be read. */
4019    if (flags & TCG_CALL_NO_READ_GLOBALS) {
4020        /* Nothing to do */
4021    } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4022        sync_globals(s, allocated_regs);
4023    } else {
4024        save_globals(s, allocated_regs);
4025    }
4026
4027#ifdef CONFIG_TCG_INTERPRETER
4028    {
4029        gpointer hash = (gpointer)(uintptr_t)info->typemask;
4030        ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
4031        assert(cif != NULL);
4032        tcg_out_call(s, func_addr, cif);
4033    }
4034#else
4035    tcg_out_call(s, func_addr);
4036#endif
4037
4038    /* assign output registers and emit moves if needed */
4039    for(i = 0; i < nb_oargs; i++) {
4040        arg = op->args[i];
4041        ts = arg_temp(arg);
4042
4043        /* ENV should not be modified.  */
4044        tcg_debug_assert(!temp_readonly(ts));
4045
4046        reg = tcg_target_call_oarg_regs[i];
4047        tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4048        if (ts->val_type == TEMP_VAL_REG) {
4049            s->reg_to_temp[ts->reg] = NULL;
4050        }
4051        ts->val_type = TEMP_VAL_REG;
4052        ts->reg = reg;
4053        ts->mem_coherent = 0;
4054        s->reg_to_temp[reg] = ts;
4055        if (NEED_SYNC_ARG(i)) {
4056            temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4057        } else if (IS_DEAD_ARG(i)) {
4058            temp_dead(s, ts);
4059        }
4060    }
4061}
4062
4063#ifdef CONFIG_PROFILER
4064
4065/* avoid copy/paste errors */
4066#define PROF_ADD(to, from, field)                       \
4067    do {                                                \
4068        (to)->field += qatomic_read(&((from)->field));  \
4069    } while (0)
4070
4071#define PROF_MAX(to, from, field)                                       \
4072    do {                                                                \
4073        typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4074        if (val__ > (to)->field) {                                      \
4075            (to)->field = val__;                                        \
4076        }                                                               \
4077    } while (0)
4078
4079/* Pass in a zero'ed @prof */
4080static inline
4081void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4082{
4083    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4084    unsigned int i;
4085
4086    for (i = 0; i < n_ctxs; i++) {
4087        TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4088        const TCGProfile *orig = &s->prof;
4089
4090        if (counters) {
4091            PROF_ADD(prof, orig, cpu_exec_time);
4092            PROF_ADD(prof, orig, tb_count1);
4093            PROF_ADD(prof, orig, tb_count);
4094            PROF_ADD(prof, orig, op_count);
4095            PROF_MAX(prof, orig, op_count_max);
4096            PROF_ADD(prof, orig, temp_count);
4097            PROF_MAX(prof, orig, temp_count_max);
4098            PROF_ADD(prof, orig, del_op_count);
4099            PROF_ADD(prof, orig, code_in_len);
4100            PROF_ADD(prof, orig, code_out_len);
4101            PROF_ADD(prof, orig, search_out_len);
4102            PROF_ADD(prof, orig, interm_time);
4103            PROF_ADD(prof, orig, code_time);
4104            PROF_ADD(prof, orig, la_time);
4105            PROF_ADD(prof, orig, opt_time);
4106            PROF_ADD(prof, orig, restore_count);
4107            PROF_ADD(prof, orig, restore_time);
4108        }
4109        if (table) {
4110            int i;
4111
4112            for (i = 0; i < NB_OPS; i++) {
4113                PROF_ADD(prof, orig, table_op_count[i]);
4114            }
4115        }
4116    }
4117}
4118
4119#undef PROF_ADD
4120#undef PROF_MAX
4121
4122static void tcg_profile_snapshot_counters(TCGProfile *prof)
4123{
4124    tcg_profile_snapshot(prof, true, false);
4125}
4126
4127static void tcg_profile_snapshot_table(TCGProfile *prof)
4128{
4129    tcg_profile_snapshot(prof, false, true);
4130}
4131
4132void tcg_dump_op_count(GString *buf)
4133{
4134    TCGProfile prof = {};
4135    int i;
4136
4137    tcg_profile_snapshot_table(&prof);
4138    for (i = 0; i < NB_OPS; i++) {
4139        g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4140                               prof.table_op_count[i]);
4141    }
4142}
4143
4144int64_t tcg_cpu_exec_time(void)
4145{
4146    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4147    unsigned int i;
4148    int64_t ret = 0;
4149
4150    for (i = 0; i < n_ctxs; i++) {
4151        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4152        const TCGProfile *prof = &s->prof;
4153
4154        ret += qatomic_read(&prof->cpu_exec_time);
4155    }
4156    return ret;
4157}
4158#else
4159void tcg_dump_op_count(GString *buf)
4160{
4161    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4162}
4163
4164int64_t tcg_cpu_exec_time(void)
4165{
4166    error_report("%s: TCG profiler not compiled", __func__);
4167    exit(EXIT_FAILURE);
4168}
4169#endif
4170
4171
4172int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4173{
4174#ifdef CONFIG_PROFILER
4175    TCGProfile *prof = &s->prof;
4176#endif
4177    int i, num_insns;
4178    TCGOp *op;
4179
4180#ifdef CONFIG_PROFILER
4181    {
4182        int n = 0;
4183
4184        QTAILQ_FOREACH(op, &s->ops, link) {
4185            n++;
4186        }
4187        qatomic_set(&prof->op_count, prof->op_count + n);
4188        if (n > prof->op_count_max) {
4189            qatomic_set(&prof->op_count_max, n);
4190        }
4191
4192        n = s->nb_temps;
4193        qatomic_set(&prof->temp_count, prof->temp_count + n);
4194        if (n > prof->temp_count_max) {
4195            qatomic_set(&prof->temp_count_max, n);
4196        }
4197    }
4198#endif
4199
4200#ifdef DEBUG_DISAS
4201    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4202                 && qemu_log_in_addr_range(tb->pc))) {
4203        FILE *logfile = qemu_log_lock();
4204        qemu_log("OP:\n");
4205        tcg_dump_ops(s, false);
4206        qemu_log("\n");
4207        qemu_log_unlock(logfile);
4208    }
4209#endif
4210
4211#ifdef CONFIG_DEBUG_TCG
4212    /* Ensure all labels referenced have been emitted.  */
4213    {
4214        TCGLabel *l;
4215        bool error = false;
4216
4217        QSIMPLEQ_FOREACH(l, &s->labels, next) {
4218            if (unlikely(!l->present) && l->refs) {
4219                qemu_log_mask(CPU_LOG_TB_OP,
4220                              "$L%d referenced but not present.\n", l->id);
4221                error = true;
4222            }
4223        }
4224        assert(!error);
4225    }
4226#endif
4227
4228#ifdef CONFIG_PROFILER
4229    qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4230#endif
4231
4232#ifdef USE_TCG_OPTIMIZATIONS
4233    tcg_optimize(s);
4234#endif
4235
4236#ifdef CONFIG_PROFILER
4237    qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4238    qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4239#endif
4240
4241    reachable_code_pass(s);
4242    liveness_pass_1(s);
4243
4244    if (s->nb_indirects > 0) {
4245#ifdef DEBUG_DISAS
4246        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4247                     && qemu_log_in_addr_range(tb->pc))) {
4248            FILE *logfile = qemu_log_lock();
4249            qemu_log("OP before indirect lowering:\n");
4250            tcg_dump_ops(s, false);
4251            qemu_log("\n");
4252            qemu_log_unlock(logfile);
4253        }
4254#endif
4255        /* Replace indirect temps with direct temps.  */
4256        if (liveness_pass_2(s)) {
4257            /* If changes were made, re-run liveness.  */
4258            liveness_pass_1(s);
4259        }
4260    }
4261
4262#ifdef CONFIG_PROFILER
4263    qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4264#endif
4265
4266#ifdef DEBUG_DISAS
4267    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4268                 && qemu_log_in_addr_range(tb->pc))) {
4269        FILE *logfile = qemu_log_lock();
4270        qemu_log("OP after optimization and liveness analysis:\n");
4271        tcg_dump_ops(s, true);
4272        qemu_log("\n");
4273        qemu_log_unlock(logfile);
4274    }
4275#endif
4276
4277    tcg_reg_alloc_start(s);
4278
4279    /*
4280     * Reset the buffer pointers when restarting after overflow.
4281     * TODO: Move this into translate-all.c with the rest of the
4282     * buffer management.  Having only this done here is confusing.
4283     */
4284    s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4285    s->code_ptr = s->code_buf;
4286
4287#ifdef TCG_TARGET_NEED_LDST_LABELS
4288    QSIMPLEQ_INIT(&s->ldst_labels);
4289#endif
4290#ifdef TCG_TARGET_NEED_POOL_LABELS
4291    s->pool_labels = NULL;
4292#endif
4293
4294    num_insns = -1;
4295    QTAILQ_FOREACH(op, &s->ops, link) {
4296        TCGOpcode opc = op->opc;
4297
4298#ifdef CONFIG_PROFILER
4299        qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4300#endif
4301
4302        switch (opc) {
4303        case INDEX_op_mov_i32:
4304        case INDEX_op_mov_i64:
4305        case INDEX_op_mov_vec:
4306            tcg_reg_alloc_mov(s, op);
4307            break;
4308        case INDEX_op_dup_vec:
4309            tcg_reg_alloc_dup(s, op);
4310            break;
4311        case INDEX_op_insn_start:
4312            if (num_insns >= 0) {
4313                size_t off = tcg_current_code_size(s);
4314                s->gen_insn_end_off[num_insns] = off;
4315                /* Assert that we do not overflow our stored offset.  */
4316                assert(s->gen_insn_end_off[num_insns] == off);
4317            }
4318            num_insns++;
4319            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4320                target_ulong a;
4321#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4322                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4323#else
4324                a = op->args[i];
4325#endif
4326                s->gen_insn_data[num_insns][i] = a;
4327            }
4328            break;
4329        case INDEX_op_discard:
4330            temp_dead(s, arg_temp(op->args[0]));
4331            break;
4332        case INDEX_op_set_label:
4333            tcg_reg_alloc_bb_end(s, s->reserved_regs);
4334            tcg_out_label(s, arg_label(op->args[0]));
4335            break;
4336        case INDEX_op_call:
4337            tcg_reg_alloc_call(s, op);
4338            break;
4339        case INDEX_op_dup2_vec:
4340            if (tcg_reg_alloc_dup2(s, op)) {
4341                break;
4342            }
4343            /* fall through */
4344        default:
4345            /* Sanity check that we've not introduced any unhandled opcodes. */
4346            tcg_debug_assert(tcg_op_supported(opc));
4347            /* Note: in order to speed up the code, it would be much
4348               faster to have specialized register allocator functions for
4349               some common argument patterns */
4350            tcg_reg_alloc_op(s, op);
4351            break;
4352        }
4353#ifdef CONFIG_DEBUG_TCG
4354        check_regs(s);
4355#endif
4356        /* Test for (pending) buffer overflow.  The assumption is that any
4357           one operation beginning below the high water mark cannot overrun
4358           the buffer completely.  Thus we can test for overflow after
4359           generating code without having to check during generation.  */
4360        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4361            return -1;
4362        }
4363        /* Test for TB overflow, as seen by gen_insn_end_off.  */
4364        if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4365            return -2;
4366        }
4367    }
4368    tcg_debug_assert(num_insns >= 0);
4369    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4370
4371    /* Generate TB finalization at the end of block */
4372#ifdef TCG_TARGET_NEED_LDST_LABELS
4373    i = tcg_out_ldst_finalize(s);
4374    if (i < 0) {
4375        return i;
4376    }
4377#endif
4378#ifdef TCG_TARGET_NEED_POOL_LABELS
4379    i = tcg_out_pool_finalize(s);
4380    if (i < 0) {
4381        return i;
4382    }
4383#endif
4384    if (!tcg_resolve_relocs(s)) {
4385        return -2;
4386    }
4387
4388#ifndef CONFIG_TCG_INTERPRETER
4389    /* flush instruction cache */
4390    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4391                        (uintptr_t)s->code_buf,
4392                        tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4393#endif
4394
4395    return tcg_current_code_size(s);
4396}
4397
4398#ifdef CONFIG_PROFILER
4399void tcg_dump_info(GString *buf)
4400{
4401    TCGProfile prof = {};
4402    const TCGProfile *s;
4403    int64_t tb_count;
4404    int64_t tb_div_count;
4405    int64_t tot;
4406
4407    tcg_profile_snapshot_counters(&prof);
4408    s = &prof;
4409    tb_count = s->tb_count;
4410    tb_div_count = tb_count ? tb_count : 1;
4411    tot = s->interm_time + s->code_time;
4412
4413    g_string_append_printf(buf, "JIT cycles          %" PRId64
4414                           " (%0.3f s at 2.4 GHz)\n",
4415                           tot, tot / 2.4e9);
4416    g_string_append_printf(buf, "translated TBs      %" PRId64
4417                           " (aborted=%" PRId64 " %0.1f%%)\n",
4418                           tb_count, s->tb_count1 - tb_count,
4419                           (double)(s->tb_count1 - s->tb_count)
4420                           / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4421    g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
4422                           (double)s->op_count / tb_div_count, s->op_count_max);
4423    g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
4424                           (double)s->del_op_count / tb_div_count);
4425    g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
4426                           (double)s->temp_count / tb_div_count,
4427                           s->temp_count_max);
4428    g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
4429                           (double)s->code_out_len / tb_div_count);
4430    g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
4431                           (double)s->search_out_len / tb_div_count);
4432    
4433    g_string_append_printf(buf, "cycles/op           %0.1f\n",
4434                           s->op_count ? (double)tot / s->op_count : 0);
4435    g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
4436                           s->code_in_len ? (double)tot / s->code_in_len : 0);
4437    g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
4438                           s->code_out_len ? (double)tot / s->code_out_len : 0);
4439    g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
4440                           s->search_out_len ?
4441                           (double)tot / s->search_out_len : 0);
4442    if (tot == 0) {
4443        tot = 1;
4444    }
4445    g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
4446                           (double)s->interm_time / tot * 100.0);
4447    g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
4448                           (double)s->code_time / tot * 100.0);
4449    g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
4450                           (double)s->opt_time / (s->code_time ?
4451                                                  s->code_time : 1)
4452                           * 100.0);
4453    g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
4454                           (double)s->la_time / (s->code_time ?
4455                                                 s->code_time : 1) * 100.0);
4456    g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
4457                           s->restore_count);
4458    g_string_append_printf(buf, "  avg cycles        %0.1f\n",
4459                           s->restore_count ?
4460                           (double)s->restore_time / s->restore_count : 0);
4461}
4462#else
4463void tcg_dump_info(GString *buf)
4464{
4465    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4466}
4467#endif
4468
4469#ifdef ELF_HOST_MACHINE
4470/* In order to use this feature, the backend needs to do three things:
4471
4472   (1) Define ELF_HOST_MACHINE to indicate both what value to
4473       put into the ELF image and to indicate support for the feature.
4474
4475   (2) Define tcg_register_jit.  This should create a buffer containing
4476       the contents of a .debug_frame section that describes the post-
4477       prologue unwind info for the tcg machine.
4478
4479   (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4480*/
4481
4482/* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4483typedef enum {
4484    JIT_NOACTION = 0,
4485    JIT_REGISTER_FN,
4486    JIT_UNREGISTER_FN
4487} jit_actions_t;
4488
4489struct jit_code_entry {
4490    struct jit_code_entry *next_entry;
4491    struct jit_code_entry *prev_entry;
4492    const void *symfile_addr;
4493    uint64_t symfile_size;
4494};
4495
4496struct jit_descriptor {
4497    uint32_t version;
4498    uint32_t action_flag;
4499    struct jit_code_entry *relevant_entry;
4500    struct jit_code_entry *first_entry;
4501};
4502
4503void __jit_debug_register_code(void) __attribute__((noinline));
4504void __jit_debug_register_code(void)
4505{
4506    asm("");
4507}
4508
4509/* Must statically initialize the version, because GDB may check
4510   the version before we can set it.  */
4511struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4512
4513/* End GDB interface.  */
4514
4515static int find_string(const char *strtab, const char *str)
4516{
4517    const char *p = strtab + 1;
4518
4519    while (1) {
4520        if (strcmp(p, str) == 0) {
4521            return p - strtab;
4522        }
4523        p += strlen(p) + 1;
4524    }
4525}
4526
4527static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4528                                 const void *debug_frame,
4529                                 size_t debug_frame_size)
4530{
4531    struct __attribute__((packed)) DebugInfo {
4532        uint32_t  len;
4533        uint16_t  version;
4534        uint32_t  abbrev;
4535        uint8_t   ptr_size;
4536        uint8_t   cu_die;
4537        uint16_t  cu_lang;
4538        uintptr_t cu_low_pc;
4539        uintptr_t cu_high_pc;
4540        uint8_t   fn_die;
4541        char      fn_name[16];
4542        uintptr_t fn_low_pc;
4543        uintptr_t fn_high_pc;
4544        uint8_t   cu_eoc;
4545    };
4546
4547    struct ElfImage {
4548        ElfW(Ehdr) ehdr;
4549        ElfW(Phdr) phdr;
4550        ElfW(Shdr) shdr[7];
4551        ElfW(Sym)  sym[2];
4552        struct DebugInfo di;
4553        uint8_t    da[24];
4554        char       str[80];
4555    };
4556
4557    struct ElfImage *img;
4558
4559    static const struct ElfImage img_template = {
4560        .ehdr = {
4561            .e_ident[EI_MAG0] = ELFMAG0,
4562            .e_ident[EI_MAG1] = ELFMAG1,
4563            .e_ident[EI_MAG2] = ELFMAG2,
4564            .e_ident[EI_MAG3] = ELFMAG3,
4565            .e_ident[EI_CLASS] = ELF_CLASS,
4566            .e_ident[EI_DATA] = ELF_DATA,
4567            .e_ident[EI_VERSION] = EV_CURRENT,
4568            .e_type = ET_EXEC,
4569            .e_machine = ELF_HOST_MACHINE,
4570            .e_version = EV_CURRENT,
4571            .e_phoff = offsetof(struct ElfImage, phdr),
4572            .e_shoff = offsetof(struct ElfImage, shdr),
4573            .e_ehsize = sizeof(ElfW(Shdr)),
4574            .e_phentsize = sizeof(ElfW(Phdr)),
4575            .e_phnum = 1,
4576            .e_shentsize = sizeof(ElfW(Shdr)),
4577            .e_shnum = ARRAY_SIZE(img->shdr),
4578            .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4579#ifdef ELF_HOST_FLAGS
4580            .e_flags = ELF_HOST_FLAGS,
4581#endif
4582#ifdef ELF_OSABI
4583            .e_ident[EI_OSABI] = ELF_OSABI,
4584#endif
4585        },
4586        .phdr = {
4587            .p_type = PT_LOAD,
4588            .p_flags = PF_X,
4589        },
4590        .shdr = {
4591            [0] = { .sh_type = SHT_NULL },
4592            /* Trick: The contents of code_gen_buffer are not present in
4593               this fake ELF file; that got allocated elsewhere.  Therefore
4594               we mark .text as SHT_NOBITS (similar to .bss) so that readers
4595               will not look for contents.  We can record any address.  */
4596            [1] = { /* .text */
4597                .sh_type = SHT_NOBITS,
4598                .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4599            },
4600            [2] = { /* .debug_info */
4601                .sh_type = SHT_PROGBITS,
4602                .sh_offset = offsetof(struct ElfImage, di),
4603                .sh_size = sizeof(struct DebugInfo),
4604            },
4605            [3] = { /* .debug_abbrev */
4606                .sh_type = SHT_PROGBITS,
4607                .sh_offset = offsetof(struct ElfImage, da),
4608                .sh_size = sizeof(img->da),
4609            },
4610            [4] = { /* .debug_frame */
4611                .sh_type = SHT_PROGBITS,
4612                .sh_offset = sizeof(struct ElfImage),
4613            },
4614            [5] = { /* .symtab */
4615                .sh_type = SHT_SYMTAB,
4616                .sh_offset = offsetof(struct ElfImage, sym),
4617                .sh_size = sizeof(img->sym),
4618                .sh_info = 1,
4619                .sh_link = ARRAY_SIZE(img->shdr) - 1,
4620                .sh_entsize = sizeof(ElfW(Sym)),
4621            },
4622            [6] = { /* .strtab */
4623                .sh_type = SHT_STRTAB,
4624                .sh_offset = offsetof(struct ElfImage, str),
4625                .sh_size = sizeof(img->str),
4626            }
4627        },
4628        .sym = {
4629            [1] = { /* code_gen_buffer */
4630                .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4631                .st_shndx = 1,
4632            }
4633        },
4634        .di = {
4635            .len = sizeof(struct DebugInfo) - 4,
4636            .version = 2,
4637            .ptr_size = sizeof(void *),
4638            .cu_die = 1,
4639            .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4640            .fn_die = 2,
4641            .fn_name = "code_gen_buffer"
4642        },
4643        .da = {
4644            1,          /* abbrev number (the cu) */
4645            0x11, 1,    /* DW_TAG_compile_unit, has children */
4646            0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4647            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4648            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4649            0, 0,       /* end of abbrev */
4650            2,          /* abbrev number (the fn) */
4651            0x2e, 0,    /* DW_TAG_subprogram, no children */
4652            0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4653            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4654            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4655            0, 0,       /* end of abbrev */
4656            0           /* no more abbrev */
4657        },
4658        .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4659               ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4660    };
4661
4662    /* We only need a single jit entry; statically allocate it.  */
4663    static struct jit_code_entry one_entry;
4664
4665    uintptr_t buf = (uintptr_t)buf_ptr;
4666    size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4667    DebugFrameHeader *dfh;
4668
4669    img = g_malloc(img_size);
4670    *img = img_template;
4671
4672    img->phdr.p_vaddr = buf;
4673    img->phdr.p_paddr = buf;
4674    img->phdr.p_memsz = buf_size;
4675
4676    img->shdr[1].sh_name = find_string(img->str, ".text");
4677    img->shdr[1].sh_addr = buf;
4678    img->shdr[1].sh_size = buf_size;
4679
4680    img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4681    img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4682
4683    img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4684    img->shdr[4].sh_size = debug_frame_size;
4685
4686    img->shdr[5].sh_name = find_string(img->str, ".symtab");
4687    img->shdr[6].sh_name = find_string(img->str, ".strtab");
4688
4689    img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4690    img->sym[1].st_value = buf;
4691    img->sym[1].st_size = buf_size;
4692
4693    img->di.cu_low_pc = buf;
4694    img->di.cu_high_pc = buf + buf_size;
4695    img->di.fn_low_pc = buf;
4696    img->di.fn_high_pc = buf + buf_size;
4697
4698    dfh = (DebugFrameHeader *)(img + 1);
4699    memcpy(dfh, debug_frame, debug_frame_size);
4700    dfh->fde.func_start = buf;
4701    dfh->fde.func_len = buf_size;
4702
4703#ifdef DEBUG_JIT
4704    /* Enable this block to be able to debug the ELF image file creation.
4705       One can use readelf, objdump, or other inspection utilities.  */
4706    {
4707        FILE *f = fopen("/tmp/qemu.jit", "w+b");
4708        if (f) {
4709            if (fwrite(img, img_size, 1, f) != img_size) {
4710                /* Avoid stupid unused return value warning for fwrite.  */
4711            }
4712            fclose(f);
4713        }
4714    }
4715#endif
4716
4717    one_entry.symfile_addr = img;
4718    one_entry.symfile_size = img_size;
4719
4720    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4721    __jit_debug_descriptor.relevant_entry = &one_entry;
4722    __jit_debug_descriptor.first_entry = &one_entry;
4723    __jit_debug_register_code();
4724}
4725#else
4726/* No support for the feature.  Provide the entry point expected by exec.c,
4727   and implement the internal function we declared earlier.  */
4728
4729static void tcg_register_jit_int(const void *buf, size_t size,
4730                                 const void *debug_frame,
4731                                 size_t debug_frame_size)
4732{
4733}
4734
4735void tcg_register_jit(const void *buf, size_t buf_size)
4736{
4737}
4738#endif /* ELF_HOST_MACHINE */
4739
4740#if !TCG_TARGET_MAYBE_vec
4741void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4742{
4743    g_assert_not_reached();
4744}
4745#endif
4746