qemu/tcg/tcg.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* define it to use liveness analysis (better code) */
  26#define USE_TCG_OPTIMIZATIONS
  27
  28#include "qemu/osdep.h"
  29
  30/* Define to jump the ELF file used to communicate with GDB.  */
  31#undef DEBUG_JIT
  32
  33#include "qemu/error-report.h"
  34#include "qemu/cutils.h"
  35#include "qemu/host-utils.h"
  36#include "qemu/qemu-print.h"
  37#include "qemu/timer.h"
  38#include "qemu/cacheflush.h"
  39#include "qemu/cacheinfo.h"
  40
  41/* Note: the long term plan is to reduce the dependencies on the QEMU
  42   CPU definitions. Currently they are used for qemu_ld/st
  43   instructions */
  44#define NO_CPU_IO_DEFS
  45
  46#include "exec/exec-all.h"
  47#include "tcg/tcg-op.h"
  48
  49#if UINTPTR_MAX == UINT32_MAX
  50# define ELF_CLASS  ELFCLASS32
  51#else
  52# define ELF_CLASS  ELFCLASS64
  53#endif
  54#if HOST_BIG_ENDIAN
  55# define ELF_DATA   ELFDATA2MSB
  56#else
  57# define ELF_DATA   ELFDATA2LSB
  58#endif
  59
  60#include "elf.h"
  61#include "exec/log.h"
  62#include "tcg/tcg-ldst.h"
  63#include "tcg-internal.h"
  64
  65#ifdef CONFIG_TCG_INTERPRETER
  66#include <ffi.h>
  67#endif
  68
  69/* Forward declarations for functions declared in tcg-target.c.inc and
  70   used here. */
  71static void tcg_target_init(TCGContext *s);
  72static void tcg_target_qemu_prologue(TCGContext *s);
  73static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
  74                        intptr_t value, intptr_t addend);
  75
  76/* The CIE and FDE header definitions will be common to all hosts.  */
  77typedef struct {
  78    uint32_t len __attribute__((aligned((sizeof(void *)))));
  79    uint32_t id;
  80    uint8_t version;
  81    char augmentation[1];
  82    uint8_t code_align;
  83    uint8_t data_align;
  84    uint8_t return_column;
  85} DebugFrameCIE;
  86
  87typedef struct QEMU_PACKED {
  88    uint32_t len __attribute__((aligned((sizeof(void *)))));
  89    uint32_t cie_offset;
  90    uintptr_t func_start;
  91    uintptr_t func_len;
  92} DebugFrameFDEHeader;
  93
  94typedef struct QEMU_PACKED {
  95    DebugFrameCIE cie;
  96    DebugFrameFDEHeader fde;
  97} DebugFrameHeader;
  98
  99static void tcg_register_jit_int(const void *buf, size_t size,
 100                                 const void *debug_frame,
 101                                 size_t debug_frame_size)
 102    __attribute__((unused));
 103
 104/* Forward declarations for functions declared and used in tcg-target.c.inc. */
 105static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
 106                       intptr_t arg2);
 107static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
 108static void tcg_out_movi(TCGContext *s, TCGType type,
 109                         TCGReg ret, tcg_target_long arg);
 110static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 111                       const TCGArg args[TCG_MAX_OP_ARGS],
 112                       const int const_args[TCG_MAX_OP_ARGS]);
 113#if TCG_TARGET_MAYBE_vec
 114static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 115                            TCGReg dst, TCGReg src);
 116static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 117                             TCGReg dst, TCGReg base, intptr_t offset);
 118static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 119                             TCGReg dst, int64_t arg);
 120static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 121                           unsigned vecl, unsigned vece,
 122                           const TCGArg args[TCG_MAX_OP_ARGS],
 123                           const int const_args[TCG_MAX_OP_ARGS]);
 124#else
 125static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 126                                   TCGReg dst, TCGReg src)
 127{
 128    g_assert_not_reached();
 129}
 130static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 131                                    TCGReg dst, TCGReg base, intptr_t offset)
 132{
 133    g_assert_not_reached();
 134}
 135static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 136                                    TCGReg dst, int64_t arg)
 137{
 138    g_assert_not_reached();
 139}
 140static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 141                                  unsigned vecl, unsigned vece,
 142                                  const TCGArg args[TCG_MAX_OP_ARGS],
 143                                  const int const_args[TCG_MAX_OP_ARGS])
 144{
 145    g_assert_not_reached();
 146}
 147#endif
 148static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
 149                       intptr_t arg2);
 150static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 151                        TCGReg base, intptr_t ofs);
 152#ifdef CONFIG_TCG_INTERPRETER
 153static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
 154                         ffi_cif *cif);
 155#else
 156static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
 157#endif
 158static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
 159#ifdef TCG_TARGET_NEED_LDST_LABELS
 160static int tcg_out_ldst_finalize(TCGContext *s);
 161#endif
 162
 163TCGContext tcg_init_ctx;
 164__thread TCGContext *tcg_ctx;
 165
 166TCGContext **tcg_ctxs;
 167unsigned int tcg_cur_ctxs;
 168unsigned int tcg_max_ctxs;
 169TCGv_env cpu_env = 0;
 170const void *tcg_code_gen_epilogue;
 171uintptr_t tcg_splitwx_diff;
 172
 173#ifndef CONFIG_TCG_INTERPRETER
 174tcg_prologue_fn *tcg_qemu_tb_exec;
 175#endif
 176
 177static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
 178static TCGRegSet tcg_target_call_clobber_regs;
 179
 180#if TCG_TARGET_INSN_UNIT_SIZE == 1
 181static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
 182{
 183    *s->code_ptr++ = v;
 184}
 185
 186static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
 187                                                      uint8_t v)
 188{
 189    *p = v;
 190}
 191#endif
 192
 193#if TCG_TARGET_INSN_UNIT_SIZE <= 2
 194static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
 195{
 196    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 197        *s->code_ptr++ = v;
 198    } else {
 199        tcg_insn_unit *p = s->code_ptr;
 200        memcpy(p, &v, sizeof(v));
 201        s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
 202    }
 203}
 204
 205static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
 206                                                       uint16_t v)
 207{
 208    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 209        *p = v;
 210    } else {
 211        memcpy(p, &v, sizeof(v));
 212    }
 213}
 214#endif
 215
 216#if TCG_TARGET_INSN_UNIT_SIZE <= 4
 217static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
 218{
 219    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 220        *s->code_ptr++ = v;
 221    } else {
 222        tcg_insn_unit *p = s->code_ptr;
 223        memcpy(p, &v, sizeof(v));
 224        s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
 225    }
 226}
 227
 228static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
 229                                                       uint32_t v)
 230{
 231    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 232        *p = v;
 233    } else {
 234        memcpy(p, &v, sizeof(v));
 235    }
 236}
 237#endif
 238
 239#if TCG_TARGET_INSN_UNIT_SIZE <= 8
 240static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
 241{
 242    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 243        *s->code_ptr++ = v;
 244    } else {
 245        tcg_insn_unit *p = s->code_ptr;
 246        memcpy(p, &v, sizeof(v));
 247        s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
 248    }
 249}
 250
 251static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
 252                                                       uint64_t v)
 253{
 254    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 255        *p = v;
 256    } else {
 257        memcpy(p, &v, sizeof(v));
 258    }
 259}
 260#endif
 261
 262/* label relocation processing */
 263
 264static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
 265                          TCGLabel *l, intptr_t addend)
 266{
 267    TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
 268
 269    r->type = type;
 270    r->ptr = code_ptr;
 271    r->addend = addend;
 272    QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
 273}
 274
 275static void tcg_out_label(TCGContext *s, TCGLabel *l)
 276{
 277    tcg_debug_assert(!l->has_value);
 278    l->has_value = 1;
 279    l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
 280}
 281
 282TCGLabel *gen_new_label(void)
 283{
 284    TCGContext *s = tcg_ctx;
 285    TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
 286
 287    memset(l, 0, sizeof(TCGLabel));
 288    l->id = s->nb_labels++;
 289    QSIMPLEQ_INIT(&l->relocs);
 290
 291    QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
 292
 293    return l;
 294}
 295
 296static bool tcg_resolve_relocs(TCGContext *s)
 297{
 298    TCGLabel *l;
 299
 300    QSIMPLEQ_FOREACH(l, &s->labels, next) {
 301        TCGRelocation *r;
 302        uintptr_t value = l->u.value;
 303
 304        QSIMPLEQ_FOREACH(r, &l->relocs, next) {
 305            if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
 306                return false;
 307            }
 308        }
 309    }
 310    return true;
 311}
 312
 313static void set_jmp_reset_offset(TCGContext *s, int which)
 314{
 315    /*
 316     * We will check for overflow at the end of the opcode loop in
 317     * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
 318     */
 319    s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
 320}
 321
 322/* Signal overflow, starting over with fewer guest insns. */
 323static G_NORETURN
 324void tcg_raise_tb_overflow(TCGContext *s)
 325{
 326    siglongjmp(s->jmp_trans, -2);
 327}
 328
 329#define C_PFX1(P, A)                    P##A
 330#define C_PFX2(P, A, B)                 P##A##_##B
 331#define C_PFX3(P, A, B, C)              P##A##_##B##_##C
 332#define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
 333#define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
 334#define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
 335
 336/* Define an enumeration for the various combinations. */
 337
 338#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
 339#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
 340#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
 341#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
 342
 343#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
 344#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
 345#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
 346#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
 347
 348#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
 349
 350#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
 351#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
 352#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
 353#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
 354
 355typedef enum {
 356#include "tcg-target-con-set.h"
 357} TCGConstraintSetIndex;
 358
 359static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
 360
 361#undef C_O0_I1
 362#undef C_O0_I2
 363#undef C_O0_I3
 364#undef C_O0_I4
 365#undef C_O1_I1
 366#undef C_O1_I2
 367#undef C_O1_I3
 368#undef C_O1_I4
 369#undef C_N1_I2
 370#undef C_O2_I1
 371#undef C_O2_I2
 372#undef C_O2_I3
 373#undef C_O2_I4
 374
 375/* Put all of the constraint sets into an array, indexed by the enum. */
 376
 377#define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
 378#define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
 379#define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
 380#define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
 381
 382#define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
 383#define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
 384#define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
 385#define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
 386
 387#define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
 388
 389#define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
 390#define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
 391#define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
 392#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
 393
 394static const TCGTargetOpDef constraint_sets[] = {
 395#include "tcg-target-con-set.h"
 396};
 397
 398
 399#undef C_O0_I1
 400#undef C_O0_I2
 401#undef C_O0_I3
 402#undef C_O0_I4
 403#undef C_O1_I1
 404#undef C_O1_I2
 405#undef C_O1_I3
 406#undef C_O1_I4
 407#undef C_N1_I2
 408#undef C_O2_I1
 409#undef C_O2_I2
 410#undef C_O2_I3
 411#undef C_O2_I4
 412
 413/* Expand the enumerator to be returned from tcg_target_op_def(). */
 414
 415#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
 416#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
 417#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
 418#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
 419
 420#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
 421#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
 422#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
 423#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
 424
 425#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
 426
 427#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
 428#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
 429#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
 430#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
 431
 432#include "tcg-target.c.inc"
 433
 434static void alloc_tcg_plugin_context(TCGContext *s)
 435{
 436#ifdef CONFIG_PLUGIN
 437    s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
 438    s->plugin_tb->insns =
 439        g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
 440#endif
 441}
 442
 443/*
 444 * All TCG threads except the parent (i.e. the one that called tcg_context_init
 445 * and registered the target's TCG globals) must register with this function
 446 * before initiating translation.
 447 *
 448 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
 449 * of tcg_region_init() for the reasoning behind this.
 450 *
 451 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
 452 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
 453 * is not used anymore for translation once this function is called.
 454 *
 455 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
 456 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
 457 */
 458#ifdef CONFIG_USER_ONLY
 459void tcg_register_thread(void)
 460{
 461    tcg_ctx = &tcg_init_ctx;
 462}
 463#else
 464void tcg_register_thread(void)
 465{
 466    TCGContext *s = g_malloc(sizeof(*s));
 467    unsigned int i, n;
 468
 469    *s = tcg_init_ctx;
 470
 471    /* Relink mem_base.  */
 472    for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
 473        if (tcg_init_ctx.temps[i].mem_base) {
 474            ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
 475            tcg_debug_assert(b >= 0 && b < n);
 476            s->temps[i].mem_base = &s->temps[b];
 477        }
 478    }
 479
 480    /* Claim an entry in tcg_ctxs */
 481    n = qatomic_fetch_inc(&tcg_cur_ctxs);
 482    g_assert(n < tcg_max_ctxs);
 483    qatomic_set(&tcg_ctxs[n], s);
 484
 485    if (n > 0) {
 486        alloc_tcg_plugin_context(s);
 487        tcg_region_initial_alloc(s);
 488    }
 489
 490    tcg_ctx = s;
 491}
 492#endif /* !CONFIG_USER_ONLY */
 493
 494/* pool based memory allocation */
 495void *tcg_malloc_internal(TCGContext *s, int size)
 496{
 497    TCGPool *p;
 498    int pool_size;
 499    
 500    if (size > TCG_POOL_CHUNK_SIZE) {
 501        /* big malloc: insert a new pool (XXX: could optimize) */
 502        p = g_malloc(sizeof(TCGPool) + size);
 503        p->size = size;
 504        p->next = s->pool_first_large;
 505        s->pool_first_large = p;
 506        return p->data;
 507    } else {
 508        p = s->pool_current;
 509        if (!p) {
 510            p = s->pool_first;
 511            if (!p)
 512                goto new_pool;
 513        } else {
 514            if (!p->next) {
 515            new_pool:
 516                pool_size = TCG_POOL_CHUNK_SIZE;
 517                p = g_malloc(sizeof(TCGPool) + pool_size);
 518                p->size = pool_size;
 519                p->next = NULL;
 520                if (s->pool_current) 
 521                    s->pool_current->next = p;
 522                else
 523                    s->pool_first = p;
 524            } else {
 525                p = p->next;
 526            }
 527        }
 528    }
 529    s->pool_current = p;
 530    s->pool_cur = p->data + size;
 531    s->pool_end = p->data + p->size;
 532    return p->data;
 533}
 534
 535void tcg_pool_reset(TCGContext *s)
 536{
 537    TCGPool *p, *t;
 538    for (p = s->pool_first_large; p; p = t) {
 539        t = p->next;
 540        g_free(p);
 541    }
 542    s->pool_first_large = NULL;
 543    s->pool_cur = s->pool_end = NULL;
 544    s->pool_current = NULL;
 545}
 546
 547#include "exec/helper-proto.h"
 548
 549static const TCGHelperInfo all_helpers[] = {
 550#include "exec/helper-tcg.h"
 551};
 552static GHashTable *helper_table;
 553
 554#ifdef CONFIG_TCG_INTERPRETER
 555static GHashTable *ffi_table;
 556
 557static ffi_type * const typecode_to_ffi[8] = {
 558    [dh_typecode_void] = &ffi_type_void,
 559    [dh_typecode_i32]  = &ffi_type_uint32,
 560    [dh_typecode_s32]  = &ffi_type_sint32,
 561    [dh_typecode_i64]  = &ffi_type_uint64,
 562    [dh_typecode_s64]  = &ffi_type_sint64,
 563    [dh_typecode_ptr]  = &ffi_type_pointer,
 564};
 565#endif
 566
 567static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
 568static void process_op_defs(TCGContext *s);
 569static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
 570                                            TCGReg reg, const char *name);
 571
 572static void tcg_context_init(unsigned max_cpus)
 573{
 574    TCGContext *s = &tcg_init_ctx;
 575    int op, total_args, n, i;
 576    TCGOpDef *def;
 577    TCGArgConstraint *args_ct;
 578    TCGTemp *ts;
 579
 580    memset(s, 0, sizeof(*s));
 581    s->nb_globals = 0;
 582
 583    /* Count total number of arguments and allocate the corresponding
 584       space */
 585    total_args = 0;
 586    for(op = 0; op < NB_OPS; op++) {
 587        def = &tcg_op_defs[op];
 588        n = def->nb_iargs + def->nb_oargs;
 589        total_args += n;
 590    }
 591
 592    args_ct = g_new0(TCGArgConstraint, total_args);
 593
 594    for(op = 0; op < NB_OPS; op++) {
 595        def = &tcg_op_defs[op];
 596        def->args_ct = args_ct;
 597        n = def->nb_iargs + def->nb_oargs;
 598        args_ct += n;
 599    }
 600
 601    /* Register helpers.  */
 602    /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
 603    helper_table = g_hash_table_new(NULL, NULL);
 604
 605    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
 606        g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
 607                            (gpointer)&all_helpers[i]);
 608    }
 609
 610#ifdef CONFIG_TCG_INTERPRETER
 611    /* g_direct_hash/equal for direct comparisons on uint32_t.  */
 612    ffi_table = g_hash_table_new(NULL, NULL);
 613    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
 614        struct {
 615            ffi_cif cif;
 616            ffi_type *args[];
 617        } *ca;
 618        uint32_t typemask = all_helpers[i].typemask;
 619        gpointer hash = (gpointer)(uintptr_t)typemask;
 620        ffi_status status;
 621        int nargs;
 622
 623        if (g_hash_table_lookup(ffi_table, hash)) {
 624            continue;
 625        }
 626
 627        /* Ignoring the return type, find the last non-zero field. */
 628        nargs = 32 - clz32(typemask >> 3);
 629        nargs = DIV_ROUND_UP(nargs, 3);
 630
 631        ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
 632        ca->cif.rtype = typecode_to_ffi[typemask & 7];
 633        ca->cif.nargs = nargs;
 634
 635        if (nargs != 0) {
 636            ca->cif.arg_types = ca->args;
 637            for (int j = 0; j < nargs; ++j) {
 638                int typecode = extract32(typemask, (j + 1) * 3, 3);
 639                ca->args[j] = typecode_to_ffi[typecode];
 640            }
 641        }
 642
 643        status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
 644                              ca->cif.rtype, ca->cif.arg_types);
 645        assert(status == FFI_OK);
 646
 647        g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
 648    }
 649#endif
 650
 651    tcg_target_init(s);
 652    process_op_defs(s);
 653
 654    /* Reverse the order of the saved registers, assuming they're all at
 655       the start of tcg_target_reg_alloc_order.  */
 656    for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
 657        int r = tcg_target_reg_alloc_order[n];
 658        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
 659            break;
 660        }
 661    }
 662    for (i = 0; i < n; ++i) {
 663        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
 664    }
 665    for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
 666        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
 667    }
 668
 669    alloc_tcg_plugin_context(s);
 670
 671    tcg_ctx = s;
 672    /*
 673     * In user-mode we simply share the init context among threads, since we
 674     * use a single region. See the documentation tcg_region_init() for the
 675     * reasoning behind this.
 676     * In softmmu we will have at most max_cpus TCG threads.
 677     */
 678#ifdef CONFIG_USER_ONLY
 679    tcg_ctxs = &tcg_ctx;
 680    tcg_cur_ctxs = 1;
 681    tcg_max_ctxs = 1;
 682#else
 683    tcg_max_ctxs = max_cpus;
 684    tcg_ctxs = g_new0(TCGContext *, max_cpus);
 685#endif
 686
 687    tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
 688    ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
 689    cpu_env = temp_tcgv_ptr(ts);
 690}
 691
 692void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
 693{
 694    tcg_context_init(max_cpus);
 695    tcg_region_init(tb_size, splitwx, max_cpus);
 696}
 697
 698/*
 699 * Allocate TBs right before their corresponding translated code, making
 700 * sure that TBs and code are on different cache lines.
 701 */
 702TranslationBlock *tcg_tb_alloc(TCGContext *s)
 703{
 704    uintptr_t align = qemu_icache_linesize;
 705    TranslationBlock *tb;
 706    void *next;
 707
 708 retry:
 709    tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
 710    next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
 711
 712    if (unlikely(next > s->code_gen_highwater)) {
 713        if (tcg_region_alloc(s)) {
 714            return NULL;
 715        }
 716        goto retry;
 717    }
 718    qatomic_set(&s->code_gen_ptr, next);
 719    s->data_gen_ptr = NULL;
 720    return tb;
 721}
 722
 723void tcg_prologue_init(TCGContext *s)
 724{
 725    size_t prologue_size;
 726
 727    s->code_ptr = s->code_gen_ptr;
 728    s->code_buf = s->code_gen_ptr;
 729    s->data_gen_ptr = NULL;
 730
 731#ifndef CONFIG_TCG_INTERPRETER
 732    tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
 733#endif
 734
 735#ifdef TCG_TARGET_NEED_POOL_LABELS
 736    s->pool_labels = NULL;
 737#endif
 738
 739    qemu_thread_jit_write();
 740    /* Generate the prologue.  */
 741    tcg_target_qemu_prologue(s);
 742
 743#ifdef TCG_TARGET_NEED_POOL_LABELS
 744    /* Allow the prologue to put e.g. guest_base into a pool entry.  */
 745    {
 746        int result = tcg_out_pool_finalize(s);
 747        tcg_debug_assert(result == 0);
 748    }
 749#endif
 750
 751    prologue_size = tcg_current_code_size(s);
 752
 753#ifndef CONFIG_TCG_INTERPRETER
 754    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
 755                        (uintptr_t)s->code_buf, prologue_size);
 756#endif
 757
 758#ifdef DEBUG_DISAS
 759    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
 760        FILE *logfile = qemu_log_trylock();
 761        if (logfile) {
 762            fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
 763            if (s->data_gen_ptr) {
 764                size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
 765                size_t data_size = prologue_size - code_size;
 766                size_t i;
 767
 768                disas(logfile, s->code_gen_ptr, code_size);
 769
 770                for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
 771                    if (sizeof(tcg_target_ulong) == 8) {
 772                        fprintf(logfile,
 773                                "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
 774                                (uintptr_t)s->data_gen_ptr + i,
 775                                *(uint64_t *)(s->data_gen_ptr + i));
 776                    } else {
 777                        fprintf(logfile,
 778                                "0x%08" PRIxPTR ":  .long  0x%08x\n",
 779                                (uintptr_t)s->data_gen_ptr + i,
 780                                *(uint32_t *)(s->data_gen_ptr + i));
 781                    }
 782                }
 783            } else {
 784                disas(logfile, s->code_gen_ptr, prologue_size);
 785            }
 786            fprintf(logfile, "\n");
 787            qemu_log_unlock(logfile);
 788        }
 789    }
 790#endif
 791
 792#ifndef CONFIG_TCG_INTERPRETER
 793    /*
 794     * Assert that goto_ptr is implemented completely, setting an epilogue.
 795     * For tci, we use NULL as the signal to return from the interpreter,
 796     * so skip this check.
 797     */
 798    tcg_debug_assert(tcg_code_gen_epilogue != NULL);
 799#endif
 800
 801    tcg_region_prologue_set(s);
 802}
 803
 804void tcg_func_start(TCGContext *s)
 805{
 806    tcg_pool_reset(s);
 807    s->nb_temps = s->nb_globals;
 808
 809    /* No temps have been previously allocated for size or locality.  */
 810    memset(s->free_temps, 0, sizeof(s->free_temps));
 811
 812    /* No constant temps have been previously allocated. */
 813    for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
 814        if (s->const_table[i]) {
 815            g_hash_table_remove_all(s->const_table[i]);
 816        }
 817    }
 818
 819    s->nb_ops = 0;
 820    s->nb_labels = 0;
 821    s->current_frame_offset = s->frame_start;
 822
 823#ifdef CONFIG_DEBUG_TCG
 824    s->goto_tb_issue_mask = 0;
 825#endif
 826
 827    QTAILQ_INIT(&s->ops);
 828    QTAILQ_INIT(&s->free_ops);
 829    QSIMPLEQ_INIT(&s->labels);
 830}
 831
 832static TCGTemp *tcg_temp_alloc(TCGContext *s)
 833{
 834    int n = s->nb_temps++;
 835
 836    if (n >= TCG_MAX_TEMPS) {
 837        tcg_raise_tb_overflow(s);
 838    }
 839    return memset(&s->temps[n], 0, sizeof(TCGTemp));
 840}
 841
 842static TCGTemp *tcg_global_alloc(TCGContext *s)
 843{
 844    TCGTemp *ts;
 845
 846    tcg_debug_assert(s->nb_globals == s->nb_temps);
 847    tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
 848    s->nb_globals++;
 849    ts = tcg_temp_alloc(s);
 850    ts->kind = TEMP_GLOBAL;
 851
 852    return ts;
 853}
 854
 855static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
 856                                            TCGReg reg, const char *name)
 857{
 858    TCGTemp *ts;
 859
 860    if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
 861        tcg_abort();
 862    }
 863
 864    ts = tcg_global_alloc(s);
 865    ts->base_type = type;
 866    ts->type = type;
 867    ts->kind = TEMP_FIXED;
 868    ts->reg = reg;
 869    ts->name = name;
 870    tcg_regset_set_reg(s->reserved_regs, reg);
 871
 872    return ts;
 873}
 874
 875void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
 876{
 877    s->frame_start = start;
 878    s->frame_end = start + size;
 879    s->frame_temp
 880        = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
 881}
 882
 883TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
 884                                     intptr_t offset, const char *name)
 885{
 886    TCGContext *s = tcg_ctx;
 887    TCGTemp *base_ts = tcgv_ptr_temp(base);
 888    TCGTemp *ts = tcg_global_alloc(s);
 889    int indirect_reg = 0, bigendian = 0;
 890#if HOST_BIG_ENDIAN
 891    bigendian = 1;
 892#endif
 893
 894    switch (base_ts->kind) {
 895    case TEMP_FIXED:
 896        break;
 897    case TEMP_GLOBAL:
 898        /* We do not support double-indirect registers.  */
 899        tcg_debug_assert(!base_ts->indirect_reg);
 900        base_ts->indirect_base = 1;
 901        s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
 902                            ? 2 : 1);
 903        indirect_reg = 1;
 904        break;
 905    default:
 906        g_assert_not_reached();
 907    }
 908
 909    if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
 910        TCGTemp *ts2 = tcg_global_alloc(s);
 911        char buf[64];
 912
 913        ts->base_type = TCG_TYPE_I64;
 914        ts->type = TCG_TYPE_I32;
 915        ts->indirect_reg = indirect_reg;
 916        ts->mem_allocated = 1;
 917        ts->mem_base = base_ts;
 918        ts->mem_offset = offset + bigendian * 4;
 919        pstrcpy(buf, sizeof(buf), name);
 920        pstrcat(buf, sizeof(buf), "_0");
 921        ts->name = strdup(buf);
 922
 923        tcg_debug_assert(ts2 == ts + 1);
 924        ts2->base_type = TCG_TYPE_I64;
 925        ts2->type = TCG_TYPE_I32;
 926        ts2->indirect_reg = indirect_reg;
 927        ts2->mem_allocated = 1;
 928        ts2->mem_base = base_ts;
 929        ts2->mem_offset = offset + (1 - bigendian) * 4;
 930        pstrcpy(buf, sizeof(buf), name);
 931        pstrcat(buf, sizeof(buf), "_1");
 932        ts2->name = strdup(buf);
 933    } else {
 934        ts->base_type = type;
 935        ts->type = type;
 936        ts->indirect_reg = indirect_reg;
 937        ts->mem_allocated = 1;
 938        ts->mem_base = base_ts;
 939        ts->mem_offset = offset;
 940        ts->name = name;
 941    }
 942    return ts;
 943}
 944
 945TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
 946{
 947    TCGContext *s = tcg_ctx;
 948    TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
 949    TCGTemp *ts;
 950    int idx, k;
 951
 952    k = type + (temp_local ? TCG_TYPE_COUNT : 0);
 953    idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
 954    if (idx < TCG_MAX_TEMPS) {
 955        /* There is already an available temp with the right type.  */
 956        clear_bit(idx, s->free_temps[k].l);
 957
 958        ts = &s->temps[idx];
 959        ts->temp_allocated = 1;
 960        tcg_debug_assert(ts->base_type == type);
 961        tcg_debug_assert(ts->kind == kind);
 962    } else {
 963        ts = tcg_temp_alloc(s);
 964        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
 965            TCGTemp *ts2 = tcg_temp_alloc(s);
 966
 967            ts->base_type = type;
 968            ts->type = TCG_TYPE_I32;
 969            ts->temp_allocated = 1;
 970            ts->kind = kind;
 971
 972            tcg_debug_assert(ts2 == ts + 1);
 973            ts2->base_type = TCG_TYPE_I64;
 974            ts2->type = TCG_TYPE_I32;
 975            ts2->temp_allocated = 1;
 976            ts2->kind = kind;
 977        } else {
 978            ts->base_type = type;
 979            ts->type = type;
 980            ts->temp_allocated = 1;
 981            ts->kind = kind;
 982        }
 983    }
 984
 985#if defined(CONFIG_DEBUG_TCG)
 986    s->temps_in_use++;
 987#endif
 988    return ts;
 989}
 990
 991TCGv_vec tcg_temp_new_vec(TCGType type)
 992{
 993    TCGTemp *t;
 994
 995#ifdef CONFIG_DEBUG_TCG
 996    switch (type) {
 997    case TCG_TYPE_V64:
 998        assert(TCG_TARGET_HAS_v64);
 999        break;
1000    case TCG_TYPE_V128:
1001        assert(TCG_TARGET_HAS_v128);
1002        break;
1003    case TCG_TYPE_V256:
1004        assert(TCG_TARGET_HAS_v256);
1005        break;
1006    default:
1007        g_assert_not_reached();
1008    }
1009#endif
1010
1011    t = tcg_temp_new_internal(type, 0);
1012    return temp_tcgv_vec(t);
1013}
1014
1015/* Create a new temp of the same type as an existing temp.  */
1016TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1017{
1018    TCGTemp *t = tcgv_vec_temp(match);
1019
1020    tcg_debug_assert(t->temp_allocated != 0);
1021
1022    t = tcg_temp_new_internal(t->base_type, 0);
1023    return temp_tcgv_vec(t);
1024}
1025
1026void tcg_temp_free_internal(TCGTemp *ts)
1027{
1028    TCGContext *s = tcg_ctx;
1029    int k, idx;
1030
1031    switch (ts->kind) {
1032    case TEMP_CONST:
1033        /*
1034         * In order to simplify users of tcg_constant_*,
1035         * silently ignore free.
1036         */
1037        return;
1038    case TEMP_NORMAL:
1039    case TEMP_LOCAL:
1040        break;
1041    default:
1042        g_assert_not_reached();
1043    }
1044
1045#if defined(CONFIG_DEBUG_TCG)
1046    s->temps_in_use--;
1047    if (s->temps_in_use < 0) {
1048        fprintf(stderr, "More temporaries freed than allocated!\n");
1049    }
1050#endif
1051
1052    tcg_debug_assert(ts->temp_allocated != 0);
1053    ts->temp_allocated = 0;
1054
1055    idx = temp_idx(ts);
1056    k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1057    set_bit(idx, s->free_temps[k].l);
1058}
1059
1060TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1061{
1062    TCGContext *s = tcg_ctx;
1063    GHashTable *h = s->const_table[type];
1064    TCGTemp *ts;
1065
1066    if (h == NULL) {
1067        h = g_hash_table_new(g_int64_hash, g_int64_equal);
1068        s->const_table[type] = h;
1069    }
1070
1071    ts = g_hash_table_lookup(h, &val);
1072    if (ts == NULL) {
1073        ts = tcg_temp_alloc(s);
1074
1075        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1076            TCGTemp *ts2 = tcg_temp_alloc(s);
1077
1078            ts->base_type = TCG_TYPE_I64;
1079            ts->type = TCG_TYPE_I32;
1080            ts->kind = TEMP_CONST;
1081            ts->temp_allocated = 1;
1082            /*
1083             * Retain the full value of the 64-bit constant in the low
1084             * part, so that the hash table works.  Actual uses will
1085             * truncate the value to the low part.
1086             */
1087            ts->val = val;
1088
1089            tcg_debug_assert(ts2 == ts + 1);
1090            ts2->base_type = TCG_TYPE_I64;
1091            ts2->type = TCG_TYPE_I32;
1092            ts2->kind = TEMP_CONST;
1093            ts2->temp_allocated = 1;
1094            ts2->val = val >> 32;
1095        } else {
1096            ts->base_type = type;
1097            ts->type = type;
1098            ts->kind = TEMP_CONST;
1099            ts->temp_allocated = 1;
1100            ts->val = val;
1101        }
1102        g_hash_table_insert(h, &ts->val, ts);
1103    }
1104
1105    return ts;
1106}
1107
1108TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1109{
1110    val = dup_const(vece, val);
1111    return temp_tcgv_vec(tcg_constant_internal(type, val));
1112}
1113
1114TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1115{
1116    TCGTemp *t = tcgv_vec_temp(match);
1117
1118    tcg_debug_assert(t->temp_allocated != 0);
1119    return tcg_constant_vec(t->base_type, vece, val);
1120}
1121
1122TCGv_i32 tcg_const_i32(int32_t val)
1123{
1124    TCGv_i32 t0;
1125    t0 = tcg_temp_new_i32();
1126    tcg_gen_movi_i32(t0, val);
1127    return t0;
1128}
1129
1130TCGv_i64 tcg_const_i64(int64_t val)
1131{
1132    TCGv_i64 t0;
1133    t0 = tcg_temp_new_i64();
1134    tcg_gen_movi_i64(t0, val);
1135    return t0;
1136}
1137
1138TCGv_i32 tcg_const_local_i32(int32_t val)
1139{
1140    TCGv_i32 t0;
1141    t0 = tcg_temp_local_new_i32();
1142    tcg_gen_movi_i32(t0, val);
1143    return t0;
1144}
1145
1146TCGv_i64 tcg_const_local_i64(int64_t val)
1147{
1148    TCGv_i64 t0;
1149    t0 = tcg_temp_local_new_i64();
1150    tcg_gen_movi_i64(t0, val);
1151    return t0;
1152}
1153
1154#if defined(CONFIG_DEBUG_TCG)
1155void tcg_clear_temp_count(void)
1156{
1157    TCGContext *s = tcg_ctx;
1158    s->temps_in_use = 0;
1159}
1160
1161int tcg_check_temp_count(void)
1162{
1163    TCGContext *s = tcg_ctx;
1164    if (s->temps_in_use) {
1165        /* Clear the count so that we don't give another
1166         * warning immediately next time around.
1167         */
1168        s->temps_in_use = 0;
1169        return 1;
1170    }
1171    return 0;
1172}
1173#endif
1174
1175/* Return true if OP may appear in the opcode stream.
1176   Test the runtime variable that controls each opcode.  */
1177bool tcg_op_supported(TCGOpcode op)
1178{
1179    const bool have_vec
1180        = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1181
1182    switch (op) {
1183    case INDEX_op_discard:
1184    case INDEX_op_set_label:
1185    case INDEX_op_call:
1186    case INDEX_op_br:
1187    case INDEX_op_mb:
1188    case INDEX_op_insn_start:
1189    case INDEX_op_exit_tb:
1190    case INDEX_op_goto_tb:
1191    case INDEX_op_goto_ptr:
1192    case INDEX_op_qemu_ld_i32:
1193    case INDEX_op_qemu_st_i32:
1194    case INDEX_op_qemu_ld_i64:
1195    case INDEX_op_qemu_st_i64:
1196        return true;
1197
1198    case INDEX_op_qemu_st8_i32:
1199        return TCG_TARGET_HAS_qemu_st8_i32;
1200
1201    case INDEX_op_mov_i32:
1202    case INDEX_op_setcond_i32:
1203    case INDEX_op_brcond_i32:
1204    case INDEX_op_ld8u_i32:
1205    case INDEX_op_ld8s_i32:
1206    case INDEX_op_ld16u_i32:
1207    case INDEX_op_ld16s_i32:
1208    case INDEX_op_ld_i32:
1209    case INDEX_op_st8_i32:
1210    case INDEX_op_st16_i32:
1211    case INDEX_op_st_i32:
1212    case INDEX_op_add_i32:
1213    case INDEX_op_sub_i32:
1214    case INDEX_op_mul_i32:
1215    case INDEX_op_and_i32:
1216    case INDEX_op_or_i32:
1217    case INDEX_op_xor_i32:
1218    case INDEX_op_shl_i32:
1219    case INDEX_op_shr_i32:
1220    case INDEX_op_sar_i32:
1221        return true;
1222
1223    case INDEX_op_movcond_i32:
1224        return TCG_TARGET_HAS_movcond_i32;
1225    case INDEX_op_div_i32:
1226    case INDEX_op_divu_i32:
1227        return TCG_TARGET_HAS_div_i32;
1228    case INDEX_op_rem_i32:
1229    case INDEX_op_remu_i32:
1230        return TCG_TARGET_HAS_rem_i32;
1231    case INDEX_op_div2_i32:
1232    case INDEX_op_divu2_i32:
1233        return TCG_TARGET_HAS_div2_i32;
1234    case INDEX_op_rotl_i32:
1235    case INDEX_op_rotr_i32:
1236        return TCG_TARGET_HAS_rot_i32;
1237    case INDEX_op_deposit_i32:
1238        return TCG_TARGET_HAS_deposit_i32;
1239    case INDEX_op_extract_i32:
1240        return TCG_TARGET_HAS_extract_i32;
1241    case INDEX_op_sextract_i32:
1242        return TCG_TARGET_HAS_sextract_i32;
1243    case INDEX_op_extract2_i32:
1244        return TCG_TARGET_HAS_extract2_i32;
1245    case INDEX_op_add2_i32:
1246        return TCG_TARGET_HAS_add2_i32;
1247    case INDEX_op_sub2_i32:
1248        return TCG_TARGET_HAS_sub2_i32;
1249    case INDEX_op_mulu2_i32:
1250        return TCG_TARGET_HAS_mulu2_i32;
1251    case INDEX_op_muls2_i32:
1252        return TCG_TARGET_HAS_muls2_i32;
1253    case INDEX_op_muluh_i32:
1254        return TCG_TARGET_HAS_muluh_i32;
1255    case INDEX_op_mulsh_i32:
1256        return TCG_TARGET_HAS_mulsh_i32;
1257    case INDEX_op_ext8s_i32:
1258        return TCG_TARGET_HAS_ext8s_i32;
1259    case INDEX_op_ext16s_i32:
1260        return TCG_TARGET_HAS_ext16s_i32;
1261    case INDEX_op_ext8u_i32:
1262        return TCG_TARGET_HAS_ext8u_i32;
1263    case INDEX_op_ext16u_i32:
1264        return TCG_TARGET_HAS_ext16u_i32;
1265    case INDEX_op_bswap16_i32:
1266        return TCG_TARGET_HAS_bswap16_i32;
1267    case INDEX_op_bswap32_i32:
1268        return TCG_TARGET_HAS_bswap32_i32;
1269    case INDEX_op_not_i32:
1270        return TCG_TARGET_HAS_not_i32;
1271    case INDEX_op_neg_i32:
1272        return TCG_TARGET_HAS_neg_i32;
1273    case INDEX_op_andc_i32:
1274        return TCG_TARGET_HAS_andc_i32;
1275    case INDEX_op_orc_i32:
1276        return TCG_TARGET_HAS_orc_i32;
1277    case INDEX_op_eqv_i32:
1278        return TCG_TARGET_HAS_eqv_i32;
1279    case INDEX_op_nand_i32:
1280        return TCG_TARGET_HAS_nand_i32;
1281    case INDEX_op_nor_i32:
1282        return TCG_TARGET_HAS_nor_i32;
1283    case INDEX_op_clz_i32:
1284        return TCG_TARGET_HAS_clz_i32;
1285    case INDEX_op_ctz_i32:
1286        return TCG_TARGET_HAS_ctz_i32;
1287    case INDEX_op_ctpop_i32:
1288        return TCG_TARGET_HAS_ctpop_i32;
1289
1290    case INDEX_op_brcond2_i32:
1291    case INDEX_op_setcond2_i32:
1292        return TCG_TARGET_REG_BITS == 32;
1293
1294    case INDEX_op_mov_i64:
1295    case INDEX_op_setcond_i64:
1296    case INDEX_op_brcond_i64:
1297    case INDEX_op_ld8u_i64:
1298    case INDEX_op_ld8s_i64:
1299    case INDEX_op_ld16u_i64:
1300    case INDEX_op_ld16s_i64:
1301    case INDEX_op_ld32u_i64:
1302    case INDEX_op_ld32s_i64:
1303    case INDEX_op_ld_i64:
1304    case INDEX_op_st8_i64:
1305    case INDEX_op_st16_i64:
1306    case INDEX_op_st32_i64:
1307    case INDEX_op_st_i64:
1308    case INDEX_op_add_i64:
1309    case INDEX_op_sub_i64:
1310    case INDEX_op_mul_i64:
1311    case INDEX_op_and_i64:
1312    case INDEX_op_or_i64:
1313    case INDEX_op_xor_i64:
1314    case INDEX_op_shl_i64:
1315    case INDEX_op_shr_i64:
1316    case INDEX_op_sar_i64:
1317    case INDEX_op_ext_i32_i64:
1318    case INDEX_op_extu_i32_i64:
1319        return TCG_TARGET_REG_BITS == 64;
1320
1321    case INDEX_op_movcond_i64:
1322        return TCG_TARGET_HAS_movcond_i64;
1323    case INDEX_op_div_i64:
1324    case INDEX_op_divu_i64:
1325        return TCG_TARGET_HAS_div_i64;
1326    case INDEX_op_rem_i64:
1327    case INDEX_op_remu_i64:
1328        return TCG_TARGET_HAS_rem_i64;
1329    case INDEX_op_div2_i64:
1330    case INDEX_op_divu2_i64:
1331        return TCG_TARGET_HAS_div2_i64;
1332    case INDEX_op_rotl_i64:
1333    case INDEX_op_rotr_i64:
1334        return TCG_TARGET_HAS_rot_i64;
1335    case INDEX_op_deposit_i64:
1336        return TCG_TARGET_HAS_deposit_i64;
1337    case INDEX_op_extract_i64:
1338        return TCG_TARGET_HAS_extract_i64;
1339    case INDEX_op_sextract_i64:
1340        return TCG_TARGET_HAS_sextract_i64;
1341    case INDEX_op_extract2_i64:
1342        return TCG_TARGET_HAS_extract2_i64;
1343    case INDEX_op_extrl_i64_i32:
1344        return TCG_TARGET_HAS_extrl_i64_i32;
1345    case INDEX_op_extrh_i64_i32:
1346        return TCG_TARGET_HAS_extrh_i64_i32;
1347    case INDEX_op_ext8s_i64:
1348        return TCG_TARGET_HAS_ext8s_i64;
1349    case INDEX_op_ext16s_i64:
1350        return TCG_TARGET_HAS_ext16s_i64;
1351    case INDEX_op_ext32s_i64:
1352        return TCG_TARGET_HAS_ext32s_i64;
1353    case INDEX_op_ext8u_i64:
1354        return TCG_TARGET_HAS_ext8u_i64;
1355    case INDEX_op_ext16u_i64:
1356        return TCG_TARGET_HAS_ext16u_i64;
1357    case INDEX_op_ext32u_i64:
1358        return TCG_TARGET_HAS_ext32u_i64;
1359    case INDEX_op_bswap16_i64:
1360        return TCG_TARGET_HAS_bswap16_i64;
1361    case INDEX_op_bswap32_i64:
1362        return TCG_TARGET_HAS_bswap32_i64;
1363    case INDEX_op_bswap64_i64:
1364        return TCG_TARGET_HAS_bswap64_i64;
1365    case INDEX_op_not_i64:
1366        return TCG_TARGET_HAS_not_i64;
1367    case INDEX_op_neg_i64:
1368        return TCG_TARGET_HAS_neg_i64;
1369    case INDEX_op_andc_i64:
1370        return TCG_TARGET_HAS_andc_i64;
1371    case INDEX_op_orc_i64:
1372        return TCG_TARGET_HAS_orc_i64;
1373    case INDEX_op_eqv_i64:
1374        return TCG_TARGET_HAS_eqv_i64;
1375    case INDEX_op_nand_i64:
1376        return TCG_TARGET_HAS_nand_i64;
1377    case INDEX_op_nor_i64:
1378        return TCG_TARGET_HAS_nor_i64;
1379    case INDEX_op_clz_i64:
1380        return TCG_TARGET_HAS_clz_i64;
1381    case INDEX_op_ctz_i64:
1382        return TCG_TARGET_HAS_ctz_i64;
1383    case INDEX_op_ctpop_i64:
1384        return TCG_TARGET_HAS_ctpop_i64;
1385    case INDEX_op_add2_i64:
1386        return TCG_TARGET_HAS_add2_i64;
1387    case INDEX_op_sub2_i64:
1388        return TCG_TARGET_HAS_sub2_i64;
1389    case INDEX_op_mulu2_i64:
1390        return TCG_TARGET_HAS_mulu2_i64;
1391    case INDEX_op_muls2_i64:
1392        return TCG_TARGET_HAS_muls2_i64;
1393    case INDEX_op_muluh_i64:
1394        return TCG_TARGET_HAS_muluh_i64;
1395    case INDEX_op_mulsh_i64:
1396        return TCG_TARGET_HAS_mulsh_i64;
1397
1398    case INDEX_op_mov_vec:
1399    case INDEX_op_dup_vec:
1400    case INDEX_op_dupm_vec:
1401    case INDEX_op_ld_vec:
1402    case INDEX_op_st_vec:
1403    case INDEX_op_add_vec:
1404    case INDEX_op_sub_vec:
1405    case INDEX_op_and_vec:
1406    case INDEX_op_or_vec:
1407    case INDEX_op_xor_vec:
1408    case INDEX_op_cmp_vec:
1409        return have_vec;
1410    case INDEX_op_dup2_vec:
1411        return have_vec && TCG_TARGET_REG_BITS == 32;
1412    case INDEX_op_not_vec:
1413        return have_vec && TCG_TARGET_HAS_not_vec;
1414    case INDEX_op_neg_vec:
1415        return have_vec && TCG_TARGET_HAS_neg_vec;
1416    case INDEX_op_abs_vec:
1417        return have_vec && TCG_TARGET_HAS_abs_vec;
1418    case INDEX_op_andc_vec:
1419        return have_vec && TCG_TARGET_HAS_andc_vec;
1420    case INDEX_op_orc_vec:
1421        return have_vec && TCG_TARGET_HAS_orc_vec;
1422    case INDEX_op_nand_vec:
1423        return have_vec && TCG_TARGET_HAS_nand_vec;
1424    case INDEX_op_nor_vec:
1425        return have_vec && TCG_TARGET_HAS_nor_vec;
1426    case INDEX_op_eqv_vec:
1427        return have_vec && TCG_TARGET_HAS_eqv_vec;
1428    case INDEX_op_mul_vec:
1429        return have_vec && TCG_TARGET_HAS_mul_vec;
1430    case INDEX_op_shli_vec:
1431    case INDEX_op_shri_vec:
1432    case INDEX_op_sari_vec:
1433        return have_vec && TCG_TARGET_HAS_shi_vec;
1434    case INDEX_op_shls_vec:
1435    case INDEX_op_shrs_vec:
1436    case INDEX_op_sars_vec:
1437        return have_vec && TCG_TARGET_HAS_shs_vec;
1438    case INDEX_op_shlv_vec:
1439    case INDEX_op_shrv_vec:
1440    case INDEX_op_sarv_vec:
1441        return have_vec && TCG_TARGET_HAS_shv_vec;
1442    case INDEX_op_rotli_vec:
1443        return have_vec && TCG_TARGET_HAS_roti_vec;
1444    case INDEX_op_rotls_vec:
1445        return have_vec && TCG_TARGET_HAS_rots_vec;
1446    case INDEX_op_rotlv_vec:
1447    case INDEX_op_rotrv_vec:
1448        return have_vec && TCG_TARGET_HAS_rotv_vec;
1449    case INDEX_op_ssadd_vec:
1450    case INDEX_op_usadd_vec:
1451    case INDEX_op_sssub_vec:
1452    case INDEX_op_ussub_vec:
1453        return have_vec && TCG_TARGET_HAS_sat_vec;
1454    case INDEX_op_smin_vec:
1455    case INDEX_op_umin_vec:
1456    case INDEX_op_smax_vec:
1457    case INDEX_op_umax_vec:
1458        return have_vec && TCG_TARGET_HAS_minmax_vec;
1459    case INDEX_op_bitsel_vec:
1460        return have_vec && TCG_TARGET_HAS_bitsel_vec;
1461    case INDEX_op_cmpsel_vec:
1462        return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1463
1464    default:
1465        tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1466        return true;
1467    }
1468}
1469
1470/* Note: we convert the 64 bit args to 32 bit and do some alignment
1471   and endian swap. Maybe it would be better to do the alignment
1472   and endian swap in tcg_reg_alloc_call(). */
1473void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1474{
1475    int i, real_args, nb_rets, pi;
1476    unsigned typemask;
1477    const TCGHelperInfo *info;
1478    TCGOp *op;
1479
1480    info = g_hash_table_lookup(helper_table, (gpointer)func);
1481    typemask = info->typemask;
1482
1483#ifdef CONFIG_PLUGIN
1484    /* detect non-plugin helpers */
1485    if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1486        tcg_ctx->plugin_insn->calls_helpers = true;
1487    }
1488#endif
1489
1490#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1491    for (i = 0; i < nargs; ++i) {
1492        int argtype = extract32(typemask, (i + 1) * 3, 3);
1493        bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1494        bool is_signed = argtype & 1;
1495
1496        if (is_32bit) {
1497            TCGv_i64 temp = tcg_temp_new_i64();
1498            TCGv_i32 orig = temp_tcgv_i32(args[i]);
1499            if (is_signed) {
1500                tcg_gen_ext_i32_i64(temp, orig);
1501            } else {
1502                tcg_gen_extu_i32_i64(temp, orig);
1503            }
1504            args[i] = tcgv_i64_temp(temp);
1505        }
1506    }
1507#endif /* TCG_TARGET_EXTEND_ARGS */
1508
1509    op = tcg_emit_op(INDEX_op_call);
1510
1511    pi = 0;
1512    if (ret != NULL) {
1513        if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1514#if HOST_BIG_ENDIAN
1515            op->args[pi++] = temp_arg(ret + 1);
1516            op->args[pi++] = temp_arg(ret);
1517#else
1518            op->args[pi++] = temp_arg(ret);
1519            op->args[pi++] = temp_arg(ret + 1);
1520#endif
1521            nb_rets = 2;
1522        } else {
1523            op->args[pi++] = temp_arg(ret);
1524            nb_rets = 1;
1525        }
1526    } else {
1527        nb_rets = 0;
1528    }
1529    TCGOP_CALLO(op) = nb_rets;
1530
1531    real_args = 0;
1532    for (i = 0; i < nargs; i++) {
1533        int argtype = extract32(typemask, (i + 1) * 3, 3);
1534        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1535        bool want_align = false;
1536
1537#if defined(CONFIG_TCG_INTERPRETER)
1538        /*
1539         * Align all arguments, so that they land in predictable places
1540         * for passing off to ffi_call.
1541         */
1542        want_align = true;
1543#elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1544        /* Some targets want aligned 64 bit args */
1545        want_align = is_64bit;
1546#endif
1547
1548        if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1549            op->args[pi++] = TCG_CALL_DUMMY_ARG;
1550            real_args++;
1551        }
1552
1553        if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1554            /*
1555             * If stack grows up, then we will be placing successive
1556             * arguments at lower addresses, which means we need to
1557             * reverse the order compared to how we would normally
1558             * treat either big or little-endian.  For those arguments
1559             * that will wind up in registers, this still works for
1560             * HPPA (the only current STACK_GROWSUP target) since the
1561             * argument registers are *also* allocated in decreasing
1562             * order.  If another such target is added, this logic may
1563             * have to get more complicated to differentiate between
1564             * stack arguments and register arguments.
1565             */
1566#if HOST_BIG_ENDIAN != defined(TCG_TARGET_STACK_GROWSUP)
1567            op->args[pi++] = temp_arg(args[i] + 1);
1568            op->args[pi++] = temp_arg(args[i]);
1569#else
1570            op->args[pi++] = temp_arg(args[i]);
1571            op->args[pi++] = temp_arg(args[i] + 1);
1572#endif
1573            real_args += 2;
1574            continue;
1575        }
1576
1577        op->args[pi++] = temp_arg(args[i]);
1578        real_args++;
1579    }
1580    op->args[pi++] = (uintptr_t)func;
1581    op->args[pi++] = (uintptr_t)info;
1582    TCGOP_CALLI(op) = real_args;
1583
1584    /* Make sure the fields didn't overflow.  */
1585    tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1586    tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1587
1588#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1589    for (i = 0; i < nargs; ++i) {
1590        int argtype = extract32(typemask, (i + 1) * 3, 3);
1591        bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1592
1593        if (is_32bit) {
1594            tcg_temp_free_internal(args[i]);
1595        }
1596    }
1597#endif /* TCG_TARGET_EXTEND_ARGS */
1598}
1599
1600static void tcg_reg_alloc_start(TCGContext *s)
1601{
1602    int i, n;
1603
1604    for (i = 0, n = s->nb_temps; i < n; i++) {
1605        TCGTemp *ts = &s->temps[i];
1606        TCGTempVal val = TEMP_VAL_MEM;
1607
1608        switch (ts->kind) {
1609        case TEMP_CONST:
1610            val = TEMP_VAL_CONST;
1611            break;
1612        case TEMP_FIXED:
1613            val = TEMP_VAL_REG;
1614            break;
1615        case TEMP_GLOBAL:
1616            break;
1617        case TEMP_NORMAL:
1618        case TEMP_EBB:
1619            val = TEMP_VAL_DEAD;
1620            /* fall through */
1621        case TEMP_LOCAL:
1622            ts->mem_allocated = 0;
1623            break;
1624        default:
1625            g_assert_not_reached();
1626        }
1627        ts->val_type = val;
1628    }
1629
1630    memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1631}
1632
1633static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1634                                 TCGTemp *ts)
1635{
1636    int idx = temp_idx(ts);
1637
1638    switch (ts->kind) {
1639    case TEMP_FIXED:
1640    case TEMP_GLOBAL:
1641        pstrcpy(buf, buf_size, ts->name);
1642        break;
1643    case TEMP_LOCAL:
1644        snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1645        break;
1646    case TEMP_EBB:
1647        snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals);
1648        break;
1649    case TEMP_NORMAL:
1650        snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1651        break;
1652    case TEMP_CONST:
1653        switch (ts->type) {
1654        case TCG_TYPE_I32:
1655            snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1656            break;
1657#if TCG_TARGET_REG_BITS > 32
1658        case TCG_TYPE_I64:
1659            snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1660            break;
1661#endif
1662        case TCG_TYPE_V64:
1663        case TCG_TYPE_V128:
1664        case TCG_TYPE_V256:
1665            snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1666                     64 << (ts->type - TCG_TYPE_V64), ts->val);
1667            break;
1668        default:
1669            g_assert_not_reached();
1670        }
1671        break;
1672    }
1673    return buf;
1674}
1675
1676static char *tcg_get_arg_str(TCGContext *s, char *buf,
1677                             int buf_size, TCGArg arg)
1678{
1679    return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1680}
1681
1682static const char * const cond_name[] =
1683{
1684    [TCG_COND_NEVER] = "never",
1685    [TCG_COND_ALWAYS] = "always",
1686    [TCG_COND_EQ] = "eq",
1687    [TCG_COND_NE] = "ne",
1688    [TCG_COND_LT] = "lt",
1689    [TCG_COND_GE] = "ge",
1690    [TCG_COND_LE] = "le",
1691    [TCG_COND_GT] = "gt",
1692    [TCG_COND_LTU] = "ltu",
1693    [TCG_COND_GEU] = "geu",
1694    [TCG_COND_LEU] = "leu",
1695    [TCG_COND_GTU] = "gtu"
1696};
1697
1698static const char * const ldst_name[] =
1699{
1700    [MO_UB]   = "ub",
1701    [MO_SB]   = "sb",
1702    [MO_LEUW] = "leuw",
1703    [MO_LESW] = "lesw",
1704    [MO_LEUL] = "leul",
1705    [MO_LESL] = "lesl",
1706    [MO_LEUQ] = "leq",
1707    [MO_BEUW] = "beuw",
1708    [MO_BESW] = "besw",
1709    [MO_BEUL] = "beul",
1710    [MO_BESL] = "besl",
1711    [MO_BEUQ] = "beq",
1712};
1713
1714static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1715#ifdef TARGET_ALIGNED_ONLY
1716    [MO_UNALN >> MO_ASHIFT]    = "un+",
1717    [MO_ALIGN >> MO_ASHIFT]    = "",
1718#else
1719    [MO_UNALN >> MO_ASHIFT]    = "",
1720    [MO_ALIGN >> MO_ASHIFT]    = "al+",
1721#endif
1722    [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1723    [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1724    [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1725    [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1726    [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1727    [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1728};
1729
1730static const char bswap_flag_name[][6] = {
1731    [TCG_BSWAP_IZ] = "iz",
1732    [TCG_BSWAP_OZ] = "oz",
1733    [TCG_BSWAP_OS] = "os",
1734    [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1735    [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1736};
1737
1738static inline bool tcg_regset_single(TCGRegSet d)
1739{
1740    return (d & (d - 1)) == 0;
1741}
1742
1743static inline TCGReg tcg_regset_first(TCGRegSet d)
1744{
1745    if (TCG_TARGET_NB_REGS <= 32) {
1746        return ctz32(d);
1747    } else {
1748        return ctz64(d);
1749    }
1750}
1751
1752/* Return only the number of characters output -- no error return. */
1753#define ne_fprintf(...) \
1754    ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
1755
1756static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
1757{
1758    char buf[128];
1759    TCGOp *op;
1760
1761    QTAILQ_FOREACH(op, &s->ops, link) {
1762        int i, k, nb_oargs, nb_iargs, nb_cargs;
1763        const TCGOpDef *def;
1764        TCGOpcode c;
1765        int col = 0;
1766
1767        c = op->opc;
1768        def = &tcg_op_defs[c];
1769
1770        if (c == INDEX_op_insn_start) {
1771            nb_oargs = 0;
1772            col += ne_fprintf(f, "\n ----");
1773
1774            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1775                target_ulong a;
1776#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1777                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1778#else
1779                a = op->args[i];
1780#endif
1781                col += ne_fprintf(f, " " TARGET_FMT_lx, a);
1782            }
1783        } else if (c == INDEX_op_call) {
1784            const TCGHelperInfo *info = tcg_call_info(op);
1785            void *func = tcg_call_func(op);
1786
1787            /* variable number of arguments */
1788            nb_oargs = TCGOP_CALLO(op);
1789            nb_iargs = TCGOP_CALLI(op);
1790            nb_cargs = def->nb_cargs;
1791
1792            col += ne_fprintf(f, " %s ", def->name);
1793
1794            /*
1795             * Print the function name from TCGHelperInfo, if available.
1796             * Note that plugins have a template function for the info,
1797             * but the actual function pointer comes from the plugin.
1798             */
1799            if (func == info->func) {
1800                col += ne_fprintf(f, "%s", info->name);
1801            } else {
1802                col += ne_fprintf(f, "plugin(%p)", func);
1803            }
1804
1805            col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
1806            for (i = 0; i < nb_oargs; i++) {
1807                col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1808                                                            op->args[i]));
1809            }
1810            for (i = 0; i < nb_iargs; i++) {
1811                TCGArg arg = op->args[nb_oargs + i];
1812                const char *t = "<dummy>";
1813                if (arg != TCG_CALL_DUMMY_ARG) {
1814                    t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1815                }
1816                col += ne_fprintf(f, ",%s", t);
1817            }
1818        } else {
1819            col += ne_fprintf(f, " %s ", def->name);
1820
1821            nb_oargs = def->nb_oargs;
1822            nb_iargs = def->nb_iargs;
1823            nb_cargs = def->nb_cargs;
1824
1825            if (def->flags & TCG_OPF_VECTOR) {
1826                col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
1827                                  8 << TCGOP_VECE(op));
1828            }
1829
1830            k = 0;
1831            for (i = 0; i < nb_oargs; i++) {
1832                const char *sep =  k ? "," : "";
1833                col += ne_fprintf(f, "%s%s", sep,
1834                                  tcg_get_arg_str(s, buf, sizeof(buf),
1835                                                  op->args[k++]));
1836            }
1837            for (i = 0; i < nb_iargs; i++) {
1838                const char *sep =  k ? "," : "";
1839                col += ne_fprintf(f, "%s%s", sep,
1840                                  tcg_get_arg_str(s, buf, sizeof(buf),
1841                                                  op->args[k++]));
1842            }
1843            switch (c) {
1844            case INDEX_op_brcond_i32:
1845            case INDEX_op_setcond_i32:
1846            case INDEX_op_movcond_i32:
1847            case INDEX_op_brcond2_i32:
1848            case INDEX_op_setcond2_i32:
1849            case INDEX_op_brcond_i64:
1850            case INDEX_op_setcond_i64:
1851            case INDEX_op_movcond_i64:
1852            case INDEX_op_cmp_vec:
1853            case INDEX_op_cmpsel_vec:
1854                if (op->args[k] < ARRAY_SIZE(cond_name)
1855                    && cond_name[op->args[k]]) {
1856                    col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
1857                } else {
1858                    col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
1859                }
1860                i = 1;
1861                break;
1862            case INDEX_op_qemu_ld_i32:
1863            case INDEX_op_qemu_st_i32:
1864            case INDEX_op_qemu_st8_i32:
1865            case INDEX_op_qemu_ld_i64:
1866            case INDEX_op_qemu_st_i64:
1867                {
1868                    MemOpIdx oi = op->args[k++];
1869                    MemOp op = get_memop(oi);
1870                    unsigned ix = get_mmuidx(oi);
1871
1872                    if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1873                        col += ne_fprintf(f, ",$0x%x,%u", op, ix);
1874                    } else {
1875                        const char *s_al, *s_op;
1876                        s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1877                        s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1878                        col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
1879                    }
1880                    i = 1;
1881                }
1882                break;
1883            case INDEX_op_bswap16_i32:
1884            case INDEX_op_bswap16_i64:
1885            case INDEX_op_bswap32_i32:
1886            case INDEX_op_bswap32_i64:
1887            case INDEX_op_bswap64_i64:
1888                {
1889                    TCGArg flags = op->args[k];
1890                    const char *name = NULL;
1891
1892                    if (flags < ARRAY_SIZE(bswap_flag_name)) {
1893                        name = bswap_flag_name[flags];
1894                    }
1895                    if (name) {
1896                        col += ne_fprintf(f, ",%s", name);
1897                    } else {
1898                        col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
1899                    }
1900                    i = k = 1;
1901                }
1902                break;
1903            default:
1904                i = 0;
1905                break;
1906            }
1907            switch (c) {
1908            case INDEX_op_set_label:
1909            case INDEX_op_br:
1910            case INDEX_op_brcond_i32:
1911            case INDEX_op_brcond_i64:
1912            case INDEX_op_brcond2_i32:
1913                col += ne_fprintf(f, "%s$L%d", k ? "," : "",
1914                                  arg_label(op->args[k])->id);
1915                i++, k++;
1916                break;
1917            default:
1918                break;
1919            }
1920            for (; i < nb_cargs; i++, k++) {
1921                col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
1922                                  op->args[k]);
1923            }
1924        }
1925
1926        if (have_prefs || op->life) {
1927            for (; col < 40; ++col) {
1928                putc(' ', f);
1929            }
1930        }
1931
1932        if (op->life) {
1933            unsigned life = op->life;
1934
1935            if (life & (SYNC_ARG * 3)) {
1936                ne_fprintf(f, "  sync:");
1937                for (i = 0; i < 2; ++i) {
1938                    if (life & (SYNC_ARG << i)) {
1939                        ne_fprintf(f, " %d", i);
1940                    }
1941                }
1942            }
1943            life /= DEAD_ARG;
1944            if (life) {
1945                ne_fprintf(f, "  dead:");
1946                for (i = 0; life; ++i, life >>= 1) {
1947                    if (life & 1) {
1948                        ne_fprintf(f, " %d", i);
1949                    }
1950                }
1951            }
1952        }
1953
1954        if (have_prefs) {
1955            for (i = 0; i < nb_oargs; ++i) {
1956                TCGRegSet set = op->output_pref[i];
1957
1958                if (i == 0) {
1959                    ne_fprintf(f, "  pref=");
1960                } else {
1961                    ne_fprintf(f, ",");
1962                }
1963                if (set == 0) {
1964                    ne_fprintf(f, "none");
1965                } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
1966                    ne_fprintf(f, "all");
1967#ifdef CONFIG_DEBUG_TCG
1968                } else if (tcg_regset_single(set)) {
1969                    TCGReg reg = tcg_regset_first(set);
1970                    ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
1971#endif
1972                } else if (TCG_TARGET_NB_REGS <= 32) {
1973                    ne_fprintf(f, "0x%x", (uint32_t)set);
1974                } else {
1975                    ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
1976                }
1977            }
1978        }
1979
1980        putc('\n', f);
1981    }
1982}
1983
1984/* we give more priority to constraints with less registers */
1985static int get_constraint_priority(const TCGOpDef *def, int k)
1986{
1987    const TCGArgConstraint *arg_ct = &def->args_ct[k];
1988    int n;
1989
1990    if (arg_ct->oalias) {
1991        /* an alias is equivalent to a single register */
1992        n = 1;
1993    } else {
1994        n = ctpop64(arg_ct->regs);
1995    }
1996    return TCG_TARGET_NB_REGS - n + 1;
1997}
1998
1999/* sort from highest priority to lowest */
2000static void sort_constraints(TCGOpDef *def, int start, int n)
2001{
2002    int i, j;
2003    TCGArgConstraint *a = def->args_ct;
2004
2005    for (i = 0; i < n; i++) {
2006        a[start + i].sort_index = start + i;
2007    }
2008    if (n <= 1) {
2009        return;
2010    }
2011    for (i = 0; i < n - 1; i++) {
2012        for (j = i + 1; j < n; j++) {
2013            int p1 = get_constraint_priority(def, a[start + i].sort_index);
2014            int p2 = get_constraint_priority(def, a[start + j].sort_index);
2015            if (p1 < p2) {
2016                int tmp = a[start + i].sort_index;
2017                a[start + i].sort_index = a[start + j].sort_index;
2018                a[start + j].sort_index = tmp;
2019            }
2020        }
2021    }
2022}
2023
2024static void process_op_defs(TCGContext *s)
2025{
2026    TCGOpcode op;
2027
2028    for (op = 0; op < NB_OPS; op++) {
2029        TCGOpDef *def = &tcg_op_defs[op];
2030        const TCGTargetOpDef *tdefs;
2031        int i, nb_args;
2032
2033        if (def->flags & TCG_OPF_NOT_PRESENT) {
2034            continue;
2035        }
2036
2037        nb_args = def->nb_iargs + def->nb_oargs;
2038        if (nb_args == 0) {
2039            continue;
2040        }
2041
2042        /*
2043         * Macro magic should make it impossible, but double-check that
2044         * the array index is in range.  Since the signness of an enum
2045         * is implementation defined, force the result to unsigned.
2046         */
2047        unsigned con_set = tcg_target_op_def(op);
2048        tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2049        tdefs = &constraint_sets[con_set];
2050
2051        for (i = 0; i < nb_args; i++) {
2052            const char *ct_str = tdefs->args_ct_str[i];
2053            /* Incomplete TCGTargetOpDef entry. */
2054            tcg_debug_assert(ct_str != NULL);
2055
2056            while (*ct_str != '\0') {
2057                switch(*ct_str) {
2058                case '0' ... '9':
2059                    {
2060                        int oarg = *ct_str - '0';
2061                        tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2062                        tcg_debug_assert(oarg < def->nb_oargs);
2063                        tcg_debug_assert(def->args_ct[oarg].regs != 0);
2064                        def->args_ct[i] = def->args_ct[oarg];
2065                        /* The output sets oalias.  */
2066                        def->args_ct[oarg].oalias = true;
2067                        def->args_ct[oarg].alias_index = i;
2068                        /* The input sets ialias. */
2069                        def->args_ct[i].ialias = true;
2070                        def->args_ct[i].alias_index = oarg;
2071                    }
2072                    ct_str++;
2073                    break;
2074                case '&':
2075                    def->args_ct[i].newreg = true;
2076                    ct_str++;
2077                    break;
2078                case 'i':
2079                    def->args_ct[i].ct |= TCG_CT_CONST;
2080                    ct_str++;
2081                    break;
2082
2083                /* Include all of the target-specific constraints. */
2084
2085#undef CONST
2086#define CONST(CASE, MASK) \
2087    case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2088#define REGS(CASE, MASK) \
2089    case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2090
2091#include "tcg-target-con-str.h"
2092
2093#undef REGS
2094#undef CONST
2095                default:
2096                    /* Typo in TCGTargetOpDef constraint. */
2097                    g_assert_not_reached();
2098                }
2099            }
2100        }
2101
2102        /* TCGTargetOpDef entry with too much information? */
2103        tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2104
2105        /* sort the constraints (XXX: this is just an heuristic) */
2106        sort_constraints(def, 0, def->nb_oargs);
2107        sort_constraints(def, def->nb_oargs, def->nb_iargs);
2108    }
2109}
2110
2111void tcg_op_remove(TCGContext *s, TCGOp *op)
2112{
2113    TCGLabel *label;
2114
2115    switch (op->opc) {
2116    case INDEX_op_br:
2117        label = arg_label(op->args[0]);
2118        label->refs--;
2119        break;
2120    case INDEX_op_brcond_i32:
2121    case INDEX_op_brcond_i64:
2122        label = arg_label(op->args[3]);
2123        label->refs--;
2124        break;
2125    case INDEX_op_brcond2_i32:
2126        label = arg_label(op->args[5]);
2127        label->refs--;
2128        break;
2129    default:
2130        break;
2131    }
2132
2133    QTAILQ_REMOVE(&s->ops, op, link);
2134    QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2135    s->nb_ops--;
2136
2137#ifdef CONFIG_PROFILER
2138    qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2139#endif
2140}
2141
2142void tcg_remove_ops_after(TCGOp *op)
2143{
2144    TCGContext *s = tcg_ctx;
2145
2146    while (true) {
2147        TCGOp *last = tcg_last_op();
2148        if (last == op) {
2149            return;
2150        }
2151        tcg_op_remove(s, last);
2152    }
2153}
2154
2155static TCGOp *tcg_op_alloc(TCGOpcode opc)
2156{
2157    TCGContext *s = tcg_ctx;
2158    TCGOp *op;
2159
2160    if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2161        op = tcg_malloc(sizeof(TCGOp));
2162    } else {
2163        op = QTAILQ_FIRST(&s->free_ops);
2164        QTAILQ_REMOVE(&s->free_ops, op, link);
2165    }
2166    memset(op, 0, offsetof(TCGOp, link));
2167    op->opc = opc;
2168    s->nb_ops++;
2169
2170    return op;
2171}
2172
2173TCGOp *tcg_emit_op(TCGOpcode opc)
2174{
2175    TCGOp *op = tcg_op_alloc(opc);
2176    QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2177    return op;
2178}
2179
2180TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2181{
2182    TCGOp *new_op = tcg_op_alloc(opc);
2183    QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2184    return new_op;
2185}
2186
2187TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2188{
2189    TCGOp *new_op = tcg_op_alloc(opc);
2190    QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2191    return new_op;
2192}
2193
2194/* Reachable analysis : remove unreachable code.  */
2195static void reachable_code_pass(TCGContext *s)
2196{
2197    TCGOp *op, *op_next;
2198    bool dead = false;
2199
2200    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2201        bool remove = dead;
2202        TCGLabel *label;
2203
2204        switch (op->opc) {
2205        case INDEX_op_set_label:
2206            label = arg_label(op->args[0]);
2207            if (label->refs == 0) {
2208                /*
2209                 * While there is an occasional backward branch, virtually
2210                 * all branches generated by the translators are forward.
2211                 * Which means that generally we will have already removed
2212                 * all references to the label that will be, and there is
2213                 * little to be gained by iterating.
2214                 */
2215                remove = true;
2216            } else {
2217                /* Once we see a label, insns become live again.  */
2218                dead = false;
2219                remove = false;
2220
2221                /*
2222                 * Optimization can fold conditional branches to unconditional.
2223                 * If we find a label with one reference which is preceded by
2224                 * an unconditional branch to it, remove both.  This needed to
2225                 * wait until the dead code in between them was removed.
2226                 */
2227                if (label->refs == 1) {
2228                    TCGOp *op_prev = QTAILQ_PREV(op, link);
2229                    if (op_prev->opc == INDEX_op_br &&
2230                        label == arg_label(op_prev->args[0])) {
2231                        tcg_op_remove(s, op_prev);
2232                        remove = true;
2233                    }
2234                }
2235            }
2236            break;
2237
2238        case INDEX_op_br:
2239        case INDEX_op_exit_tb:
2240        case INDEX_op_goto_ptr:
2241            /* Unconditional branches; everything following is dead.  */
2242            dead = true;
2243            break;
2244
2245        case INDEX_op_call:
2246            /* Notice noreturn helper calls, raising exceptions.  */
2247            if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2248                dead = true;
2249            }
2250            break;
2251
2252        case INDEX_op_insn_start:
2253            /* Never remove -- we need to keep these for unwind.  */
2254            remove = false;
2255            break;
2256
2257        default:
2258            break;
2259        }
2260
2261        if (remove) {
2262            tcg_op_remove(s, op);
2263        }
2264    }
2265}
2266
2267#define TS_DEAD  1
2268#define TS_MEM   2
2269
2270#define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2271#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2272
2273/* For liveness_pass_1, the register preferences for a given temp.  */
2274static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2275{
2276    return ts->state_ptr;
2277}
2278
2279/* For liveness_pass_1, reset the preferences for a given temp to the
2280 * maximal regset for its type.
2281 */
2282static inline void la_reset_pref(TCGTemp *ts)
2283{
2284    *la_temp_pref(ts)
2285        = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2286}
2287
2288/* liveness analysis: end of function: all temps are dead, and globals
2289   should be in memory. */
2290static void la_func_end(TCGContext *s, int ng, int nt)
2291{
2292    int i;
2293
2294    for (i = 0; i < ng; ++i) {
2295        s->temps[i].state = TS_DEAD | TS_MEM;
2296        la_reset_pref(&s->temps[i]);
2297    }
2298    for (i = ng; i < nt; ++i) {
2299        s->temps[i].state = TS_DEAD;
2300        la_reset_pref(&s->temps[i]);
2301    }
2302}
2303
2304/* liveness analysis: end of basic block: all temps are dead, globals
2305   and local temps should be in memory. */
2306static void la_bb_end(TCGContext *s, int ng, int nt)
2307{
2308    int i;
2309
2310    for (i = 0; i < nt; ++i) {
2311        TCGTemp *ts = &s->temps[i];
2312        int state;
2313
2314        switch (ts->kind) {
2315        case TEMP_FIXED:
2316        case TEMP_GLOBAL:
2317        case TEMP_LOCAL:
2318            state = TS_DEAD | TS_MEM;
2319            break;
2320        case TEMP_NORMAL:
2321        case TEMP_EBB:
2322        case TEMP_CONST:
2323            state = TS_DEAD;
2324            break;
2325        default:
2326            g_assert_not_reached();
2327        }
2328        ts->state = state;
2329        la_reset_pref(ts);
2330    }
2331}
2332
2333/* liveness analysis: sync globals back to memory.  */
2334static void la_global_sync(TCGContext *s, int ng)
2335{
2336    int i;
2337
2338    for (i = 0; i < ng; ++i) {
2339        int state = s->temps[i].state;
2340        s->temps[i].state = state | TS_MEM;
2341        if (state == TS_DEAD) {
2342            /* If the global was previously dead, reset prefs.  */
2343            la_reset_pref(&s->temps[i]);
2344        }
2345    }
2346}
2347
2348/*
2349 * liveness analysis: conditional branch: all temps are dead unless
2350 * explicitly live-across-conditional-branch, globals and local temps
2351 * should be synced.
2352 */
2353static void la_bb_sync(TCGContext *s, int ng, int nt)
2354{
2355    la_global_sync(s, ng);
2356
2357    for (int i = ng; i < nt; ++i) {
2358        TCGTemp *ts = &s->temps[i];
2359        int state;
2360
2361        switch (ts->kind) {
2362        case TEMP_LOCAL:
2363            state = ts->state;
2364            ts->state = state | TS_MEM;
2365            if (state != TS_DEAD) {
2366                continue;
2367            }
2368            break;
2369        case TEMP_NORMAL:
2370            s->temps[i].state = TS_DEAD;
2371            break;
2372        case TEMP_EBB:
2373        case TEMP_CONST:
2374            continue;
2375        default:
2376            g_assert_not_reached();
2377        }
2378        la_reset_pref(&s->temps[i]);
2379    }
2380}
2381
2382/* liveness analysis: sync globals back to memory and kill.  */
2383static void la_global_kill(TCGContext *s, int ng)
2384{
2385    int i;
2386
2387    for (i = 0; i < ng; i++) {
2388        s->temps[i].state = TS_DEAD | TS_MEM;
2389        la_reset_pref(&s->temps[i]);
2390    }
2391}
2392
2393/* liveness analysis: note live globals crossing calls.  */
2394static void la_cross_call(TCGContext *s, int nt)
2395{
2396    TCGRegSet mask = ~tcg_target_call_clobber_regs;
2397    int i;
2398
2399    for (i = 0; i < nt; i++) {
2400        TCGTemp *ts = &s->temps[i];
2401        if (!(ts->state & TS_DEAD)) {
2402            TCGRegSet *pset = la_temp_pref(ts);
2403            TCGRegSet set = *pset;
2404
2405            set &= mask;
2406            /* If the combination is not possible, restart.  */
2407            if (set == 0) {
2408                set = tcg_target_available_regs[ts->type] & mask;
2409            }
2410            *pset = set;
2411        }
2412    }
2413}
2414
2415/* Liveness analysis : update the opc_arg_life array to tell if a
2416   given input arguments is dead. Instructions updating dead
2417   temporaries are removed. */
2418static void liveness_pass_1(TCGContext *s)
2419{
2420    int nb_globals = s->nb_globals;
2421    int nb_temps = s->nb_temps;
2422    TCGOp *op, *op_prev;
2423    TCGRegSet *prefs;
2424    int i;
2425
2426    prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2427    for (i = 0; i < nb_temps; ++i) {
2428        s->temps[i].state_ptr = prefs + i;
2429    }
2430
2431    /* ??? Should be redundant with the exit_tb that ends the TB.  */
2432    la_func_end(s, nb_globals, nb_temps);
2433
2434    QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2435        int nb_iargs, nb_oargs;
2436        TCGOpcode opc_new, opc_new2;
2437        bool have_opc_new2;
2438        TCGLifeData arg_life = 0;
2439        TCGTemp *ts;
2440        TCGOpcode opc = op->opc;
2441        const TCGOpDef *def = &tcg_op_defs[opc];
2442
2443        switch (opc) {
2444        case INDEX_op_call:
2445            {
2446                int call_flags;
2447                int nb_call_regs;
2448
2449                nb_oargs = TCGOP_CALLO(op);
2450                nb_iargs = TCGOP_CALLI(op);
2451                call_flags = tcg_call_flags(op);
2452
2453                /* pure functions can be removed if their result is unused */
2454                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2455                    for (i = 0; i < nb_oargs; i++) {
2456                        ts = arg_temp(op->args[i]);
2457                        if (ts->state != TS_DEAD) {
2458                            goto do_not_remove_call;
2459                        }
2460                    }
2461                    goto do_remove;
2462                }
2463            do_not_remove_call:
2464
2465                /* Output args are dead.  */
2466                for (i = 0; i < nb_oargs; i++) {
2467                    ts = arg_temp(op->args[i]);
2468                    if (ts->state & TS_DEAD) {
2469                        arg_life |= DEAD_ARG << i;
2470                    }
2471                    if (ts->state & TS_MEM) {
2472                        arg_life |= SYNC_ARG << i;
2473                    }
2474                    ts->state = TS_DEAD;
2475                    la_reset_pref(ts);
2476
2477                    /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2478                    op->output_pref[i] = 0;
2479                }
2480
2481                if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2482                                    TCG_CALL_NO_READ_GLOBALS))) {
2483                    la_global_kill(s, nb_globals);
2484                } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2485                    la_global_sync(s, nb_globals);
2486                }
2487
2488                /* Record arguments that die in this helper.  */
2489                for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2490                    ts = arg_temp(op->args[i]);
2491                    if (ts && ts->state & TS_DEAD) {
2492                        arg_life |= DEAD_ARG << i;
2493                    }
2494                }
2495
2496                /* For all live registers, remove call-clobbered prefs.  */
2497                la_cross_call(s, nb_temps);
2498
2499                nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2500
2501                /* Input arguments are live for preceding opcodes.  */
2502                for (i = 0; i < nb_iargs; i++) {
2503                    ts = arg_temp(op->args[i + nb_oargs]);
2504                    if (ts && ts->state & TS_DEAD) {
2505                        /* For those arguments that die, and will be allocated
2506                         * in registers, clear the register set for that arg,
2507                         * to be filled in below.  For args that will be on
2508                         * the stack, reset to any available reg.
2509                         */
2510                        *la_temp_pref(ts)
2511                            = (i < nb_call_regs ? 0 :
2512                               tcg_target_available_regs[ts->type]);
2513                        ts->state &= ~TS_DEAD;
2514                    }
2515                }
2516
2517                /* For each input argument, add its input register to prefs.
2518                   If a temp is used once, this produces a single set bit.  */
2519                for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2520                    ts = arg_temp(op->args[i + nb_oargs]);
2521                    if (ts) {
2522                        tcg_regset_set_reg(*la_temp_pref(ts),
2523                                           tcg_target_call_iarg_regs[i]);
2524                    }
2525                }
2526            }
2527            break;
2528        case INDEX_op_insn_start:
2529            break;
2530        case INDEX_op_discard:
2531            /* mark the temporary as dead */
2532            ts = arg_temp(op->args[0]);
2533            ts->state = TS_DEAD;
2534            la_reset_pref(ts);
2535            break;
2536
2537        case INDEX_op_add2_i32:
2538            opc_new = INDEX_op_add_i32;
2539            goto do_addsub2;
2540        case INDEX_op_sub2_i32:
2541            opc_new = INDEX_op_sub_i32;
2542            goto do_addsub2;
2543        case INDEX_op_add2_i64:
2544            opc_new = INDEX_op_add_i64;
2545            goto do_addsub2;
2546        case INDEX_op_sub2_i64:
2547            opc_new = INDEX_op_sub_i64;
2548        do_addsub2:
2549            nb_iargs = 4;
2550            nb_oargs = 2;
2551            /* Test if the high part of the operation is dead, but not
2552               the low part.  The result can be optimized to a simple
2553               add or sub.  This happens often for x86_64 guest when the
2554               cpu mode is set to 32 bit.  */
2555            if (arg_temp(op->args[1])->state == TS_DEAD) {
2556                if (arg_temp(op->args[0])->state == TS_DEAD) {
2557                    goto do_remove;
2558                }
2559                /* Replace the opcode and adjust the args in place,
2560                   leaving 3 unused args at the end.  */
2561                op->opc = opc = opc_new;
2562                op->args[1] = op->args[2];
2563                op->args[2] = op->args[4];
2564                /* Fall through and mark the single-word operation live.  */
2565                nb_iargs = 2;
2566                nb_oargs = 1;
2567            }
2568            goto do_not_remove;
2569
2570        case INDEX_op_mulu2_i32:
2571            opc_new = INDEX_op_mul_i32;
2572            opc_new2 = INDEX_op_muluh_i32;
2573            have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2574            goto do_mul2;
2575        case INDEX_op_muls2_i32:
2576            opc_new = INDEX_op_mul_i32;
2577            opc_new2 = INDEX_op_mulsh_i32;
2578            have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2579            goto do_mul2;
2580        case INDEX_op_mulu2_i64:
2581            opc_new = INDEX_op_mul_i64;
2582            opc_new2 = INDEX_op_muluh_i64;
2583            have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2584            goto do_mul2;
2585        case INDEX_op_muls2_i64:
2586            opc_new = INDEX_op_mul_i64;
2587            opc_new2 = INDEX_op_mulsh_i64;
2588            have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2589            goto do_mul2;
2590        do_mul2:
2591            nb_iargs = 2;
2592            nb_oargs = 2;
2593            if (arg_temp(op->args[1])->state == TS_DEAD) {
2594                if (arg_temp(op->args[0])->state == TS_DEAD) {
2595                    /* Both parts of the operation are dead.  */
2596                    goto do_remove;
2597                }
2598                /* The high part of the operation is dead; generate the low. */
2599                op->opc = opc = opc_new;
2600                op->args[1] = op->args[2];
2601                op->args[2] = op->args[3];
2602            } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2603                /* The low part of the operation is dead; generate the high. */
2604                op->opc = opc = opc_new2;
2605                op->args[0] = op->args[1];
2606                op->args[1] = op->args[2];
2607                op->args[2] = op->args[3];
2608            } else {
2609                goto do_not_remove;
2610            }
2611            /* Mark the single-word operation live.  */
2612            nb_oargs = 1;
2613            goto do_not_remove;
2614
2615        default:
2616            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2617            nb_iargs = def->nb_iargs;
2618            nb_oargs = def->nb_oargs;
2619
2620            /* Test if the operation can be removed because all
2621               its outputs are dead. We assume that nb_oargs == 0
2622               implies side effects */
2623            if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2624                for (i = 0; i < nb_oargs; i++) {
2625                    if (arg_temp(op->args[i])->state != TS_DEAD) {
2626                        goto do_not_remove;
2627                    }
2628                }
2629                goto do_remove;
2630            }
2631            goto do_not_remove;
2632
2633        do_remove:
2634            tcg_op_remove(s, op);
2635            break;
2636
2637        do_not_remove:
2638            for (i = 0; i < nb_oargs; i++) {
2639                ts = arg_temp(op->args[i]);
2640
2641                /* Remember the preference of the uses that followed.  */
2642                op->output_pref[i] = *la_temp_pref(ts);
2643
2644                /* Output args are dead.  */
2645                if (ts->state & TS_DEAD) {
2646                    arg_life |= DEAD_ARG << i;
2647                }
2648                if (ts->state & TS_MEM) {
2649                    arg_life |= SYNC_ARG << i;
2650                }
2651                ts->state = TS_DEAD;
2652                la_reset_pref(ts);
2653            }
2654
2655            /* If end of basic block, update.  */
2656            if (def->flags & TCG_OPF_BB_EXIT) {
2657                la_func_end(s, nb_globals, nb_temps);
2658            } else if (def->flags & TCG_OPF_COND_BRANCH) {
2659                la_bb_sync(s, nb_globals, nb_temps);
2660            } else if (def->flags & TCG_OPF_BB_END) {
2661                la_bb_end(s, nb_globals, nb_temps);
2662            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2663                la_global_sync(s, nb_globals);
2664                if (def->flags & TCG_OPF_CALL_CLOBBER) {
2665                    la_cross_call(s, nb_temps);
2666                }
2667            }
2668
2669            /* Record arguments that die in this opcode.  */
2670            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2671                ts = arg_temp(op->args[i]);
2672                if (ts->state & TS_DEAD) {
2673                    arg_life |= DEAD_ARG << i;
2674                }
2675            }
2676
2677            /* Input arguments are live for preceding opcodes.  */
2678            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2679                ts = arg_temp(op->args[i]);
2680                if (ts->state & TS_DEAD) {
2681                    /* For operands that were dead, initially allow
2682                       all regs for the type.  */
2683                    *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2684                    ts->state &= ~TS_DEAD;
2685                }
2686            }
2687
2688            /* Incorporate constraints for this operand.  */
2689            switch (opc) {
2690            case INDEX_op_mov_i32:
2691            case INDEX_op_mov_i64:
2692                /* Note that these are TCG_OPF_NOT_PRESENT and do not
2693                   have proper constraints.  That said, special case
2694                   moves to propagate preferences backward.  */
2695                if (IS_DEAD_ARG(1)) {
2696                    *la_temp_pref(arg_temp(op->args[0]))
2697                        = *la_temp_pref(arg_temp(op->args[1]));
2698                }
2699                break;
2700
2701            default:
2702                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2703                    const TCGArgConstraint *ct = &def->args_ct[i];
2704                    TCGRegSet set, *pset;
2705
2706                    ts = arg_temp(op->args[i]);
2707                    pset = la_temp_pref(ts);
2708                    set = *pset;
2709
2710                    set &= ct->regs;
2711                    if (ct->ialias) {
2712                        set &= op->output_pref[ct->alias_index];
2713                    }
2714                    /* If the combination is not possible, restart.  */
2715                    if (set == 0) {
2716                        set = ct->regs;
2717                    }
2718                    *pset = set;
2719                }
2720                break;
2721            }
2722            break;
2723        }
2724        op->life = arg_life;
2725    }
2726}
2727
2728/* Liveness analysis: Convert indirect regs to direct temporaries.  */
2729static bool liveness_pass_2(TCGContext *s)
2730{
2731    int nb_globals = s->nb_globals;
2732    int nb_temps, i;
2733    bool changes = false;
2734    TCGOp *op, *op_next;
2735
2736    /* Create a temporary for each indirect global.  */
2737    for (i = 0; i < nb_globals; ++i) {
2738        TCGTemp *its = &s->temps[i];
2739        if (its->indirect_reg) {
2740            TCGTemp *dts = tcg_temp_alloc(s);
2741            dts->type = its->type;
2742            dts->base_type = its->base_type;
2743            dts->kind = TEMP_EBB;
2744            its->state_ptr = dts;
2745        } else {
2746            its->state_ptr = NULL;
2747        }
2748        /* All globals begin dead.  */
2749        its->state = TS_DEAD;
2750    }
2751    for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2752        TCGTemp *its = &s->temps[i];
2753        its->state_ptr = NULL;
2754        its->state = TS_DEAD;
2755    }
2756
2757    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2758        TCGOpcode opc = op->opc;
2759        const TCGOpDef *def = &tcg_op_defs[opc];
2760        TCGLifeData arg_life = op->life;
2761        int nb_iargs, nb_oargs, call_flags;
2762        TCGTemp *arg_ts, *dir_ts;
2763
2764        if (opc == INDEX_op_call) {
2765            nb_oargs = TCGOP_CALLO(op);
2766            nb_iargs = TCGOP_CALLI(op);
2767            call_flags = tcg_call_flags(op);
2768        } else {
2769            nb_iargs = def->nb_iargs;
2770            nb_oargs = def->nb_oargs;
2771
2772            /* Set flags similar to how calls require.  */
2773            if (def->flags & TCG_OPF_COND_BRANCH) {
2774                /* Like reading globals: sync_globals */
2775                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2776            } else if (def->flags & TCG_OPF_BB_END) {
2777                /* Like writing globals: save_globals */
2778                call_flags = 0;
2779            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2780                /* Like reading globals: sync_globals */
2781                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2782            } else {
2783                /* No effect on globals.  */
2784                call_flags = (TCG_CALL_NO_READ_GLOBALS |
2785                              TCG_CALL_NO_WRITE_GLOBALS);
2786            }
2787        }
2788
2789        /* Make sure that input arguments are available.  */
2790        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2791            arg_ts = arg_temp(op->args[i]);
2792            if (arg_ts) {
2793                dir_ts = arg_ts->state_ptr;
2794                if (dir_ts && arg_ts->state == TS_DEAD) {
2795                    TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2796                                      ? INDEX_op_ld_i32
2797                                      : INDEX_op_ld_i64);
2798                    TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2799
2800                    lop->args[0] = temp_arg(dir_ts);
2801                    lop->args[1] = temp_arg(arg_ts->mem_base);
2802                    lop->args[2] = arg_ts->mem_offset;
2803
2804                    /* Loaded, but synced with memory.  */
2805                    arg_ts->state = TS_MEM;
2806                }
2807            }
2808        }
2809
2810        /* Perform input replacement, and mark inputs that became dead.
2811           No action is required except keeping temp_state up to date
2812           so that we reload when needed.  */
2813        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2814            arg_ts = arg_temp(op->args[i]);
2815            if (arg_ts) {
2816                dir_ts = arg_ts->state_ptr;
2817                if (dir_ts) {
2818                    op->args[i] = temp_arg(dir_ts);
2819                    changes = true;
2820                    if (IS_DEAD_ARG(i)) {
2821                        arg_ts->state = TS_DEAD;
2822                    }
2823                }
2824            }
2825        }
2826
2827        /* Liveness analysis should ensure that the following are
2828           all correct, for call sites and basic block end points.  */
2829        if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2830            /* Nothing to do */
2831        } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2832            for (i = 0; i < nb_globals; ++i) {
2833                /* Liveness should see that globals are synced back,
2834                   that is, either TS_DEAD or TS_MEM.  */
2835                arg_ts = &s->temps[i];
2836                tcg_debug_assert(arg_ts->state_ptr == 0
2837                                 || arg_ts->state != 0);
2838            }
2839        } else {
2840            for (i = 0; i < nb_globals; ++i) {
2841                /* Liveness should see that globals are saved back,
2842                   that is, TS_DEAD, waiting to be reloaded.  */
2843                arg_ts = &s->temps[i];
2844                tcg_debug_assert(arg_ts->state_ptr == 0
2845                                 || arg_ts->state == TS_DEAD);
2846            }
2847        }
2848
2849        /* Outputs become available.  */
2850        if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2851            arg_ts = arg_temp(op->args[0]);
2852            dir_ts = arg_ts->state_ptr;
2853            if (dir_ts) {
2854                op->args[0] = temp_arg(dir_ts);
2855                changes = true;
2856
2857                /* The output is now live and modified.  */
2858                arg_ts->state = 0;
2859
2860                if (NEED_SYNC_ARG(0)) {
2861                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2862                                      ? INDEX_op_st_i32
2863                                      : INDEX_op_st_i64);
2864                    TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2865                    TCGTemp *out_ts = dir_ts;
2866
2867                    if (IS_DEAD_ARG(0)) {
2868                        out_ts = arg_temp(op->args[1]);
2869                        arg_ts->state = TS_DEAD;
2870                        tcg_op_remove(s, op);
2871                    } else {
2872                        arg_ts->state = TS_MEM;
2873                    }
2874
2875                    sop->args[0] = temp_arg(out_ts);
2876                    sop->args[1] = temp_arg(arg_ts->mem_base);
2877                    sop->args[2] = arg_ts->mem_offset;
2878                } else {
2879                    tcg_debug_assert(!IS_DEAD_ARG(0));
2880                }
2881            }
2882        } else {
2883            for (i = 0; i < nb_oargs; i++) {
2884                arg_ts = arg_temp(op->args[i]);
2885                dir_ts = arg_ts->state_ptr;
2886                if (!dir_ts) {
2887                    continue;
2888                }
2889                op->args[i] = temp_arg(dir_ts);
2890                changes = true;
2891
2892                /* The output is now live and modified.  */
2893                arg_ts->state = 0;
2894
2895                /* Sync outputs upon their last write.  */
2896                if (NEED_SYNC_ARG(i)) {
2897                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2898                                      ? INDEX_op_st_i32
2899                                      : INDEX_op_st_i64);
2900                    TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2901
2902                    sop->args[0] = temp_arg(dir_ts);
2903                    sop->args[1] = temp_arg(arg_ts->mem_base);
2904                    sop->args[2] = arg_ts->mem_offset;
2905
2906                    arg_ts->state = TS_MEM;
2907                }
2908                /* Drop outputs that are dead.  */
2909                if (IS_DEAD_ARG(i)) {
2910                    arg_ts->state = TS_DEAD;
2911                }
2912            }
2913        }
2914    }
2915
2916    return changes;
2917}
2918
2919#ifdef CONFIG_DEBUG_TCG
2920static void dump_regs(TCGContext *s)
2921{
2922    TCGTemp *ts;
2923    int i;
2924    char buf[64];
2925
2926    for(i = 0; i < s->nb_temps; i++) {
2927        ts = &s->temps[i];
2928        printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2929        switch(ts->val_type) {
2930        case TEMP_VAL_REG:
2931            printf("%s", tcg_target_reg_names[ts->reg]);
2932            break;
2933        case TEMP_VAL_MEM:
2934            printf("%d(%s)", (int)ts->mem_offset,
2935                   tcg_target_reg_names[ts->mem_base->reg]);
2936            break;
2937        case TEMP_VAL_CONST:
2938            printf("$0x%" PRIx64, ts->val);
2939            break;
2940        case TEMP_VAL_DEAD:
2941            printf("D");
2942            break;
2943        default:
2944            printf("???");
2945            break;
2946        }
2947        printf("\n");
2948    }
2949
2950    for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2951        if (s->reg_to_temp[i] != NULL) {
2952            printf("%s: %s\n", 
2953                   tcg_target_reg_names[i], 
2954                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2955        }
2956    }
2957}
2958
2959static void check_regs(TCGContext *s)
2960{
2961    int reg;
2962    int k;
2963    TCGTemp *ts;
2964    char buf[64];
2965
2966    for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2967        ts = s->reg_to_temp[reg];
2968        if (ts != NULL) {
2969            if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2970                printf("Inconsistency for register %s:\n", 
2971                       tcg_target_reg_names[reg]);
2972                goto fail;
2973            }
2974        }
2975    }
2976    for (k = 0; k < s->nb_temps; k++) {
2977        ts = &s->temps[k];
2978        if (ts->val_type == TEMP_VAL_REG
2979            && ts->kind != TEMP_FIXED
2980            && s->reg_to_temp[ts->reg] != ts) {
2981            printf("Inconsistency for temp %s:\n",
2982                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2983        fail:
2984            printf("reg state:\n");
2985            dump_regs(s);
2986            tcg_abort();
2987        }
2988    }
2989}
2990#endif
2991
2992static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
2993{
2994    intptr_t off, size, align;
2995
2996    switch (ts->type) {
2997    case TCG_TYPE_I32:
2998        size = align = 4;
2999        break;
3000    case TCG_TYPE_I64:
3001    case TCG_TYPE_V64:
3002        size = align = 8;
3003        break;
3004    case TCG_TYPE_V128:
3005        size = align = 16;
3006        break;
3007    case TCG_TYPE_V256:
3008        /* Note that we do not require aligned storage for V256. */
3009        size = 32, align = 16;
3010        break;
3011    default:
3012        g_assert_not_reached();
3013    }
3014
3015    /*
3016     * Assume the stack is sufficiently aligned.
3017     * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3018     * and do not require 16 byte vector alignment.  This seems slightly
3019     * easier than fully parameterizing the above switch statement.
3020     */
3021    align = MIN(TCG_TARGET_STACK_ALIGN, align);
3022    off = ROUND_UP(s->current_frame_offset, align);
3023
3024    /* If we've exhausted the stack frame, restart with a smaller TB. */
3025    if (off + size > s->frame_end) {
3026        tcg_raise_tb_overflow(s);
3027    }
3028    s->current_frame_offset = off + size;
3029
3030    ts->mem_offset = off;
3031#if defined(__sparc__)
3032    ts->mem_offset += TCG_TARGET_STACK_BIAS;
3033#endif
3034    ts->mem_base = s->frame_temp;
3035    ts->mem_allocated = 1;
3036}
3037
3038static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3039
3040/* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3041   mark it free; otherwise mark it dead.  */
3042static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3043{
3044    TCGTempVal new_type;
3045
3046    switch (ts->kind) {
3047    case TEMP_FIXED:
3048        return;
3049    case TEMP_GLOBAL:
3050    case TEMP_LOCAL:
3051        new_type = TEMP_VAL_MEM;
3052        break;
3053    case TEMP_NORMAL:
3054    case TEMP_EBB:
3055        new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3056        break;
3057    case TEMP_CONST:
3058        new_type = TEMP_VAL_CONST;
3059        break;
3060    default:
3061        g_assert_not_reached();
3062    }
3063    if (ts->val_type == TEMP_VAL_REG) {
3064        s->reg_to_temp[ts->reg] = NULL;
3065    }
3066    ts->val_type = new_type;
3067}
3068
3069/* Mark a temporary as dead.  */
3070static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3071{
3072    temp_free_or_dead(s, ts, 1);
3073}
3074
3075/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3076   registers needs to be allocated to store a constant.  If 'free_or_dead'
3077   is non-zero, subsequently release the temporary; if it is positive, the
3078   temp is dead; if it is negative, the temp is free.  */
3079static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3080                      TCGRegSet preferred_regs, int free_or_dead)
3081{
3082    if (!temp_readonly(ts) && !ts->mem_coherent) {
3083        if (!ts->mem_allocated) {
3084            temp_allocate_frame(s, ts);
3085        }
3086        switch (ts->val_type) {
3087        case TEMP_VAL_CONST:
3088            /* If we're going to free the temp immediately, then we won't
3089               require it later in a register, so attempt to store the
3090               constant to memory directly.  */
3091            if (free_or_dead
3092                && tcg_out_sti(s, ts->type, ts->val,
3093                               ts->mem_base->reg, ts->mem_offset)) {
3094                break;
3095            }
3096            temp_load(s, ts, tcg_target_available_regs[ts->type],
3097                      allocated_regs, preferred_regs);
3098            /* fallthrough */
3099
3100        case TEMP_VAL_REG:
3101            tcg_out_st(s, ts->type, ts->reg,
3102                       ts->mem_base->reg, ts->mem_offset);
3103            break;
3104
3105        case TEMP_VAL_MEM:
3106            break;
3107
3108        case TEMP_VAL_DEAD:
3109        default:
3110            tcg_abort();
3111        }
3112        ts->mem_coherent = 1;
3113    }
3114    if (free_or_dead) {
3115        temp_free_or_dead(s, ts, free_or_dead);
3116    }
3117}
3118
3119/* free register 'reg' by spilling the corresponding temporary if necessary */
3120static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3121{
3122    TCGTemp *ts = s->reg_to_temp[reg];
3123    if (ts != NULL) {
3124        temp_sync(s, ts, allocated_regs, 0, -1);
3125    }
3126}
3127
3128/**
3129 * tcg_reg_alloc:
3130 * @required_regs: Set of registers in which we must allocate.
3131 * @allocated_regs: Set of registers which must be avoided.
3132 * @preferred_regs: Set of registers we should prefer.
3133 * @rev: True if we search the registers in "indirect" order.
3134 *
3135 * The allocated register must be in @required_regs & ~@allocated_regs,
3136 * but if we can put it in @preferred_regs we may save a move later.
3137 */
3138static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3139                            TCGRegSet allocated_regs,
3140                            TCGRegSet preferred_regs, bool rev)
3141{
3142    int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3143    TCGRegSet reg_ct[2];
3144    const int *order;
3145
3146    reg_ct[1] = required_regs & ~allocated_regs;
3147    tcg_debug_assert(reg_ct[1] != 0);
3148    reg_ct[0] = reg_ct[1] & preferred_regs;
3149
3150    /* Skip the preferred_regs option if it cannot be satisfied,
3151       or if the preference made no difference.  */
3152    f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3153
3154    order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3155
3156    /* Try free registers, preferences first.  */
3157    for (j = f; j < 2; j++) {
3158        TCGRegSet set = reg_ct[j];
3159
3160        if (tcg_regset_single(set)) {
3161            /* One register in the set.  */
3162            TCGReg reg = tcg_regset_first(set);
3163            if (s->reg_to_temp[reg] == NULL) {
3164                return reg;
3165            }
3166        } else {
3167            for (i = 0; i < n; i++) {
3168                TCGReg reg = order[i];
3169                if (s->reg_to_temp[reg] == NULL &&
3170                    tcg_regset_test_reg(set, reg)) {
3171                    return reg;
3172                }
3173            }
3174        }
3175    }
3176
3177    /* We must spill something.  */
3178    for (j = f; j < 2; j++) {
3179        TCGRegSet set = reg_ct[j];
3180
3181        if (tcg_regset_single(set)) {
3182            /* One register in the set.  */
3183            TCGReg reg = tcg_regset_first(set);
3184            tcg_reg_free(s, reg, allocated_regs);
3185            return reg;
3186        } else {
3187            for (i = 0; i < n; i++) {
3188                TCGReg reg = order[i];
3189                if (tcg_regset_test_reg(set, reg)) {
3190                    tcg_reg_free(s, reg, allocated_regs);
3191                    return reg;
3192                }
3193            }
3194        }
3195    }
3196
3197    tcg_abort();
3198}
3199
3200/* Make sure the temporary is in a register.  If needed, allocate the register
3201   from DESIRED while avoiding ALLOCATED.  */
3202static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3203                      TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3204{
3205    TCGReg reg;
3206
3207    switch (ts->val_type) {
3208    case TEMP_VAL_REG:
3209        return;
3210    case TEMP_VAL_CONST:
3211        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3212                            preferred_regs, ts->indirect_base);
3213        if (ts->type <= TCG_TYPE_I64) {
3214            tcg_out_movi(s, ts->type, reg, ts->val);
3215        } else {
3216            uint64_t val = ts->val;
3217            MemOp vece = MO_64;
3218
3219            /*
3220             * Find the minimal vector element that matches the constant.
3221             * The targets will, in general, have to do this search anyway,
3222             * do this generically.
3223             */
3224            if (val == dup_const(MO_8, val)) {
3225                vece = MO_8;
3226            } else if (val == dup_const(MO_16, val)) {
3227                vece = MO_16;
3228            } else if (val == dup_const(MO_32, val)) {
3229                vece = MO_32;
3230            }
3231
3232            tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3233        }
3234        ts->mem_coherent = 0;
3235        break;
3236    case TEMP_VAL_MEM:
3237        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3238                            preferred_regs, ts->indirect_base);
3239        tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3240        ts->mem_coherent = 1;
3241        break;
3242    case TEMP_VAL_DEAD:
3243    default:
3244        tcg_abort();
3245    }
3246    ts->reg = reg;
3247    ts->val_type = TEMP_VAL_REG;
3248    s->reg_to_temp[reg] = ts;
3249}
3250
3251/* Save a temporary to memory. 'allocated_regs' is used in case a
3252   temporary registers needs to be allocated to store a constant.  */
3253static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3254{
3255    /* The liveness analysis already ensures that globals are back
3256       in memory. Keep an tcg_debug_assert for safety. */
3257    tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3258}
3259
3260/* save globals to their canonical location and assume they can be
3261   modified be the following code. 'allocated_regs' is used in case a
3262   temporary registers needs to be allocated to store a constant. */
3263static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3264{
3265    int i, n;
3266
3267    for (i = 0, n = s->nb_globals; i < n; i++) {
3268        temp_save(s, &s->temps[i], allocated_regs);
3269    }
3270}
3271
3272/* sync globals to their canonical location and assume they can be
3273   read by the following code. 'allocated_regs' is used in case a
3274   temporary registers needs to be allocated to store a constant. */
3275static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3276{
3277    int i, n;
3278
3279    for (i = 0, n = s->nb_globals; i < n; i++) {
3280        TCGTemp *ts = &s->temps[i];
3281        tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3282                         || ts->kind == TEMP_FIXED
3283                         || ts->mem_coherent);
3284    }
3285}
3286
3287/* at the end of a basic block, we assume all temporaries are dead and
3288   all globals are stored at their canonical location. */
3289static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3290{
3291    int i;
3292
3293    for (i = s->nb_globals; i < s->nb_temps; i++) {
3294        TCGTemp *ts = &s->temps[i];
3295
3296        switch (ts->kind) {
3297        case TEMP_LOCAL:
3298            temp_save(s, ts, allocated_regs);
3299            break;
3300        case TEMP_NORMAL:
3301        case TEMP_EBB:
3302            /* The liveness analysis already ensures that temps are dead.
3303               Keep an tcg_debug_assert for safety. */
3304            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3305            break;
3306        case TEMP_CONST:
3307            /* Similarly, we should have freed any allocated register. */
3308            tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3309            break;
3310        default:
3311            g_assert_not_reached();
3312        }
3313    }
3314
3315    save_globals(s, allocated_regs);
3316}
3317
3318/*
3319 * At a conditional branch, we assume all temporaries are dead unless
3320 * explicitly live-across-conditional-branch; all globals and local
3321 * temps are synced to their location.
3322 */
3323static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3324{
3325    sync_globals(s, allocated_regs);
3326
3327    for (int i = s->nb_globals; i < s->nb_temps; i++) {
3328        TCGTemp *ts = &s->temps[i];
3329        /*
3330         * The liveness analysis already ensures that temps are dead.
3331         * Keep tcg_debug_asserts for safety.
3332         */
3333        switch (ts->kind) {
3334        case TEMP_LOCAL:
3335            tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3336            break;
3337        case TEMP_NORMAL:
3338            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3339            break;
3340        case TEMP_EBB:
3341        case TEMP_CONST:
3342            break;
3343        default:
3344            g_assert_not_reached();
3345        }
3346    }
3347}
3348
3349/*
3350 * Specialized code generation for INDEX_op_mov_* with a constant.
3351 */
3352static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3353                                  tcg_target_ulong val, TCGLifeData arg_life,
3354                                  TCGRegSet preferred_regs)
3355{
3356    /* ENV should not be modified.  */
3357    tcg_debug_assert(!temp_readonly(ots));
3358
3359    /* The movi is not explicitly generated here.  */
3360    if (ots->val_type == TEMP_VAL_REG) {
3361        s->reg_to_temp[ots->reg] = NULL;
3362    }
3363    ots->val_type = TEMP_VAL_CONST;
3364    ots->val = val;
3365    ots->mem_coherent = 0;
3366    if (NEED_SYNC_ARG(0)) {
3367        temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3368    } else if (IS_DEAD_ARG(0)) {
3369        temp_dead(s, ots);
3370    }
3371}
3372
3373/*
3374 * Specialized code generation for INDEX_op_mov_*.
3375 */
3376static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3377{
3378    const TCGLifeData arg_life = op->life;
3379    TCGRegSet allocated_regs, preferred_regs;
3380    TCGTemp *ts, *ots;
3381    TCGType otype, itype;
3382
3383    allocated_regs = s->reserved_regs;
3384    preferred_regs = op->output_pref[0];
3385    ots = arg_temp(op->args[0]);
3386    ts = arg_temp(op->args[1]);
3387
3388    /* ENV should not be modified.  */
3389    tcg_debug_assert(!temp_readonly(ots));
3390
3391    /* Note that otype != itype for no-op truncation.  */
3392    otype = ots->type;
3393    itype = ts->type;
3394
3395    if (ts->val_type == TEMP_VAL_CONST) {
3396        /* propagate constant or generate sti */
3397        tcg_target_ulong val = ts->val;
3398        if (IS_DEAD_ARG(1)) {
3399            temp_dead(s, ts);
3400        }
3401        tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3402        return;
3403    }
3404
3405    /* If the source value is in memory we're going to be forced
3406       to have it in a register in order to perform the copy.  Copy
3407       the SOURCE value into its own register first, that way we
3408       don't have to reload SOURCE the next time it is used. */
3409    if (ts->val_type == TEMP_VAL_MEM) {
3410        temp_load(s, ts, tcg_target_available_regs[itype],
3411                  allocated_regs, preferred_regs);
3412    }
3413
3414    tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3415    if (IS_DEAD_ARG(0)) {
3416        /* mov to a non-saved dead register makes no sense (even with
3417           liveness analysis disabled). */
3418        tcg_debug_assert(NEED_SYNC_ARG(0));
3419        if (!ots->mem_allocated) {
3420            temp_allocate_frame(s, ots);
3421        }
3422        tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3423        if (IS_DEAD_ARG(1)) {
3424            temp_dead(s, ts);
3425        }
3426        temp_dead(s, ots);
3427    } else {
3428        if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3429            /* the mov can be suppressed */
3430            if (ots->val_type == TEMP_VAL_REG) {
3431                s->reg_to_temp[ots->reg] = NULL;
3432            }
3433            ots->reg = ts->reg;
3434            temp_dead(s, ts);
3435        } else {
3436            if (ots->val_type != TEMP_VAL_REG) {
3437                /* When allocating a new register, make sure to not spill the
3438                   input one. */
3439                tcg_regset_set_reg(allocated_regs, ts->reg);
3440                ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3441                                         allocated_regs, preferred_regs,
3442                                         ots->indirect_base);
3443            }
3444            if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3445                /*
3446                 * Cross register class move not supported.
3447                 * Store the source register into the destination slot
3448                 * and leave the destination temp as TEMP_VAL_MEM.
3449                 */
3450                assert(!temp_readonly(ots));
3451                if (!ts->mem_allocated) {
3452                    temp_allocate_frame(s, ots);
3453                }
3454                tcg_out_st(s, ts->type, ts->reg,
3455                           ots->mem_base->reg, ots->mem_offset);
3456                ots->mem_coherent = 1;
3457                temp_free_or_dead(s, ots, -1);
3458                return;
3459            }
3460        }
3461        ots->val_type = TEMP_VAL_REG;
3462        ots->mem_coherent = 0;
3463        s->reg_to_temp[ots->reg] = ots;
3464        if (NEED_SYNC_ARG(0)) {
3465            temp_sync(s, ots, allocated_regs, 0, 0);
3466        }
3467    }
3468}
3469
3470/*
3471 * Specialized code generation for INDEX_op_dup_vec.
3472 */
3473static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3474{
3475    const TCGLifeData arg_life = op->life;
3476    TCGRegSet dup_out_regs, dup_in_regs;
3477    TCGTemp *its, *ots;
3478    TCGType itype, vtype;
3479    intptr_t endian_fixup;
3480    unsigned vece;
3481    bool ok;
3482
3483    ots = arg_temp(op->args[0]);
3484    its = arg_temp(op->args[1]);
3485
3486    /* ENV should not be modified.  */
3487    tcg_debug_assert(!temp_readonly(ots));
3488
3489    itype = its->type;
3490    vece = TCGOP_VECE(op);
3491    vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3492
3493    if (its->val_type == TEMP_VAL_CONST) {
3494        /* Propagate constant via movi -> dupi.  */
3495        tcg_target_ulong val = its->val;
3496        if (IS_DEAD_ARG(1)) {
3497            temp_dead(s, its);
3498        }
3499        tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3500        return;
3501    }
3502
3503    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3504    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3505
3506    /* Allocate the output register now.  */
3507    if (ots->val_type != TEMP_VAL_REG) {
3508        TCGRegSet allocated_regs = s->reserved_regs;
3509
3510        if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3511            /* Make sure to not spill the input register. */
3512            tcg_regset_set_reg(allocated_regs, its->reg);
3513        }
3514        ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3515                                 op->output_pref[0], ots->indirect_base);
3516        ots->val_type = TEMP_VAL_REG;
3517        ots->mem_coherent = 0;
3518        s->reg_to_temp[ots->reg] = ots;
3519    }
3520
3521    switch (its->val_type) {
3522    case TEMP_VAL_REG:
3523        /*
3524         * The dup constriaints must be broad, covering all possible VECE.
3525         * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3526         * to fail, indicating that extra moves are required for that case.
3527         */
3528        if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3529            if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3530                goto done;
3531            }
3532            /* Try again from memory or a vector input register.  */
3533        }
3534        if (!its->mem_coherent) {
3535            /*
3536             * The input register is not synced, and so an extra store
3537             * would be required to use memory.  Attempt an integer-vector
3538             * register move first.  We do not have a TCGRegSet for this.
3539             */
3540            if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3541                break;
3542            }
3543            /* Sync the temp back to its slot and load from there.  */
3544            temp_sync(s, its, s->reserved_regs, 0, 0);
3545        }
3546        /* fall through */
3547
3548    case TEMP_VAL_MEM:
3549#if HOST_BIG_ENDIAN
3550        endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3551        endian_fixup -= 1 << vece;
3552#else
3553        endian_fixup = 0;
3554#endif
3555        if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3556                             its->mem_offset + endian_fixup)) {
3557            goto done;
3558        }
3559        tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3560        break;
3561
3562    default:
3563        g_assert_not_reached();
3564    }
3565
3566    /* We now have a vector input register, so dup must succeed. */
3567    ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3568    tcg_debug_assert(ok);
3569
3570 done:
3571    if (IS_DEAD_ARG(1)) {
3572        temp_dead(s, its);
3573    }
3574    if (NEED_SYNC_ARG(0)) {
3575        temp_sync(s, ots, s->reserved_regs, 0, 0);
3576    }
3577    if (IS_DEAD_ARG(0)) {
3578        temp_dead(s, ots);
3579    }
3580}
3581
3582static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3583{
3584    const TCGLifeData arg_life = op->life;
3585    const TCGOpDef * const def = &tcg_op_defs[op->opc];
3586    TCGRegSet i_allocated_regs;
3587    TCGRegSet o_allocated_regs;
3588    int i, k, nb_iargs, nb_oargs;
3589    TCGReg reg;
3590    TCGArg arg;
3591    const TCGArgConstraint *arg_ct;
3592    TCGTemp *ts;
3593    TCGArg new_args[TCG_MAX_OP_ARGS];
3594    int const_args[TCG_MAX_OP_ARGS];
3595
3596    nb_oargs = def->nb_oargs;
3597    nb_iargs = def->nb_iargs;
3598
3599    /* copy constants */
3600    memcpy(new_args + nb_oargs + nb_iargs, 
3601           op->args + nb_oargs + nb_iargs,
3602           sizeof(TCGArg) * def->nb_cargs);
3603
3604    i_allocated_regs = s->reserved_regs;
3605    o_allocated_regs = s->reserved_regs;
3606
3607    /* satisfy input constraints */ 
3608    for (k = 0; k < nb_iargs; k++) {
3609        TCGRegSet i_preferred_regs, o_preferred_regs;
3610
3611        i = def->args_ct[nb_oargs + k].sort_index;
3612        arg = op->args[i];
3613        arg_ct = &def->args_ct[i];
3614        ts = arg_temp(arg);
3615
3616        if (ts->val_type == TEMP_VAL_CONST
3617            && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3618            /* constant is OK for instruction */
3619            const_args[i] = 1;
3620            new_args[i] = ts->val;
3621            continue;
3622        }
3623
3624        i_preferred_regs = o_preferred_regs = 0;
3625        if (arg_ct->ialias) {
3626            o_preferred_regs = op->output_pref[arg_ct->alias_index];
3627
3628            /*
3629             * If the input is readonly, then it cannot also be an
3630             * output and aliased to itself.  If the input is not
3631             * dead after the instruction, we must allocate a new
3632             * register and move it.
3633             */
3634            if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3635                goto allocate_in_reg;
3636            }
3637
3638            /*
3639             * Check if the current register has already been allocated
3640             * for another input aliased to an output.
3641             */
3642            if (ts->val_type == TEMP_VAL_REG) {
3643                reg = ts->reg;
3644                for (int k2 = 0; k2 < k; k2++) {
3645                    int i2 = def->args_ct[nb_oargs + k2].sort_index;
3646                    if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3647                        goto allocate_in_reg;
3648                    }
3649                }
3650            }
3651            i_preferred_regs = o_preferred_regs;
3652        }
3653
3654        temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3655        reg = ts->reg;
3656
3657        if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3658 allocate_in_reg:
3659            /*
3660             * Allocate a new register matching the constraint
3661             * and move the temporary register into it.
3662             */
3663            temp_load(s, ts, tcg_target_available_regs[ts->type],
3664                      i_allocated_regs, 0);
3665            reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3666                                o_preferred_regs, ts->indirect_base);
3667            if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3668                /*
3669                 * Cross register class move not supported.  Sync the
3670                 * temp back to its slot and load from there.
3671                 */
3672                temp_sync(s, ts, i_allocated_regs, 0, 0);
3673                tcg_out_ld(s, ts->type, reg,
3674                           ts->mem_base->reg, ts->mem_offset);
3675            }
3676        }
3677        new_args[i] = reg;
3678        const_args[i] = 0;
3679        tcg_regset_set_reg(i_allocated_regs, reg);
3680    }
3681    
3682    /* mark dead temporaries and free the associated registers */
3683    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3684        if (IS_DEAD_ARG(i)) {
3685            temp_dead(s, arg_temp(op->args[i]));
3686        }
3687    }
3688
3689    if (def->flags & TCG_OPF_COND_BRANCH) {
3690        tcg_reg_alloc_cbranch(s, i_allocated_regs);
3691    } else if (def->flags & TCG_OPF_BB_END) {
3692        tcg_reg_alloc_bb_end(s, i_allocated_regs);
3693    } else {
3694        if (def->flags & TCG_OPF_CALL_CLOBBER) {
3695            /* XXX: permit generic clobber register list ? */ 
3696            for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3697                if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3698                    tcg_reg_free(s, i, i_allocated_regs);
3699                }
3700            }
3701        }
3702        if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3703            /* sync globals if the op has side effects and might trigger
3704               an exception. */
3705            sync_globals(s, i_allocated_regs);
3706        }
3707        
3708        /* satisfy the output constraints */
3709        for(k = 0; k < nb_oargs; k++) {
3710            i = def->args_ct[k].sort_index;
3711            arg = op->args[i];
3712            arg_ct = &def->args_ct[i];
3713            ts = arg_temp(arg);
3714
3715            /* ENV should not be modified.  */
3716            tcg_debug_assert(!temp_readonly(ts));
3717
3718            if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3719                reg = new_args[arg_ct->alias_index];
3720            } else if (arg_ct->newreg) {
3721                reg = tcg_reg_alloc(s, arg_ct->regs,
3722                                    i_allocated_regs | o_allocated_regs,
3723                                    op->output_pref[k], ts->indirect_base);
3724            } else {
3725                reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3726                                    op->output_pref[k], ts->indirect_base);
3727            }
3728            tcg_regset_set_reg(o_allocated_regs, reg);
3729            if (ts->val_type == TEMP_VAL_REG) {
3730                s->reg_to_temp[ts->reg] = NULL;
3731            }
3732            ts->val_type = TEMP_VAL_REG;
3733            ts->reg = reg;
3734            /*
3735             * Temp value is modified, so the value kept in memory is
3736             * potentially not the same.
3737             */
3738            ts->mem_coherent = 0;
3739            s->reg_to_temp[reg] = ts;
3740            new_args[i] = reg;
3741        }
3742    }
3743
3744    /* emit instruction */
3745    if (def->flags & TCG_OPF_VECTOR) {
3746        tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3747                       new_args, const_args);
3748    } else {
3749        tcg_out_op(s, op->opc, new_args, const_args);
3750    }
3751
3752    /* move the outputs in the correct register if needed */
3753    for(i = 0; i < nb_oargs; i++) {
3754        ts = arg_temp(op->args[i]);
3755
3756        /* ENV should not be modified.  */
3757        tcg_debug_assert(!temp_readonly(ts));
3758
3759        if (NEED_SYNC_ARG(i)) {
3760            temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3761        } else if (IS_DEAD_ARG(i)) {
3762            temp_dead(s, ts);
3763        }
3764    }
3765}
3766
3767static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3768{
3769    const TCGLifeData arg_life = op->life;
3770    TCGTemp *ots, *itsl, *itsh;
3771    TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3772
3773    /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3774    tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3775    tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3776
3777    ots = arg_temp(op->args[0]);
3778    itsl = arg_temp(op->args[1]);
3779    itsh = arg_temp(op->args[2]);
3780
3781    /* ENV should not be modified.  */
3782    tcg_debug_assert(!temp_readonly(ots));
3783
3784    /* Allocate the output register now.  */
3785    if (ots->val_type != TEMP_VAL_REG) {
3786        TCGRegSet allocated_regs = s->reserved_regs;
3787        TCGRegSet dup_out_regs =
3788            tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3789
3790        /* Make sure to not spill the input registers. */
3791        if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3792            tcg_regset_set_reg(allocated_regs, itsl->reg);
3793        }
3794        if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3795            tcg_regset_set_reg(allocated_regs, itsh->reg);
3796        }
3797
3798        ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3799                                 op->output_pref[0], ots->indirect_base);
3800        ots->val_type = TEMP_VAL_REG;
3801        ots->mem_coherent = 0;
3802        s->reg_to_temp[ots->reg] = ots;
3803    }
3804
3805    /* Promote dup2 of immediates to dupi_vec. */
3806    if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3807        uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3808        MemOp vece = MO_64;
3809
3810        if (val == dup_const(MO_8, val)) {
3811            vece = MO_8;
3812        } else if (val == dup_const(MO_16, val)) {
3813            vece = MO_16;
3814        } else if (val == dup_const(MO_32, val)) {
3815            vece = MO_32;
3816        }
3817
3818        tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3819        goto done;
3820    }
3821
3822    /* If the two inputs form one 64-bit value, try dupm_vec. */
3823    if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3824        if (!itsl->mem_coherent) {
3825            temp_sync(s, itsl, s->reserved_regs, 0, 0);
3826        }
3827        if (!itsh->mem_coherent) {
3828            temp_sync(s, itsh, s->reserved_regs, 0, 0);
3829        }
3830#if HOST_BIG_ENDIAN
3831        TCGTemp *its = itsh;
3832#else
3833        TCGTemp *its = itsl;
3834#endif
3835        if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3836                             its->mem_base->reg, its->mem_offset)) {
3837            goto done;
3838        }
3839    }
3840
3841    /* Fall back to generic expansion. */
3842    return false;
3843
3844 done:
3845    if (IS_DEAD_ARG(1)) {
3846        temp_dead(s, itsl);
3847    }
3848    if (IS_DEAD_ARG(2)) {
3849        temp_dead(s, itsh);
3850    }
3851    if (NEED_SYNC_ARG(0)) {
3852        temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3853    } else if (IS_DEAD_ARG(0)) {
3854        temp_dead(s, ots);
3855    }
3856    return true;
3857}
3858
3859#ifdef TCG_TARGET_STACK_GROWSUP
3860#define STACK_DIR(x) (-(x))
3861#else
3862#define STACK_DIR(x) (x)
3863#endif
3864
3865static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3866{
3867    const int nb_oargs = TCGOP_CALLO(op);
3868    const int nb_iargs = TCGOP_CALLI(op);
3869    const TCGLifeData arg_life = op->life;
3870    const TCGHelperInfo *info;
3871    int flags, nb_regs, i;
3872    TCGReg reg;
3873    TCGArg arg;
3874    TCGTemp *ts;
3875    intptr_t stack_offset;
3876    size_t call_stack_size;
3877    tcg_insn_unit *func_addr;
3878    int allocate_args;
3879    TCGRegSet allocated_regs;
3880
3881    func_addr = tcg_call_func(op);
3882    info = tcg_call_info(op);
3883    flags = info->flags;
3884
3885    nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3886    if (nb_regs > nb_iargs) {
3887        nb_regs = nb_iargs;
3888    }
3889
3890    /* assign stack slots first */
3891    call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3892    call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 
3893        ~(TCG_TARGET_STACK_ALIGN - 1);
3894    allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3895    if (allocate_args) {
3896        /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3897           preallocate call stack */
3898        tcg_abort();
3899    }
3900
3901    stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3902    for (i = nb_regs; i < nb_iargs; i++) {
3903        arg = op->args[nb_oargs + i];
3904#ifdef TCG_TARGET_STACK_GROWSUP
3905        stack_offset -= sizeof(tcg_target_long);
3906#endif
3907        if (arg != TCG_CALL_DUMMY_ARG) {
3908            ts = arg_temp(arg);
3909            temp_load(s, ts, tcg_target_available_regs[ts->type],
3910                      s->reserved_regs, 0);
3911            tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3912        }
3913#ifndef TCG_TARGET_STACK_GROWSUP
3914        stack_offset += sizeof(tcg_target_long);
3915#endif
3916    }
3917    
3918    /* assign input registers */
3919    allocated_regs = s->reserved_regs;
3920    for (i = 0; i < nb_regs; i++) {
3921        arg = op->args[nb_oargs + i];
3922        if (arg != TCG_CALL_DUMMY_ARG) {
3923            ts = arg_temp(arg);
3924            reg = tcg_target_call_iarg_regs[i];
3925
3926            if (ts->val_type == TEMP_VAL_REG) {
3927                if (ts->reg != reg) {
3928                    tcg_reg_free(s, reg, allocated_regs);
3929                    if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3930                        /*
3931                         * Cross register class move not supported.  Sync the
3932                         * temp back to its slot and load from there.
3933                         */
3934                        temp_sync(s, ts, allocated_regs, 0, 0);
3935                        tcg_out_ld(s, ts->type, reg,
3936                                   ts->mem_base->reg, ts->mem_offset);
3937                    }
3938                }
3939            } else {
3940                TCGRegSet arg_set = 0;
3941
3942                tcg_reg_free(s, reg, allocated_regs);
3943                tcg_regset_set_reg(arg_set, reg);
3944                temp_load(s, ts, arg_set, allocated_regs, 0);
3945            }
3946
3947            tcg_regset_set_reg(allocated_regs, reg);
3948        }
3949    }
3950    
3951    /* mark dead temporaries and free the associated registers */
3952    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3953        if (IS_DEAD_ARG(i)) {
3954            temp_dead(s, arg_temp(op->args[i]));
3955        }
3956    }
3957    
3958    /* clobber call registers */
3959    for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3960        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3961            tcg_reg_free(s, i, allocated_regs);
3962        }
3963    }
3964
3965    /* Save globals if they might be written by the helper, sync them if
3966       they might be read. */
3967    if (flags & TCG_CALL_NO_READ_GLOBALS) {
3968        /* Nothing to do */
3969    } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3970        sync_globals(s, allocated_regs);
3971    } else {
3972        save_globals(s, allocated_regs);
3973    }
3974
3975#ifdef CONFIG_TCG_INTERPRETER
3976    {
3977        gpointer hash = (gpointer)(uintptr_t)info->typemask;
3978        ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
3979        assert(cif != NULL);
3980        tcg_out_call(s, func_addr, cif);
3981    }
3982#else
3983    tcg_out_call(s, func_addr);
3984#endif
3985
3986    /* assign output registers and emit moves if needed */
3987    for(i = 0; i < nb_oargs; i++) {
3988        arg = op->args[i];
3989        ts = arg_temp(arg);
3990
3991        /* ENV should not be modified.  */
3992        tcg_debug_assert(!temp_readonly(ts));
3993
3994        reg = tcg_target_call_oarg_regs[i];
3995        tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3996        if (ts->val_type == TEMP_VAL_REG) {
3997            s->reg_to_temp[ts->reg] = NULL;
3998        }
3999        ts->val_type = TEMP_VAL_REG;
4000        ts->reg = reg;
4001        ts->mem_coherent = 0;
4002        s->reg_to_temp[reg] = ts;
4003        if (NEED_SYNC_ARG(i)) {
4004            temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4005        } else if (IS_DEAD_ARG(i)) {
4006            temp_dead(s, ts);
4007        }
4008    }
4009}
4010
4011#ifdef CONFIG_PROFILER
4012
4013/* avoid copy/paste errors */
4014#define PROF_ADD(to, from, field)                       \
4015    do {                                                \
4016        (to)->field += qatomic_read(&((from)->field));  \
4017    } while (0)
4018
4019#define PROF_MAX(to, from, field)                                       \
4020    do {                                                                \
4021        typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4022        if (val__ > (to)->field) {                                      \
4023            (to)->field = val__;                                        \
4024        }                                                               \
4025    } while (0)
4026
4027/* Pass in a zero'ed @prof */
4028static inline
4029void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4030{
4031    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4032    unsigned int i;
4033
4034    for (i = 0; i < n_ctxs; i++) {
4035        TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4036        const TCGProfile *orig = &s->prof;
4037
4038        if (counters) {
4039            PROF_ADD(prof, orig, cpu_exec_time);
4040            PROF_ADD(prof, orig, tb_count1);
4041            PROF_ADD(prof, orig, tb_count);
4042            PROF_ADD(prof, orig, op_count);
4043            PROF_MAX(prof, orig, op_count_max);
4044            PROF_ADD(prof, orig, temp_count);
4045            PROF_MAX(prof, orig, temp_count_max);
4046            PROF_ADD(prof, orig, del_op_count);
4047            PROF_ADD(prof, orig, code_in_len);
4048            PROF_ADD(prof, orig, code_out_len);
4049            PROF_ADD(prof, orig, search_out_len);
4050            PROF_ADD(prof, orig, interm_time);
4051            PROF_ADD(prof, orig, code_time);
4052            PROF_ADD(prof, orig, la_time);
4053            PROF_ADD(prof, orig, opt_time);
4054            PROF_ADD(prof, orig, restore_count);
4055            PROF_ADD(prof, orig, restore_time);
4056        }
4057        if (table) {
4058            int i;
4059
4060            for (i = 0; i < NB_OPS; i++) {
4061                PROF_ADD(prof, orig, table_op_count[i]);
4062            }
4063        }
4064    }
4065}
4066
4067#undef PROF_ADD
4068#undef PROF_MAX
4069
4070static void tcg_profile_snapshot_counters(TCGProfile *prof)
4071{
4072    tcg_profile_snapshot(prof, true, false);
4073}
4074
4075static void tcg_profile_snapshot_table(TCGProfile *prof)
4076{
4077    tcg_profile_snapshot(prof, false, true);
4078}
4079
4080void tcg_dump_op_count(GString *buf)
4081{
4082    TCGProfile prof = {};
4083    int i;
4084
4085    tcg_profile_snapshot_table(&prof);
4086    for (i = 0; i < NB_OPS; i++) {
4087        g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4088                               prof.table_op_count[i]);
4089    }
4090}
4091
4092int64_t tcg_cpu_exec_time(void)
4093{
4094    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4095    unsigned int i;
4096    int64_t ret = 0;
4097
4098    for (i = 0; i < n_ctxs; i++) {
4099        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4100        const TCGProfile *prof = &s->prof;
4101
4102        ret += qatomic_read(&prof->cpu_exec_time);
4103    }
4104    return ret;
4105}
4106#else
4107void tcg_dump_op_count(GString *buf)
4108{
4109    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4110}
4111
4112int64_t tcg_cpu_exec_time(void)
4113{
4114    error_report("%s: TCG profiler not compiled", __func__);
4115    exit(EXIT_FAILURE);
4116}
4117#endif
4118
4119
4120int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
4121{
4122#ifdef CONFIG_PROFILER
4123    TCGProfile *prof = &s->prof;
4124#endif
4125    int i, num_insns;
4126    TCGOp *op;
4127
4128#ifdef CONFIG_PROFILER
4129    {
4130        int n = 0;
4131
4132        QTAILQ_FOREACH(op, &s->ops, link) {
4133            n++;
4134        }
4135        qatomic_set(&prof->op_count, prof->op_count + n);
4136        if (n > prof->op_count_max) {
4137            qatomic_set(&prof->op_count_max, n);
4138        }
4139
4140        n = s->nb_temps;
4141        qatomic_set(&prof->temp_count, prof->temp_count + n);
4142        if (n > prof->temp_count_max) {
4143            qatomic_set(&prof->temp_count_max, n);
4144        }
4145    }
4146#endif
4147
4148#ifdef DEBUG_DISAS
4149    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4150                 && qemu_log_in_addr_range(pc_start))) {
4151        FILE *logfile = qemu_log_trylock();
4152        if (logfile) {
4153            fprintf(logfile, "OP:\n");
4154            tcg_dump_ops(s, logfile, false);
4155            fprintf(logfile, "\n");
4156            qemu_log_unlock(logfile);
4157        }
4158    }
4159#endif
4160
4161#ifdef CONFIG_DEBUG_TCG
4162    /* Ensure all labels referenced have been emitted.  */
4163    {
4164        TCGLabel *l;
4165        bool error = false;
4166
4167        QSIMPLEQ_FOREACH(l, &s->labels, next) {
4168            if (unlikely(!l->present) && l->refs) {
4169                qemu_log_mask(CPU_LOG_TB_OP,
4170                              "$L%d referenced but not present.\n", l->id);
4171                error = true;
4172            }
4173        }
4174        assert(!error);
4175    }
4176#endif
4177
4178#ifdef CONFIG_PROFILER
4179    qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4180#endif
4181
4182#ifdef USE_TCG_OPTIMIZATIONS
4183    tcg_optimize(s);
4184#endif
4185
4186#ifdef CONFIG_PROFILER
4187    qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4188    qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4189#endif
4190
4191    reachable_code_pass(s);
4192    liveness_pass_1(s);
4193
4194    if (s->nb_indirects > 0) {
4195#ifdef DEBUG_DISAS
4196        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4197                     && qemu_log_in_addr_range(pc_start))) {
4198            FILE *logfile = qemu_log_trylock();
4199            if (logfile) {
4200                fprintf(logfile, "OP before indirect lowering:\n");
4201                tcg_dump_ops(s, logfile, false);
4202                fprintf(logfile, "\n");
4203                qemu_log_unlock(logfile);
4204            }
4205        }
4206#endif
4207        /* Replace indirect temps with direct temps.  */
4208        if (liveness_pass_2(s)) {
4209            /* If changes were made, re-run liveness.  */
4210            liveness_pass_1(s);
4211        }
4212    }
4213
4214#ifdef CONFIG_PROFILER
4215    qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4216#endif
4217
4218#ifdef DEBUG_DISAS
4219    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4220                 && qemu_log_in_addr_range(pc_start))) {
4221        FILE *logfile = qemu_log_trylock();
4222        if (logfile) {
4223            fprintf(logfile, "OP after optimization and liveness analysis:\n");
4224            tcg_dump_ops(s, logfile, true);
4225            fprintf(logfile, "\n");
4226            qemu_log_unlock(logfile);
4227        }
4228    }
4229#endif
4230
4231    /* Initialize goto_tb jump offsets. */
4232    tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
4233    tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
4234    tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
4235    if (TCG_TARGET_HAS_direct_jump) {
4236        tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
4237        tcg_ctx->tb_jmp_target_addr = NULL;
4238    } else {
4239        tcg_ctx->tb_jmp_insn_offset = NULL;
4240        tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
4241    }
4242
4243    tcg_reg_alloc_start(s);
4244
4245    /*
4246     * Reset the buffer pointers when restarting after overflow.
4247     * TODO: Move this into translate-all.c with the rest of the
4248     * buffer management.  Having only this done here is confusing.
4249     */
4250    s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4251    s->code_ptr = s->code_buf;
4252
4253#ifdef TCG_TARGET_NEED_LDST_LABELS
4254    QSIMPLEQ_INIT(&s->ldst_labels);
4255#endif
4256#ifdef TCG_TARGET_NEED_POOL_LABELS
4257    s->pool_labels = NULL;
4258#endif
4259
4260    num_insns = -1;
4261    QTAILQ_FOREACH(op, &s->ops, link) {
4262        TCGOpcode opc = op->opc;
4263
4264#ifdef CONFIG_PROFILER
4265        qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4266#endif
4267
4268        switch (opc) {
4269        case INDEX_op_mov_i32:
4270        case INDEX_op_mov_i64:
4271        case INDEX_op_mov_vec:
4272            tcg_reg_alloc_mov(s, op);
4273            break;
4274        case INDEX_op_dup_vec:
4275            tcg_reg_alloc_dup(s, op);
4276            break;
4277        case INDEX_op_insn_start:
4278            if (num_insns >= 0) {
4279                size_t off = tcg_current_code_size(s);
4280                s->gen_insn_end_off[num_insns] = off;
4281                /* Assert that we do not overflow our stored offset.  */
4282                assert(s->gen_insn_end_off[num_insns] == off);
4283            }
4284            num_insns++;
4285            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4286                target_ulong a;
4287#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4288                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4289#else
4290                a = op->args[i];
4291#endif
4292                s->gen_insn_data[num_insns][i] = a;
4293            }
4294            break;
4295        case INDEX_op_discard:
4296            temp_dead(s, arg_temp(op->args[0]));
4297            break;
4298        case INDEX_op_set_label:
4299            tcg_reg_alloc_bb_end(s, s->reserved_regs);
4300            tcg_out_label(s, arg_label(op->args[0]));
4301            break;
4302        case INDEX_op_call:
4303            tcg_reg_alloc_call(s, op);
4304            break;
4305        case INDEX_op_dup2_vec:
4306            if (tcg_reg_alloc_dup2(s, op)) {
4307                break;
4308            }
4309            /* fall through */
4310        default:
4311            /* Sanity check that we've not introduced any unhandled opcodes. */
4312            tcg_debug_assert(tcg_op_supported(opc));
4313            /* Note: in order to speed up the code, it would be much
4314               faster to have specialized register allocator functions for
4315               some common argument patterns */
4316            tcg_reg_alloc_op(s, op);
4317            break;
4318        }
4319#ifdef CONFIG_DEBUG_TCG
4320        check_regs(s);
4321#endif
4322        /* Test for (pending) buffer overflow.  The assumption is that any
4323           one operation beginning below the high water mark cannot overrun
4324           the buffer completely.  Thus we can test for overflow after
4325           generating code without having to check during generation.  */
4326        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4327            return -1;
4328        }
4329        /* Test for TB overflow, as seen by gen_insn_end_off.  */
4330        if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4331            return -2;
4332        }
4333    }
4334    tcg_debug_assert(num_insns >= 0);
4335    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4336
4337    /* Generate TB finalization at the end of block */
4338#ifdef TCG_TARGET_NEED_LDST_LABELS
4339    i = tcg_out_ldst_finalize(s);
4340    if (i < 0) {
4341        return i;
4342    }
4343#endif
4344#ifdef TCG_TARGET_NEED_POOL_LABELS
4345    i = tcg_out_pool_finalize(s);
4346    if (i < 0) {
4347        return i;
4348    }
4349#endif
4350    if (!tcg_resolve_relocs(s)) {
4351        return -2;
4352    }
4353
4354#ifndef CONFIG_TCG_INTERPRETER
4355    /* flush instruction cache */
4356    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4357                        (uintptr_t)s->code_buf,
4358                        tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4359#endif
4360
4361    return tcg_current_code_size(s);
4362}
4363
4364#ifdef CONFIG_PROFILER
4365void tcg_dump_info(GString *buf)
4366{
4367    TCGProfile prof = {};
4368    const TCGProfile *s;
4369    int64_t tb_count;
4370    int64_t tb_div_count;
4371    int64_t tot;
4372
4373    tcg_profile_snapshot_counters(&prof);
4374    s = &prof;
4375    tb_count = s->tb_count;
4376    tb_div_count = tb_count ? tb_count : 1;
4377    tot = s->interm_time + s->code_time;
4378
4379    g_string_append_printf(buf, "JIT cycles          %" PRId64
4380                           " (%0.3f s at 2.4 GHz)\n",
4381                           tot, tot / 2.4e9);
4382    g_string_append_printf(buf, "translated TBs      %" PRId64
4383                           " (aborted=%" PRId64 " %0.1f%%)\n",
4384                           tb_count, s->tb_count1 - tb_count,
4385                           (double)(s->tb_count1 - s->tb_count)
4386                           / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4387    g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
4388                           (double)s->op_count / tb_div_count, s->op_count_max);
4389    g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
4390                           (double)s->del_op_count / tb_div_count);
4391    g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
4392                           (double)s->temp_count / tb_div_count,
4393                           s->temp_count_max);
4394    g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
4395                           (double)s->code_out_len / tb_div_count);
4396    g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
4397                           (double)s->search_out_len / tb_div_count);
4398    
4399    g_string_append_printf(buf, "cycles/op           %0.1f\n",
4400                           s->op_count ? (double)tot / s->op_count : 0);
4401    g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
4402                           s->code_in_len ? (double)tot / s->code_in_len : 0);
4403    g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
4404                           s->code_out_len ? (double)tot / s->code_out_len : 0);
4405    g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
4406                           s->search_out_len ?
4407                           (double)tot / s->search_out_len : 0);
4408    if (tot == 0) {
4409        tot = 1;
4410    }
4411    g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
4412                           (double)s->interm_time / tot * 100.0);
4413    g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
4414                           (double)s->code_time / tot * 100.0);
4415    g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
4416                           (double)s->opt_time / (s->code_time ?
4417                                                  s->code_time : 1)
4418                           * 100.0);
4419    g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
4420                           (double)s->la_time / (s->code_time ?
4421                                                 s->code_time : 1) * 100.0);
4422    g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
4423                           s->restore_count);
4424    g_string_append_printf(buf, "  avg cycles        %0.1f\n",
4425                           s->restore_count ?
4426                           (double)s->restore_time / s->restore_count : 0);
4427}
4428#else
4429void tcg_dump_info(GString *buf)
4430{
4431    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4432}
4433#endif
4434
4435#ifdef ELF_HOST_MACHINE
4436/* In order to use this feature, the backend needs to do three things:
4437
4438   (1) Define ELF_HOST_MACHINE to indicate both what value to
4439       put into the ELF image and to indicate support for the feature.
4440
4441   (2) Define tcg_register_jit.  This should create a buffer containing
4442       the contents of a .debug_frame section that describes the post-
4443       prologue unwind info for the tcg machine.
4444
4445   (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4446*/
4447
4448/* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4449typedef enum {
4450    JIT_NOACTION = 0,
4451    JIT_REGISTER_FN,
4452    JIT_UNREGISTER_FN
4453} jit_actions_t;
4454
4455struct jit_code_entry {
4456    struct jit_code_entry *next_entry;
4457    struct jit_code_entry *prev_entry;
4458    const void *symfile_addr;
4459    uint64_t symfile_size;
4460};
4461
4462struct jit_descriptor {
4463    uint32_t version;
4464    uint32_t action_flag;
4465    struct jit_code_entry *relevant_entry;
4466    struct jit_code_entry *first_entry;
4467};
4468
4469void __jit_debug_register_code(void) __attribute__((noinline));
4470void __jit_debug_register_code(void)
4471{
4472    asm("");
4473}
4474
4475/* Must statically initialize the version, because GDB may check
4476   the version before we can set it.  */
4477struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4478
4479/* End GDB interface.  */
4480
4481static int find_string(const char *strtab, const char *str)
4482{
4483    const char *p = strtab + 1;
4484
4485    while (1) {
4486        if (strcmp(p, str) == 0) {
4487            return p - strtab;
4488        }
4489        p += strlen(p) + 1;
4490    }
4491}
4492
4493static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4494                                 const void *debug_frame,
4495                                 size_t debug_frame_size)
4496{
4497    struct __attribute__((packed)) DebugInfo {
4498        uint32_t  len;
4499        uint16_t  version;
4500        uint32_t  abbrev;
4501        uint8_t   ptr_size;
4502        uint8_t   cu_die;
4503        uint16_t  cu_lang;
4504        uintptr_t cu_low_pc;
4505        uintptr_t cu_high_pc;
4506        uint8_t   fn_die;
4507        char      fn_name[16];
4508        uintptr_t fn_low_pc;
4509        uintptr_t fn_high_pc;
4510        uint8_t   cu_eoc;
4511    };
4512
4513    struct ElfImage {
4514        ElfW(Ehdr) ehdr;
4515        ElfW(Phdr) phdr;
4516        ElfW(Shdr) shdr[7];
4517        ElfW(Sym)  sym[2];
4518        struct DebugInfo di;
4519        uint8_t    da[24];
4520        char       str[80];
4521    };
4522
4523    struct ElfImage *img;
4524
4525    static const struct ElfImage img_template = {
4526        .ehdr = {
4527            .e_ident[EI_MAG0] = ELFMAG0,
4528            .e_ident[EI_MAG1] = ELFMAG1,
4529            .e_ident[EI_MAG2] = ELFMAG2,
4530            .e_ident[EI_MAG3] = ELFMAG3,
4531            .e_ident[EI_CLASS] = ELF_CLASS,
4532            .e_ident[EI_DATA] = ELF_DATA,
4533            .e_ident[EI_VERSION] = EV_CURRENT,
4534            .e_type = ET_EXEC,
4535            .e_machine = ELF_HOST_MACHINE,
4536            .e_version = EV_CURRENT,
4537            .e_phoff = offsetof(struct ElfImage, phdr),
4538            .e_shoff = offsetof(struct ElfImage, shdr),
4539            .e_ehsize = sizeof(ElfW(Shdr)),
4540            .e_phentsize = sizeof(ElfW(Phdr)),
4541            .e_phnum = 1,
4542            .e_shentsize = sizeof(ElfW(Shdr)),
4543            .e_shnum = ARRAY_SIZE(img->shdr),
4544            .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4545#ifdef ELF_HOST_FLAGS
4546            .e_flags = ELF_HOST_FLAGS,
4547#endif
4548#ifdef ELF_OSABI
4549            .e_ident[EI_OSABI] = ELF_OSABI,
4550#endif
4551        },
4552        .phdr = {
4553            .p_type = PT_LOAD,
4554            .p_flags = PF_X,
4555        },
4556        .shdr = {
4557            [0] = { .sh_type = SHT_NULL },
4558            /* Trick: The contents of code_gen_buffer are not present in
4559               this fake ELF file; that got allocated elsewhere.  Therefore
4560               we mark .text as SHT_NOBITS (similar to .bss) so that readers
4561               will not look for contents.  We can record any address.  */
4562            [1] = { /* .text */
4563                .sh_type = SHT_NOBITS,
4564                .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4565            },
4566            [2] = { /* .debug_info */
4567                .sh_type = SHT_PROGBITS,
4568                .sh_offset = offsetof(struct ElfImage, di),
4569                .sh_size = sizeof(struct DebugInfo),
4570            },
4571            [3] = { /* .debug_abbrev */
4572                .sh_type = SHT_PROGBITS,
4573                .sh_offset = offsetof(struct ElfImage, da),
4574                .sh_size = sizeof(img->da),
4575            },
4576            [4] = { /* .debug_frame */
4577                .sh_type = SHT_PROGBITS,
4578                .sh_offset = sizeof(struct ElfImage),
4579            },
4580            [5] = { /* .symtab */
4581                .sh_type = SHT_SYMTAB,
4582                .sh_offset = offsetof(struct ElfImage, sym),
4583                .sh_size = sizeof(img->sym),
4584                .sh_info = 1,
4585                .sh_link = ARRAY_SIZE(img->shdr) - 1,
4586                .sh_entsize = sizeof(ElfW(Sym)),
4587            },
4588            [6] = { /* .strtab */
4589                .sh_type = SHT_STRTAB,
4590                .sh_offset = offsetof(struct ElfImage, str),
4591                .sh_size = sizeof(img->str),
4592            }
4593        },
4594        .sym = {
4595            [1] = { /* code_gen_buffer */
4596                .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4597                .st_shndx = 1,
4598            }
4599        },
4600        .di = {
4601            .len = sizeof(struct DebugInfo) - 4,
4602            .version = 2,
4603            .ptr_size = sizeof(void *),
4604            .cu_die = 1,
4605            .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4606            .fn_die = 2,
4607            .fn_name = "code_gen_buffer"
4608        },
4609        .da = {
4610            1,          /* abbrev number (the cu) */
4611            0x11, 1,    /* DW_TAG_compile_unit, has children */
4612            0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4613            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4614            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4615            0, 0,       /* end of abbrev */
4616            2,          /* abbrev number (the fn) */
4617            0x2e, 0,    /* DW_TAG_subprogram, no children */
4618            0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4619            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4620            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4621            0, 0,       /* end of abbrev */
4622            0           /* no more abbrev */
4623        },
4624        .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4625               ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4626    };
4627
4628    /* We only need a single jit entry; statically allocate it.  */
4629    static struct jit_code_entry one_entry;
4630
4631    uintptr_t buf = (uintptr_t)buf_ptr;
4632    size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4633    DebugFrameHeader *dfh;
4634
4635    img = g_malloc(img_size);
4636    *img = img_template;
4637
4638    img->phdr.p_vaddr = buf;
4639    img->phdr.p_paddr = buf;
4640    img->phdr.p_memsz = buf_size;
4641
4642    img->shdr[1].sh_name = find_string(img->str, ".text");
4643    img->shdr[1].sh_addr = buf;
4644    img->shdr[1].sh_size = buf_size;
4645
4646    img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4647    img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4648
4649    img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4650    img->shdr[4].sh_size = debug_frame_size;
4651
4652    img->shdr[5].sh_name = find_string(img->str, ".symtab");
4653    img->shdr[6].sh_name = find_string(img->str, ".strtab");
4654
4655    img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4656    img->sym[1].st_value = buf;
4657    img->sym[1].st_size = buf_size;
4658
4659    img->di.cu_low_pc = buf;
4660    img->di.cu_high_pc = buf + buf_size;
4661    img->di.fn_low_pc = buf;
4662    img->di.fn_high_pc = buf + buf_size;
4663
4664    dfh = (DebugFrameHeader *)(img + 1);
4665    memcpy(dfh, debug_frame, debug_frame_size);
4666    dfh->fde.func_start = buf;
4667    dfh->fde.func_len = buf_size;
4668
4669#ifdef DEBUG_JIT
4670    /* Enable this block to be able to debug the ELF image file creation.
4671       One can use readelf, objdump, or other inspection utilities.  */
4672    {
4673        g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
4674        FILE *f = fopen(jit, "w+b");
4675        if (f) {
4676            if (fwrite(img, img_size, 1, f) != img_size) {
4677                /* Avoid stupid unused return value warning for fwrite.  */
4678            }
4679            fclose(f);
4680        }
4681    }
4682#endif
4683
4684    one_entry.symfile_addr = img;
4685    one_entry.symfile_size = img_size;
4686
4687    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4688    __jit_debug_descriptor.relevant_entry = &one_entry;
4689    __jit_debug_descriptor.first_entry = &one_entry;
4690    __jit_debug_register_code();
4691}
4692#else
4693/* No support for the feature.  Provide the entry point expected by exec.c,
4694   and implement the internal function we declared earlier.  */
4695
4696static void tcg_register_jit_int(const void *buf, size_t size,
4697                                 const void *debug_frame,
4698                                 size_t debug_frame_size)
4699{
4700}
4701
4702void tcg_register_jit(const void *buf, size_t buf_size)
4703{
4704}
4705#endif /* ELF_HOST_MACHINE */
4706
4707#if !TCG_TARGET_MAYBE_vec
4708void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4709{
4710    g_assert_not_reached();
4711}
4712#endif
4713