qemu/tcg/tcg.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* define it to use liveness analysis (better code) */
  26#define USE_TCG_OPTIMIZATIONS
  27
  28#include "qemu/osdep.h"
  29
  30/* Define to jump the ELF file used to communicate with GDB.  */
  31#undef DEBUG_JIT
  32
  33#include "qemu/error-report.h"
  34#include "qemu/cutils.h"
  35#include "qemu/host-utils.h"
  36#include "qemu/qemu-print.h"
  37#include "qemu/cacheflush.h"
  38#include "qemu/cacheinfo.h"
  39#include "qemu/timer.h"
  40
  41/* Note: the long term plan is to reduce the dependencies on the QEMU
  42   CPU definitions. Currently they are used for qemu_ld/st
  43   instructions */
  44#define NO_CPU_IO_DEFS
  45
  46#include "exec/exec-all.h"
  47#include "tcg/tcg-op.h"
  48
  49#if UINTPTR_MAX == UINT32_MAX
  50# define ELF_CLASS  ELFCLASS32
  51#else
  52# define ELF_CLASS  ELFCLASS64
  53#endif
  54#if HOST_BIG_ENDIAN
  55# define ELF_DATA   ELFDATA2MSB
  56#else
  57# define ELF_DATA   ELFDATA2LSB
  58#endif
  59
  60#include "elf.h"
  61#include "exec/log.h"
  62#include "tcg/tcg-ldst.h"
  63#include "tcg/tcg-temp-internal.h"
  64#include "tcg-internal.h"
  65#include "accel/tcg/perf.h"
  66
  67/* Forward declarations for functions declared in tcg-target.c.inc and
  68   used here. */
  69static void tcg_target_init(TCGContext *s);
  70static void tcg_target_qemu_prologue(TCGContext *s);
  71static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
  72                        intptr_t value, intptr_t addend);
  73
  74/* The CIE and FDE header definitions will be common to all hosts.  */
  75typedef struct {
  76    uint32_t len __attribute__((aligned((sizeof(void *)))));
  77    uint32_t id;
  78    uint8_t version;
  79    char augmentation[1];
  80    uint8_t code_align;
  81    uint8_t data_align;
  82    uint8_t return_column;
  83} DebugFrameCIE;
  84
  85typedef struct QEMU_PACKED {
  86    uint32_t len __attribute__((aligned((sizeof(void *)))));
  87    uint32_t cie_offset;
  88    uintptr_t func_start;
  89    uintptr_t func_len;
  90} DebugFrameFDEHeader;
  91
  92typedef struct QEMU_PACKED {
  93    DebugFrameCIE cie;
  94    DebugFrameFDEHeader fde;
  95} DebugFrameHeader;
  96
  97static void tcg_register_jit_int(const void *buf, size_t size,
  98                                 const void *debug_frame,
  99                                 size_t debug_frame_size)
 100    __attribute__((unused));
 101
 102/* Forward declarations for functions declared and used in tcg-target.c.inc. */
 103static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
 104                       intptr_t arg2);
 105static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
 106static void tcg_out_movi(TCGContext *s, TCGType type,
 107                         TCGReg ret, tcg_target_long arg);
 108static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
 109static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
 110static void tcg_out_goto_tb(TCGContext *s, int which);
 111static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 112                       const TCGArg args[TCG_MAX_OP_ARGS],
 113                       const int const_args[TCG_MAX_OP_ARGS]);
 114#if TCG_TARGET_MAYBE_vec
 115static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 116                            TCGReg dst, TCGReg src);
 117static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 118                             TCGReg dst, TCGReg base, intptr_t offset);
 119static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 120                             TCGReg dst, int64_t arg);
 121static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 122                           unsigned vecl, unsigned vece,
 123                           const TCGArg args[TCG_MAX_OP_ARGS],
 124                           const int const_args[TCG_MAX_OP_ARGS]);
 125#else
 126static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 127                                   TCGReg dst, TCGReg src)
 128{
 129    g_assert_not_reached();
 130}
 131static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 132                                    TCGReg dst, TCGReg base, intptr_t offset)
 133{
 134    g_assert_not_reached();
 135}
 136static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 137                                    TCGReg dst, int64_t arg)
 138{
 139    g_assert_not_reached();
 140}
 141static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 142                                  unsigned vecl, unsigned vece,
 143                                  const TCGArg args[TCG_MAX_OP_ARGS],
 144                                  const int const_args[TCG_MAX_OP_ARGS])
 145{
 146    g_assert_not_reached();
 147}
 148#endif
 149static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
 150                       intptr_t arg2);
 151static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 152                        TCGReg base, intptr_t ofs);
 153static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
 154                         const TCGHelperInfo *info);
 155static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
 156static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
 157#ifdef TCG_TARGET_NEED_LDST_LABELS
 158static int tcg_out_ldst_finalize(TCGContext *s);
 159#endif
 160
 161TCGContext tcg_init_ctx;
 162__thread TCGContext *tcg_ctx;
 163
 164TCGContext **tcg_ctxs;
 165unsigned int tcg_cur_ctxs;
 166unsigned int tcg_max_ctxs;
 167TCGv_env cpu_env = 0;
 168const void *tcg_code_gen_epilogue;
 169uintptr_t tcg_splitwx_diff;
 170
 171#ifndef CONFIG_TCG_INTERPRETER
 172tcg_prologue_fn *tcg_qemu_tb_exec;
 173#endif
 174
 175static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
 176static TCGRegSet tcg_target_call_clobber_regs;
 177
 178#if TCG_TARGET_INSN_UNIT_SIZE == 1
 179static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
 180{
 181    *s->code_ptr++ = v;
 182}
 183
 184static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
 185                                                      uint8_t v)
 186{
 187    *p = v;
 188}
 189#endif
 190
 191#if TCG_TARGET_INSN_UNIT_SIZE <= 2
 192static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
 193{
 194    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 195        *s->code_ptr++ = v;
 196    } else {
 197        tcg_insn_unit *p = s->code_ptr;
 198        memcpy(p, &v, sizeof(v));
 199        s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
 200    }
 201}
 202
 203static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
 204                                                       uint16_t v)
 205{
 206    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 207        *p = v;
 208    } else {
 209        memcpy(p, &v, sizeof(v));
 210    }
 211}
 212#endif
 213
 214#if TCG_TARGET_INSN_UNIT_SIZE <= 4
 215static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
 216{
 217    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 218        *s->code_ptr++ = v;
 219    } else {
 220        tcg_insn_unit *p = s->code_ptr;
 221        memcpy(p, &v, sizeof(v));
 222        s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
 223    }
 224}
 225
 226static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
 227                                                       uint32_t v)
 228{
 229    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 230        *p = v;
 231    } else {
 232        memcpy(p, &v, sizeof(v));
 233    }
 234}
 235#endif
 236
 237#if TCG_TARGET_INSN_UNIT_SIZE <= 8
 238static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
 239{
 240    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 241        *s->code_ptr++ = v;
 242    } else {
 243        tcg_insn_unit *p = s->code_ptr;
 244        memcpy(p, &v, sizeof(v));
 245        s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
 246    }
 247}
 248
 249static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
 250                                                       uint64_t v)
 251{
 252    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 253        *p = v;
 254    } else {
 255        memcpy(p, &v, sizeof(v));
 256    }
 257}
 258#endif
 259
 260/* label relocation processing */
 261
 262static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
 263                          TCGLabel *l, intptr_t addend)
 264{
 265    TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
 266
 267    r->type = type;
 268    r->ptr = code_ptr;
 269    r->addend = addend;
 270    QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
 271}
 272
 273static void tcg_out_label(TCGContext *s, TCGLabel *l)
 274{
 275    tcg_debug_assert(!l->has_value);
 276    l->has_value = 1;
 277    l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
 278}
 279
 280TCGLabel *gen_new_label(void)
 281{
 282    TCGContext *s = tcg_ctx;
 283    TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
 284
 285    memset(l, 0, sizeof(TCGLabel));
 286    l->id = s->nb_labels++;
 287    QSIMPLEQ_INIT(&l->branches);
 288    QSIMPLEQ_INIT(&l->relocs);
 289
 290    QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
 291
 292    return l;
 293}
 294
 295static bool tcg_resolve_relocs(TCGContext *s)
 296{
 297    TCGLabel *l;
 298
 299    QSIMPLEQ_FOREACH(l, &s->labels, next) {
 300        TCGRelocation *r;
 301        uintptr_t value = l->u.value;
 302
 303        QSIMPLEQ_FOREACH(r, &l->relocs, next) {
 304            if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
 305                return false;
 306            }
 307        }
 308    }
 309    return true;
 310}
 311
 312static void set_jmp_reset_offset(TCGContext *s, int which)
 313{
 314    /*
 315     * We will check for overflow at the end of the opcode loop in
 316     * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
 317     */
 318    s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
 319}
 320
 321static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
 322{
 323    /*
 324     * We will check for overflow at the end of the opcode loop in
 325     * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
 326     */
 327    s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
 328}
 329
 330static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
 331{
 332    /*
 333     * Return the read-execute version of the pointer, for the benefit
 334     * of any pc-relative addressing mode.
 335     */
 336    return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
 337}
 338
 339/* Signal overflow, starting over with fewer guest insns. */
 340static G_NORETURN
 341void tcg_raise_tb_overflow(TCGContext *s)
 342{
 343    siglongjmp(s->jmp_trans, -2);
 344}
 345
 346#define C_PFX1(P, A)                    P##A
 347#define C_PFX2(P, A, B)                 P##A##_##B
 348#define C_PFX3(P, A, B, C)              P##A##_##B##_##C
 349#define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
 350#define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
 351#define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
 352
 353/* Define an enumeration for the various combinations. */
 354
 355#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
 356#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
 357#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
 358#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
 359
 360#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
 361#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
 362#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
 363#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
 364
 365#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
 366
 367#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
 368#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
 369#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
 370#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
 371
 372typedef enum {
 373#include "tcg-target-con-set.h"
 374} TCGConstraintSetIndex;
 375
 376static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
 377
 378#undef C_O0_I1
 379#undef C_O0_I2
 380#undef C_O0_I3
 381#undef C_O0_I4
 382#undef C_O1_I1
 383#undef C_O1_I2
 384#undef C_O1_I3
 385#undef C_O1_I4
 386#undef C_N1_I2
 387#undef C_O2_I1
 388#undef C_O2_I2
 389#undef C_O2_I3
 390#undef C_O2_I4
 391
 392/* Put all of the constraint sets into an array, indexed by the enum. */
 393
 394#define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
 395#define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
 396#define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
 397#define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
 398
 399#define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
 400#define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
 401#define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
 402#define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
 403
 404#define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
 405
 406#define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
 407#define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
 408#define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
 409#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
 410
 411static const TCGTargetOpDef constraint_sets[] = {
 412#include "tcg-target-con-set.h"
 413};
 414
 415
 416#undef C_O0_I1
 417#undef C_O0_I2
 418#undef C_O0_I3
 419#undef C_O0_I4
 420#undef C_O1_I1
 421#undef C_O1_I2
 422#undef C_O1_I3
 423#undef C_O1_I4
 424#undef C_N1_I2
 425#undef C_O2_I1
 426#undef C_O2_I2
 427#undef C_O2_I3
 428#undef C_O2_I4
 429
 430/* Expand the enumerator to be returned from tcg_target_op_def(). */
 431
 432#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
 433#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
 434#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
 435#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
 436
 437#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
 438#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
 439#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
 440#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
 441
 442#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
 443
 444#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
 445#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
 446#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
 447#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
 448
 449#include "tcg-target.c.inc"
 450
 451static void alloc_tcg_plugin_context(TCGContext *s)
 452{
 453#ifdef CONFIG_PLUGIN
 454    s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
 455    s->plugin_tb->insns =
 456        g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
 457#endif
 458}
 459
 460/*
 461 * All TCG threads except the parent (i.e. the one that called tcg_context_init
 462 * and registered the target's TCG globals) must register with this function
 463 * before initiating translation.
 464 *
 465 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
 466 * of tcg_region_init() for the reasoning behind this.
 467 *
 468 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
 469 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
 470 * is not used anymore for translation once this function is called.
 471 *
 472 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
 473 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
 474 */
 475#ifdef CONFIG_USER_ONLY
 476void tcg_register_thread(void)
 477{
 478    tcg_ctx = &tcg_init_ctx;
 479}
 480#else
 481void tcg_register_thread(void)
 482{
 483    TCGContext *s = g_malloc(sizeof(*s));
 484    unsigned int i, n;
 485
 486    *s = tcg_init_ctx;
 487
 488    /* Relink mem_base.  */
 489    for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
 490        if (tcg_init_ctx.temps[i].mem_base) {
 491            ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
 492            tcg_debug_assert(b >= 0 && b < n);
 493            s->temps[i].mem_base = &s->temps[b];
 494        }
 495    }
 496
 497    /* Claim an entry in tcg_ctxs */
 498    n = qatomic_fetch_inc(&tcg_cur_ctxs);
 499    g_assert(n < tcg_max_ctxs);
 500    qatomic_set(&tcg_ctxs[n], s);
 501
 502    if (n > 0) {
 503        alloc_tcg_plugin_context(s);
 504        tcg_region_initial_alloc(s);
 505    }
 506
 507    tcg_ctx = s;
 508}
 509#endif /* !CONFIG_USER_ONLY */
 510
 511/* pool based memory allocation */
 512void *tcg_malloc_internal(TCGContext *s, int size)
 513{
 514    TCGPool *p;
 515    int pool_size;
 516
 517    if (size > TCG_POOL_CHUNK_SIZE) {
 518        /* big malloc: insert a new pool (XXX: could optimize) */
 519        p = g_malloc(sizeof(TCGPool) + size);
 520        p->size = size;
 521        p->next = s->pool_first_large;
 522        s->pool_first_large = p;
 523        return p->data;
 524    } else {
 525        p = s->pool_current;
 526        if (!p) {
 527            p = s->pool_first;
 528            if (!p)
 529                goto new_pool;
 530        } else {
 531            if (!p->next) {
 532            new_pool:
 533                pool_size = TCG_POOL_CHUNK_SIZE;
 534                p = g_malloc(sizeof(TCGPool) + pool_size);
 535                p->size = pool_size;
 536                p->next = NULL;
 537                if (s->pool_current) {
 538                    s->pool_current->next = p;
 539                } else {
 540                    s->pool_first = p;
 541                }
 542            } else {
 543                p = p->next;
 544            }
 545        }
 546    }
 547    s->pool_current = p;
 548    s->pool_cur = p->data + size;
 549    s->pool_end = p->data + p->size;
 550    return p->data;
 551}
 552
 553void tcg_pool_reset(TCGContext *s)
 554{
 555    TCGPool *p, *t;
 556    for (p = s->pool_first_large; p; p = t) {
 557        t = p->next;
 558        g_free(p);
 559    }
 560    s->pool_first_large = NULL;
 561    s->pool_cur = s->pool_end = NULL;
 562    s->pool_current = NULL;
 563}
 564
 565#include "exec/helper-proto.h"
 566
 567static TCGHelperInfo all_helpers[] = {
 568#include "exec/helper-tcg.h"
 569};
 570static GHashTable *helper_table;
 571
 572#ifdef CONFIG_TCG_INTERPRETER
 573static ffi_type *typecode_to_ffi(int argmask)
 574{
 575    /*
 576     * libffi does not support __int128_t, so we have forced Int128
 577     * to use the structure definition instead of the builtin type.
 578     */
 579    static ffi_type *ffi_type_i128_elements[3] = {
 580        &ffi_type_uint64,
 581        &ffi_type_uint64,
 582        NULL
 583    };
 584    static ffi_type ffi_type_i128 = {
 585        .size = 16,
 586        .alignment = __alignof__(Int128),
 587        .type = FFI_TYPE_STRUCT,
 588        .elements = ffi_type_i128_elements,
 589    };
 590
 591    switch (argmask) {
 592    case dh_typecode_void:
 593        return &ffi_type_void;
 594    case dh_typecode_i32:
 595        return &ffi_type_uint32;
 596    case dh_typecode_s32:
 597        return &ffi_type_sint32;
 598    case dh_typecode_i64:
 599        return &ffi_type_uint64;
 600    case dh_typecode_s64:
 601        return &ffi_type_sint64;
 602    case dh_typecode_ptr:
 603        return &ffi_type_pointer;
 604    case dh_typecode_i128:
 605        return &ffi_type_i128;
 606    }
 607    g_assert_not_reached();
 608}
 609
 610static void init_ffi_layouts(void)
 611{
 612    /* g_direct_hash/equal for direct comparisons on uint32_t.  */
 613    GHashTable *ffi_table = g_hash_table_new(NULL, NULL);
 614
 615    for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
 616        TCGHelperInfo *info = &all_helpers[i];
 617        unsigned typemask = info->typemask;
 618        gpointer hash = (gpointer)(uintptr_t)typemask;
 619        struct {
 620            ffi_cif cif;
 621            ffi_type *args[];
 622        } *ca;
 623        ffi_status status;
 624        int nargs;
 625        ffi_cif *cif;
 626
 627        cif = g_hash_table_lookup(ffi_table, hash);
 628        if (cif) {
 629            info->cif = cif;
 630            continue;
 631        }
 632
 633        /* Ignoring the return type, find the last non-zero field. */
 634        nargs = 32 - clz32(typemask >> 3);
 635        nargs = DIV_ROUND_UP(nargs, 3);
 636        assert(nargs <= MAX_CALL_IARGS);
 637
 638        ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
 639        ca->cif.rtype = typecode_to_ffi(typemask & 7);
 640        ca->cif.nargs = nargs;
 641
 642        if (nargs != 0) {
 643            ca->cif.arg_types = ca->args;
 644            for (int j = 0; j < nargs; ++j) {
 645                int typecode = extract32(typemask, (j + 1) * 3, 3);
 646                ca->args[j] = typecode_to_ffi(typecode);
 647            }
 648        }
 649
 650        status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
 651                              ca->cif.rtype, ca->cif.arg_types);
 652        assert(status == FFI_OK);
 653
 654        cif = &ca->cif;
 655        info->cif = cif;
 656        g_hash_table_insert(ffi_table, hash, (gpointer)cif);
 657    }
 658
 659    g_hash_table_destroy(ffi_table);
 660}
 661#endif /* CONFIG_TCG_INTERPRETER */
 662
 663typedef struct TCGCumulativeArgs {
 664    int arg_idx;                /* tcg_gen_callN args[] */
 665    int info_in_idx;            /* TCGHelperInfo in[] */
 666    int arg_slot;               /* regs+stack slot */
 667    int ref_slot;               /* stack slots for references */
 668} TCGCumulativeArgs;
 669
 670static void layout_arg_even(TCGCumulativeArgs *cum)
 671{
 672    cum->arg_slot += cum->arg_slot & 1;
 673}
 674
 675static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
 676                         TCGCallArgumentKind kind)
 677{
 678    TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
 679
 680    *loc = (TCGCallArgumentLoc){
 681        .kind = kind,
 682        .arg_idx = cum->arg_idx,
 683        .arg_slot = cum->arg_slot,
 684    };
 685    cum->info_in_idx++;
 686    cum->arg_slot++;
 687}
 688
 689static void layout_arg_normal_n(TCGCumulativeArgs *cum,
 690                                TCGHelperInfo *info, int n)
 691{
 692    TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
 693
 694    for (int i = 0; i < n; ++i) {
 695        /* Layout all using the same arg_idx, adjusting the subindex. */
 696        loc[i] = (TCGCallArgumentLoc){
 697            .kind = TCG_CALL_ARG_NORMAL,
 698            .arg_idx = cum->arg_idx,
 699            .tmp_subindex = i,
 700            .arg_slot = cum->arg_slot + i,
 701        };
 702    }
 703    cum->info_in_idx += n;
 704    cum->arg_slot += n;
 705}
 706
 707static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
 708{
 709    TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
 710    int n = 128 / TCG_TARGET_REG_BITS;
 711
 712    /* The first subindex carries the pointer. */
 713    layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
 714
 715    /*
 716     * The callee is allowed to clobber memory associated with
 717     * structure pass by-reference.  Therefore we must make copies.
 718     * Allocate space from "ref_slot", which will be adjusted to
 719     * follow the parameters on the stack.
 720     */
 721    loc[0].ref_slot = cum->ref_slot;
 722
 723    /*
 724     * Subsequent words also go into the reference slot, but
 725     * do not accumulate into the regular arguments.
 726     */
 727    for (int i = 1; i < n; ++i) {
 728        loc[i] = (TCGCallArgumentLoc){
 729            .kind = TCG_CALL_ARG_BY_REF_N,
 730            .arg_idx = cum->arg_idx,
 731            .tmp_subindex = i,
 732            .ref_slot = cum->ref_slot + i,
 733        };
 734    }
 735    cum->info_in_idx += n;
 736    cum->ref_slot += n;
 737}
 738
 739static void init_call_layout(TCGHelperInfo *info)
 740{
 741    int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
 742    int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
 743    unsigned typemask = info->typemask;
 744    unsigned typecode;
 745    TCGCumulativeArgs cum = { };
 746
 747    /*
 748     * Parse and place any function return value.
 749     */
 750    typecode = typemask & 7;
 751    switch (typecode) {
 752    case dh_typecode_void:
 753        info->nr_out = 0;
 754        break;
 755    case dh_typecode_i32:
 756    case dh_typecode_s32:
 757    case dh_typecode_ptr:
 758        info->nr_out = 1;
 759        info->out_kind = TCG_CALL_RET_NORMAL;
 760        break;
 761    case dh_typecode_i64:
 762    case dh_typecode_s64:
 763        info->nr_out = 64 / TCG_TARGET_REG_BITS;
 764        info->out_kind = TCG_CALL_RET_NORMAL;
 765        /* Query the last register now to trigger any assert early. */
 766        tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
 767        break;
 768    case dh_typecode_i128:
 769        info->nr_out = 128 / TCG_TARGET_REG_BITS;
 770        info->out_kind = TCG_TARGET_CALL_RET_I128;
 771        switch (TCG_TARGET_CALL_RET_I128) {
 772        case TCG_CALL_RET_NORMAL:
 773            /* Query the last register now to trigger any assert early. */
 774            tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
 775            break;
 776        case TCG_CALL_RET_BY_VEC:
 777            /* Query the single register now to trigger any assert early. */
 778            tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
 779            break;
 780        case TCG_CALL_RET_BY_REF:
 781            /*
 782             * Allocate the first argument to the output.
 783             * We don't need to store this anywhere, just make it
 784             * unavailable for use in the input loop below.
 785             */
 786            cum.arg_slot = 1;
 787            break;
 788        default:
 789            qemu_build_not_reached();
 790        }
 791        break;
 792    default:
 793        g_assert_not_reached();
 794    }
 795
 796    /*
 797     * Parse and place function arguments.
 798     */
 799    for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
 800        TCGCallArgumentKind kind;
 801        TCGType type;
 802
 803        typecode = typemask & 7;
 804        switch (typecode) {
 805        case dh_typecode_i32:
 806        case dh_typecode_s32:
 807            type = TCG_TYPE_I32;
 808            break;
 809        case dh_typecode_i64:
 810        case dh_typecode_s64:
 811            type = TCG_TYPE_I64;
 812            break;
 813        case dh_typecode_ptr:
 814            type = TCG_TYPE_PTR;
 815            break;
 816        case dh_typecode_i128:
 817            type = TCG_TYPE_I128;
 818            break;
 819        default:
 820            g_assert_not_reached();
 821        }
 822
 823        switch (type) {
 824        case TCG_TYPE_I32:
 825            switch (TCG_TARGET_CALL_ARG_I32) {
 826            case TCG_CALL_ARG_EVEN:
 827                layout_arg_even(&cum);
 828                /* fall through */
 829            case TCG_CALL_ARG_NORMAL:
 830                layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
 831                break;
 832            case TCG_CALL_ARG_EXTEND:
 833                kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
 834                layout_arg_1(&cum, info, kind);
 835                break;
 836            default:
 837                qemu_build_not_reached();
 838            }
 839            break;
 840
 841        case TCG_TYPE_I64:
 842            switch (TCG_TARGET_CALL_ARG_I64) {
 843            case TCG_CALL_ARG_EVEN:
 844                layout_arg_even(&cum);
 845                /* fall through */
 846            case TCG_CALL_ARG_NORMAL:
 847                if (TCG_TARGET_REG_BITS == 32) {
 848                    layout_arg_normal_n(&cum, info, 2);
 849                } else {
 850                    layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
 851                }
 852                break;
 853            default:
 854                qemu_build_not_reached();
 855            }
 856            break;
 857
 858        case TCG_TYPE_I128:
 859            switch (TCG_TARGET_CALL_ARG_I128) {
 860            case TCG_CALL_ARG_EVEN:
 861                layout_arg_even(&cum);
 862                /* fall through */
 863            case TCG_CALL_ARG_NORMAL:
 864                layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
 865                break;
 866            case TCG_CALL_ARG_BY_REF:
 867                layout_arg_by_ref(&cum, info);
 868                break;
 869            default:
 870                qemu_build_not_reached();
 871            }
 872            break;
 873
 874        default:
 875            g_assert_not_reached();
 876        }
 877    }
 878    info->nr_in = cum.info_in_idx;
 879
 880    /* Validate that we didn't overrun the input array. */
 881    assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
 882    /* Validate the backend has enough argument space. */
 883    assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
 884
 885    /*
 886     * Relocate the "ref_slot" area to the end of the parameters.
 887     * Minimizing this stack offset helps code size for x86,
 888     * which has a signed 8-bit offset encoding.
 889     */
 890    if (cum.ref_slot != 0) {
 891        int ref_base = 0;
 892
 893        if (cum.arg_slot > max_reg_slots) {
 894            int align = __alignof(Int128) / sizeof(tcg_target_long);
 895
 896            ref_base = cum.arg_slot - max_reg_slots;
 897            if (align > 1) {
 898                ref_base = ROUND_UP(ref_base, align);
 899            }
 900        }
 901        assert(ref_base + cum.ref_slot <= max_stk_slots);
 902
 903        if (ref_base != 0) {
 904            for (int i = cum.info_in_idx - 1; i >= 0; --i) {
 905                TCGCallArgumentLoc *loc = &info->in[i];
 906                switch (loc->kind) {
 907                case TCG_CALL_ARG_BY_REF:
 908                case TCG_CALL_ARG_BY_REF_N:
 909                    loc->ref_slot += ref_base;
 910                    break;
 911                default:
 912                    break;
 913                }
 914            }
 915        }
 916    }
 917}
 918
 919static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
 920static void process_op_defs(TCGContext *s);
 921static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
 922                                            TCGReg reg, const char *name);
 923
 924static void tcg_context_init(unsigned max_cpus)
 925{
 926    TCGContext *s = &tcg_init_ctx;
 927    int op, total_args, n, i;
 928    TCGOpDef *def;
 929    TCGArgConstraint *args_ct;
 930    TCGTemp *ts;
 931
 932    memset(s, 0, sizeof(*s));
 933    s->nb_globals = 0;
 934
 935    /* Count total number of arguments and allocate the corresponding
 936       space */
 937    total_args = 0;
 938    for(op = 0; op < NB_OPS; op++) {
 939        def = &tcg_op_defs[op];
 940        n = def->nb_iargs + def->nb_oargs;
 941        total_args += n;
 942    }
 943
 944    args_ct = g_new0(TCGArgConstraint, total_args);
 945
 946    for(op = 0; op < NB_OPS; op++) {
 947        def = &tcg_op_defs[op];
 948        def->args_ct = args_ct;
 949        n = def->nb_iargs + def->nb_oargs;
 950        args_ct += n;
 951    }
 952
 953    /* Register helpers.  */
 954    /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
 955    helper_table = g_hash_table_new(NULL, NULL);
 956
 957    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
 958        init_call_layout(&all_helpers[i]);
 959        g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
 960                            (gpointer)&all_helpers[i]);
 961    }
 962
 963#ifdef CONFIG_TCG_INTERPRETER
 964    init_ffi_layouts();
 965#endif
 966
 967    tcg_target_init(s);
 968    process_op_defs(s);
 969
 970    /* Reverse the order of the saved registers, assuming they're all at
 971       the start of tcg_target_reg_alloc_order.  */
 972    for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
 973        int r = tcg_target_reg_alloc_order[n];
 974        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
 975            break;
 976        }
 977    }
 978    for (i = 0; i < n; ++i) {
 979        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
 980    }
 981    for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
 982        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
 983    }
 984
 985    alloc_tcg_plugin_context(s);
 986
 987    tcg_ctx = s;
 988    /*
 989     * In user-mode we simply share the init context among threads, since we
 990     * use a single region. See the documentation tcg_region_init() for the
 991     * reasoning behind this.
 992     * In softmmu we will have at most max_cpus TCG threads.
 993     */
 994#ifdef CONFIG_USER_ONLY
 995    tcg_ctxs = &tcg_ctx;
 996    tcg_cur_ctxs = 1;
 997    tcg_max_ctxs = 1;
 998#else
 999    tcg_max_ctxs = max_cpus;
1000    tcg_ctxs = g_new0(TCGContext *, max_cpus);
1001#endif
1002
1003    tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1004    ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1005    cpu_env = temp_tcgv_ptr(ts);
1006}
1007
1008void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1009{
1010    tcg_context_init(max_cpus);
1011    tcg_region_init(tb_size, splitwx, max_cpus);
1012}
1013
1014/*
1015 * Allocate TBs right before their corresponding translated code, making
1016 * sure that TBs and code are on different cache lines.
1017 */
1018TranslationBlock *tcg_tb_alloc(TCGContext *s)
1019{
1020    uintptr_t align = qemu_icache_linesize;
1021    TranslationBlock *tb;
1022    void *next;
1023
1024 retry:
1025    tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1026    next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1027
1028    if (unlikely(next > s->code_gen_highwater)) {
1029        if (tcg_region_alloc(s)) {
1030            return NULL;
1031        }
1032        goto retry;
1033    }
1034    qatomic_set(&s->code_gen_ptr, next);
1035    s->data_gen_ptr = NULL;
1036    return tb;
1037}
1038
1039void tcg_prologue_init(TCGContext *s)
1040{
1041    size_t prologue_size;
1042
1043    s->code_ptr = s->code_gen_ptr;
1044    s->code_buf = s->code_gen_ptr;
1045    s->data_gen_ptr = NULL;
1046
1047#ifndef CONFIG_TCG_INTERPRETER
1048    tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1049#endif
1050
1051#ifdef TCG_TARGET_NEED_POOL_LABELS
1052    s->pool_labels = NULL;
1053#endif
1054
1055    qemu_thread_jit_write();
1056    /* Generate the prologue.  */
1057    tcg_target_qemu_prologue(s);
1058
1059#ifdef TCG_TARGET_NEED_POOL_LABELS
1060    /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1061    {
1062        int result = tcg_out_pool_finalize(s);
1063        tcg_debug_assert(result == 0);
1064    }
1065#endif
1066
1067    prologue_size = tcg_current_code_size(s);
1068    perf_report_prologue(s->code_gen_ptr, prologue_size);
1069
1070#ifndef CONFIG_TCG_INTERPRETER
1071    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1072                        (uintptr_t)s->code_buf, prologue_size);
1073#endif
1074
1075#ifdef DEBUG_DISAS
1076    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1077        FILE *logfile = qemu_log_trylock();
1078        if (logfile) {
1079            fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1080            if (s->data_gen_ptr) {
1081                size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1082                size_t data_size = prologue_size - code_size;
1083                size_t i;
1084
1085                disas(logfile, s->code_gen_ptr, code_size);
1086
1087                for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1088                    if (sizeof(tcg_target_ulong) == 8) {
1089                        fprintf(logfile,
1090                                "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1091                                (uintptr_t)s->data_gen_ptr + i,
1092                                *(uint64_t *)(s->data_gen_ptr + i));
1093                    } else {
1094                        fprintf(logfile,
1095                                "0x%08" PRIxPTR ":  .long  0x%08x\n",
1096                                (uintptr_t)s->data_gen_ptr + i,
1097                                *(uint32_t *)(s->data_gen_ptr + i));
1098                    }
1099                }
1100            } else {
1101                disas(logfile, s->code_gen_ptr, prologue_size);
1102            }
1103            fprintf(logfile, "\n");
1104            qemu_log_unlock(logfile);
1105        }
1106    }
1107#endif
1108
1109#ifndef CONFIG_TCG_INTERPRETER
1110    /*
1111     * Assert that goto_ptr is implemented completely, setting an epilogue.
1112     * For tci, we use NULL as the signal to return from the interpreter,
1113     * so skip this check.
1114     */
1115    tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1116#endif
1117
1118    tcg_region_prologue_set(s);
1119}
1120
1121void tcg_func_start(TCGContext *s)
1122{
1123    tcg_pool_reset(s);
1124    s->nb_temps = s->nb_globals;
1125
1126    /* No temps have been previously allocated for size or locality.  */
1127    memset(s->free_temps, 0, sizeof(s->free_temps));
1128
1129    /* No constant temps have been previously allocated. */
1130    for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1131        if (s->const_table[i]) {
1132            g_hash_table_remove_all(s->const_table[i]);
1133        }
1134    }
1135
1136    s->nb_ops = 0;
1137    s->nb_labels = 0;
1138    s->current_frame_offset = s->frame_start;
1139
1140#ifdef CONFIG_DEBUG_TCG
1141    s->goto_tb_issue_mask = 0;
1142#endif
1143
1144    QTAILQ_INIT(&s->ops);
1145    QTAILQ_INIT(&s->free_ops);
1146    QSIMPLEQ_INIT(&s->labels);
1147}
1148
1149static TCGTemp *tcg_temp_alloc(TCGContext *s)
1150{
1151    int n = s->nb_temps++;
1152
1153    if (n >= TCG_MAX_TEMPS) {
1154        tcg_raise_tb_overflow(s);
1155    }
1156    return memset(&s->temps[n], 0, sizeof(TCGTemp));
1157}
1158
1159static TCGTemp *tcg_global_alloc(TCGContext *s)
1160{
1161    TCGTemp *ts;
1162
1163    tcg_debug_assert(s->nb_globals == s->nb_temps);
1164    tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1165    s->nb_globals++;
1166    ts = tcg_temp_alloc(s);
1167    ts->kind = TEMP_GLOBAL;
1168
1169    return ts;
1170}
1171
1172static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1173                                            TCGReg reg, const char *name)
1174{
1175    TCGTemp *ts;
1176
1177    if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1178        tcg_abort();
1179    }
1180
1181    ts = tcg_global_alloc(s);
1182    ts->base_type = type;
1183    ts->type = type;
1184    ts->kind = TEMP_FIXED;
1185    ts->reg = reg;
1186    ts->name = name;
1187    tcg_regset_set_reg(s->reserved_regs, reg);
1188
1189    return ts;
1190}
1191
1192void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1193{
1194    s->frame_start = start;
1195    s->frame_end = start + size;
1196    s->frame_temp
1197        = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1198}
1199
1200TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1201                                     intptr_t offset, const char *name)
1202{
1203    TCGContext *s = tcg_ctx;
1204    TCGTemp *base_ts = tcgv_ptr_temp(base);
1205    TCGTemp *ts = tcg_global_alloc(s);
1206    int indirect_reg = 0;
1207
1208    switch (base_ts->kind) {
1209    case TEMP_FIXED:
1210        break;
1211    case TEMP_GLOBAL:
1212        /* We do not support double-indirect registers.  */
1213        tcg_debug_assert(!base_ts->indirect_reg);
1214        base_ts->indirect_base = 1;
1215        s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1216                            ? 2 : 1);
1217        indirect_reg = 1;
1218        break;
1219    default:
1220        g_assert_not_reached();
1221    }
1222
1223    if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1224        TCGTemp *ts2 = tcg_global_alloc(s);
1225        char buf[64];
1226
1227        ts->base_type = TCG_TYPE_I64;
1228        ts->type = TCG_TYPE_I32;
1229        ts->indirect_reg = indirect_reg;
1230        ts->mem_allocated = 1;
1231        ts->mem_base = base_ts;
1232        ts->mem_offset = offset;
1233        pstrcpy(buf, sizeof(buf), name);
1234        pstrcat(buf, sizeof(buf), "_0");
1235        ts->name = strdup(buf);
1236
1237        tcg_debug_assert(ts2 == ts + 1);
1238        ts2->base_type = TCG_TYPE_I64;
1239        ts2->type = TCG_TYPE_I32;
1240        ts2->indirect_reg = indirect_reg;
1241        ts2->mem_allocated = 1;
1242        ts2->mem_base = base_ts;
1243        ts2->mem_offset = offset + 4;
1244        ts2->temp_subindex = 1;
1245        pstrcpy(buf, sizeof(buf), name);
1246        pstrcat(buf, sizeof(buf), "_1");
1247        ts2->name = strdup(buf);
1248    } else {
1249        ts->base_type = type;
1250        ts->type = type;
1251        ts->indirect_reg = indirect_reg;
1252        ts->mem_allocated = 1;
1253        ts->mem_base = base_ts;
1254        ts->mem_offset = offset;
1255        ts->name = name;
1256    }
1257    return ts;
1258}
1259
1260TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1261{
1262    TCGContext *s = tcg_ctx;
1263    TCGTemp *ts;
1264    int n;
1265
1266    if (kind == TEMP_EBB) {
1267        int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1268
1269        if (idx < TCG_MAX_TEMPS) {
1270            /* There is already an available temp with the right type.  */
1271            clear_bit(idx, s->free_temps[type].l);
1272
1273            ts = &s->temps[idx];
1274            ts->temp_allocated = 1;
1275            tcg_debug_assert(ts->base_type == type);
1276            tcg_debug_assert(ts->kind == kind);
1277            return ts;
1278        }
1279    } else {
1280        tcg_debug_assert(kind == TEMP_TB);
1281    }
1282
1283    switch (type) {
1284    case TCG_TYPE_I32:
1285    case TCG_TYPE_V64:
1286    case TCG_TYPE_V128:
1287    case TCG_TYPE_V256:
1288        n = 1;
1289        break;
1290    case TCG_TYPE_I64:
1291        n = 64 / TCG_TARGET_REG_BITS;
1292        break;
1293    case TCG_TYPE_I128:
1294        n = 128 / TCG_TARGET_REG_BITS;
1295        break;
1296    default:
1297        g_assert_not_reached();
1298    }
1299
1300    ts = tcg_temp_alloc(s);
1301    ts->base_type = type;
1302    ts->temp_allocated = 1;
1303    ts->kind = kind;
1304
1305    if (n == 1) {
1306        ts->type = type;
1307    } else {
1308        ts->type = TCG_TYPE_REG;
1309
1310        for (int i = 1; i < n; ++i) {
1311            TCGTemp *ts2 = tcg_temp_alloc(s);
1312
1313            tcg_debug_assert(ts2 == ts + i);
1314            ts2->base_type = type;
1315            ts2->type = TCG_TYPE_REG;
1316            ts2->temp_allocated = 1;
1317            ts2->temp_subindex = i;
1318            ts2->kind = kind;
1319        }
1320    }
1321    return ts;
1322}
1323
1324TCGv_vec tcg_temp_new_vec(TCGType type)
1325{
1326    TCGTemp *t;
1327
1328#ifdef CONFIG_DEBUG_TCG
1329    switch (type) {
1330    case TCG_TYPE_V64:
1331        assert(TCG_TARGET_HAS_v64);
1332        break;
1333    case TCG_TYPE_V128:
1334        assert(TCG_TARGET_HAS_v128);
1335        break;
1336    case TCG_TYPE_V256:
1337        assert(TCG_TARGET_HAS_v256);
1338        break;
1339    default:
1340        g_assert_not_reached();
1341    }
1342#endif
1343
1344    t = tcg_temp_new_internal(type, TEMP_EBB);
1345    return temp_tcgv_vec(t);
1346}
1347
1348/* Create a new temp of the same type as an existing temp.  */
1349TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1350{
1351    TCGTemp *t = tcgv_vec_temp(match);
1352
1353    tcg_debug_assert(t->temp_allocated != 0);
1354
1355    t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1356    return temp_tcgv_vec(t);
1357}
1358
1359void tcg_temp_free_internal(TCGTemp *ts)
1360{
1361    TCGContext *s = tcg_ctx;
1362
1363    switch (ts->kind) {
1364    case TEMP_CONST:
1365    case TEMP_TB:
1366        /* Silently ignore free. */
1367        break;
1368    case TEMP_EBB:
1369        tcg_debug_assert(ts->temp_allocated != 0);
1370        ts->temp_allocated = 0;
1371        set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1372        break;
1373    default:
1374        /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1375        g_assert_not_reached();
1376    }
1377}
1378
1379TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1380{
1381    TCGContext *s = tcg_ctx;
1382    GHashTable *h = s->const_table[type];
1383    TCGTemp *ts;
1384
1385    if (h == NULL) {
1386        h = g_hash_table_new(g_int64_hash, g_int64_equal);
1387        s->const_table[type] = h;
1388    }
1389
1390    ts = g_hash_table_lookup(h, &val);
1391    if (ts == NULL) {
1392        int64_t *val_ptr;
1393
1394        ts = tcg_temp_alloc(s);
1395
1396        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1397            TCGTemp *ts2 = tcg_temp_alloc(s);
1398
1399            tcg_debug_assert(ts2 == ts + 1);
1400
1401            ts->base_type = TCG_TYPE_I64;
1402            ts->type = TCG_TYPE_I32;
1403            ts->kind = TEMP_CONST;
1404            ts->temp_allocated = 1;
1405
1406            ts2->base_type = TCG_TYPE_I64;
1407            ts2->type = TCG_TYPE_I32;
1408            ts2->kind = TEMP_CONST;
1409            ts2->temp_allocated = 1;
1410            ts2->temp_subindex = 1;
1411
1412            /*
1413             * Retain the full value of the 64-bit constant in the low
1414             * part, so that the hash table works.  Actual uses will
1415             * truncate the value to the low part.
1416             */
1417            ts[HOST_BIG_ENDIAN].val = val;
1418            ts[!HOST_BIG_ENDIAN].val = val >> 32;
1419            val_ptr = &ts[HOST_BIG_ENDIAN].val;
1420        } else {
1421            ts->base_type = type;
1422            ts->type = type;
1423            ts->kind = TEMP_CONST;
1424            ts->temp_allocated = 1;
1425            ts->val = val;
1426            val_ptr = &ts->val;
1427        }
1428        g_hash_table_insert(h, val_ptr, ts);
1429    }
1430
1431    return ts;
1432}
1433
1434TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1435{
1436    val = dup_const(vece, val);
1437    return temp_tcgv_vec(tcg_constant_internal(type, val));
1438}
1439
1440TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1441{
1442    TCGTemp *t = tcgv_vec_temp(match);
1443
1444    tcg_debug_assert(t->temp_allocated != 0);
1445    return tcg_constant_vec(t->base_type, vece, val);
1446}
1447
1448/* Return true if OP may appear in the opcode stream.
1449   Test the runtime variable that controls each opcode.  */
1450bool tcg_op_supported(TCGOpcode op)
1451{
1452    const bool have_vec
1453        = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1454
1455    switch (op) {
1456    case INDEX_op_discard:
1457    case INDEX_op_set_label:
1458    case INDEX_op_call:
1459    case INDEX_op_br:
1460    case INDEX_op_mb:
1461    case INDEX_op_insn_start:
1462    case INDEX_op_exit_tb:
1463    case INDEX_op_goto_tb:
1464    case INDEX_op_goto_ptr:
1465    case INDEX_op_qemu_ld_i32:
1466    case INDEX_op_qemu_st_i32:
1467    case INDEX_op_qemu_ld_i64:
1468    case INDEX_op_qemu_st_i64:
1469        return true;
1470
1471    case INDEX_op_qemu_st8_i32:
1472        return TCG_TARGET_HAS_qemu_st8_i32;
1473
1474    case INDEX_op_mov_i32:
1475    case INDEX_op_setcond_i32:
1476    case INDEX_op_brcond_i32:
1477    case INDEX_op_ld8u_i32:
1478    case INDEX_op_ld8s_i32:
1479    case INDEX_op_ld16u_i32:
1480    case INDEX_op_ld16s_i32:
1481    case INDEX_op_ld_i32:
1482    case INDEX_op_st8_i32:
1483    case INDEX_op_st16_i32:
1484    case INDEX_op_st_i32:
1485    case INDEX_op_add_i32:
1486    case INDEX_op_sub_i32:
1487    case INDEX_op_mul_i32:
1488    case INDEX_op_and_i32:
1489    case INDEX_op_or_i32:
1490    case INDEX_op_xor_i32:
1491    case INDEX_op_shl_i32:
1492    case INDEX_op_shr_i32:
1493    case INDEX_op_sar_i32:
1494        return true;
1495
1496    case INDEX_op_movcond_i32:
1497        return TCG_TARGET_HAS_movcond_i32;
1498    case INDEX_op_div_i32:
1499    case INDEX_op_divu_i32:
1500        return TCG_TARGET_HAS_div_i32;
1501    case INDEX_op_rem_i32:
1502    case INDEX_op_remu_i32:
1503        return TCG_TARGET_HAS_rem_i32;
1504    case INDEX_op_div2_i32:
1505    case INDEX_op_divu2_i32:
1506        return TCG_TARGET_HAS_div2_i32;
1507    case INDEX_op_rotl_i32:
1508    case INDEX_op_rotr_i32:
1509        return TCG_TARGET_HAS_rot_i32;
1510    case INDEX_op_deposit_i32:
1511        return TCG_TARGET_HAS_deposit_i32;
1512    case INDEX_op_extract_i32:
1513        return TCG_TARGET_HAS_extract_i32;
1514    case INDEX_op_sextract_i32:
1515        return TCG_TARGET_HAS_sextract_i32;
1516    case INDEX_op_extract2_i32:
1517        return TCG_TARGET_HAS_extract2_i32;
1518    case INDEX_op_add2_i32:
1519        return TCG_TARGET_HAS_add2_i32;
1520    case INDEX_op_sub2_i32:
1521        return TCG_TARGET_HAS_sub2_i32;
1522    case INDEX_op_mulu2_i32:
1523        return TCG_TARGET_HAS_mulu2_i32;
1524    case INDEX_op_muls2_i32:
1525        return TCG_TARGET_HAS_muls2_i32;
1526    case INDEX_op_muluh_i32:
1527        return TCG_TARGET_HAS_muluh_i32;
1528    case INDEX_op_mulsh_i32:
1529        return TCG_TARGET_HAS_mulsh_i32;
1530    case INDEX_op_ext8s_i32:
1531        return TCG_TARGET_HAS_ext8s_i32;
1532    case INDEX_op_ext16s_i32:
1533        return TCG_TARGET_HAS_ext16s_i32;
1534    case INDEX_op_ext8u_i32:
1535        return TCG_TARGET_HAS_ext8u_i32;
1536    case INDEX_op_ext16u_i32:
1537        return TCG_TARGET_HAS_ext16u_i32;
1538    case INDEX_op_bswap16_i32:
1539        return TCG_TARGET_HAS_bswap16_i32;
1540    case INDEX_op_bswap32_i32:
1541        return TCG_TARGET_HAS_bswap32_i32;
1542    case INDEX_op_not_i32:
1543        return TCG_TARGET_HAS_not_i32;
1544    case INDEX_op_neg_i32:
1545        return TCG_TARGET_HAS_neg_i32;
1546    case INDEX_op_andc_i32:
1547        return TCG_TARGET_HAS_andc_i32;
1548    case INDEX_op_orc_i32:
1549        return TCG_TARGET_HAS_orc_i32;
1550    case INDEX_op_eqv_i32:
1551        return TCG_TARGET_HAS_eqv_i32;
1552    case INDEX_op_nand_i32:
1553        return TCG_TARGET_HAS_nand_i32;
1554    case INDEX_op_nor_i32:
1555        return TCG_TARGET_HAS_nor_i32;
1556    case INDEX_op_clz_i32:
1557        return TCG_TARGET_HAS_clz_i32;
1558    case INDEX_op_ctz_i32:
1559        return TCG_TARGET_HAS_ctz_i32;
1560    case INDEX_op_ctpop_i32:
1561        return TCG_TARGET_HAS_ctpop_i32;
1562
1563    case INDEX_op_brcond2_i32:
1564    case INDEX_op_setcond2_i32:
1565        return TCG_TARGET_REG_BITS == 32;
1566
1567    case INDEX_op_mov_i64:
1568    case INDEX_op_setcond_i64:
1569    case INDEX_op_brcond_i64:
1570    case INDEX_op_ld8u_i64:
1571    case INDEX_op_ld8s_i64:
1572    case INDEX_op_ld16u_i64:
1573    case INDEX_op_ld16s_i64:
1574    case INDEX_op_ld32u_i64:
1575    case INDEX_op_ld32s_i64:
1576    case INDEX_op_ld_i64:
1577    case INDEX_op_st8_i64:
1578    case INDEX_op_st16_i64:
1579    case INDEX_op_st32_i64:
1580    case INDEX_op_st_i64:
1581    case INDEX_op_add_i64:
1582    case INDEX_op_sub_i64:
1583    case INDEX_op_mul_i64:
1584    case INDEX_op_and_i64:
1585    case INDEX_op_or_i64:
1586    case INDEX_op_xor_i64:
1587    case INDEX_op_shl_i64:
1588    case INDEX_op_shr_i64:
1589    case INDEX_op_sar_i64:
1590    case INDEX_op_ext_i32_i64:
1591    case INDEX_op_extu_i32_i64:
1592        return TCG_TARGET_REG_BITS == 64;
1593
1594    case INDEX_op_movcond_i64:
1595        return TCG_TARGET_HAS_movcond_i64;
1596    case INDEX_op_div_i64:
1597    case INDEX_op_divu_i64:
1598        return TCG_TARGET_HAS_div_i64;
1599    case INDEX_op_rem_i64:
1600    case INDEX_op_remu_i64:
1601        return TCG_TARGET_HAS_rem_i64;
1602    case INDEX_op_div2_i64:
1603    case INDEX_op_divu2_i64:
1604        return TCG_TARGET_HAS_div2_i64;
1605    case INDEX_op_rotl_i64:
1606    case INDEX_op_rotr_i64:
1607        return TCG_TARGET_HAS_rot_i64;
1608    case INDEX_op_deposit_i64:
1609        return TCG_TARGET_HAS_deposit_i64;
1610    case INDEX_op_extract_i64:
1611        return TCG_TARGET_HAS_extract_i64;
1612    case INDEX_op_sextract_i64:
1613        return TCG_TARGET_HAS_sextract_i64;
1614    case INDEX_op_extract2_i64:
1615        return TCG_TARGET_HAS_extract2_i64;
1616    case INDEX_op_extrl_i64_i32:
1617        return TCG_TARGET_HAS_extrl_i64_i32;
1618    case INDEX_op_extrh_i64_i32:
1619        return TCG_TARGET_HAS_extrh_i64_i32;
1620    case INDEX_op_ext8s_i64:
1621        return TCG_TARGET_HAS_ext8s_i64;
1622    case INDEX_op_ext16s_i64:
1623        return TCG_TARGET_HAS_ext16s_i64;
1624    case INDEX_op_ext32s_i64:
1625        return TCG_TARGET_HAS_ext32s_i64;
1626    case INDEX_op_ext8u_i64:
1627        return TCG_TARGET_HAS_ext8u_i64;
1628    case INDEX_op_ext16u_i64:
1629        return TCG_TARGET_HAS_ext16u_i64;
1630    case INDEX_op_ext32u_i64:
1631        return TCG_TARGET_HAS_ext32u_i64;
1632    case INDEX_op_bswap16_i64:
1633        return TCG_TARGET_HAS_bswap16_i64;
1634    case INDEX_op_bswap32_i64:
1635        return TCG_TARGET_HAS_bswap32_i64;
1636    case INDEX_op_bswap64_i64:
1637        return TCG_TARGET_HAS_bswap64_i64;
1638    case INDEX_op_not_i64:
1639        return TCG_TARGET_HAS_not_i64;
1640    case INDEX_op_neg_i64:
1641        return TCG_TARGET_HAS_neg_i64;
1642    case INDEX_op_andc_i64:
1643        return TCG_TARGET_HAS_andc_i64;
1644    case INDEX_op_orc_i64:
1645        return TCG_TARGET_HAS_orc_i64;
1646    case INDEX_op_eqv_i64:
1647        return TCG_TARGET_HAS_eqv_i64;
1648    case INDEX_op_nand_i64:
1649        return TCG_TARGET_HAS_nand_i64;
1650    case INDEX_op_nor_i64:
1651        return TCG_TARGET_HAS_nor_i64;
1652    case INDEX_op_clz_i64:
1653        return TCG_TARGET_HAS_clz_i64;
1654    case INDEX_op_ctz_i64:
1655        return TCG_TARGET_HAS_ctz_i64;
1656    case INDEX_op_ctpop_i64:
1657        return TCG_TARGET_HAS_ctpop_i64;
1658    case INDEX_op_add2_i64:
1659        return TCG_TARGET_HAS_add2_i64;
1660    case INDEX_op_sub2_i64:
1661        return TCG_TARGET_HAS_sub2_i64;
1662    case INDEX_op_mulu2_i64:
1663        return TCG_TARGET_HAS_mulu2_i64;
1664    case INDEX_op_muls2_i64:
1665        return TCG_TARGET_HAS_muls2_i64;
1666    case INDEX_op_muluh_i64:
1667        return TCG_TARGET_HAS_muluh_i64;
1668    case INDEX_op_mulsh_i64:
1669        return TCG_TARGET_HAS_mulsh_i64;
1670
1671    case INDEX_op_mov_vec:
1672    case INDEX_op_dup_vec:
1673    case INDEX_op_dupm_vec:
1674    case INDEX_op_ld_vec:
1675    case INDEX_op_st_vec:
1676    case INDEX_op_add_vec:
1677    case INDEX_op_sub_vec:
1678    case INDEX_op_and_vec:
1679    case INDEX_op_or_vec:
1680    case INDEX_op_xor_vec:
1681    case INDEX_op_cmp_vec:
1682        return have_vec;
1683    case INDEX_op_dup2_vec:
1684        return have_vec && TCG_TARGET_REG_BITS == 32;
1685    case INDEX_op_not_vec:
1686        return have_vec && TCG_TARGET_HAS_not_vec;
1687    case INDEX_op_neg_vec:
1688        return have_vec && TCG_TARGET_HAS_neg_vec;
1689    case INDEX_op_abs_vec:
1690        return have_vec && TCG_TARGET_HAS_abs_vec;
1691    case INDEX_op_andc_vec:
1692        return have_vec && TCG_TARGET_HAS_andc_vec;
1693    case INDEX_op_orc_vec:
1694        return have_vec && TCG_TARGET_HAS_orc_vec;
1695    case INDEX_op_nand_vec:
1696        return have_vec && TCG_TARGET_HAS_nand_vec;
1697    case INDEX_op_nor_vec:
1698        return have_vec && TCG_TARGET_HAS_nor_vec;
1699    case INDEX_op_eqv_vec:
1700        return have_vec && TCG_TARGET_HAS_eqv_vec;
1701    case INDEX_op_mul_vec:
1702        return have_vec && TCG_TARGET_HAS_mul_vec;
1703    case INDEX_op_shli_vec:
1704    case INDEX_op_shri_vec:
1705    case INDEX_op_sari_vec:
1706        return have_vec && TCG_TARGET_HAS_shi_vec;
1707    case INDEX_op_shls_vec:
1708    case INDEX_op_shrs_vec:
1709    case INDEX_op_sars_vec:
1710        return have_vec && TCG_TARGET_HAS_shs_vec;
1711    case INDEX_op_shlv_vec:
1712    case INDEX_op_shrv_vec:
1713    case INDEX_op_sarv_vec:
1714        return have_vec && TCG_TARGET_HAS_shv_vec;
1715    case INDEX_op_rotli_vec:
1716        return have_vec && TCG_TARGET_HAS_roti_vec;
1717    case INDEX_op_rotls_vec:
1718        return have_vec && TCG_TARGET_HAS_rots_vec;
1719    case INDEX_op_rotlv_vec:
1720    case INDEX_op_rotrv_vec:
1721        return have_vec && TCG_TARGET_HAS_rotv_vec;
1722    case INDEX_op_ssadd_vec:
1723    case INDEX_op_usadd_vec:
1724    case INDEX_op_sssub_vec:
1725    case INDEX_op_ussub_vec:
1726        return have_vec && TCG_TARGET_HAS_sat_vec;
1727    case INDEX_op_smin_vec:
1728    case INDEX_op_umin_vec:
1729    case INDEX_op_smax_vec:
1730    case INDEX_op_umax_vec:
1731        return have_vec && TCG_TARGET_HAS_minmax_vec;
1732    case INDEX_op_bitsel_vec:
1733        return have_vec && TCG_TARGET_HAS_bitsel_vec;
1734    case INDEX_op_cmpsel_vec:
1735        return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1736
1737    default:
1738        tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1739        return true;
1740    }
1741}
1742
1743static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
1744
1745void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1746{
1747    const TCGHelperInfo *info;
1748    TCGv_i64 extend_free[MAX_CALL_IARGS];
1749    int n_extend = 0;
1750    TCGOp *op;
1751    int i, n, pi = 0, total_args;
1752
1753    info = g_hash_table_lookup(helper_table, (gpointer)func);
1754    total_args = info->nr_out + info->nr_in + 2;
1755    op = tcg_op_alloc(INDEX_op_call, total_args);
1756
1757#ifdef CONFIG_PLUGIN
1758    /* Flag helpers that may affect guest state */
1759    if (tcg_ctx->plugin_insn &&
1760        !(info->flags & TCG_CALL_PLUGIN) &&
1761        !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1762        tcg_ctx->plugin_insn->calls_helpers = true;
1763    }
1764#endif
1765
1766    TCGOP_CALLO(op) = n = info->nr_out;
1767    switch (n) {
1768    case 0:
1769        tcg_debug_assert(ret == NULL);
1770        break;
1771    case 1:
1772        tcg_debug_assert(ret != NULL);
1773        op->args[pi++] = temp_arg(ret);
1774        break;
1775    case 2:
1776    case 4:
1777        tcg_debug_assert(ret != NULL);
1778        tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
1779        tcg_debug_assert(ret->temp_subindex == 0);
1780        for (i = 0; i < n; ++i) {
1781            op->args[pi++] = temp_arg(ret + i);
1782        }
1783        break;
1784    default:
1785        g_assert_not_reached();
1786    }
1787
1788    TCGOP_CALLI(op) = n = info->nr_in;
1789    for (i = 0; i < n; i++) {
1790        const TCGCallArgumentLoc *loc = &info->in[i];
1791        TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
1792
1793        switch (loc->kind) {
1794        case TCG_CALL_ARG_NORMAL:
1795        case TCG_CALL_ARG_BY_REF:
1796        case TCG_CALL_ARG_BY_REF_N:
1797            op->args[pi++] = temp_arg(ts);
1798            break;
1799
1800        case TCG_CALL_ARG_EXTEND_U:
1801        case TCG_CALL_ARG_EXTEND_S:
1802            {
1803                TCGv_i64 temp = tcg_temp_ebb_new_i64();
1804                TCGv_i32 orig = temp_tcgv_i32(ts);
1805
1806                if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
1807                    tcg_gen_ext_i32_i64(temp, orig);
1808                } else {
1809                    tcg_gen_extu_i32_i64(temp, orig);
1810                }
1811                op->args[pi++] = tcgv_i64_arg(temp);
1812                extend_free[n_extend++] = temp;
1813            }
1814            break;
1815
1816        default:
1817            g_assert_not_reached();
1818        }
1819    }
1820    op->args[pi++] = (uintptr_t)func;
1821    op->args[pi++] = (uintptr_t)info;
1822    tcg_debug_assert(pi == total_args);
1823
1824    QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
1825
1826    tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
1827    for (i = 0; i < n_extend; ++i) {
1828        tcg_temp_free_i64(extend_free[i]);
1829    }
1830}
1831
1832static void tcg_reg_alloc_start(TCGContext *s)
1833{
1834    int i, n;
1835
1836    for (i = 0, n = s->nb_temps; i < n; i++) {
1837        TCGTemp *ts = &s->temps[i];
1838        TCGTempVal val = TEMP_VAL_MEM;
1839
1840        switch (ts->kind) {
1841        case TEMP_CONST:
1842            val = TEMP_VAL_CONST;
1843            break;
1844        case TEMP_FIXED:
1845            val = TEMP_VAL_REG;
1846            break;
1847        case TEMP_GLOBAL:
1848            break;
1849        case TEMP_EBB:
1850            val = TEMP_VAL_DEAD;
1851            /* fall through */
1852        case TEMP_TB:
1853            ts->mem_allocated = 0;
1854            break;
1855        default:
1856            g_assert_not_reached();
1857        }
1858        ts->val_type = val;
1859    }
1860
1861    memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1862}
1863
1864static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1865                                 TCGTemp *ts)
1866{
1867    int idx = temp_idx(ts);
1868
1869    switch (ts->kind) {
1870    case TEMP_FIXED:
1871    case TEMP_GLOBAL:
1872        pstrcpy(buf, buf_size, ts->name);
1873        break;
1874    case TEMP_TB:
1875        snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1876        break;
1877    case TEMP_EBB:
1878        snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1879        break;
1880    case TEMP_CONST:
1881        switch (ts->type) {
1882        case TCG_TYPE_I32:
1883            snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1884            break;
1885#if TCG_TARGET_REG_BITS > 32
1886        case TCG_TYPE_I64:
1887            snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1888            break;
1889#endif
1890        case TCG_TYPE_V64:
1891        case TCG_TYPE_V128:
1892        case TCG_TYPE_V256:
1893            snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1894                     64 << (ts->type - TCG_TYPE_V64), ts->val);
1895            break;
1896        default:
1897            g_assert_not_reached();
1898        }
1899        break;
1900    }
1901    return buf;
1902}
1903
1904static char *tcg_get_arg_str(TCGContext *s, char *buf,
1905                             int buf_size, TCGArg arg)
1906{
1907    return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1908}
1909
1910static const char * const cond_name[] =
1911{
1912    [TCG_COND_NEVER] = "never",
1913    [TCG_COND_ALWAYS] = "always",
1914    [TCG_COND_EQ] = "eq",
1915    [TCG_COND_NE] = "ne",
1916    [TCG_COND_LT] = "lt",
1917    [TCG_COND_GE] = "ge",
1918    [TCG_COND_LE] = "le",
1919    [TCG_COND_GT] = "gt",
1920    [TCG_COND_LTU] = "ltu",
1921    [TCG_COND_GEU] = "geu",
1922    [TCG_COND_LEU] = "leu",
1923    [TCG_COND_GTU] = "gtu"
1924};
1925
1926static const char * const ldst_name[] =
1927{
1928    [MO_UB]   = "ub",
1929    [MO_SB]   = "sb",
1930    [MO_LEUW] = "leuw",
1931    [MO_LESW] = "lesw",
1932    [MO_LEUL] = "leul",
1933    [MO_LESL] = "lesl",
1934    [MO_LEUQ] = "leq",
1935    [MO_BEUW] = "beuw",
1936    [MO_BESW] = "besw",
1937    [MO_BEUL] = "beul",
1938    [MO_BESL] = "besl",
1939    [MO_BEUQ] = "beq",
1940};
1941
1942static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1943#ifdef TARGET_ALIGNED_ONLY
1944    [MO_UNALN >> MO_ASHIFT]    = "un+",
1945    [MO_ALIGN >> MO_ASHIFT]    = "",
1946#else
1947    [MO_UNALN >> MO_ASHIFT]    = "",
1948    [MO_ALIGN >> MO_ASHIFT]    = "al+",
1949#endif
1950    [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1951    [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1952    [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1953    [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1954    [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1955    [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1956};
1957
1958static const char bswap_flag_name[][6] = {
1959    [TCG_BSWAP_IZ] = "iz",
1960    [TCG_BSWAP_OZ] = "oz",
1961    [TCG_BSWAP_OS] = "os",
1962    [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1963    [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1964};
1965
1966static inline bool tcg_regset_single(TCGRegSet d)
1967{
1968    return (d & (d - 1)) == 0;
1969}
1970
1971static inline TCGReg tcg_regset_first(TCGRegSet d)
1972{
1973    if (TCG_TARGET_NB_REGS <= 32) {
1974        return ctz32(d);
1975    } else {
1976        return ctz64(d);
1977    }
1978}
1979
1980/* Return only the number of characters output -- no error return. */
1981#define ne_fprintf(...) \
1982    ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
1983
1984static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
1985{
1986    char buf[128];
1987    TCGOp *op;
1988
1989    QTAILQ_FOREACH(op, &s->ops, link) {
1990        int i, k, nb_oargs, nb_iargs, nb_cargs;
1991        const TCGOpDef *def;
1992        TCGOpcode c;
1993        int col = 0;
1994
1995        c = op->opc;
1996        def = &tcg_op_defs[c];
1997
1998        if (c == INDEX_op_insn_start) {
1999            nb_oargs = 0;
2000            col += ne_fprintf(f, "\n ----");
2001
2002            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2003                target_ulong a;
2004#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2005                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2006#else
2007                a = op->args[i];
2008#endif
2009                col += ne_fprintf(f, " " TARGET_FMT_lx, a);
2010            }
2011        } else if (c == INDEX_op_call) {
2012            const TCGHelperInfo *info = tcg_call_info(op);
2013            void *func = tcg_call_func(op);
2014
2015            /* variable number of arguments */
2016            nb_oargs = TCGOP_CALLO(op);
2017            nb_iargs = TCGOP_CALLI(op);
2018            nb_cargs = def->nb_cargs;
2019
2020            col += ne_fprintf(f, " %s ", def->name);
2021
2022            /*
2023             * Print the function name from TCGHelperInfo, if available.
2024             * Note that plugins have a template function for the info,
2025             * but the actual function pointer comes from the plugin.
2026             */
2027            if (func == info->func) {
2028                col += ne_fprintf(f, "%s", info->name);
2029            } else {
2030                col += ne_fprintf(f, "plugin(%p)", func);
2031            }
2032
2033            col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2034            for (i = 0; i < nb_oargs; i++) {
2035                col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2036                                                            op->args[i]));
2037            }
2038            for (i = 0; i < nb_iargs; i++) {
2039                TCGArg arg = op->args[nb_oargs + i];
2040                const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2041                col += ne_fprintf(f, ",%s", t);
2042            }
2043        } else {
2044            col += ne_fprintf(f, " %s ", def->name);
2045
2046            nb_oargs = def->nb_oargs;
2047            nb_iargs = def->nb_iargs;
2048            nb_cargs = def->nb_cargs;
2049
2050            if (def->flags & TCG_OPF_VECTOR) {
2051                col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2052                                  8 << TCGOP_VECE(op));
2053            }
2054
2055            k = 0;
2056            for (i = 0; i < nb_oargs; i++) {
2057                const char *sep =  k ? "," : "";
2058                col += ne_fprintf(f, "%s%s", sep,
2059                                  tcg_get_arg_str(s, buf, sizeof(buf),
2060                                                  op->args[k++]));
2061            }
2062            for (i = 0; i < nb_iargs; i++) {
2063                const char *sep =  k ? "," : "";
2064                col += ne_fprintf(f, "%s%s", sep,
2065                                  tcg_get_arg_str(s, buf, sizeof(buf),
2066                                                  op->args[k++]));
2067            }
2068            switch (c) {
2069            case INDEX_op_brcond_i32:
2070            case INDEX_op_setcond_i32:
2071            case INDEX_op_movcond_i32:
2072            case INDEX_op_brcond2_i32:
2073            case INDEX_op_setcond2_i32:
2074            case INDEX_op_brcond_i64:
2075            case INDEX_op_setcond_i64:
2076            case INDEX_op_movcond_i64:
2077            case INDEX_op_cmp_vec:
2078            case INDEX_op_cmpsel_vec:
2079                if (op->args[k] < ARRAY_SIZE(cond_name)
2080                    && cond_name[op->args[k]]) {
2081                    col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2082                } else {
2083                    col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2084                }
2085                i = 1;
2086                break;
2087            case INDEX_op_qemu_ld_i32:
2088            case INDEX_op_qemu_st_i32:
2089            case INDEX_op_qemu_st8_i32:
2090            case INDEX_op_qemu_ld_i64:
2091            case INDEX_op_qemu_st_i64:
2092                {
2093                    MemOpIdx oi = op->args[k++];
2094                    MemOp op = get_memop(oi);
2095                    unsigned ix = get_mmuidx(oi);
2096
2097                    if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2098                        col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2099                    } else {
2100                        const char *s_al, *s_op;
2101                        s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2102                        s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2103                        col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
2104                    }
2105                    i = 1;
2106                }
2107                break;
2108            case INDEX_op_bswap16_i32:
2109            case INDEX_op_bswap16_i64:
2110            case INDEX_op_bswap32_i32:
2111            case INDEX_op_bswap32_i64:
2112            case INDEX_op_bswap64_i64:
2113                {
2114                    TCGArg flags = op->args[k];
2115                    const char *name = NULL;
2116
2117                    if (flags < ARRAY_SIZE(bswap_flag_name)) {
2118                        name = bswap_flag_name[flags];
2119                    }
2120                    if (name) {
2121                        col += ne_fprintf(f, ",%s", name);
2122                    } else {
2123                        col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2124                    }
2125                    i = k = 1;
2126                }
2127                break;
2128            default:
2129                i = 0;
2130                break;
2131            }
2132            switch (c) {
2133            case INDEX_op_set_label:
2134            case INDEX_op_br:
2135            case INDEX_op_brcond_i32:
2136            case INDEX_op_brcond_i64:
2137            case INDEX_op_brcond2_i32:
2138                col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2139                                  arg_label(op->args[k])->id);
2140                i++, k++;
2141                break;
2142            case INDEX_op_mb:
2143                {
2144                    TCGBar membar = op->args[k];
2145                    const char *b_op, *m_op;
2146
2147                    switch (membar & TCG_BAR_SC) {
2148                    case 0:
2149                        b_op = "none";
2150                        break;
2151                    case TCG_BAR_LDAQ:
2152                        b_op = "acq";
2153                        break;
2154                    case TCG_BAR_STRL:
2155                        b_op = "rel";
2156                        break;
2157                    case TCG_BAR_SC:
2158                        b_op = "seq";
2159                        break;
2160                    default:
2161                        g_assert_not_reached();
2162                    }
2163
2164                    switch (membar & TCG_MO_ALL) {
2165                    case 0:
2166                        m_op = "none";
2167                        break;
2168                    case TCG_MO_LD_LD:
2169                        m_op = "rr";
2170                        break;
2171                    case TCG_MO_LD_ST:
2172                        m_op = "rw";
2173                        break;
2174                    case TCG_MO_ST_LD:
2175                        m_op = "wr";
2176                        break;
2177                    case TCG_MO_ST_ST:
2178                        m_op = "ww";
2179                        break;
2180                    case TCG_MO_LD_LD | TCG_MO_LD_ST:
2181                        m_op = "rr+rw";
2182                        break;
2183                    case TCG_MO_LD_LD | TCG_MO_ST_LD:
2184                        m_op = "rr+wr";
2185                        break;
2186                    case TCG_MO_LD_LD | TCG_MO_ST_ST:
2187                        m_op = "rr+ww";
2188                        break;
2189                    case TCG_MO_LD_ST | TCG_MO_ST_LD:
2190                        m_op = "rw+wr";
2191                        break;
2192                    case TCG_MO_LD_ST | TCG_MO_ST_ST:
2193                        m_op = "rw+ww";
2194                        break;
2195                    case TCG_MO_ST_LD | TCG_MO_ST_ST:
2196                        m_op = "wr+ww";
2197                        break;
2198                    case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2199                        m_op = "rr+rw+wr";
2200                        break;
2201                    case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2202                        m_op = "rr+rw+ww";
2203                        break;
2204                    case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2205                        m_op = "rr+wr+ww";
2206                        break;
2207                    case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2208                        m_op = "rw+wr+ww";
2209                        break;
2210                    case TCG_MO_ALL:
2211                        m_op = "all";
2212                        break;
2213                    default:
2214                        g_assert_not_reached();
2215                    }
2216
2217                    col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2218                    i++, k++;
2219                }
2220                break;
2221            default:
2222                break;
2223            }
2224            for (; i < nb_cargs; i++, k++) {
2225                col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2226                                  op->args[k]);
2227            }
2228        }
2229
2230        if (have_prefs || op->life) {
2231            for (; col < 40; ++col) {
2232                putc(' ', f);
2233            }
2234        }
2235
2236        if (op->life) {
2237            unsigned life = op->life;
2238
2239            if (life & (SYNC_ARG * 3)) {
2240                ne_fprintf(f, "  sync:");
2241                for (i = 0; i < 2; ++i) {
2242                    if (life & (SYNC_ARG << i)) {
2243                        ne_fprintf(f, " %d", i);
2244                    }
2245                }
2246            }
2247            life /= DEAD_ARG;
2248            if (life) {
2249                ne_fprintf(f, "  dead:");
2250                for (i = 0; life; ++i, life >>= 1) {
2251                    if (life & 1) {
2252                        ne_fprintf(f, " %d", i);
2253                    }
2254                }
2255            }
2256        }
2257
2258        if (have_prefs) {
2259            for (i = 0; i < nb_oargs; ++i) {
2260                TCGRegSet set = output_pref(op, i);
2261
2262                if (i == 0) {
2263                    ne_fprintf(f, "  pref=");
2264                } else {
2265                    ne_fprintf(f, ",");
2266                }
2267                if (set == 0) {
2268                    ne_fprintf(f, "none");
2269                } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2270                    ne_fprintf(f, "all");
2271#ifdef CONFIG_DEBUG_TCG
2272                } else if (tcg_regset_single(set)) {
2273                    TCGReg reg = tcg_regset_first(set);
2274                    ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2275#endif
2276                } else if (TCG_TARGET_NB_REGS <= 32) {
2277                    ne_fprintf(f, "0x%x", (uint32_t)set);
2278                } else {
2279                    ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2280                }
2281            }
2282        }
2283
2284        putc('\n', f);
2285    }
2286}
2287
2288/* we give more priority to constraints with less registers */
2289static int get_constraint_priority(const TCGOpDef *def, int k)
2290{
2291    const TCGArgConstraint *arg_ct = &def->args_ct[k];
2292    int n = ctpop64(arg_ct->regs);
2293
2294    /*
2295     * Sort constraints of a single register first, which includes output
2296     * aliases (which must exactly match the input already allocated).
2297     */
2298    if (n == 1 || arg_ct->oalias) {
2299        return INT_MAX;
2300    }
2301
2302    /*
2303     * Sort register pairs next, first then second immediately after.
2304     * Arbitrarily sort multiple pairs by the index of the first reg;
2305     * there shouldn't be many pairs.
2306     */
2307    switch (arg_ct->pair) {
2308    case 1:
2309    case 3:
2310        return (k + 1) * 2;
2311    case 2:
2312        return (arg_ct->pair_index + 1) * 2 - 1;
2313    }
2314
2315    /* Finally, sort by decreasing register count. */
2316    assert(n > 1);
2317    return -n;
2318}
2319
2320/* sort from highest priority to lowest */
2321static void sort_constraints(TCGOpDef *def, int start, int n)
2322{
2323    int i, j;
2324    TCGArgConstraint *a = def->args_ct;
2325
2326    for (i = 0; i < n; i++) {
2327        a[start + i].sort_index = start + i;
2328    }
2329    if (n <= 1) {
2330        return;
2331    }
2332    for (i = 0; i < n - 1; i++) {
2333        for (j = i + 1; j < n; j++) {
2334            int p1 = get_constraint_priority(def, a[start + i].sort_index);
2335            int p2 = get_constraint_priority(def, a[start + j].sort_index);
2336            if (p1 < p2) {
2337                int tmp = a[start + i].sort_index;
2338                a[start + i].sort_index = a[start + j].sort_index;
2339                a[start + j].sort_index = tmp;
2340            }
2341        }
2342    }
2343}
2344
2345static void process_op_defs(TCGContext *s)
2346{
2347    TCGOpcode op;
2348
2349    for (op = 0; op < NB_OPS; op++) {
2350        TCGOpDef *def = &tcg_op_defs[op];
2351        const TCGTargetOpDef *tdefs;
2352        bool saw_alias_pair = false;
2353        int i, o, i2, o2, nb_args;
2354
2355        if (def->flags & TCG_OPF_NOT_PRESENT) {
2356            continue;
2357        }
2358
2359        nb_args = def->nb_iargs + def->nb_oargs;
2360        if (nb_args == 0) {
2361            continue;
2362        }
2363
2364        /*
2365         * Macro magic should make it impossible, but double-check that
2366         * the array index is in range.  Since the signness of an enum
2367         * is implementation defined, force the result to unsigned.
2368         */
2369        unsigned con_set = tcg_target_op_def(op);
2370        tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2371        tdefs = &constraint_sets[con_set];
2372
2373        for (i = 0; i < nb_args; i++) {
2374            const char *ct_str = tdefs->args_ct_str[i];
2375            bool input_p = i >= def->nb_oargs;
2376
2377            /* Incomplete TCGTargetOpDef entry. */
2378            tcg_debug_assert(ct_str != NULL);
2379
2380            switch (*ct_str) {
2381            case '0' ... '9':
2382                o = *ct_str - '0';
2383                tcg_debug_assert(input_p);
2384                tcg_debug_assert(o < def->nb_oargs);
2385                tcg_debug_assert(def->args_ct[o].regs != 0);
2386                tcg_debug_assert(!def->args_ct[o].oalias);
2387                def->args_ct[i] = def->args_ct[o];
2388                /* The output sets oalias.  */
2389                def->args_ct[o].oalias = 1;
2390                def->args_ct[o].alias_index = i;
2391                /* The input sets ialias. */
2392                def->args_ct[i].ialias = 1;
2393                def->args_ct[i].alias_index = o;
2394                if (def->args_ct[i].pair) {
2395                    saw_alias_pair = true;
2396                }
2397                tcg_debug_assert(ct_str[1] == '\0');
2398                continue;
2399
2400            case '&':
2401                tcg_debug_assert(!input_p);
2402                def->args_ct[i].newreg = true;
2403                ct_str++;
2404                break;
2405
2406            case 'p': /* plus */
2407                /* Allocate to the register after the previous. */
2408                tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2409                o = i - 1;
2410                tcg_debug_assert(!def->args_ct[o].pair);
2411                tcg_debug_assert(!def->args_ct[o].ct);
2412                def->args_ct[i] = (TCGArgConstraint){
2413                    .pair = 2,
2414                    .pair_index = o,
2415                    .regs = def->args_ct[o].regs << 1,
2416                };
2417                def->args_ct[o].pair = 1;
2418                def->args_ct[o].pair_index = i;
2419                tcg_debug_assert(ct_str[1] == '\0');
2420                continue;
2421
2422            case 'm': /* minus */
2423                /* Allocate to the register before the previous. */
2424                tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2425                o = i - 1;
2426                tcg_debug_assert(!def->args_ct[o].pair);
2427                tcg_debug_assert(!def->args_ct[o].ct);
2428                def->args_ct[i] = (TCGArgConstraint){
2429                    .pair = 1,
2430                    .pair_index = o,
2431                    .regs = def->args_ct[o].regs >> 1,
2432                };
2433                def->args_ct[o].pair = 2;
2434                def->args_ct[o].pair_index = i;
2435                tcg_debug_assert(ct_str[1] == '\0');
2436                continue;
2437            }
2438
2439            do {
2440                switch (*ct_str) {
2441                case 'i':
2442                    def->args_ct[i].ct |= TCG_CT_CONST;
2443                    break;
2444
2445                /* Include all of the target-specific constraints. */
2446
2447#undef CONST
2448#define CONST(CASE, MASK) \
2449    case CASE: def->args_ct[i].ct |= MASK; break;
2450#define REGS(CASE, MASK) \
2451    case CASE: def->args_ct[i].regs |= MASK; break;
2452
2453#include "tcg-target-con-str.h"
2454
2455#undef REGS
2456#undef CONST
2457                default:
2458                case '0' ... '9':
2459                case '&':
2460                case 'p':
2461                case 'm':
2462                    /* Typo in TCGTargetOpDef constraint. */
2463                    g_assert_not_reached();
2464                }
2465            } while (*++ct_str != '\0');
2466        }
2467
2468        /* TCGTargetOpDef entry with too much information? */
2469        tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2470
2471        /*
2472         * Fix up output pairs that are aliased with inputs.
2473         * When we created the alias, we copied pair from the output.
2474         * There are three cases:
2475         *    (1a) Pairs of inputs alias pairs of outputs.
2476         *    (1b) One input aliases the first of a pair of outputs.
2477         *    (2)  One input aliases the second of a pair of outputs.
2478         *
2479         * Case 1a is handled by making sure that the pair_index'es are
2480         * properly updated so that they appear the same as a pair of inputs.
2481         *
2482         * Case 1b is handled by setting the pair_index of the input to
2483         * itself, simply so it doesn't point to an unrelated argument.
2484         * Since we don't encounter the "second" during the input allocation
2485         * phase, nothing happens with the second half of the input pair.
2486         *
2487         * Case 2 is handled by setting the second input to pair=3, the
2488         * first output to pair=3, and the pair_index'es to match.
2489         */
2490        if (saw_alias_pair) {
2491            for (i = def->nb_oargs; i < nb_args; i++) {
2492                /*
2493                 * Since [0-9pm] must be alone in the constraint string,
2494                 * the only way they can both be set is if the pair comes
2495                 * from the output alias.
2496                 */
2497                if (!def->args_ct[i].ialias) {
2498                    continue;
2499                }
2500                switch (def->args_ct[i].pair) {
2501                case 0:
2502                    break;
2503                case 1:
2504                    o = def->args_ct[i].alias_index;
2505                    o2 = def->args_ct[o].pair_index;
2506                    tcg_debug_assert(def->args_ct[o].pair == 1);
2507                    tcg_debug_assert(def->args_ct[o2].pair == 2);
2508                    if (def->args_ct[o2].oalias) {
2509                        /* Case 1a */
2510                        i2 = def->args_ct[o2].alias_index;
2511                        tcg_debug_assert(def->args_ct[i2].pair == 2);
2512                        def->args_ct[i2].pair_index = i;
2513                        def->args_ct[i].pair_index = i2;
2514                    } else {
2515                        /* Case 1b */
2516                        def->args_ct[i].pair_index = i;
2517                    }
2518                    break;
2519                case 2:
2520                    o = def->args_ct[i].alias_index;
2521                    o2 = def->args_ct[o].pair_index;
2522                    tcg_debug_assert(def->args_ct[o].pair == 2);
2523                    tcg_debug_assert(def->args_ct[o2].pair == 1);
2524                    if (def->args_ct[o2].oalias) {
2525                        /* Case 1a */
2526                        i2 = def->args_ct[o2].alias_index;
2527                        tcg_debug_assert(def->args_ct[i2].pair == 1);
2528                        def->args_ct[i2].pair_index = i;
2529                        def->args_ct[i].pair_index = i2;
2530                    } else {
2531                        /* Case 2 */
2532                        def->args_ct[i].pair = 3;
2533                        def->args_ct[o2].pair = 3;
2534                        def->args_ct[i].pair_index = o2;
2535                        def->args_ct[o2].pair_index = i;
2536                    }
2537                    break;
2538                default:
2539                    g_assert_not_reached();
2540                }
2541            }
2542        }
2543
2544        /* sort the constraints (XXX: this is just an heuristic) */
2545        sort_constraints(def, 0, def->nb_oargs);
2546        sort_constraints(def, def->nb_oargs, def->nb_iargs);
2547    }
2548}
2549
2550static void remove_label_use(TCGOp *op, int idx)
2551{
2552    TCGLabel *label = arg_label(op->args[idx]);
2553    TCGLabelUse *use;
2554
2555    QSIMPLEQ_FOREACH(use, &label->branches, next) {
2556        if (use->op == op) {
2557            QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
2558            return;
2559        }
2560    }
2561    g_assert_not_reached();
2562}
2563
2564void tcg_op_remove(TCGContext *s, TCGOp *op)
2565{
2566    switch (op->opc) {
2567    case INDEX_op_br:
2568        remove_label_use(op, 0);
2569        break;
2570    case INDEX_op_brcond_i32:
2571    case INDEX_op_brcond_i64:
2572        remove_label_use(op, 3);
2573        break;
2574    case INDEX_op_brcond2_i32:
2575        remove_label_use(op, 5);
2576        break;
2577    default:
2578        break;
2579    }
2580
2581    QTAILQ_REMOVE(&s->ops, op, link);
2582    QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2583    s->nb_ops--;
2584
2585#ifdef CONFIG_PROFILER
2586    qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2587#endif
2588}
2589
2590void tcg_remove_ops_after(TCGOp *op)
2591{
2592    TCGContext *s = tcg_ctx;
2593
2594    while (true) {
2595        TCGOp *last = tcg_last_op();
2596        if (last == op) {
2597            return;
2598        }
2599        tcg_op_remove(s, last);
2600    }
2601}
2602
2603static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
2604{
2605    TCGContext *s = tcg_ctx;
2606    TCGOp *op = NULL;
2607
2608    if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
2609        QTAILQ_FOREACH(op, &s->free_ops, link) {
2610            if (nargs <= op->nargs) {
2611                QTAILQ_REMOVE(&s->free_ops, op, link);
2612                nargs = op->nargs;
2613                goto found;
2614            }
2615        }
2616    }
2617
2618    /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
2619    nargs = MAX(4, nargs);
2620    op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
2621
2622 found:
2623    memset(op, 0, offsetof(TCGOp, link));
2624    op->opc = opc;
2625    op->nargs = nargs;
2626
2627    /* Check for bitfield overflow. */
2628    tcg_debug_assert(op->nargs == nargs);
2629
2630    s->nb_ops++;
2631    return op;
2632}
2633
2634TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
2635{
2636    TCGOp *op = tcg_op_alloc(opc, nargs);
2637    QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2638    return op;
2639}
2640
2641TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
2642                            TCGOpcode opc, unsigned nargs)
2643{
2644    TCGOp *new_op = tcg_op_alloc(opc, nargs);
2645    QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2646    return new_op;
2647}
2648
2649TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
2650                           TCGOpcode opc, unsigned nargs)
2651{
2652    TCGOp *new_op = tcg_op_alloc(opc, nargs);
2653    QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2654    return new_op;
2655}
2656
2657static void move_label_uses(TCGLabel *to, TCGLabel *from)
2658{
2659    TCGLabelUse *u;
2660
2661    QSIMPLEQ_FOREACH(u, &from->branches, next) {
2662        TCGOp *op = u->op;
2663        switch (op->opc) {
2664        case INDEX_op_br:
2665            op->args[0] = label_arg(to);
2666            break;
2667        case INDEX_op_brcond_i32:
2668        case INDEX_op_brcond_i64:
2669            op->args[3] = label_arg(to);
2670            break;
2671        case INDEX_op_brcond2_i32:
2672            op->args[5] = label_arg(to);
2673            break;
2674        default:
2675            g_assert_not_reached();
2676        }
2677    }
2678
2679    QSIMPLEQ_CONCAT(&to->branches, &from->branches);
2680}
2681
2682/* Reachable analysis : remove unreachable code.  */
2683static void __attribute__((noinline))
2684reachable_code_pass(TCGContext *s)
2685{
2686    TCGOp *op, *op_next, *op_prev;
2687    bool dead = false;
2688
2689    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2690        bool remove = dead;
2691        TCGLabel *label;
2692
2693        switch (op->opc) {
2694        case INDEX_op_set_label:
2695            label = arg_label(op->args[0]);
2696
2697            /*
2698             * Note that the first op in the TB is always a load,
2699             * so there is always something before a label.
2700             */
2701            op_prev = QTAILQ_PREV(op, link);
2702
2703            /*
2704             * If we find two sequential labels, move all branches to
2705             * reference the second label and remove the first label.
2706             * Do this before branch to next optimization, so that the
2707             * middle label is out of the way.
2708             */
2709            if (op_prev->opc == INDEX_op_set_label) {
2710                move_label_uses(label, arg_label(op_prev->args[0]));
2711                tcg_op_remove(s, op_prev);
2712                op_prev = QTAILQ_PREV(op, link);
2713            }
2714
2715            /*
2716             * Optimization can fold conditional branches to unconditional.
2717             * If we find a label which is preceded by an unconditional
2718             * branch to next, remove the branch.  We couldn't do this when
2719             * processing the branch because any dead code between the branch
2720             * and label had not yet been removed.
2721             */
2722            if (op_prev->opc == INDEX_op_br &&
2723                label == arg_label(op_prev->args[0])) {
2724                tcg_op_remove(s, op_prev);
2725                /* Fall through means insns become live again.  */
2726                dead = false;
2727            }
2728
2729            if (QSIMPLEQ_EMPTY(&label->branches)) {
2730                /*
2731                 * While there is an occasional backward branch, virtually
2732                 * all branches generated by the translators are forward.
2733                 * Which means that generally we will have already removed
2734                 * all references to the label that will be, and there is
2735                 * little to be gained by iterating.
2736                 */
2737                remove = true;
2738            } else {
2739                /* Once we see a label, insns become live again.  */
2740                dead = false;
2741                remove = false;
2742            }
2743            break;
2744
2745        case INDEX_op_br:
2746        case INDEX_op_exit_tb:
2747        case INDEX_op_goto_ptr:
2748            /* Unconditional branches; everything following is dead.  */
2749            dead = true;
2750            break;
2751
2752        case INDEX_op_call:
2753            /* Notice noreturn helper calls, raising exceptions.  */
2754            if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2755                dead = true;
2756            }
2757            break;
2758
2759        case INDEX_op_insn_start:
2760            /* Never remove -- we need to keep these for unwind.  */
2761            remove = false;
2762            break;
2763
2764        default:
2765            break;
2766        }
2767
2768        if (remove) {
2769            tcg_op_remove(s, op);
2770        }
2771    }
2772}
2773
2774#define TS_DEAD  1
2775#define TS_MEM   2
2776
2777#define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2778#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2779
2780/* For liveness_pass_1, the register preferences for a given temp.  */
2781static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2782{
2783    return ts->state_ptr;
2784}
2785
2786/* For liveness_pass_1, reset the preferences for a given temp to the
2787 * maximal regset for its type.
2788 */
2789static inline void la_reset_pref(TCGTemp *ts)
2790{
2791    *la_temp_pref(ts)
2792        = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2793}
2794
2795/* liveness analysis: end of function: all temps are dead, and globals
2796   should be in memory. */
2797static void la_func_end(TCGContext *s, int ng, int nt)
2798{
2799    int i;
2800
2801    for (i = 0; i < ng; ++i) {
2802        s->temps[i].state = TS_DEAD | TS_MEM;
2803        la_reset_pref(&s->temps[i]);
2804    }
2805    for (i = ng; i < nt; ++i) {
2806        s->temps[i].state = TS_DEAD;
2807        la_reset_pref(&s->temps[i]);
2808    }
2809}
2810
2811/* liveness analysis: end of basic block: all temps are dead, globals
2812   and local temps should be in memory. */
2813static void la_bb_end(TCGContext *s, int ng, int nt)
2814{
2815    int i;
2816
2817    for (i = 0; i < nt; ++i) {
2818        TCGTemp *ts = &s->temps[i];
2819        int state;
2820
2821        switch (ts->kind) {
2822        case TEMP_FIXED:
2823        case TEMP_GLOBAL:
2824        case TEMP_TB:
2825            state = TS_DEAD | TS_MEM;
2826            break;
2827        case TEMP_EBB:
2828        case TEMP_CONST:
2829            state = TS_DEAD;
2830            break;
2831        default:
2832            g_assert_not_reached();
2833        }
2834        ts->state = state;
2835        la_reset_pref(ts);
2836    }
2837}
2838
2839/* liveness analysis: sync globals back to memory.  */
2840static void la_global_sync(TCGContext *s, int ng)
2841{
2842    int i;
2843
2844    for (i = 0; i < ng; ++i) {
2845        int state = s->temps[i].state;
2846        s->temps[i].state = state | TS_MEM;
2847        if (state == TS_DEAD) {
2848            /* If the global was previously dead, reset prefs.  */
2849            la_reset_pref(&s->temps[i]);
2850        }
2851    }
2852}
2853
2854/*
2855 * liveness analysis: conditional branch: all temps are dead unless
2856 * explicitly live-across-conditional-branch, globals and local temps
2857 * should be synced.
2858 */
2859static void la_bb_sync(TCGContext *s, int ng, int nt)
2860{
2861    la_global_sync(s, ng);
2862
2863    for (int i = ng; i < nt; ++i) {
2864        TCGTemp *ts = &s->temps[i];
2865        int state;
2866
2867        switch (ts->kind) {
2868        case TEMP_TB:
2869            state = ts->state;
2870            ts->state = state | TS_MEM;
2871            if (state != TS_DEAD) {
2872                continue;
2873            }
2874            break;
2875        case TEMP_EBB:
2876        case TEMP_CONST:
2877            continue;
2878        default:
2879            g_assert_not_reached();
2880        }
2881        la_reset_pref(&s->temps[i]);
2882    }
2883}
2884
2885/* liveness analysis: sync globals back to memory and kill.  */
2886static void la_global_kill(TCGContext *s, int ng)
2887{
2888    int i;
2889
2890    for (i = 0; i < ng; i++) {
2891        s->temps[i].state = TS_DEAD | TS_MEM;
2892        la_reset_pref(&s->temps[i]);
2893    }
2894}
2895
2896/* liveness analysis: note live globals crossing calls.  */
2897static void la_cross_call(TCGContext *s, int nt)
2898{
2899    TCGRegSet mask = ~tcg_target_call_clobber_regs;
2900    int i;
2901
2902    for (i = 0; i < nt; i++) {
2903        TCGTemp *ts = &s->temps[i];
2904        if (!(ts->state & TS_DEAD)) {
2905            TCGRegSet *pset = la_temp_pref(ts);
2906            TCGRegSet set = *pset;
2907
2908            set &= mask;
2909            /* If the combination is not possible, restart.  */
2910            if (set == 0) {
2911                set = tcg_target_available_regs[ts->type] & mask;
2912            }
2913            *pset = set;
2914        }
2915    }
2916}
2917
2918/*
2919 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
2920 * to TEMP_EBB, if possible.
2921 */
2922static void __attribute__((noinline))
2923liveness_pass_0(TCGContext *s)
2924{
2925    void * const multiple_ebb = (void *)(uintptr_t)-1;
2926    int nb_temps = s->nb_temps;
2927    TCGOp *op, *ebb;
2928
2929    for (int i = s->nb_globals; i < nb_temps; ++i) {
2930        s->temps[i].state_ptr = NULL;
2931    }
2932
2933    /*
2934     * Represent each EBB by the op at which it begins.  In the case of
2935     * the first EBB, this is the first op, otherwise it is a label.
2936     * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
2937     * within a single EBB, else MULTIPLE_EBB.
2938     */
2939    ebb = QTAILQ_FIRST(&s->ops);
2940    QTAILQ_FOREACH(op, &s->ops, link) {
2941        const TCGOpDef *def;
2942        int nb_oargs, nb_iargs;
2943
2944        switch (op->opc) {
2945        case INDEX_op_set_label:
2946            ebb = op;
2947            continue;
2948        case INDEX_op_discard:
2949            continue;
2950        case INDEX_op_call:
2951            nb_oargs = TCGOP_CALLO(op);
2952            nb_iargs = TCGOP_CALLI(op);
2953            break;
2954        default:
2955            def = &tcg_op_defs[op->opc];
2956            nb_oargs = def->nb_oargs;
2957            nb_iargs = def->nb_iargs;
2958            break;
2959        }
2960
2961        for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
2962            TCGTemp *ts = arg_temp(op->args[i]);
2963
2964            if (ts->kind != TEMP_TB) {
2965                continue;
2966            }
2967            if (ts->state_ptr == NULL) {
2968                ts->state_ptr = ebb;
2969            } else if (ts->state_ptr != ebb) {
2970                ts->state_ptr = multiple_ebb;
2971            }
2972        }
2973    }
2974
2975    /*
2976     * For TEMP_TB that turned out not to be used beyond one EBB,
2977     * reduce the liveness to TEMP_EBB.
2978     */
2979    for (int i = s->nb_globals; i < nb_temps; ++i) {
2980        TCGTemp *ts = &s->temps[i];
2981        if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
2982            ts->kind = TEMP_EBB;
2983        }
2984    }
2985}
2986
2987/* Liveness analysis : update the opc_arg_life array to tell if a
2988   given input arguments is dead. Instructions updating dead
2989   temporaries are removed. */
2990static void __attribute__((noinline))
2991liveness_pass_1(TCGContext *s)
2992{
2993    int nb_globals = s->nb_globals;
2994    int nb_temps = s->nb_temps;
2995    TCGOp *op, *op_prev;
2996    TCGRegSet *prefs;
2997    int i;
2998
2999    prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3000    for (i = 0; i < nb_temps; ++i) {
3001        s->temps[i].state_ptr = prefs + i;
3002    }
3003
3004    /* ??? Should be redundant with the exit_tb that ends the TB.  */
3005    la_func_end(s, nb_globals, nb_temps);
3006
3007    QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3008        int nb_iargs, nb_oargs;
3009        TCGOpcode opc_new, opc_new2;
3010        bool have_opc_new2;
3011        TCGLifeData arg_life = 0;
3012        TCGTemp *ts;
3013        TCGOpcode opc = op->opc;
3014        const TCGOpDef *def = &tcg_op_defs[opc];
3015
3016        switch (opc) {
3017        case INDEX_op_call:
3018            {
3019                const TCGHelperInfo *info = tcg_call_info(op);
3020                int call_flags = tcg_call_flags(op);
3021
3022                nb_oargs = TCGOP_CALLO(op);
3023                nb_iargs = TCGOP_CALLI(op);
3024
3025                /* pure functions can be removed if their result is unused */
3026                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3027                    for (i = 0; i < nb_oargs; i++) {
3028                        ts = arg_temp(op->args[i]);
3029                        if (ts->state != TS_DEAD) {
3030                            goto do_not_remove_call;
3031                        }
3032                    }
3033                    goto do_remove;
3034                }
3035            do_not_remove_call:
3036
3037                /* Output args are dead.  */
3038                for (i = 0; i < nb_oargs; i++) {
3039                    ts = arg_temp(op->args[i]);
3040                    if (ts->state & TS_DEAD) {
3041                        arg_life |= DEAD_ARG << i;
3042                    }
3043                    if (ts->state & TS_MEM) {
3044                        arg_life |= SYNC_ARG << i;
3045                    }
3046                    ts->state = TS_DEAD;
3047                    la_reset_pref(ts);
3048                }
3049
3050                /* Not used -- it will be tcg_target_call_oarg_reg().  */
3051                memset(op->output_pref, 0, sizeof(op->output_pref));
3052
3053                if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3054                                    TCG_CALL_NO_READ_GLOBALS))) {
3055                    la_global_kill(s, nb_globals);
3056                } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3057                    la_global_sync(s, nb_globals);
3058                }
3059
3060                /* Record arguments that die in this helper.  */
3061                for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3062                    ts = arg_temp(op->args[i]);
3063                    if (ts->state & TS_DEAD) {
3064                        arg_life |= DEAD_ARG << i;
3065                    }
3066                }
3067
3068                /* For all live registers, remove call-clobbered prefs.  */
3069                la_cross_call(s, nb_temps);
3070
3071                /*
3072                 * Input arguments are live for preceding opcodes.
3073                 *
3074                 * For those arguments that die, and will be allocated in
3075                 * registers, clear the register set for that arg, to be
3076                 * filled in below.  For args that will be on the stack,
3077                 * reset to any available reg.  Process arguments in reverse
3078                 * order so that if a temp is used more than once, the stack
3079                 * reset to max happens before the register reset to 0.
3080                 */
3081                for (i = nb_iargs - 1; i >= 0; i--) {
3082                    const TCGCallArgumentLoc *loc = &info->in[i];
3083                    ts = arg_temp(op->args[nb_oargs + i]);
3084
3085                    if (ts->state & TS_DEAD) {
3086                        switch (loc->kind) {
3087                        case TCG_CALL_ARG_NORMAL:
3088                        case TCG_CALL_ARG_EXTEND_U:
3089                        case TCG_CALL_ARG_EXTEND_S:
3090                            if (REG_P(loc)) {
3091                                *la_temp_pref(ts) = 0;
3092                                break;
3093                            }
3094                            /* fall through */
3095                        default:
3096                            *la_temp_pref(ts) =
3097                                tcg_target_available_regs[ts->type];
3098                            break;
3099                        }
3100                        ts->state &= ~TS_DEAD;
3101                    }
3102                }
3103
3104                /*
3105                 * For each input argument, add its input register to prefs.
3106                 * If a temp is used once, this produces a single set bit;
3107                 * if a temp is used multiple times, this produces a set.
3108                 */
3109                for (i = 0; i < nb_iargs; i++) {
3110                    const TCGCallArgumentLoc *loc = &info->in[i];
3111                    ts = arg_temp(op->args[nb_oargs + i]);
3112
3113                    switch (loc->kind) {
3114                    case TCG_CALL_ARG_NORMAL:
3115                    case TCG_CALL_ARG_EXTEND_U:
3116                    case TCG_CALL_ARG_EXTEND_S:
3117                        if (REG_P(loc)) {
3118                            tcg_regset_set_reg(*la_temp_pref(ts),
3119                                tcg_target_call_iarg_regs[loc->arg_slot]);
3120                        }
3121                        break;
3122                    default:
3123                        break;
3124                    }
3125                }
3126            }
3127            break;
3128        case INDEX_op_insn_start:
3129            break;
3130        case INDEX_op_discard:
3131            /* mark the temporary as dead */
3132            ts = arg_temp(op->args[0]);
3133            ts->state = TS_DEAD;
3134            la_reset_pref(ts);
3135            break;
3136
3137        case INDEX_op_add2_i32:
3138            opc_new = INDEX_op_add_i32;
3139            goto do_addsub2;
3140        case INDEX_op_sub2_i32:
3141            opc_new = INDEX_op_sub_i32;
3142            goto do_addsub2;
3143        case INDEX_op_add2_i64:
3144            opc_new = INDEX_op_add_i64;
3145            goto do_addsub2;
3146        case INDEX_op_sub2_i64:
3147            opc_new = INDEX_op_sub_i64;
3148        do_addsub2:
3149            nb_iargs = 4;
3150            nb_oargs = 2;
3151            /* Test if the high part of the operation is dead, but not
3152               the low part.  The result can be optimized to a simple
3153               add or sub.  This happens often for x86_64 guest when the
3154               cpu mode is set to 32 bit.  */
3155            if (arg_temp(op->args[1])->state == TS_DEAD) {
3156                if (arg_temp(op->args[0])->state == TS_DEAD) {
3157                    goto do_remove;
3158                }
3159                /* Replace the opcode and adjust the args in place,
3160                   leaving 3 unused args at the end.  */
3161                op->opc = opc = opc_new;
3162                op->args[1] = op->args[2];
3163                op->args[2] = op->args[4];
3164                /* Fall through and mark the single-word operation live.  */
3165                nb_iargs = 2;
3166                nb_oargs = 1;
3167            }
3168            goto do_not_remove;
3169
3170        case INDEX_op_mulu2_i32:
3171            opc_new = INDEX_op_mul_i32;
3172            opc_new2 = INDEX_op_muluh_i32;
3173            have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3174            goto do_mul2;
3175        case INDEX_op_muls2_i32:
3176            opc_new = INDEX_op_mul_i32;
3177            opc_new2 = INDEX_op_mulsh_i32;
3178            have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3179            goto do_mul2;
3180        case INDEX_op_mulu2_i64:
3181            opc_new = INDEX_op_mul_i64;
3182            opc_new2 = INDEX_op_muluh_i64;
3183            have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3184            goto do_mul2;
3185        case INDEX_op_muls2_i64:
3186            opc_new = INDEX_op_mul_i64;
3187            opc_new2 = INDEX_op_mulsh_i64;
3188            have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3189            goto do_mul2;
3190        do_mul2:
3191            nb_iargs = 2;
3192            nb_oargs = 2;
3193            if (arg_temp(op->args[1])->state == TS_DEAD) {
3194                if (arg_temp(op->args[0])->state == TS_DEAD) {
3195                    /* Both parts of the operation are dead.  */
3196                    goto do_remove;
3197                }
3198                /* The high part of the operation is dead; generate the low. */
3199                op->opc = opc = opc_new;
3200                op->args[1] = op->args[2];
3201                op->args[2] = op->args[3];
3202            } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3203                /* The low part of the operation is dead; generate the high. */
3204                op->opc = opc = opc_new2;
3205                op->args[0] = op->args[1];
3206                op->args[1] = op->args[2];
3207                op->args[2] = op->args[3];
3208            } else {
3209                goto do_not_remove;
3210            }
3211            /* Mark the single-word operation live.  */
3212            nb_oargs = 1;
3213            goto do_not_remove;
3214
3215        default:
3216            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3217            nb_iargs = def->nb_iargs;
3218            nb_oargs = def->nb_oargs;
3219
3220            /* Test if the operation can be removed because all
3221               its outputs are dead. We assume that nb_oargs == 0
3222               implies side effects */
3223            if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3224                for (i = 0; i < nb_oargs; i++) {
3225                    if (arg_temp(op->args[i])->state != TS_DEAD) {
3226                        goto do_not_remove;
3227                    }
3228                }
3229                goto do_remove;
3230            }
3231            goto do_not_remove;
3232
3233        do_remove:
3234            tcg_op_remove(s, op);
3235            break;
3236
3237        do_not_remove:
3238            for (i = 0; i < nb_oargs; i++) {
3239                ts = arg_temp(op->args[i]);
3240
3241                /* Remember the preference of the uses that followed.  */
3242                if (i < ARRAY_SIZE(op->output_pref)) {
3243                    op->output_pref[i] = *la_temp_pref(ts);
3244                }
3245
3246                /* Output args are dead.  */
3247                if (ts->state & TS_DEAD) {
3248                    arg_life |= DEAD_ARG << i;
3249                }
3250                if (ts->state & TS_MEM) {
3251                    arg_life |= SYNC_ARG << i;
3252                }
3253                ts->state = TS_DEAD;
3254                la_reset_pref(ts);
3255            }
3256
3257            /* If end of basic block, update.  */
3258            if (def->flags & TCG_OPF_BB_EXIT) {
3259                la_func_end(s, nb_globals, nb_temps);
3260            } else if (def->flags & TCG_OPF_COND_BRANCH) {
3261                la_bb_sync(s, nb_globals, nb_temps);
3262            } else if (def->flags & TCG_OPF_BB_END) {
3263                la_bb_end(s, nb_globals, nb_temps);
3264            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3265                la_global_sync(s, nb_globals);
3266                if (def->flags & TCG_OPF_CALL_CLOBBER) {
3267                    la_cross_call(s, nb_temps);
3268                }
3269            }
3270
3271            /* Record arguments that die in this opcode.  */
3272            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3273                ts = arg_temp(op->args[i]);
3274                if (ts->state & TS_DEAD) {
3275                    arg_life |= DEAD_ARG << i;
3276                }
3277            }
3278
3279            /* Input arguments are live for preceding opcodes.  */
3280            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3281                ts = arg_temp(op->args[i]);
3282                if (ts->state & TS_DEAD) {
3283                    /* For operands that were dead, initially allow
3284                       all regs for the type.  */
3285                    *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3286                    ts->state &= ~TS_DEAD;
3287                }
3288            }
3289
3290            /* Incorporate constraints for this operand.  */
3291            switch (opc) {
3292            case INDEX_op_mov_i32:
3293            case INDEX_op_mov_i64:
3294                /* Note that these are TCG_OPF_NOT_PRESENT and do not
3295                   have proper constraints.  That said, special case
3296                   moves to propagate preferences backward.  */
3297                if (IS_DEAD_ARG(1)) {
3298                    *la_temp_pref(arg_temp(op->args[0]))
3299                        = *la_temp_pref(arg_temp(op->args[1]));
3300                }
3301                break;
3302
3303            default:
3304                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3305                    const TCGArgConstraint *ct = &def->args_ct[i];
3306                    TCGRegSet set, *pset;
3307
3308                    ts = arg_temp(op->args[i]);
3309                    pset = la_temp_pref(ts);
3310                    set = *pset;
3311
3312                    set &= ct->regs;
3313                    if (ct->ialias) {
3314                        set &= output_pref(op, ct->alias_index);
3315                    }
3316                    /* If the combination is not possible, restart.  */
3317                    if (set == 0) {
3318                        set = ct->regs;
3319                    }
3320                    *pset = set;
3321                }
3322                break;
3323            }
3324            break;
3325        }
3326        op->life = arg_life;
3327    }
3328}
3329
3330/* Liveness analysis: Convert indirect regs to direct temporaries.  */
3331static bool __attribute__((noinline))
3332liveness_pass_2(TCGContext *s)
3333{
3334    int nb_globals = s->nb_globals;
3335    int nb_temps, i;
3336    bool changes = false;
3337    TCGOp *op, *op_next;
3338
3339    /* Create a temporary for each indirect global.  */
3340    for (i = 0; i < nb_globals; ++i) {
3341        TCGTemp *its = &s->temps[i];
3342        if (its->indirect_reg) {
3343            TCGTemp *dts = tcg_temp_alloc(s);
3344            dts->type = its->type;
3345            dts->base_type = its->base_type;
3346            dts->temp_subindex = its->temp_subindex;
3347            dts->kind = TEMP_EBB;
3348            its->state_ptr = dts;
3349        } else {
3350            its->state_ptr = NULL;
3351        }
3352        /* All globals begin dead.  */
3353        its->state = TS_DEAD;
3354    }
3355    for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3356        TCGTemp *its = &s->temps[i];
3357        its->state_ptr = NULL;
3358        its->state = TS_DEAD;
3359    }
3360
3361    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3362        TCGOpcode opc = op->opc;
3363        const TCGOpDef *def = &tcg_op_defs[opc];
3364        TCGLifeData arg_life = op->life;
3365        int nb_iargs, nb_oargs, call_flags;
3366        TCGTemp *arg_ts, *dir_ts;
3367
3368        if (opc == INDEX_op_call) {
3369            nb_oargs = TCGOP_CALLO(op);
3370            nb_iargs = TCGOP_CALLI(op);
3371            call_flags = tcg_call_flags(op);
3372        } else {
3373            nb_iargs = def->nb_iargs;
3374            nb_oargs = def->nb_oargs;
3375
3376            /* Set flags similar to how calls require.  */
3377            if (def->flags & TCG_OPF_COND_BRANCH) {
3378                /* Like reading globals: sync_globals */
3379                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3380            } else if (def->flags & TCG_OPF_BB_END) {
3381                /* Like writing globals: save_globals */
3382                call_flags = 0;
3383            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3384                /* Like reading globals: sync_globals */
3385                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3386            } else {
3387                /* No effect on globals.  */
3388                call_flags = (TCG_CALL_NO_READ_GLOBALS |
3389                              TCG_CALL_NO_WRITE_GLOBALS);
3390            }
3391        }
3392
3393        /* Make sure that input arguments are available.  */
3394        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3395            arg_ts = arg_temp(op->args[i]);
3396            dir_ts = arg_ts->state_ptr;
3397            if (dir_ts && arg_ts->state == TS_DEAD) {
3398                TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3399                                  ? INDEX_op_ld_i32
3400                                  : INDEX_op_ld_i64);
3401                TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3402
3403                lop->args[0] = temp_arg(dir_ts);
3404                lop->args[1] = temp_arg(arg_ts->mem_base);
3405                lop->args[2] = arg_ts->mem_offset;
3406
3407                /* Loaded, but synced with memory.  */
3408                arg_ts->state = TS_MEM;
3409            }
3410        }
3411
3412        /* Perform input replacement, and mark inputs that became dead.
3413           No action is required except keeping temp_state up to date
3414           so that we reload when needed.  */
3415        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3416            arg_ts = arg_temp(op->args[i]);
3417            dir_ts = arg_ts->state_ptr;
3418            if (dir_ts) {
3419                op->args[i] = temp_arg(dir_ts);
3420                changes = true;
3421                if (IS_DEAD_ARG(i)) {
3422                    arg_ts->state = TS_DEAD;
3423                }
3424            }
3425        }
3426
3427        /* Liveness analysis should ensure that the following are
3428           all correct, for call sites and basic block end points.  */
3429        if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3430            /* Nothing to do */
3431        } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3432            for (i = 0; i < nb_globals; ++i) {
3433                /* Liveness should see that globals are synced back,
3434                   that is, either TS_DEAD or TS_MEM.  */
3435                arg_ts = &s->temps[i];
3436                tcg_debug_assert(arg_ts->state_ptr == 0
3437                                 || arg_ts->state != 0);
3438            }
3439        } else {
3440            for (i = 0; i < nb_globals; ++i) {
3441                /* Liveness should see that globals are saved back,
3442                   that is, TS_DEAD, waiting to be reloaded.  */
3443                arg_ts = &s->temps[i];
3444                tcg_debug_assert(arg_ts->state_ptr == 0
3445                                 || arg_ts->state == TS_DEAD);
3446            }
3447        }
3448
3449        /* Outputs become available.  */
3450        if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3451            arg_ts = arg_temp(op->args[0]);
3452            dir_ts = arg_ts->state_ptr;
3453            if (dir_ts) {
3454                op->args[0] = temp_arg(dir_ts);
3455                changes = true;
3456
3457                /* The output is now live and modified.  */
3458                arg_ts->state = 0;
3459
3460                if (NEED_SYNC_ARG(0)) {
3461                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3462                                      ? INDEX_op_st_i32
3463                                      : INDEX_op_st_i64);
3464                    TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3465                    TCGTemp *out_ts = dir_ts;
3466
3467                    if (IS_DEAD_ARG(0)) {
3468                        out_ts = arg_temp(op->args[1]);
3469                        arg_ts->state = TS_DEAD;
3470                        tcg_op_remove(s, op);
3471                    } else {
3472                        arg_ts->state = TS_MEM;
3473                    }
3474
3475                    sop->args[0] = temp_arg(out_ts);
3476                    sop->args[1] = temp_arg(arg_ts->mem_base);
3477                    sop->args[2] = arg_ts->mem_offset;
3478                } else {
3479                    tcg_debug_assert(!IS_DEAD_ARG(0));
3480                }
3481            }
3482        } else {
3483            for (i = 0; i < nb_oargs; i++) {
3484                arg_ts = arg_temp(op->args[i]);
3485                dir_ts = arg_ts->state_ptr;
3486                if (!dir_ts) {
3487                    continue;
3488                }
3489                op->args[i] = temp_arg(dir_ts);
3490                changes = true;
3491
3492                /* The output is now live and modified.  */
3493                arg_ts->state = 0;
3494
3495                /* Sync outputs upon their last write.  */
3496                if (NEED_SYNC_ARG(i)) {
3497                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3498                                      ? INDEX_op_st_i32
3499                                      : INDEX_op_st_i64);
3500                    TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3501
3502                    sop->args[0] = temp_arg(dir_ts);
3503                    sop->args[1] = temp_arg(arg_ts->mem_base);
3504                    sop->args[2] = arg_ts->mem_offset;
3505
3506                    arg_ts->state = TS_MEM;
3507                }
3508                /* Drop outputs that are dead.  */
3509                if (IS_DEAD_ARG(i)) {
3510                    arg_ts->state = TS_DEAD;
3511                }
3512            }
3513        }
3514    }
3515
3516    return changes;
3517}
3518
3519static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3520{
3521    intptr_t off;
3522    int size, align;
3523
3524    /* When allocating an object, look at the full type. */
3525    size = tcg_type_size(ts->base_type);
3526    switch (ts->base_type) {
3527    case TCG_TYPE_I32:
3528        align = 4;
3529        break;
3530    case TCG_TYPE_I64:
3531    case TCG_TYPE_V64:
3532        align = 8;
3533        break;
3534    case TCG_TYPE_I128:
3535    case TCG_TYPE_V128:
3536    case TCG_TYPE_V256:
3537        /*
3538         * Note that we do not require aligned storage for V256,
3539         * and that we provide alignment for I128 to match V128,
3540         * even if that's above what the host ABI requires.
3541         */
3542        align = 16;
3543        break;
3544    default:
3545        g_assert_not_reached();
3546    }
3547
3548    /*
3549     * Assume the stack is sufficiently aligned.
3550     * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3551     * and do not require 16 byte vector alignment.  This seems slightly
3552     * easier than fully parameterizing the above switch statement.
3553     */
3554    align = MIN(TCG_TARGET_STACK_ALIGN, align);
3555    off = ROUND_UP(s->current_frame_offset, align);
3556
3557    /* If we've exhausted the stack frame, restart with a smaller TB. */
3558    if (off + size > s->frame_end) {
3559        tcg_raise_tb_overflow(s);
3560    }
3561    s->current_frame_offset = off + size;
3562#if defined(__sparc__)
3563    off += TCG_TARGET_STACK_BIAS;
3564#endif
3565
3566    /* If the object was subdivided, assign memory to all the parts. */
3567    if (ts->base_type != ts->type) {
3568        int part_size = tcg_type_size(ts->type);
3569        int part_count = size / part_size;
3570
3571        /*
3572         * Each part is allocated sequentially in tcg_temp_new_internal.
3573         * Jump back to the first part by subtracting the current index.
3574         */
3575        ts -= ts->temp_subindex;
3576        for (int i = 0; i < part_count; ++i) {
3577            ts[i].mem_offset = off + i * part_size;
3578            ts[i].mem_base = s->frame_temp;
3579            ts[i].mem_allocated = 1;
3580        }
3581    } else {
3582        ts->mem_offset = off;
3583        ts->mem_base = s->frame_temp;
3584        ts->mem_allocated = 1;
3585    }
3586}
3587
3588/* Assign @reg to @ts, and update reg_to_temp[]. */
3589static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
3590{
3591    if (ts->val_type == TEMP_VAL_REG) {
3592        TCGReg old = ts->reg;
3593        tcg_debug_assert(s->reg_to_temp[old] == ts);
3594        if (old == reg) {
3595            return;
3596        }
3597        s->reg_to_temp[old] = NULL;
3598    }
3599    tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3600    s->reg_to_temp[reg] = ts;
3601    ts->val_type = TEMP_VAL_REG;
3602    ts->reg = reg;
3603}
3604
3605/* Assign a non-register value type to @ts, and update reg_to_temp[]. */
3606static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
3607{
3608    tcg_debug_assert(type != TEMP_VAL_REG);
3609    if (ts->val_type == TEMP_VAL_REG) {
3610        TCGReg reg = ts->reg;
3611        tcg_debug_assert(s->reg_to_temp[reg] == ts);
3612        s->reg_to_temp[reg] = NULL;
3613    }
3614    ts->val_type = type;
3615}
3616
3617static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3618
3619/* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3620   mark it free; otherwise mark it dead.  */
3621static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3622{
3623    TCGTempVal new_type;
3624
3625    switch (ts->kind) {
3626    case TEMP_FIXED:
3627        return;
3628    case TEMP_GLOBAL:
3629    case TEMP_TB:
3630        new_type = TEMP_VAL_MEM;
3631        break;
3632    case TEMP_EBB:
3633        new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3634        break;
3635    case TEMP_CONST:
3636        new_type = TEMP_VAL_CONST;
3637        break;
3638    default:
3639        g_assert_not_reached();
3640    }
3641    set_temp_val_nonreg(s, ts, new_type);
3642}
3643
3644/* Mark a temporary as dead.  */
3645static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3646{
3647    temp_free_or_dead(s, ts, 1);
3648}
3649
3650/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3651   registers needs to be allocated to store a constant.  If 'free_or_dead'
3652   is non-zero, subsequently release the temporary; if it is positive, the
3653   temp is dead; if it is negative, the temp is free.  */
3654static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3655                      TCGRegSet preferred_regs, int free_or_dead)
3656{
3657    if (!temp_readonly(ts) && !ts->mem_coherent) {
3658        if (!ts->mem_allocated) {
3659            temp_allocate_frame(s, ts);
3660        }
3661        switch (ts->val_type) {
3662        case TEMP_VAL_CONST:
3663            /* If we're going to free the temp immediately, then we won't
3664               require it later in a register, so attempt to store the
3665               constant to memory directly.  */
3666            if (free_or_dead
3667                && tcg_out_sti(s, ts->type, ts->val,
3668                               ts->mem_base->reg, ts->mem_offset)) {
3669                break;
3670            }
3671            temp_load(s, ts, tcg_target_available_regs[ts->type],
3672                      allocated_regs, preferred_regs);
3673            /* fallthrough */
3674
3675        case TEMP_VAL_REG:
3676            tcg_out_st(s, ts->type, ts->reg,
3677                       ts->mem_base->reg, ts->mem_offset);
3678            break;
3679
3680        case TEMP_VAL_MEM:
3681            break;
3682
3683        case TEMP_VAL_DEAD:
3684        default:
3685            tcg_abort();
3686        }
3687        ts->mem_coherent = 1;
3688    }
3689    if (free_or_dead) {
3690        temp_free_or_dead(s, ts, free_or_dead);
3691    }
3692}
3693
3694/* free register 'reg' by spilling the corresponding temporary if necessary */
3695static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3696{
3697    TCGTemp *ts = s->reg_to_temp[reg];
3698    if (ts != NULL) {
3699        temp_sync(s, ts, allocated_regs, 0, -1);
3700    }
3701}
3702
3703/**
3704 * tcg_reg_alloc:
3705 * @required_regs: Set of registers in which we must allocate.
3706 * @allocated_regs: Set of registers which must be avoided.
3707 * @preferred_regs: Set of registers we should prefer.
3708 * @rev: True if we search the registers in "indirect" order.
3709 *
3710 * The allocated register must be in @required_regs & ~@allocated_regs,
3711 * but if we can put it in @preferred_regs we may save a move later.
3712 */
3713static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3714                            TCGRegSet allocated_regs,
3715                            TCGRegSet preferred_regs, bool rev)
3716{
3717    int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3718    TCGRegSet reg_ct[2];
3719    const int *order;
3720
3721    reg_ct[1] = required_regs & ~allocated_regs;
3722    tcg_debug_assert(reg_ct[1] != 0);
3723    reg_ct[0] = reg_ct[1] & preferred_regs;
3724
3725    /* Skip the preferred_regs option if it cannot be satisfied,
3726       or if the preference made no difference.  */
3727    f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3728
3729    order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3730
3731    /* Try free registers, preferences first.  */
3732    for (j = f; j < 2; j++) {
3733        TCGRegSet set = reg_ct[j];
3734
3735        if (tcg_regset_single(set)) {
3736            /* One register in the set.  */
3737            TCGReg reg = tcg_regset_first(set);
3738            if (s->reg_to_temp[reg] == NULL) {
3739                return reg;
3740            }
3741        } else {
3742            for (i = 0; i < n; i++) {
3743                TCGReg reg = order[i];
3744                if (s->reg_to_temp[reg] == NULL &&
3745                    tcg_regset_test_reg(set, reg)) {
3746                    return reg;
3747                }
3748            }
3749        }
3750    }
3751
3752    /* We must spill something.  */
3753    for (j = f; j < 2; j++) {
3754        TCGRegSet set = reg_ct[j];
3755
3756        if (tcg_regset_single(set)) {
3757            /* One register in the set.  */
3758            TCGReg reg = tcg_regset_first(set);
3759            tcg_reg_free(s, reg, allocated_regs);
3760            return reg;
3761        } else {
3762            for (i = 0; i < n; i++) {
3763                TCGReg reg = order[i];
3764                if (tcg_regset_test_reg(set, reg)) {
3765                    tcg_reg_free(s, reg, allocated_regs);
3766                    return reg;
3767                }
3768            }
3769        }
3770    }
3771
3772    tcg_abort();
3773}
3774
3775static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
3776                                 TCGRegSet allocated_regs,
3777                                 TCGRegSet preferred_regs, bool rev)
3778{
3779    int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3780    TCGRegSet reg_ct[2];
3781    const int *order;
3782
3783    /* Ensure that if I is not in allocated_regs, I+1 is not either. */
3784    reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
3785    tcg_debug_assert(reg_ct[1] != 0);
3786    reg_ct[0] = reg_ct[1] & preferred_regs;
3787
3788    order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3789
3790    /*
3791     * Skip the preferred_regs option if it cannot be satisfied,
3792     * or if the preference made no difference.
3793     */
3794    k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3795
3796    /*
3797     * Minimize the number of flushes by looking for 2 free registers first,
3798     * then a single flush, then two flushes.
3799     */
3800    for (fmin = 2; fmin >= 0; fmin--) {
3801        for (j = k; j < 2; j++) {
3802            TCGRegSet set = reg_ct[j];
3803
3804            for (i = 0; i < n; i++) {
3805                TCGReg reg = order[i];
3806
3807                if (tcg_regset_test_reg(set, reg)) {
3808                    int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
3809                    if (f >= fmin) {
3810                        tcg_reg_free(s, reg, allocated_regs);
3811                        tcg_reg_free(s, reg + 1, allocated_regs);
3812                        return reg;
3813                    }
3814                }
3815            }
3816        }
3817    }
3818    tcg_abort();
3819}
3820
3821/* Make sure the temporary is in a register.  If needed, allocate the register
3822   from DESIRED while avoiding ALLOCATED.  */
3823static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3824                      TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3825{
3826    TCGReg reg;
3827
3828    switch (ts->val_type) {
3829    case TEMP_VAL_REG:
3830        return;
3831    case TEMP_VAL_CONST:
3832        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3833                            preferred_regs, ts->indirect_base);
3834        if (ts->type <= TCG_TYPE_I64) {
3835            tcg_out_movi(s, ts->type, reg, ts->val);
3836        } else {
3837            uint64_t val = ts->val;
3838            MemOp vece = MO_64;
3839
3840            /*
3841             * Find the minimal vector element that matches the constant.
3842             * The targets will, in general, have to do this search anyway,
3843             * do this generically.
3844             */
3845            if (val == dup_const(MO_8, val)) {
3846                vece = MO_8;
3847            } else if (val == dup_const(MO_16, val)) {
3848                vece = MO_16;
3849            } else if (val == dup_const(MO_32, val)) {
3850                vece = MO_32;
3851            }
3852
3853            tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3854        }
3855        ts->mem_coherent = 0;
3856        break;
3857    case TEMP_VAL_MEM:
3858        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3859                            preferred_regs, ts->indirect_base);
3860        tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3861        ts->mem_coherent = 1;
3862        break;
3863    case TEMP_VAL_DEAD:
3864    default:
3865        tcg_abort();
3866    }
3867    set_temp_val_reg(s, ts, reg);
3868}
3869
3870/* Save a temporary to memory. 'allocated_regs' is used in case a
3871   temporary registers needs to be allocated to store a constant.  */
3872static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3873{
3874    /* The liveness analysis already ensures that globals are back
3875       in memory. Keep an tcg_debug_assert for safety. */
3876    tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3877}
3878
3879/* save globals to their canonical location and assume they can be
3880   modified be the following code. 'allocated_regs' is used in case a
3881   temporary registers needs to be allocated to store a constant. */
3882static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3883{
3884    int i, n;
3885
3886    for (i = 0, n = s->nb_globals; i < n; i++) {
3887        temp_save(s, &s->temps[i], allocated_regs);
3888    }
3889}
3890
3891/* sync globals to their canonical location and assume they can be
3892   read by the following code. 'allocated_regs' is used in case a
3893   temporary registers needs to be allocated to store a constant. */
3894static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3895{
3896    int i, n;
3897
3898    for (i = 0, n = s->nb_globals; i < n; i++) {
3899        TCGTemp *ts = &s->temps[i];
3900        tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3901                         || ts->kind == TEMP_FIXED
3902                         || ts->mem_coherent);
3903    }
3904}
3905
3906/* at the end of a basic block, we assume all temporaries are dead and
3907   all globals are stored at their canonical location. */
3908static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3909{
3910    int i;
3911
3912    for (i = s->nb_globals; i < s->nb_temps; i++) {
3913        TCGTemp *ts = &s->temps[i];
3914
3915        switch (ts->kind) {
3916        case TEMP_TB:
3917            temp_save(s, ts, allocated_regs);
3918            break;
3919        case TEMP_EBB:
3920            /* The liveness analysis already ensures that temps are dead.
3921               Keep an tcg_debug_assert for safety. */
3922            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3923            break;
3924        case TEMP_CONST:
3925            /* Similarly, we should have freed any allocated register. */
3926            tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3927            break;
3928        default:
3929            g_assert_not_reached();
3930        }
3931    }
3932
3933    save_globals(s, allocated_regs);
3934}
3935
3936/*
3937 * At a conditional branch, we assume all temporaries are dead unless
3938 * explicitly live-across-conditional-branch; all globals and local
3939 * temps are synced to their location.
3940 */
3941static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3942{
3943    sync_globals(s, allocated_regs);
3944
3945    for (int i = s->nb_globals; i < s->nb_temps; i++) {
3946        TCGTemp *ts = &s->temps[i];
3947        /*
3948         * The liveness analysis already ensures that temps are dead.
3949         * Keep tcg_debug_asserts for safety.
3950         */
3951        switch (ts->kind) {
3952        case TEMP_TB:
3953            tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3954            break;
3955        case TEMP_EBB:
3956        case TEMP_CONST:
3957            break;
3958        default:
3959            g_assert_not_reached();
3960        }
3961    }
3962}
3963
3964/*
3965 * Specialized code generation for INDEX_op_mov_* with a constant.
3966 */
3967static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3968                                  tcg_target_ulong val, TCGLifeData arg_life,
3969                                  TCGRegSet preferred_regs)
3970{
3971    /* ENV should not be modified.  */
3972    tcg_debug_assert(!temp_readonly(ots));
3973
3974    /* The movi is not explicitly generated here.  */
3975    set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
3976    ots->val = val;
3977    ots->mem_coherent = 0;
3978    if (NEED_SYNC_ARG(0)) {
3979        temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3980    } else if (IS_DEAD_ARG(0)) {
3981        temp_dead(s, ots);
3982    }
3983}
3984
3985/*
3986 * Specialized code generation for INDEX_op_mov_*.
3987 */
3988static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3989{
3990    const TCGLifeData arg_life = op->life;
3991    TCGRegSet allocated_regs, preferred_regs;
3992    TCGTemp *ts, *ots;
3993    TCGType otype, itype;
3994    TCGReg oreg, ireg;
3995
3996    allocated_regs = s->reserved_regs;
3997    preferred_regs = output_pref(op, 0);
3998    ots = arg_temp(op->args[0]);
3999    ts = arg_temp(op->args[1]);
4000
4001    /* ENV should not be modified.  */
4002    tcg_debug_assert(!temp_readonly(ots));
4003
4004    /* Note that otype != itype for no-op truncation.  */
4005    otype = ots->type;
4006    itype = ts->type;
4007
4008    if (ts->val_type == TEMP_VAL_CONST) {
4009        /* propagate constant or generate sti */
4010        tcg_target_ulong val = ts->val;
4011        if (IS_DEAD_ARG(1)) {
4012            temp_dead(s, ts);
4013        }
4014        tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4015        return;
4016    }
4017
4018    /* If the source value is in memory we're going to be forced
4019       to have it in a register in order to perform the copy.  Copy
4020       the SOURCE value into its own register first, that way we
4021       don't have to reload SOURCE the next time it is used. */
4022    if (ts->val_type == TEMP_VAL_MEM) {
4023        temp_load(s, ts, tcg_target_available_regs[itype],
4024                  allocated_regs, preferred_regs);
4025    }
4026    tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4027    ireg = ts->reg;
4028
4029    if (IS_DEAD_ARG(0)) {
4030        /* mov to a non-saved dead register makes no sense (even with
4031           liveness analysis disabled). */
4032        tcg_debug_assert(NEED_SYNC_ARG(0));
4033        if (!ots->mem_allocated) {
4034            temp_allocate_frame(s, ots);
4035        }
4036        tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4037        if (IS_DEAD_ARG(1)) {
4038            temp_dead(s, ts);
4039        }
4040        temp_dead(s, ots);
4041        return;
4042    }
4043
4044    if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4045        /*
4046         * The mov can be suppressed.  Kill input first, so that it
4047         * is unlinked from reg_to_temp, then set the output to the
4048         * reg that we saved from the input.
4049         */
4050        temp_dead(s, ts);
4051        oreg = ireg;
4052    } else {
4053        if (ots->val_type == TEMP_VAL_REG) {
4054            oreg = ots->reg;
4055        } else {
4056            /* Make sure to not spill the input register during allocation. */
4057            oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4058                                 allocated_regs | ((TCGRegSet)1 << ireg),
4059                                 preferred_regs, ots->indirect_base);
4060        }
4061        if (!tcg_out_mov(s, otype, oreg, ireg)) {
4062            /*
4063             * Cross register class move not supported.
4064             * Store the source register into the destination slot
4065             * and leave the destination temp as TEMP_VAL_MEM.
4066             */
4067            assert(!temp_readonly(ots));
4068            if (!ts->mem_allocated) {
4069                temp_allocate_frame(s, ots);
4070            }
4071            tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4072            set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4073            ots->mem_coherent = 1;
4074            return;
4075        }
4076    }
4077    set_temp_val_reg(s, ots, oreg);
4078    ots->mem_coherent = 0;
4079
4080    if (NEED_SYNC_ARG(0)) {
4081        temp_sync(s, ots, allocated_regs, 0, 0);
4082    }
4083}
4084
4085/*
4086 * Specialized code generation for INDEX_op_dup_vec.
4087 */
4088static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4089{
4090    const TCGLifeData arg_life = op->life;
4091    TCGRegSet dup_out_regs, dup_in_regs;
4092    TCGTemp *its, *ots;
4093    TCGType itype, vtype;
4094    unsigned vece;
4095    int lowpart_ofs;
4096    bool ok;
4097
4098    ots = arg_temp(op->args[0]);
4099    its = arg_temp(op->args[1]);
4100
4101    /* ENV should not be modified.  */
4102    tcg_debug_assert(!temp_readonly(ots));
4103
4104    itype = its->type;
4105    vece = TCGOP_VECE(op);
4106    vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4107
4108    if (its->val_type == TEMP_VAL_CONST) {
4109        /* Propagate constant via movi -> dupi.  */
4110        tcg_target_ulong val = its->val;
4111        if (IS_DEAD_ARG(1)) {
4112            temp_dead(s, its);
4113        }
4114        tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4115        return;
4116    }
4117
4118    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4119    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4120
4121    /* Allocate the output register now.  */
4122    if (ots->val_type != TEMP_VAL_REG) {
4123        TCGRegSet allocated_regs = s->reserved_regs;
4124        TCGReg oreg;
4125
4126        if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4127            /* Make sure to not spill the input register. */
4128            tcg_regset_set_reg(allocated_regs, its->reg);
4129        }
4130        oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4131                             output_pref(op, 0), ots->indirect_base);
4132        set_temp_val_reg(s, ots, oreg);
4133    }
4134
4135    switch (its->val_type) {
4136    case TEMP_VAL_REG:
4137        /*
4138         * The dup constriaints must be broad, covering all possible VECE.
4139         * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4140         * to fail, indicating that extra moves are required for that case.
4141         */
4142        if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4143            if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4144                goto done;
4145            }
4146            /* Try again from memory or a vector input register.  */
4147        }
4148        if (!its->mem_coherent) {
4149            /*
4150             * The input register is not synced, and so an extra store
4151             * would be required to use memory.  Attempt an integer-vector
4152             * register move first.  We do not have a TCGRegSet for this.
4153             */
4154            if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4155                break;
4156            }
4157            /* Sync the temp back to its slot and load from there.  */
4158            temp_sync(s, its, s->reserved_regs, 0, 0);
4159        }
4160        /* fall through */
4161
4162    case TEMP_VAL_MEM:
4163        lowpart_ofs = 0;
4164        if (HOST_BIG_ENDIAN) {
4165            lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4166        }
4167        if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4168                             its->mem_offset + lowpart_ofs)) {
4169            goto done;
4170        }
4171        /* Load the input into the destination vector register. */
4172        tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4173        break;
4174
4175    default:
4176        g_assert_not_reached();
4177    }
4178
4179    /* We now have a vector input register, so dup must succeed. */
4180    ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4181    tcg_debug_assert(ok);
4182
4183 done:
4184    ots->mem_coherent = 0;
4185    if (IS_DEAD_ARG(1)) {
4186        temp_dead(s, its);
4187    }
4188    if (NEED_SYNC_ARG(0)) {
4189        temp_sync(s, ots, s->reserved_regs, 0, 0);
4190    }
4191    if (IS_DEAD_ARG(0)) {
4192        temp_dead(s, ots);
4193    }
4194}
4195
4196static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4197{
4198    const TCGLifeData arg_life = op->life;
4199    const TCGOpDef * const def = &tcg_op_defs[op->opc];
4200    TCGRegSet i_allocated_regs;
4201    TCGRegSet o_allocated_regs;
4202    int i, k, nb_iargs, nb_oargs;
4203    TCGReg reg;
4204    TCGArg arg;
4205    const TCGArgConstraint *arg_ct;
4206    TCGTemp *ts;
4207    TCGArg new_args[TCG_MAX_OP_ARGS];
4208    int const_args[TCG_MAX_OP_ARGS];
4209
4210    nb_oargs = def->nb_oargs;
4211    nb_iargs = def->nb_iargs;
4212
4213    /* copy constants */
4214    memcpy(new_args + nb_oargs + nb_iargs,
4215           op->args + nb_oargs + nb_iargs,
4216           sizeof(TCGArg) * def->nb_cargs);
4217
4218    i_allocated_regs = s->reserved_regs;
4219    o_allocated_regs = s->reserved_regs;
4220
4221    /* satisfy input constraints */
4222    for (k = 0; k < nb_iargs; k++) {
4223        TCGRegSet i_preferred_regs, i_required_regs;
4224        bool allocate_new_reg, copyto_new_reg;
4225        TCGTemp *ts2;
4226        int i1, i2;
4227
4228        i = def->args_ct[nb_oargs + k].sort_index;
4229        arg = op->args[i];
4230        arg_ct = &def->args_ct[i];
4231        ts = arg_temp(arg);
4232
4233        if (ts->val_type == TEMP_VAL_CONST
4234            && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4235            /* constant is OK for instruction */
4236            const_args[i] = 1;
4237            new_args[i] = ts->val;
4238            continue;
4239        }
4240
4241        reg = ts->reg;
4242        i_preferred_regs = 0;
4243        i_required_regs = arg_ct->regs;
4244        allocate_new_reg = false;
4245        copyto_new_reg = false;
4246
4247        switch (arg_ct->pair) {
4248        case 0: /* not paired */
4249            if (arg_ct->ialias) {
4250                i_preferred_regs = output_pref(op, arg_ct->alias_index);
4251
4252                /*
4253                 * If the input is readonly, then it cannot also be an
4254                 * output and aliased to itself.  If the input is not
4255                 * dead after the instruction, we must allocate a new
4256                 * register and move it.
4257                 */
4258                if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4259                    allocate_new_reg = true;
4260                } else if (ts->val_type == TEMP_VAL_REG) {
4261                    /*
4262                     * Check if the current register has already been
4263                     * allocated for another input.
4264                     */
4265                    allocate_new_reg =
4266                        tcg_regset_test_reg(i_allocated_regs, reg);
4267                }
4268            }
4269            if (!allocate_new_reg) {
4270                temp_load(s, ts, i_required_regs, i_allocated_regs,
4271                          i_preferred_regs);
4272                reg = ts->reg;
4273                allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4274            }
4275            if (allocate_new_reg) {
4276                /*
4277                 * Allocate a new register matching the constraint
4278                 * and move the temporary register into it.
4279                 */
4280                temp_load(s, ts, tcg_target_available_regs[ts->type],
4281                          i_allocated_regs, 0);
4282                reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4283                                    i_preferred_regs, ts->indirect_base);
4284                copyto_new_reg = true;
4285            }
4286            break;
4287
4288        case 1:
4289            /* First of an input pair; if i1 == i2, the second is an output. */
4290            i1 = i;
4291            i2 = arg_ct->pair_index;
4292            ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4293
4294            /*
4295             * It is easier to default to allocating a new pair
4296             * and to identify a few cases where it's not required.
4297             */
4298            if (arg_ct->ialias) {
4299                i_preferred_regs = output_pref(op, arg_ct->alias_index);
4300                if (IS_DEAD_ARG(i1) &&
4301                    IS_DEAD_ARG(i2) &&
4302                    !temp_readonly(ts) &&
4303                    ts->val_type == TEMP_VAL_REG &&
4304                    ts->reg < TCG_TARGET_NB_REGS - 1 &&
4305                    tcg_regset_test_reg(i_required_regs, reg) &&
4306                    !tcg_regset_test_reg(i_allocated_regs, reg) &&
4307                    !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4308                    (ts2
4309                     ? ts2->val_type == TEMP_VAL_REG &&
4310                       ts2->reg == reg + 1 &&
4311                       !temp_readonly(ts2)
4312                     : s->reg_to_temp[reg + 1] == NULL)) {
4313                    break;
4314                }
4315            } else {
4316                /* Without aliasing, the pair must also be an input. */
4317                tcg_debug_assert(ts2);
4318                if (ts->val_type == TEMP_VAL_REG &&
4319                    ts2->val_type == TEMP_VAL_REG &&
4320                    ts2->reg == reg + 1 &&
4321                    tcg_regset_test_reg(i_required_regs, reg)) {
4322                    break;
4323                }
4324            }
4325            reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4326                                     0, ts->indirect_base);
4327            goto do_pair;
4328
4329        case 2: /* pair second */
4330            reg = new_args[arg_ct->pair_index] + 1;
4331            goto do_pair;
4332
4333        case 3: /* ialias with second output, no first input */
4334            tcg_debug_assert(arg_ct->ialias);
4335            i_preferred_regs = output_pref(op, arg_ct->alias_index);
4336
4337            if (IS_DEAD_ARG(i) &&
4338                !temp_readonly(ts) &&
4339                ts->val_type == TEMP_VAL_REG &&
4340                reg > 0 &&
4341                s->reg_to_temp[reg - 1] == NULL &&
4342                tcg_regset_test_reg(i_required_regs, reg) &&
4343                !tcg_regset_test_reg(i_allocated_regs, reg) &&
4344                !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4345                tcg_regset_set_reg(i_allocated_regs, reg - 1);
4346                break;
4347            }
4348            reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4349                                     i_allocated_regs, 0,
4350                                     ts->indirect_base);
4351            tcg_regset_set_reg(i_allocated_regs, reg);
4352            reg += 1;
4353            goto do_pair;
4354
4355        do_pair:
4356            /*
4357             * If an aliased input is not dead after the instruction,
4358             * we must allocate a new register and move it.
4359             */
4360            if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4361                TCGRegSet t_allocated_regs = i_allocated_regs;
4362
4363                /*
4364                 * Because of the alias, and the continued life, make sure
4365                 * that the temp is somewhere *other* than the reg pair,
4366                 * and we get a copy in reg.
4367                 */
4368                tcg_regset_set_reg(t_allocated_regs, reg);
4369                tcg_regset_set_reg(t_allocated_regs, reg + 1);
4370                if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4371                    /* If ts was already in reg, copy it somewhere else. */
4372                    TCGReg nr;
4373                    bool ok;
4374
4375                    tcg_debug_assert(ts->kind != TEMP_FIXED);
4376                    nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4377                                       t_allocated_regs, 0, ts->indirect_base);
4378                    ok = tcg_out_mov(s, ts->type, nr, reg);
4379                    tcg_debug_assert(ok);
4380
4381                    set_temp_val_reg(s, ts, nr);
4382                } else {
4383                    temp_load(s, ts, tcg_target_available_regs[ts->type],
4384                              t_allocated_regs, 0);
4385                    copyto_new_reg = true;
4386                }
4387            } else {
4388                /* Preferably allocate to reg, otherwise copy. */
4389                i_required_regs = (TCGRegSet)1 << reg;
4390                temp_load(s, ts, i_required_regs, i_allocated_regs,
4391                          i_preferred_regs);
4392                copyto_new_reg = ts->reg != reg;
4393            }
4394            break;
4395
4396        default:
4397            g_assert_not_reached();
4398        }
4399
4400        if (copyto_new_reg) {
4401            if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4402                /*
4403                 * Cross register class move not supported.  Sync the
4404                 * temp back to its slot and load from there.
4405                 */
4406                temp_sync(s, ts, i_allocated_regs, 0, 0);
4407                tcg_out_ld(s, ts->type, reg,
4408                           ts->mem_base->reg, ts->mem_offset);
4409            }
4410        }
4411        new_args[i] = reg;
4412        const_args[i] = 0;
4413        tcg_regset_set_reg(i_allocated_regs, reg);
4414    }
4415
4416    /* mark dead temporaries and free the associated registers */
4417    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4418        if (IS_DEAD_ARG(i)) {
4419            temp_dead(s, arg_temp(op->args[i]));
4420        }
4421    }
4422
4423    if (def->flags & TCG_OPF_COND_BRANCH) {
4424        tcg_reg_alloc_cbranch(s, i_allocated_regs);
4425    } else if (def->flags & TCG_OPF_BB_END) {
4426        tcg_reg_alloc_bb_end(s, i_allocated_regs);
4427    } else {
4428        if (def->flags & TCG_OPF_CALL_CLOBBER) {
4429            /* XXX: permit generic clobber register list ? */
4430            for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4431                if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4432                    tcg_reg_free(s, i, i_allocated_regs);
4433                }
4434            }
4435        }
4436        if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4437            /* sync globals if the op has side effects and might trigger
4438               an exception. */
4439            sync_globals(s, i_allocated_regs);
4440        }
4441
4442        /* satisfy the output constraints */
4443        for(k = 0; k < nb_oargs; k++) {
4444            i = def->args_ct[k].sort_index;
4445            arg = op->args[i];
4446            arg_ct = &def->args_ct[i];
4447            ts = arg_temp(arg);
4448
4449            /* ENV should not be modified.  */
4450            tcg_debug_assert(!temp_readonly(ts));
4451
4452            switch (arg_ct->pair) {
4453            case 0: /* not paired */
4454                if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4455                    reg = new_args[arg_ct->alias_index];
4456                } else if (arg_ct->newreg) {
4457                    reg = tcg_reg_alloc(s, arg_ct->regs,
4458                                        i_allocated_regs | o_allocated_regs,
4459                                        output_pref(op, k), ts->indirect_base);
4460                } else {
4461                    reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4462                                        output_pref(op, k), ts->indirect_base);
4463                }
4464                break;
4465
4466            case 1: /* first of pair */
4467                tcg_debug_assert(!arg_ct->newreg);
4468                if (arg_ct->oalias) {
4469                    reg = new_args[arg_ct->alias_index];
4470                    break;
4471                }
4472                reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4473                                         output_pref(op, k), ts->indirect_base);
4474                break;
4475
4476            case 2: /* second of pair */
4477                tcg_debug_assert(!arg_ct->newreg);
4478                if (arg_ct->oalias) {
4479                    reg = new_args[arg_ct->alias_index];
4480                } else {
4481                    reg = new_args[arg_ct->pair_index] + 1;
4482                }
4483                break;
4484
4485            case 3: /* first of pair, aliasing with a second input */
4486                tcg_debug_assert(!arg_ct->newreg);
4487                reg = new_args[arg_ct->pair_index] - 1;
4488                break;
4489
4490            default:
4491                g_assert_not_reached();
4492            }
4493            tcg_regset_set_reg(o_allocated_regs, reg);
4494            set_temp_val_reg(s, ts, reg);
4495            ts->mem_coherent = 0;
4496            new_args[i] = reg;
4497        }
4498    }
4499
4500    /* emit instruction */
4501    if (def->flags & TCG_OPF_VECTOR) {
4502        tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4503                       new_args, const_args);
4504    } else {
4505        tcg_out_op(s, op->opc, new_args, const_args);
4506    }
4507
4508    /* move the outputs in the correct register if needed */
4509    for(i = 0; i < nb_oargs; i++) {
4510        ts = arg_temp(op->args[i]);
4511
4512        /* ENV should not be modified.  */
4513        tcg_debug_assert(!temp_readonly(ts));
4514
4515        if (NEED_SYNC_ARG(i)) {
4516            temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4517        } else if (IS_DEAD_ARG(i)) {
4518            temp_dead(s, ts);
4519        }
4520    }
4521}
4522
4523static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4524{
4525    const TCGLifeData arg_life = op->life;
4526    TCGTemp *ots, *itsl, *itsh;
4527    TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4528
4529    /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4530    tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4531    tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4532
4533    ots = arg_temp(op->args[0]);
4534    itsl = arg_temp(op->args[1]);
4535    itsh = arg_temp(op->args[2]);
4536
4537    /* ENV should not be modified.  */
4538    tcg_debug_assert(!temp_readonly(ots));
4539
4540    /* Allocate the output register now.  */
4541    if (ots->val_type != TEMP_VAL_REG) {
4542        TCGRegSet allocated_regs = s->reserved_regs;
4543        TCGRegSet dup_out_regs =
4544            tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4545        TCGReg oreg;
4546
4547        /* Make sure to not spill the input registers. */
4548        if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4549            tcg_regset_set_reg(allocated_regs, itsl->reg);
4550        }
4551        if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4552            tcg_regset_set_reg(allocated_regs, itsh->reg);
4553        }
4554
4555        oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4556                             output_pref(op, 0), ots->indirect_base);
4557        set_temp_val_reg(s, ots, oreg);
4558    }
4559
4560    /* Promote dup2 of immediates to dupi_vec. */
4561    if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4562        uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4563        MemOp vece = MO_64;
4564
4565        if (val == dup_const(MO_8, val)) {
4566            vece = MO_8;
4567        } else if (val == dup_const(MO_16, val)) {
4568            vece = MO_16;
4569        } else if (val == dup_const(MO_32, val)) {
4570            vece = MO_32;
4571        }
4572
4573        tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4574        goto done;
4575    }
4576
4577    /* If the two inputs form one 64-bit value, try dupm_vec. */
4578    if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
4579        itsh->temp_subindex == !HOST_BIG_ENDIAN &&
4580        itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
4581        TCGTemp *its = itsl - HOST_BIG_ENDIAN;
4582
4583        temp_sync(s, its + 0, s->reserved_regs, 0, 0);
4584        temp_sync(s, its + 1, s->reserved_regs, 0, 0);
4585
4586        if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4587                             its->mem_base->reg, its->mem_offset)) {
4588            goto done;
4589        }
4590    }
4591
4592    /* Fall back to generic expansion. */
4593    return false;
4594
4595 done:
4596    ots->mem_coherent = 0;
4597    if (IS_DEAD_ARG(1)) {
4598        temp_dead(s, itsl);
4599    }
4600    if (IS_DEAD_ARG(2)) {
4601        temp_dead(s, itsh);
4602    }
4603    if (NEED_SYNC_ARG(0)) {
4604        temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4605    } else if (IS_DEAD_ARG(0)) {
4606        temp_dead(s, ots);
4607    }
4608    return true;
4609}
4610
4611static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
4612                         TCGRegSet allocated_regs)
4613{
4614    if (ts->val_type == TEMP_VAL_REG) {
4615        if (ts->reg != reg) {
4616            tcg_reg_free(s, reg, allocated_regs);
4617            if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4618                /*
4619                 * Cross register class move not supported.  Sync the
4620                 * temp back to its slot and load from there.
4621                 */
4622                temp_sync(s, ts, allocated_regs, 0, 0);
4623                tcg_out_ld(s, ts->type, reg,
4624                           ts->mem_base->reg, ts->mem_offset);
4625            }
4626        }
4627    } else {
4628        TCGRegSet arg_set = 0;
4629
4630        tcg_reg_free(s, reg, allocated_regs);
4631        tcg_regset_set_reg(arg_set, reg);
4632        temp_load(s, ts, arg_set, allocated_regs, 0);
4633    }
4634}
4635
4636static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts,
4637                         TCGRegSet allocated_regs)
4638{
4639    /*
4640     * When the destination is on the stack, load up the temp and store.
4641     * If there are many call-saved registers, the temp might live to
4642     * see another use; otherwise it'll be discarded.
4643     */
4644    temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
4645    tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
4646               TCG_TARGET_CALL_STACK_OFFSET +
4647               stk_slot * sizeof(tcg_target_long));
4648}
4649
4650static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
4651                            TCGTemp *ts, TCGRegSet *allocated_regs)
4652{
4653    if (REG_P(l)) {
4654        TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
4655        load_arg_reg(s, reg, ts, *allocated_regs);
4656        tcg_regset_set_reg(*allocated_regs, reg);
4657    } else {
4658        load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs),
4659                     ts, *allocated_regs);
4660    }
4661}
4662
4663static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base,
4664                         intptr_t ref_off, TCGRegSet *allocated_regs)
4665{
4666    TCGReg reg;
4667    int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
4668
4669    if (stk_slot < 0) {
4670        reg = tcg_target_call_iarg_regs[arg_slot];
4671        tcg_reg_free(s, reg, *allocated_regs);
4672        tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4673        tcg_regset_set_reg(*allocated_regs, reg);
4674    } else {
4675        reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
4676                            *allocated_regs, 0, false);
4677        tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4678        tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
4679                   TCG_TARGET_CALL_STACK_OFFSET
4680                   + stk_slot * sizeof(tcg_target_long));
4681    }
4682}
4683
4684static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4685{
4686    const int nb_oargs = TCGOP_CALLO(op);
4687    const int nb_iargs = TCGOP_CALLI(op);
4688    const TCGLifeData arg_life = op->life;
4689    const TCGHelperInfo *info = tcg_call_info(op);
4690    TCGRegSet allocated_regs = s->reserved_regs;
4691    int i;
4692
4693    /*
4694     * Move inputs into place in reverse order,
4695     * so that we place stacked arguments first.
4696     */
4697    for (i = nb_iargs - 1; i >= 0; --i) {
4698        const TCGCallArgumentLoc *loc = &info->in[i];
4699        TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
4700
4701        switch (loc->kind) {
4702        case TCG_CALL_ARG_NORMAL:
4703        case TCG_CALL_ARG_EXTEND_U:
4704        case TCG_CALL_ARG_EXTEND_S:
4705            load_arg_normal(s, loc, ts, &allocated_regs);
4706            break;
4707        case TCG_CALL_ARG_BY_REF:
4708            load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4709            load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
4710                         TCG_TARGET_CALL_STACK_OFFSET
4711                         + loc->ref_slot * sizeof(tcg_target_long),
4712                         &allocated_regs);
4713            break;
4714        case TCG_CALL_ARG_BY_REF_N:
4715            load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4716            break;
4717        default:
4718            g_assert_not_reached();
4719        }
4720    }
4721
4722    /* Mark dead temporaries and free the associated registers.  */
4723    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4724        if (IS_DEAD_ARG(i)) {
4725            temp_dead(s, arg_temp(op->args[i]));
4726        }
4727    }
4728
4729    /* Clobber call registers.  */
4730    for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4731        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4732            tcg_reg_free(s, i, allocated_regs);
4733        }
4734    }
4735
4736    /*
4737     * Save globals if they might be written by the helper,
4738     * sync them if they might be read.
4739     */
4740    if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
4741        /* Nothing to do */
4742    } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
4743        sync_globals(s, allocated_regs);
4744    } else {
4745        save_globals(s, allocated_regs);
4746    }
4747
4748    /*
4749     * If the ABI passes a pointer to the returned struct as the first
4750     * argument, load that now.  Pass a pointer to the output home slot.
4751     */
4752    if (info->out_kind == TCG_CALL_RET_BY_REF) {
4753        TCGTemp *ts = arg_temp(op->args[0]);
4754
4755        if (!ts->mem_allocated) {
4756            temp_allocate_frame(s, ts);
4757        }
4758        load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
4759    }
4760
4761    tcg_out_call(s, tcg_call_func(op), info);
4762
4763    /* Assign output registers and emit moves if needed.  */
4764    switch (info->out_kind) {
4765    case TCG_CALL_RET_NORMAL:
4766        for (i = 0; i < nb_oargs; i++) {
4767            TCGTemp *ts = arg_temp(op->args[i]);
4768            TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
4769
4770            /* ENV should not be modified.  */
4771            tcg_debug_assert(!temp_readonly(ts));
4772
4773            set_temp_val_reg(s, ts, reg);
4774            ts->mem_coherent = 0;
4775        }
4776        break;
4777
4778    case TCG_CALL_RET_BY_VEC:
4779        {
4780            TCGTemp *ts = arg_temp(op->args[0]);
4781
4782            tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
4783            tcg_debug_assert(ts->temp_subindex == 0);
4784            if (!ts->mem_allocated) {
4785                temp_allocate_frame(s, ts);
4786            }
4787            tcg_out_st(s, TCG_TYPE_V128,
4788                       tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
4789                       ts->mem_base->reg, ts->mem_offset);
4790        }
4791        /* fall through to mark all parts in memory */
4792
4793    case TCG_CALL_RET_BY_REF:
4794        /* The callee has performed a write through the reference. */
4795        for (i = 0; i < nb_oargs; i++) {
4796            TCGTemp *ts = arg_temp(op->args[i]);
4797            ts->val_type = TEMP_VAL_MEM;
4798        }
4799        break;
4800
4801    default:
4802        g_assert_not_reached();
4803    }
4804
4805    /* Flush or discard output registers as needed. */
4806    for (i = 0; i < nb_oargs; i++) {
4807        TCGTemp *ts = arg_temp(op->args[i]);
4808        if (NEED_SYNC_ARG(i)) {
4809            temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
4810        } else if (IS_DEAD_ARG(i)) {
4811            temp_dead(s, ts);
4812        }
4813    }
4814}
4815
4816#ifdef CONFIG_PROFILER
4817
4818/* avoid copy/paste errors */
4819#define PROF_ADD(to, from, field)                       \
4820    do {                                                \
4821        (to)->field += qatomic_read(&((from)->field));  \
4822    } while (0)
4823
4824#define PROF_MAX(to, from, field)                                       \
4825    do {                                                                \
4826        typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4827        if (val__ > (to)->field) {                                      \
4828            (to)->field = val__;                                        \
4829        }                                                               \
4830    } while (0)
4831
4832/* Pass in a zero'ed @prof */
4833static inline
4834void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4835{
4836    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4837    unsigned int i;
4838
4839    for (i = 0; i < n_ctxs; i++) {
4840        TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4841        const TCGProfile *orig = &s->prof;
4842
4843        if (counters) {
4844            PROF_ADD(prof, orig, cpu_exec_time);
4845            PROF_ADD(prof, orig, tb_count1);
4846            PROF_ADD(prof, orig, tb_count);
4847            PROF_ADD(prof, orig, op_count);
4848            PROF_MAX(prof, orig, op_count_max);
4849            PROF_ADD(prof, orig, temp_count);
4850            PROF_MAX(prof, orig, temp_count_max);
4851            PROF_ADD(prof, orig, del_op_count);
4852            PROF_ADD(prof, orig, code_in_len);
4853            PROF_ADD(prof, orig, code_out_len);
4854            PROF_ADD(prof, orig, search_out_len);
4855            PROF_ADD(prof, orig, interm_time);
4856            PROF_ADD(prof, orig, code_time);
4857            PROF_ADD(prof, orig, la_time);
4858            PROF_ADD(prof, orig, opt_time);
4859            PROF_ADD(prof, orig, restore_count);
4860            PROF_ADD(prof, orig, restore_time);
4861        }
4862        if (table) {
4863            int i;
4864
4865            for (i = 0; i < NB_OPS; i++) {
4866                PROF_ADD(prof, orig, table_op_count[i]);
4867            }
4868        }
4869    }
4870}
4871
4872#undef PROF_ADD
4873#undef PROF_MAX
4874
4875static void tcg_profile_snapshot_counters(TCGProfile *prof)
4876{
4877    tcg_profile_snapshot(prof, true, false);
4878}
4879
4880static void tcg_profile_snapshot_table(TCGProfile *prof)
4881{
4882    tcg_profile_snapshot(prof, false, true);
4883}
4884
4885void tcg_dump_op_count(GString *buf)
4886{
4887    TCGProfile prof = {};
4888    int i;
4889
4890    tcg_profile_snapshot_table(&prof);
4891    for (i = 0; i < NB_OPS; i++) {
4892        g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4893                               prof.table_op_count[i]);
4894    }
4895}
4896
4897int64_t tcg_cpu_exec_time(void)
4898{
4899    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4900    unsigned int i;
4901    int64_t ret = 0;
4902
4903    for (i = 0; i < n_ctxs; i++) {
4904        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4905        const TCGProfile *prof = &s->prof;
4906
4907        ret += qatomic_read(&prof->cpu_exec_time);
4908    }
4909    return ret;
4910}
4911#else
4912void tcg_dump_op_count(GString *buf)
4913{
4914    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4915}
4916
4917int64_t tcg_cpu_exec_time(void)
4918{
4919    error_report("%s: TCG profiler not compiled", __func__);
4920    exit(EXIT_FAILURE);
4921}
4922#endif
4923
4924
4925int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
4926{
4927#ifdef CONFIG_PROFILER
4928    TCGProfile *prof = &s->prof;
4929#endif
4930    int i, num_insns;
4931    TCGOp *op;
4932
4933#ifdef CONFIG_PROFILER
4934    {
4935        int n = 0;
4936
4937        QTAILQ_FOREACH(op, &s->ops, link) {
4938            n++;
4939        }
4940        qatomic_set(&prof->op_count, prof->op_count + n);
4941        if (n > prof->op_count_max) {
4942            qatomic_set(&prof->op_count_max, n);
4943        }
4944
4945        n = s->nb_temps;
4946        qatomic_set(&prof->temp_count, prof->temp_count + n);
4947        if (n > prof->temp_count_max) {
4948            qatomic_set(&prof->temp_count_max, n);
4949        }
4950    }
4951#endif
4952
4953#ifdef DEBUG_DISAS
4954    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4955                 && qemu_log_in_addr_range(pc_start))) {
4956        FILE *logfile = qemu_log_trylock();
4957        if (logfile) {
4958            fprintf(logfile, "OP:\n");
4959            tcg_dump_ops(s, logfile, false);
4960            fprintf(logfile, "\n");
4961            qemu_log_unlock(logfile);
4962        }
4963    }
4964#endif
4965
4966#ifdef CONFIG_DEBUG_TCG
4967    /* Ensure all labels referenced have been emitted.  */
4968    {
4969        TCGLabel *l;
4970        bool error = false;
4971
4972        QSIMPLEQ_FOREACH(l, &s->labels, next) {
4973            if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
4974                qemu_log_mask(CPU_LOG_TB_OP,
4975                              "$L%d referenced but not present.\n", l->id);
4976                error = true;
4977            }
4978        }
4979        assert(!error);
4980    }
4981#endif
4982
4983#ifdef CONFIG_PROFILER
4984    qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4985#endif
4986
4987#ifdef USE_TCG_OPTIMIZATIONS
4988    tcg_optimize(s);
4989#endif
4990
4991#ifdef CONFIG_PROFILER
4992    qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4993    qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4994#endif
4995
4996    reachable_code_pass(s);
4997    liveness_pass_0(s);
4998    liveness_pass_1(s);
4999
5000    if (s->nb_indirects > 0) {
5001#ifdef DEBUG_DISAS
5002        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
5003                     && qemu_log_in_addr_range(pc_start))) {
5004            FILE *logfile = qemu_log_trylock();
5005            if (logfile) {
5006                fprintf(logfile, "OP before indirect lowering:\n");
5007                tcg_dump_ops(s, logfile, false);
5008                fprintf(logfile, "\n");
5009                qemu_log_unlock(logfile);
5010            }
5011        }
5012#endif
5013        /* Replace indirect temps with direct temps.  */
5014        if (liveness_pass_2(s)) {
5015            /* If changes were made, re-run liveness.  */
5016            liveness_pass_1(s);
5017        }
5018    }
5019
5020#ifdef CONFIG_PROFILER
5021    qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
5022#endif
5023
5024#ifdef DEBUG_DISAS
5025    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5026                 && qemu_log_in_addr_range(pc_start))) {
5027        FILE *logfile = qemu_log_trylock();
5028        if (logfile) {
5029            fprintf(logfile, "OP after optimization and liveness analysis:\n");
5030            tcg_dump_ops(s, logfile, true);
5031            fprintf(logfile, "\n");
5032            qemu_log_unlock(logfile);
5033        }
5034    }
5035#endif
5036
5037    /* Initialize goto_tb jump offsets. */
5038    tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
5039    tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
5040    tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
5041    tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
5042
5043    tcg_reg_alloc_start(s);
5044
5045    /*
5046     * Reset the buffer pointers when restarting after overflow.
5047     * TODO: Move this into translate-all.c with the rest of the
5048     * buffer management.  Having only this done here is confusing.
5049     */
5050    s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
5051    s->code_ptr = s->code_buf;
5052
5053#ifdef TCG_TARGET_NEED_LDST_LABELS
5054    QSIMPLEQ_INIT(&s->ldst_labels);
5055#endif
5056#ifdef TCG_TARGET_NEED_POOL_LABELS
5057    s->pool_labels = NULL;
5058#endif
5059
5060    num_insns = -1;
5061    QTAILQ_FOREACH(op, &s->ops, link) {
5062        TCGOpcode opc = op->opc;
5063
5064#ifdef CONFIG_PROFILER
5065        qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
5066#endif
5067
5068        switch (opc) {
5069        case INDEX_op_mov_i32:
5070        case INDEX_op_mov_i64:
5071        case INDEX_op_mov_vec:
5072            tcg_reg_alloc_mov(s, op);
5073            break;
5074        case INDEX_op_dup_vec:
5075            tcg_reg_alloc_dup(s, op);
5076            break;
5077        case INDEX_op_insn_start:
5078            if (num_insns >= 0) {
5079                size_t off = tcg_current_code_size(s);
5080                s->gen_insn_end_off[num_insns] = off;
5081                /* Assert that we do not overflow our stored offset.  */
5082                assert(s->gen_insn_end_off[num_insns] == off);
5083            }
5084            num_insns++;
5085            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
5086                target_ulong a;
5087#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
5088                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
5089#else
5090                a = op->args[i];
5091#endif
5092                s->gen_insn_data[num_insns][i] = a;
5093            }
5094            break;
5095        case INDEX_op_discard:
5096            temp_dead(s, arg_temp(op->args[0]));
5097            break;
5098        case INDEX_op_set_label:
5099            tcg_reg_alloc_bb_end(s, s->reserved_regs);
5100            tcg_out_label(s, arg_label(op->args[0]));
5101            break;
5102        case INDEX_op_call:
5103            tcg_reg_alloc_call(s, op);
5104            break;
5105        case INDEX_op_exit_tb:
5106            tcg_out_exit_tb(s, op->args[0]);
5107            break;
5108        case INDEX_op_goto_tb:
5109            tcg_out_goto_tb(s, op->args[0]);
5110            break;
5111        case INDEX_op_dup2_vec:
5112            if (tcg_reg_alloc_dup2(s, op)) {
5113                break;
5114            }
5115            /* fall through */
5116        default:
5117            /* Sanity check that we've not introduced any unhandled opcodes. */
5118            tcg_debug_assert(tcg_op_supported(opc));
5119            /* Note: in order to speed up the code, it would be much
5120               faster to have specialized register allocator functions for
5121               some common argument patterns */
5122            tcg_reg_alloc_op(s, op);
5123            break;
5124        }
5125        /* Test for (pending) buffer overflow.  The assumption is that any
5126           one operation beginning below the high water mark cannot overrun
5127           the buffer completely.  Thus we can test for overflow after
5128           generating code without having to check during generation.  */
5129        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
5130            return -1;
5131        }
5132        /* Test for TB overflow, as seen by gen_insn_end_off.  */
5133        if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
5134            return -2;
5135        }
5136    }
5137    tcg_debug_assert(num_insns >= 0);
5138    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
5139
5140    /* Generate TB finalization at the end of block */
5141#ifdef TCG_TARGET_NEED_LDST_LABELS
5142    i = tcg_out_ldst_finalize(s);
5143    if (i < 0) {
5144        return i;
5145    }
5146#endif
5147#ifdef TCG_TARGET_NEED_POOL_LABELS
5148    i = tcg_out_pool_finalize(s);
5149    if (i < 0) {
5150        return i;
5151    }
5152#endif
5153    if (!tcg_resolve_relocs(s)) {
5154        return -2;
5155    }
5156
5157#ifndef CONFIG_TCG_INTERPRETER
5158    /* flush instruction cache */
5159    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
5160                        (uintptr_t)s->code_buf,
5161                        tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
5162#endif
5163
5164    return tcg_current_code_size(s);
5165}
5166
5167#ifdef CONFIG_PROFILER
5168void tcg_dump_info(GString *buf)
5169{
5170    TCGProfile prof = {};
5171    const TCGProfile *s;
5172    int64_t tb_count;
5173    int64_t tb_div_count;
5174    int64_t tot;
5175
5176    tcg_profile_snapshot_counters(&prof);
5177    s = &prof;
5178    tb_count = s->tb_count;
5179    tb_div_count = tb_count ? tb_count : 1;
5180    tot = s->interm_time + s->code_time;
5181
5182    g_string_append_printf(buf, "JIT cycles          %" PRId64
5183                           " (%0.3f s at 2.4 GHz)\n",
5184                           tot, tot / 2.4e9);
5185    g_string_append_printf(buf, "translated TBs      %" PRId64
5186                           " (aborted=%" PRId64 " %0.1f%%)\n",
5187                           tb_count, s->tb_count1 - tb_count,
5188                           (double)(s->tb_count1 - s->tb_count)
5189                           / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
5190    g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
5191                           (double)s->op_count / tb_div_count, s->op_count_max);
5192    g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
5193                           (double)s->del_op_count / tb_div_count);
5194    g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
5195                           (double)s->temp_count / tb_div_count,
5196                           s->temp_count_max);
5197    g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
5198                           (double)s->code_out_len / tb_div_count);
5199    g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
5200                           (double)s->search_out_len / tb_div_count);
5201
5202    g_string_append_printf(buf, "cycles/op           %0.1f\n",
5203                           s->op_count ? (double)tot / s->op_count : 0);
5204    g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
5205                           s->code_in_len ? (double)tot / s->code_in_len : 0);
5206    g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
5207                           s->code_out_len ? (double)tot / s->code_out_len : 0);
5208    g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
5209                           s->search_out_len ?
5210                           (double)tot / s->search_out_len : 0);
5211    if (tot == 0) {
5212        tot = 1;
5213    }
5214    g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
5215                           (double)s->interm_time / tot * 100.0);
5216    g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
5217                           (double)s->code_time / tot * 100.0);
5218    g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
5219                           (double)s->opt_time / (s->code_time ?
5220                                                  s->code_time : 1)
5221                           * 100.0);
5222    g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
5223                           (double)s->la_time / (s->code_time ?
5224                                                 s->code_time : 1) * 100.0);
5225    g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
5226                           s->restore_count);
5227    g_string_append_printf(buf, "  avg cycles        %0.1f\n",
5228                           s->restore_count ?
5229                           (double)s->restore_time / s->restore_count : 0);
5230}
5231#else
5232void tcg_dump_info(GString *buf)
5233{
5234    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5235}
5236#endif
5237
5238#ifdef ELF_HOST_MACHINE
5239/* In order to use this feature, the backend needs to do three things:
5240
5241   (1) Define ELF_HOST_MACHINE to indicate both what value to
5242       put into the ELF image and to indicate support for the feature.
5243
5244   (2) Define tcg_register_jit.  This should create a buffer containing
5245       the contents of a .debug_frame section that describes the post-
5246       prologue unwind info for the tcg machine.
5247
5248   (3) Call tcg_register_jit_int, with the constructed .debug_frame.
5249*/
5250
5251/* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
5252typedef enum {
5253    JIT_NOACTION = 0,
5254    JIT_REGISTER_FN,
5255    JIT_UNREGISTER_FN
5256} jit_actions_t;
5257
5258struct jit_code_entry {
5259    struct jit_code_entry *next_entry;
5260    struct jit_code_entry *prev_entry;
5261    const void *symfile_addr;
5262    uint64_t symfile_size;
5263};
5264
5265struct jit_descriptor {
5266    uint32_t version;
5267    uint32_t action_flag;
5268    struct jit_code_entry *relevant_entry;
5269    struct jit_code_entry *first_entry;
5270};
5271
5272void __jit_debug_register_code(void) __attribute__((noinline));
5273void __jit_debug_register_code(void)
5274{
5275    asm("");
5276}
5277
5278/* Must statically initialize the version, because GDB may check
5279   the version before we can set it.  */
5280struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
5281
5282/* End GDB interface.  */
5283
5284static int find_string(const char *strtab, const char *str)
5285{
5286    const char *p = strtab + 1;
5287
5288    while (1) {
5289        if (strcmp(p, str) == 0) {
5290            return p - strtab;
5291        }
5292        p += strlen(p) + 1;
5293    }
5294}
5295
5296static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
5297                                 const void *debug_frame,
5298                                 size_t debug_frame_size)
5299{
5300    struct __attribute__((packed)) DebugInfo {
5301        uint32_t  len;
5302        uint16_t  version;
5303        uint32_t  abbrev;
5304        uint8_t   ptr_size;
5305        uint8_t   cu_die;
5306        uint16_t  cu_lang;
5307        uintptr_t cu_low_pc;
5308        uintptr_t cu_high_pc;
5309        uint8_t   fn_die;
5310        char      fn_name[16];
5311        uintptr_t fn_low_pc;
5312        uintptr_t fn_high_pc;
5313        uint8_t   cu_eoc;
5314    };
5315
5316    struct ElfImage {
5317        ElfW(Ehdr) ehdr;
5318        ElfW(Phdr) phdr;
5319        ElfW(Shdr) shdr[7];
5320        ElfW(Sym)  sym[2];
5321        struct DebugInfo di;
5322        uint8_t    da[24];
5323        char       str[80];
5324    };
5325
5326    struct ElfImage *img;
5327
5328    static const struct ElfImage img_template = {
5329        .ehdr = {
5330            .e_ident[EI_MAG0] = ELFMAG0,
5331            .e_ident[EI_MAG1] = ELFMAG1,
5332            .e_ident[EI_MAG2] = ELFMAG2,
5333            .e_ident[EI_MAG3] = ELFMAG3,
5334            .e_ident[EI_CLASS] = ELF_CLASS,
5335            .e_ident[EI_DATA] = ELF_DATA,
5336            .e_ident[EI_VERSION] = EV_CURRENT,
5337            .e_type = ET_EXEC,
5338            .e_machine = ELF_HOST_MACHINE,
5339            .e_version = EV_CURRENT,
5340            .e_phoff = offsetof(struct ElfImage, phdr),
5341            .e_shoff = offsetof(struct ElfImage, shdr),
5342            .e_ehsize = sizeof(ElfW(Shdr)),
5343            .e_phentsize = sizeof(ElfW(Phdr)),
5344            .e_phnum = 1,
5345            .e_shentsize = sizeof(ElfW(Shdr)),
5346            .e_shnum = ARRAY_SIZE(img->shdr),
5347            .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
5348#ifdef ELF_HOST_FLAGS
5349            .e_flags = ELF_HOST_FLAGS,
5350#endif
5351#ifdef ELF_OSABI
5352            .e_ident[EI_OSABI] = ELF_OSABI,
5353#endif
5354        },
5355        .phdr = {
5356            .p_type = PT_LOAD,
5357            .p_flags = PF_X,
5358        },
5359        .shdr = {
5360            [0] = { .sh_type = SHT_NULL },
5361            /* Trick: The contents of code_gen_buffer are not present in
5362               this fake ELF file; that got allocated elsewhere.  Therefore
5363               we mark .text as SHT_NOBITS (similar to .bss) so that readers
5364               will not look for contents.  We can record any address.  */
5365            [1] = { /* .text */
5366                .sh_type = SHT_NOBITS,
5367                .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
5368            },
5369            [2] = { /* .debug_info */
5370                .sh_type = SHT_PROGBITS,
5371                .sh_offset = offsetof(struct ElfImage, di),
5372                .sh_size = sizeof(struct DebugInfo),
5373            },
5374            [3] = { /* .debug_abbrev */
5375                .sh_type = SHT_PROGBITS,
5376                .sh_offset = offsetof(struct ElfImage, da),
5377                .sh_size = sizeof(img->da),
5378            },
5379            [4] = { /* .debug_frame */
5380                .sh_type = SHT_PROGBITS,
5381                .sh_offset = sizeof(struct ElfImage),
5382            },
5383            [5] = { /* .symtab */
5384                .sh_type = SHT_SYMTAB,
5385                .sh_offset = offsetof(struct ElfImage, sym),
5386                .sh_size = sizeof(img->sym),
5387                .sh_info = 1,
5388                .sh_link = ARRAY_SIZE(img->shdr) - 1,
5389                .sh_entsize = sizeof(ElfW(Sym)),
5390            },
5391            [6] = { /* .strtab */
5392                .sh_type = SHT_STRTAB,
5393                .sh_offset = offsetof(struct ElfImage, str),
5394                .sh_size = sizeof(img->str),
5395            }
5396        },
5397        .sym = {
5398            [1] = { /* code_gen_buffer */
5399                .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5400                .st_shndx = 1,
5401            }
5402        },
5403        .di = {
5404            .len = sizeof(struct DebugInfo) - 4,
5405            .version = 2,
5406            .ptr_size = sizeof(void *),
5407            .cu_die = 1,
5408            .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5409            .fn_die = 2,
5410            .fn_name = "code_gen_buffer"
5411        },
5412        .da = {
5413            1,          /* abbrev number (the cu) */
5414            0x11, 1,    /* DW_TAG_compile_unit, has children */
5415            0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5416            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5417            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5418            0, 0,       /* end of abbrev */
5419            2,          /* abbrev number (the fn) */
5420            0x2e, 0,    /* DW_TAG_subprogram, no children */
5421            0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5422            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5423            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5424            0, 0,       /* end of abbrev */
5425            0           /* no more abbrev */
5426        },
5427        .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5428               ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5429    };
5430
5431    /* We only need a single jit entry; statically allocate it.  */
5432    static struct jit_code_entry one_entry;
5433
5434    uintptr_t buf = (uintptr_t)buf_ptr;
5435    size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5436    DebugFrameHeader *dfh;
5437
5438    img = g_malloc(img_size);
5439    *img = img_template;
5440
5441    img->phdr.p_vaddr = buf;
5442    img->phdr.p_paddr = buf;
5443    img->phdr.p_memsz = buf_size;
5444
5445    img->shdr[1].sh_name = find_string(img->str, ".text");
5446    img->shdr[1].sh_addr = buf;
5447    img->shdr[1].sh_size = buf_size;
5448
5449    img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5450    img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5451
5452    img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5453    img->shdr[4].sh_size = debug_frame_size;
5454
5455    img->shdr[5].sh_name = find_string(img->str, ".symtab");
5456    img->shdr[6].sh_name = find_string(img->str, ".strtab");
5457
5458    img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5459    img->sym[1].st_value = buf;
5460    img->sym[1].st_size = buf_size;
5461
5462    img->di.cu_low_pc = buf;
5463    img->di.cu_high_pc = buf + buf_size;
5464    img->di.fn_low_pc = buf;
5465    img->di.fn_high_pc = buf + buf_size;
5466
5467    dfh = (DebugFrameHeader *)(img + 1);
5468    memcpy(dfh, debug_frame, debug_frame_size);
5469    dfh->fde.func_start = buf;
5470    dfh->fde.func_len = buf_size;
5471
5472#ifdef DEBUG_JIT
5473    /* Enable this block to be able to debug the ELF image file creation.
5474       One can use readelf, objdump, or other inspection utilities.  */
5475    {
5476        g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
5477        FILE *f = fopen(jit, "w+b");
5478        if (f) {
5479            if (fwrite(img, img_size, 1, f) != img_size) {
5480                /* Avoid stupid unused return value warning for fwrite.  */
5481            }
5482            fclose(f);
5483        }
5484    }
5485#endif
5486
5487    one_entry.symfile_addr = img;
5488    one_entry.symfile_size = img_size;
5489
5490    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5491    __jit_debug_descriptor.relevant_entry = &one_entry;
5492    __jit_debug_descriptor.first_entry = &one_entry;
5493    __jit_debug_register_code();
5494}
5495#else
5496/* No support for the feature.  Provide the entry point expected by exec.c,
5497   and implement the internal function we declared earlier.  */
5498
5499static void tcg_register_jit_int(const void *buf, size_t size,
5500                                 const void *debug_frame,
5501                                 size_t debug_frame_size)
5502{
5503}
5504
5505void tcg_register_jit(const void *buf, size_t buf_size)
5506{
5507}
5508#endif /* ELF_HOST_MACHINE */
5509
5510#if !TCG_TARGET_MAYBE_vec
5511void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5512{
5513    g_assert_not_reached();
5514}
5515#endif
5516