qemu/tcg/tcg.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* define it to use liveness analysis (better code) */
  26#define USE_TCG_OPTIMIZATIONS
  27
  28#include "qemu/osdep.h"
  29
  30/* Define to jump the ELF file used to communicate with GDB.  */
  31#undef DEBUG_JIT
  32
  33#include "qemu/error-report.h"
  34#include "qemu/cutils.h"
  35#include "qemu/host-utils.h"
  36#include "qemu/qemu-print.h"
  37#include "qemu/timer.h"
  38#include "qemu/cacheflush.h"
  39
  40/* Note: the long term plan is to reduce the dependencies on the QEMU
  41   CPU definitions. Currently they are used for qemu_ld/st
  42   instructions */
  43#define NO_CPU_IO_DEFS
  44#include "cpu.h"
  45
  46#include "exec/exec-all.h"
  47
  48#if !defined(CONFIG_USER_ONLY)
  49#include "hw/boards.h"
  50#endif
  51
  52#include "tcg/tcg-op.h"
  53
  54#if UINTPTR_MAX == UINT32_MAX
  55# define ELF_CLASS  ELFCLASS32
  56#else
  57# define ELF_CLASS  ELFCLASS64
  58#endif
  59#ifdef HOST_WORDS_BIGENDIAN
  60# define ELF_DATA   ELFDATA2MSB
  61#else
  62# define ELF_DATA   ELFDATA2LSB
  63#endif
  64
  65#include "elf.h"
  66#include "exec/log.h"
  67#include "sysemu/sysemu.h"
  68
  69/* Forward declarations for functions declared in tcg-target.c.inc and
  70   used here. */
  71static void tcg_target_init(TCGContext *s);
  72static void tcg_target_qemu_prologue(TCGContext *s);
  73static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
  74                        intptr_t value, intptr_t addend);
  75
  76/* The CIE and FDE header definitions will be common to all hosts.  */
  77typedef struct {
  78    uint32_t len __attribute__((aligned((sizeof(void *)))));
  79    uint32_t id;
  80    uint8_t version;
  81    char augmentation[1];
  82    uint8_t code_align;
  83    uint8_t data_align;
  84    uint8_t return_column;
  85} DebugFrameCIE;
  86
  87typedef struct QEMU_PACKED {
  88    uint32_t len __attribute__((aligned((sizeof(void *)))));
  89    uint32_t cie_offset;
  90    uintptr_t func_start;
  91    uintptr_t func_len;
  92} DebugFrameFDEHeader;
  93
  94typedef struct QEMU_PACKED {
  95    DebugFrameCIE cie;
  96    DebugFrameFDEHeader fde;
  97} DebugFrameHeader;
  98
  99static void tcg_register_jit_int(const void *buf, size_t size,
 100                                 const void *debug_frame,
 101                                 size_t debug_frame_size)
 102    __attribute__((unused));
 103
 104/* Forward declarations for functions declared and used in tcg-target.c.inc. */
 105static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
 106                       intptr_t arg2);
 107static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
 108static void tcg_out_movi(TCGContext *s, TCGType type,
 109                         TCGReg ret, tcg_target_long arg);
 110static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 111                       const TCGArg args[TCG_MAX_OP_ARGS],
 112                       const int const_args[TCG_MAX_OP_ARGS]);
 113#if TCG_TARGET_MAYBE_vec
 114static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 115                            TCGReg dst, TCGReg src);
 116static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 117                             TCGReg dst, TCGReg base, intptr_t offset);
 118static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 119                             TCGReg dst, int64_t arg);
 120static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 121                           unsigned vecl, unsigned vece,
 122                           const TCGArg args[TCG_MAX_OP_ARGS],
 123                           const int const_args[TCG_MAX_OP_ARGS]);
 124#else
 125static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 126                                   TCGReg dst, TCGReg src)
 127{
 128    g_assert_not_reached();
 129}
 130static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 131                                    TCGReg dst, TCGReg base, intptr_t offset)
 132{
 133    g_assert_not_reached();
 134}
 135static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 136                                    TCGReg dst, int64_t arg)
 137{
 138    g_assert_not_reached();
 139}
 140static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 141                                  unsigned vecl, unsigned vece,
 142                                  const TCGArg args[TCG_MAX_OP_ARGS],
 143                                  const int const_args[TCG_MAX_OP_ARGS])
 144{
 145    g_assert_not_reached();
 146}
 147#endif
 148static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
 149                       intptr_t arg2);
 150static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 151                        TCGReg base, intptr_t ofs);
 152static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
 153static int tcg_target_const_match(tcg_target_long val, TCGType type,
 154                                  const TCGArgConstraint *arg_ct);
 155#ifdef TCG_TARGET_NEED_LDST_LABELS
 156static int tcg_out_ldst_finalize(TCGContext *s);
 157#endif
 158
 159#define TCG_HIGHWATER 1024
 160
 161static TCGContext **tcg_ctxs;
 162static unsigned int n_tcg_ctxs;
 163TCGv_env cpu_env = 0;
 164const void *tcg_code_gen_epilogue;
 165uintptr_t tcg_splitwx_diff;
 166
 167#ifndef CONFIG_TCG_INTERPRETER
 168tcg_prologue_fn *tcg_qemu_tb_exec;
 169#endif
 170
 171struct tcg_region_tree {
 172    QemuMutex lock;
 173    GTree *tree;
 174    /* padding to avoid false sharing is computed at run-time */
 175};
 176
 177/*
 178 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
 179 * dynamically allocate from as demand dictates. Given appropriate region
 180 * sizing, this minimizes flushes even when some TCG threads generate a lot
 181 * more code than others.
 182 */
 183struct tcg_region_state {
 184    QemuMutex lock;
 185
 186    /* fields set at init time */
 187    void *start;
 188    void *start_aligned;
 189    void *end;
 190    size_t n;
 191    size_t size; /* size of one region */
 192    size_t stride; /* .size + guard size */
 193
 194    /* fields protected by the lock */
 195    size_t current; /* current region index */
 196    size_t agg_size_full; /* aggregate size of full regions */
 197};
 198
 199static struct tcg_region_state region;
 200/*
 201 * This is an array of struct tcg_region_tree's, with padding.
 202 * We use void * to simplify the computation of region_trees[i]; each
 203 * struct is found every tree_size bytes.
 204 */
 205static void *region_trees;
 206static size_t tree_size;
 207static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
 208static TCGRegSet tcg_target_call_clobber_regs;
 209
 210#if TCG_TARGET_INSN_UNIT_SIZE == 1
 211static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
 212{
 213    *s->code_ptr++ = v;
 214}
 215
 216static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
 217                                                      uint8_t v)
 218{
 219    *p = v;
 220}
 221#endif
 222
 223#if TCG_TARGET_INSN_UNIT_SIZE <= 2
 224static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
 225{
 226    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 227        *s->code_ptr++ = v;
 228    } else {
 229        tcg_insn_unit *p = s->code_ptr;
 230        memcpy(p, &v, sizeof(v));
 231        s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
 232    }
 233}
 234
 235static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
 236                                                       uint16_t v)
 237{
 238    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 239        *p = v;
 240    } else {
 241        memcpy(p, &v, sizeof(v));
 242    }
 243}
 244#endif
 245
 246#if TCG_TARGET_INSN_UNIT_SIZE <= 4
 247static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
 248{
 249    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 250        *s->code_ptr++ = v;
 251    } else {
 252        tcg_insn_unit *p = s->code_ptr;
 253        memcpy(p, &v, sizeof(v));
 254        s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
 255    }
 256}
 257
 258static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
 259                                                       uint32_t v)
 260{
 261    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 262        *p = v;
 263    } else {
 264        memcpy(p, &v, sizeof(v));
 265    }
 266}
 267#endif
 268
 269#if TCG_TARGET_INSN_UNIT_SIZE <= 8
 270static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
 271{
 272    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 273        *s->code_ptr++ = v;
 274    } else {
 275        tcg_insn_unit *p = s->code_ptr;
 276        memcpy(p, &v, sizeof(v));
 277        s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
 278    }
 279}
 280
 281static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
 282                                                       uint64_t v)
 283{
 284    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 285        *p = v;
 286    } else {
 287        memcpy(p, &v, sizeof(v));
 288    }
 289}
 290#endif
 291
 292/* label relocation processing */
 293
 294static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
 295                          TCGLabel *l, intptr_t addend)
 296{
 297    TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
 298
 299    r->type = type;
 300    r->ptr = code_ptr;
 301    r->addend = addend;
 302    QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
 303}
 304
 305static void tcg_out_label(TCGContext *s, TCGLabel *l)
 306{
 307    tcg_debug_assert(!l->has_value);
 308    l->has_value = 1;
 309    l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
 310}
 311
 312TCGLabel *gen_new_label(void)
 313{
 314    TCGContext *s = tcg_ctx;
 315    TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
 316
 317    memset(l, 0, sizeof(TCGLabel));
 318    l->id = s->nb_labels++;
 319    QSIMPLEQ_INIT(&l->relocs);
 320
 321    QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
 322
 323    return l;
 324}
 325
 326static bool tcg_resolve_relocs(TCGContext *s)
 327{
 328    TCGLabel *l;
 329
 330    QSIMPLEQ_FOREACH(l, &s->labels, next) {
 331        TCGRelocation *r;
 332        uintptr_t value = l->u.value;
 333
 334        QSIMPLEQ_FOREACH(r, &l->relocs, next) {
 335            if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
 336                return false;
 337            }
 338        }
 339    }
 340    return true;
 341}
 342
 343static void set_jmp_reset_offset(TCGContext *s, int which)
 344{
 345    /*
 346     * We will check for overflow at the end of the opcode loop in
 347     * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
 348     */
 349    s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
 350}
 351
 352/* Signal overflow, starting over with fewer guest insns. */
 353static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
 354{
 355    siglongjmp(s->jmp_trans, -2);
 356}
 357
 358#define C_PFX1(P, A)                    P##A
 359#define C_PFX2(P, A, B)                 P##A##_##B
 360#define C_PFX3(P, A, B, C)              P##A##_##B##_##C
 361#define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
 362#define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
 363#define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
 364
 365/* Define an enumeration for the various combinations. */
 366
 367#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
 368#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
 369#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
 370#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
 371
 372#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
 373#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
 374#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
 375#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
 376
 377#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
 378
 379#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
 380#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
 381#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
 382#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
 383
 384typedef enum {
 385#include "tcg-target-con-set.h"
 386} TCGConstraintSetIndex;
 387
 388static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
 389
 390#undef C_O0_I1
 391#undef C_O0_I2
 392#undef C_O0_I3
 393#undef C_O0_I4
 394#undef C_O1_I1
 395#undef C_O1_I2
 396#undef C_O1_I3
 397#undef C_O1_I4
 398#undef C_N1_I2
 399#undef C_O2_I1
 400#undef C_O2_I2
 401#undef C_O2_I3
 402#undef C_O2_I4
 403
 404/* Put all of the constraint sets into an array, indexed by the enum. */
 405
 406#define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
 407#define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
 408#define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
 409#define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
 410
 411#define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
 412#define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
 413#define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
 414#define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
 415
 416#define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
 417
 418#define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
 419#define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
 420#define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
 421#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
 422
 423static const TCGTargetOpDef constraint_sets[] = {
 424#include "tcg-target-con-set.h"
 425};
 426
 427
 428#undef C_O0_I1
 429#undef C_O0_I2
 430#undef C_O0_I3
 431#undef C_O0_I4
 432#undef C_O1_I1
 433#undef C_O1_I2
 434#undef C_O1_I3
 435#undef C_O1_I4
 436#undef C_N1_I2
 437#undef C_O2_I1
 438#undef C_O2_I2
 439#undef C_O2_I3
 440#undef C_O2_I4
 441
 442/* Expand the enumerator to be returned from tcg_target_op_def(). */
 443
 444#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
 445#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
 446#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
 447#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
 448
 449#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
 450#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
 451#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
 452#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
 453
 454#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
 455
 456#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
 457#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
 458#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
 459#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
 460
 461#include "tcg-target.c.inc"
 462
 463/* compare a pointer @ptr and a tb_tc @s */
 464static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
 465{
 466    if (ptr >= s->ptr + s->size) {
 467        return 1;
 468    } else if (ptr < s->ptr) {
 469        return -1;
 470    }
 471    return 0;
 472}
 473
 474static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
 475{
 476    const struct tb_tc *a = ap;
 477    const struct tb_tc *b = bp;
 478
 479    /*
 480     * When both sizes are set, we know this isn't a lookup.
 481     * This is the most likely case: every TB must be inserted; lookups
 482     * are a lot less frequent.
 483     */
 484    if (likely(a->size && b->size)) {
 485        if (a->ptr > b->ptr) {
 486            return 1;
 487        } else if (a->ptr < b->ptr) {
 488            return -1;
 489        }
 490        /* a->ptr == b->ptr should happen only on deletions */
 491        g_assert(a->size == b->size);
 492        return 0;
 493    }
 494    /*
 495     * All lookups have either .size field set to 0.
 496     * From the glib sources we see that @ap is always the lookup key. However
 497     * the docs provide no guarantee, so we just mark this case as likely.
 498     */
 499    if (likely(a->size == 0)) {
 500        return ptr_cmp_tb_tc(a->ptr, b);
 501    }
 502    return ptr_cmp_tb_tc(b->ptr, a);
 503}
 504
 505static void tcg_region_trees_init(void)
 506{
 507    size_t i;
 508
 509    tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
 510    region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
 511    for (i = 0; i < region.n; i++) {
 512        struct tcg_region_tree *rt = region_trees + i * tree_size;
 513
 514        qemu_mutex_init(&rt->lock);
 515        rt->tree = g_tree_new(tb_tc_cmp);
 516    }
 517}
 518
 519static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
 520{
 521    size_t region_idx;
 522
 523    /*
 524     * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
 525     * a signal handler over which the caller has no control.
 526     */
 527    if (!in_code_gen_buffer(p)) {
 528        p -= tcg_splitwx_diff;
 529        if (!in_code_gen_buffer(p)) {
 530            return NULL;
 531        }
 532    }
 533
 534    if (p < region.start_aligned) {
 535        region_idx = 0;
 536    } else {
 537        ptrdiff_t offset = p - region.start_aligned;
 538
 539        if (offset > region.stride * (region.n - 1)) {
 540            region_idx = region.n - 1;
 541        } else {
 542            region_idx = offset / region.stride;
 543        }
 544    }
 545    return region_trees + region_idx * tree_size;
 546}
 547
 548void tcg_tb_insert(TranslationBlock *tb)
 549{
 550    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 551
 552    g_assert(rt != NULL);
 553    qemu_mutex_lock(&rt->lock);
 554    g_tree_insert(rt->tree, &tb->tc, tb);
 555    qemu_mutex_unlock(&rt->lock);
 556}
 557
 558void tcg_tb_remove(TranslationBlock *tb)
 559{
 560    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 561
 562    g_assert(rt != NULL);
 563    qemu_mutex_lock(&rt->lock);
 564    g_tree_remove(rt->tree, &tb->tc);
 565    qemu_mutex_unlock(&rt->lock);
 566}
 567
 568/*
 569 * Find the TB 'tb' such that
 570 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
 571 * Return NULL if not found.
 572 */
 573TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
 574{
 575    struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
 576    TranslationBlock *tb;
 577    struct tb_tc s = { .ptr = (void *)tc_ptr };
 578
 579    if (rt == NULL) {
 580        return NULL;
 581    }
 582
 583    qemu_mutex_lock(&rt->lock);
 584    tb = g_tree_lookup(rt->tree, &s);
 585    qemu_mutex_unlock(&rt->lock);
 586    return tb;
 587}
 588
 589static void tcg_region_tree_lock_all(void)
 590{
 591    size_t i;
 592
 593    for (i = 0; i < region.n; i++) {
 594        struct tcg_region_tree *rt = region_trees + i * tree_size;
 595
 596        qemu_mutex_lock(&rt->lock);
 597    }
 598}
 599
 600static void tcg_region_tree_unlock_all(void)
 601{
 602    size_t i;
 603
 604    for (i = 0; i < region.n; i++) {
 605        struct tcg_region_tree *rt = region_trees + i * tree_size;
 606
 607        qemu_mutex_unlock(&rt->lock);
 608    }
 609}
 610
 611void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
 612{
 613    size_t i;
 614
 615    tcg_region_tree_lock_all();
 616    for (i = 0; i < region.n; i++) {
 617        struct tcg_region_tree *rt = region_trees + i * tree_size;
 618
 619        g_tree_foreach(rt->tree, func, user_data);
 620    }
 621    tcg_region_tree_unlock_all();
 622}
 623
 624size_t tcg_nb_tbs(void)
 625{
 626    size_t nb_tbs = 0;
 627    size_t i;
 628
 629    tcg_region_tree_lock_all();
 630    for (i = 0; i < region.n; i++) {
 631        struct tcg_region_tree *rt = region_trees + i * tree_size;
 632
 633        nb_tbs += g_tree_nnodes(rt->tree);
 634    }
 635    tcg_region_tree_unlock_all();
 636    return nb_tbs;
 637}
 638
 639static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
 640{
 641    TranslationBlock *tb = v;
 642
 643    tb_destroy(tb);
 644    return FALSE;
 645}
 646
 647static void tcg_region_tree_reset_all(void)
 648{
 649    size_t i;
 650
 651    tcg_region_tree_lock_all();
 652    for (i = 0; i < region.n; i++) {
 653        struct tcg_region_tree *rt = region_trees + i * tree_size;
 654
 655        g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
 656        /* Increment the refcount first so that destroy acts as a reset */
 657        g_tree_ref(rt->tree);
 658        g_tree_destroy(rt->tree);
 659    }
 660    tcg_region_tree_unlock_all();
 661}
 662
 663static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
 664{
 665    void *start, *end;
 666
 667    start = region.start_aligned + curr_region * region.stride;
 668    end = start + region.size;
 669
 670    if (curr_region == 0) {
 671        start = region.start;
 672    }
 673    if (curr_region == region.n - 1) {
 674        end = region.end;
 675    }
 676
 677    *pstart = start;
 678    *pend = end;
 679}
 680
 681static void tcg_region_assign(TCGContext *s, size_t curr_region)
 682{
 683    void *start, *end;
 684
 685    tcg_region_bounds(curr_region, &start, &end);
 686
 687    s->code_gen_buffer = start;
 688    s->code_gen_ptr = start;
 689    s->code_gen_buffer_size = end - start;
 690    s->code_gen_highwater = end - TCG_HIGHWATER;
 691}
 692
 693static bool tcg_region_alloc__locked(TCGContext *s)
 694{
 695    if (region.current == region.n) {
 696        return true;
 697    }
 698    tcg_region_assign(s, region.current);
 699    region.current++;
 700    return false;
 701}
 702
 703/*
 704 * Request a new region once the one in use has filled up.
 705 * Returns true on error.
 706 */
 707static bool tcg_region_alloc(TCGContext *s)
 708{
 709    bool err;
 710    /* read the region size now; alloc__locked will overwrite it on success */
 711    size_t size_full = s->code_gen_buffer_size;
 712
 713    qemu_mutex_lock(&region.lock);
 714    err = tcg_region_alloc__locked(s);
 715    if (!err) {
 716        region.agg_size_full += size_full - TCG_HIGHWATER;
 717    }
 718    qemu_mutex_unlock(&region.lock);
 719    return err;
 720}
 721
 722/*
 723 * Perform a context's first region allocation.
 724 * This function does _not_ increment region.agg_size_full.
 725 */
 726static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
 727{
 728    return tcg_region_alloc__locked(s);
 729}
 730
 731/* Call from a safe-work context */
 732void tcg_region_reset_all(void)
 733{
 734    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
 735    unsigned int i;
 736
 737    qemu_mutex_lock(&region.lock);
 738    region.current = 0;
 739    region.agg_size_full = 0;
 740
 741    for (i = 0; i < n_ctxs; i++) {
 742        TCGContext *s = qatomic_read(&tcg_ctxs[i]);
 743        bool err = tcg_region_initial_alloc__locked(s);
 744
 745        g_assert(!err);
 746    }
 747    qemu_mutex_unlock(&region.lock);
 748
 749    tcg_region_tree_reset_all();
 750}
 751
 752#ifdef CONFIG_USER_ONLY
 753static size_t tcg_n_regions(void)
 754{
 755    return 1;
 756}
 757#else
 758/*
 759 * It is likely that some vCPUs will translate more code than others, so we
 760 * first try to set more regions than max_cpus, with those regions being of
 761 * reasonable size. If that's not possible we make do by evenly dividing
 762 * the code_gen_buffer among the vCPUs.
 763 */
 764static size_t tcg_n_regions(void)
 765{
 766    size_t i;
 767
 768    /* Use a single region if all we have is one vCPU thread */
 769#if !defined(CONFIG_USER_ONLY)
 770    MachineState *ms = MACHINE(qdev_get_machine());
 771    unsigned int max_cpus = ms->smp.max_cpus;
 772#endif
 773    if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
 774        return 1;
 775    }
 776
 777    /* Try to have more regions than max_cpus, with each region being >= 2 MB */
 778    for (i = 8; i > 0; i--) {
 779        size_t regions_per_thread = i;
 780        size_t region_size;
 781
 782        region_size = tcg_init_ctx.code_gen_buffer_size;
 783        region_size /= max_cpus * regions_per_thread;
 784
 785        if (region_size >= 2 * 1024u * 1024) {
 786            return max_cpus * regions_per_thread;
 787        }
 788    }
 789    /* If we can't, then just allocate one region per vCPU thread */
 790    return max_cpus;
 791}
 792#endif
 793
 794/*
 795 * Initializes region partitioning.
 796 *
 797 * Called at init time from the parent thread (i.e. the one calling
 798 * tcg_context_init), after the target's TCG globals have been set.
 799 *
 800 * Region partitioning works by splitting code_gen_buffer into separate regions,
 801 * and then assigning regions to TCG threads so that the threads can translate
 802 * code in parallel without synchronization.
 803 *
 804 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
 805 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
 806 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
 807 * must have been parsed before calling this function, since it calls
 808 * qemu_tcg_mttcg_enabled().
 809 *
 810 * In user-mode we use a single region.  Having multiple regions in user-mode
 811 * is not supported, because the number of vCPU threads (recall that each thread
 812 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
 813 * OS, and usually this number is huge (tens of thousands is not uncommon).
 814 * Thus, given this large bound on the number of vCPU threads and the fact
 815 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
 816 * that the availability of at least one region per vCPU thread.
 817 *
 818 * However, this user-mode limitation is unlikely to be a significant problem
 819 * in practice. Multi-threaded guests share most if not all of their translated
 820 * code, which makes parallel code generation less appealing than in softmmu.
 821 */
 822void tcg_region_init(void)
 823{
 824    void *buf = tcg_init_ctx.code_gen_buffer;
 825    void *aligned;
 826    size_t size = tcg_init_ctx.code_gen_buffer_size;
 827    size_t page_size = qemu_real_host_page_size;
 828    size_t region_size;
 829    size_t n_regions;
 830    size_t i;
 831
 832    n_regions = tcg_n_regions();
 833
 834    /* The first region will be 'aligned - buf' bytes larger than the others */
 835    aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
 836    g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
 837    /*
 838     * Make region_size a multiple of page_size, using aligned as the start.
 839     * As a result of this we might end up with a few extra pages at the end of
 840     * the buffer; we will assign those to the last region.
 841     */
 842    region_size = (size - (aligned - buf)) / n_regions;
 843    region_size = QEMU_ALIGN_DOWN(region_size, page_size);
 844
 845    /* A region must have at least 2 pages; one code, one guard */
 846    g_assert(region_size >= 2 * page_size);
 847
 848    /* init the region struct */
 849    qemu_mutex_init(&region.lock);
 850    region.n = n_regions;
 851    region.size = region_size - page_size;
 852    region.stride = region_size;
 853    region.start = buf;
 854    region.start_aligned = aligned;
 855    /* page-align the end, since its last page will be a guard page */
 856    region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
 857    /* account for that last guard page */
 858    region.end -= page_size;
 859
 860    /*
 861     * Set guard pages in the rw buffer, as that's the one into which
 862     * buffer overruns could occur.  Do not set guard pages in the rx
 863     * buffer -- let that one use hugepages throughout.
 864     */
 865    for (i = 0; i < region.n; i++) {
 866        void *start, *end;
 867
 868        tcg_region_bounds(i, &start, &end);
 869
 870        /*
 871         * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
 872         * rejects a permission change from RWX -> NONE.  Guard pages are
 873         * nice for bug detection but are not essential; ignore any failure.
 874         */
 875        (void)qemu_mprotect_none(end, page_size);
 876    }
 877
 878    tcg_region_trees_init();
 879
 880    /* In user-mode we support only one ctx, so do the initial allocation now */
 881#ifdef CONFIG_USER_ONLY
 882    {
 883        bool err = tcg_region_initial_alloc__locked(tcg_ctx);
 884
 885        g_assert(!err);
 886    }
 887#endif
 888}
 889
 890#ifdef CONFIG_DEBUG_TCG
 891const void *tcg_splitwx_to_rx(void *rw)
 892{
 893    /* Pass NULL pointers unchanged. */
 894    if (rw) {
 895        g_assert(in_code_gen_buffer(rw));
 896        rw += tcg_splitwx_diff;
 897    }
 898    return rw;
 899}
 900
 901void *tcg_splitwx_to_rw(const void *rx)
 902{
 903    /* Pass NULL pointers unchanged. */
 904    if (rx) {
 905        rx -= tcg_splitwx_diff;
 906        /* Assert that we end with a pointer in the rw region. */
 907        g_assert(in_code_gen_buffer(rx));
 908    }
 909    return (void *)rx;
 910}
 911#endif /* CONFIG_DEBUG_TCG */
 912
 913static void alloc_tcg_plugin_context(TCGContext *s)
 914{
 915#ifdef CONFIG_PLUGIN
 916    s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
 917    s->plugin_tb->insns =
 918        g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
 919#endif
 920}
 921
 922/*
 923 * All TCG threads except the parent (i.e. the one that called tcg_context_init
 924 * and registered the target's TCG globals) must register with this function
 925 * before initiating translation.
 926 *
 927 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
 928 * of tcg_region_init() for the reasoning behind this.
 929 *
 930 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
 931 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
 932 * is not used anymore for translation once this function is called.
 933 *
 934 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
 935 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
 936 */
 937#ifdef CONFIG_USER_ONLY
 938void tcg_register_thread(void)
 939{
 940    tcg_ctx = &tcg_init_ctx;
 941}
 942#else
 943void tcg_register_thread(void)
 944{
 945    MachineState *ms = MACHINE(qdev_get_machine());
 946    TCGContext *s = g_malloc(sizeof(*s));
 947    unsigned int i, n;
 948    bool err;
 949
 950    *s = tcg_init_ctx;
 951
 952    /* Relink mem_base.  */
 953    for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
 954        if (tcg_init_ctx.temps[i].mem_base) {
 955            ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
 956            tcg_debug_assert(b >= 0 && b < n);
 957            s->temps[i].mem_base = &s->temps[b];
 958        }
 959    }
 960
 961    /* Claim an entry in tcg_ctxs */
 962    n = qatomic_fetch_inc(&n_tcg_ctxs);
 963    g_assert(n < ms->smp.max_cpus);
 964    qatomic_set(&tcg_ctxs[n], s);
 965
 966    if (n > 0) {
 967        alloc_tcg_plugin_context(s);
 968    }
 969
 970    tcg_ctx = s;
 971    qemu_mutex_lock(&region.lock);
 972    err = tcg_region_initial_alloc__locked(tcg_ctx);
 973    g_assert(!err);
 974    qemu_mutex_unlock(&region.lock);
 975}
 976#endif /* !CONFIG_USER_ONLY */
 977
 978/*
 979 * Returns the size (in bytes) of all translated code (i.e. from all regions)
 980 * currently in the cache.
 981 * See also: tcg_code_capacity()
 982 * Do not confuse with tcg_current_code_size(); that one applies to a single
 983 * TCG context.
 984 */
 985size_t tcg_code_size(void)
 986{
 987    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
 988    unsigned int i;
 989    size_t total;
 990
 991    qemu_mutex_lock(&region.lock);
 992    total = region.agg_size_full;
 993    for (i = 0; i < n_ctxs; i++) {
 994        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
 995        size_t size;
 996
 997        size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
 998        g_assert(size <= s->code_gen_buffer_size);
 999        total += size;
1000    }
1001    qemu_mutex_unlock(&region.lock);
1002    return total;
1003}
1004
1005/*
1006 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
1007 * regions.
1008 * See also: tcg_code_size()
1009 */
1010size_t tcg_code_capacity(void)
1011{
1012    size_t guard_size, capacity;
1013
1014    /* no need for synchronization; these variables are set at init time */
1015    guard_size = region.stride - region.size;
1016    capacity = region.end + guard_size - region.start;
1017    capacity -= region.n * (guard_size + TCG_HIGHWATER);
1018    return capacity;
1019}
1020
1021size_t tcg_tb_phys_invalidate_count(void)
1022{
1023    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
1024    unsigned int i;
1025    size_t total = 0;
1026
1027    for (i = 0; i < n_ctxs; i++) {
1028        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
1029
1030        total += qatomic_read(&s->tb_phys_invalidate_count);
1031    }
1032    return total;
1033}
1034
1035/* pool based memory allocation */
1036void *tcg_malloc_internal(TCGContext *s, int size)
1037{
1038    TCGPool *p;
1039    int pool_size;
1040    
1041    if (size > TCG_POOL_CHUNK_SIZE) {
1042        /* big malloc: insert a new pool (XXX: could optimize) */
1043        p = g_malloc(sizeof(TCGPool) + size);
1044        p->size = size;
1045        p->next = s->pool_first_large;
1046        s->pool_first_large = p;
1047        return p->data;
1048    } else {
1049        p = s->pool_current;
1050        if (!p) {
1051            p = s->pool_first;
1052            if (!p)
1053                goto new_pool;
1054        } else {
1055            if (!p->next) {
1056            new_pool:
1057                pool_size = TCG_POOL_CHUNK_SIZE;
1058                p = g_malloc(sizeof(TCGPool) + pool_size);
1059                p->size = pool_size;
1060                p->next = NULL;
1061                if (s->pool_current) 
1062                    s->pool_current->next = p;
1063                else
1064                    s->pool_first = p;
1065            } else {
1066                p = p->next;
1067            }
1068        }
1069    }
1070    s->pool_current = p;
1071    s->pool_cur = p->data + size;
1072    s->pool_end = p->data + p->size;
1073    return p->data;
1074}
1075
1076void tcg_pool_reset(TCGContext *s)
1077{
1078    TCGPool *p, *t;
1079    for (p = s->pool_first_large; p; p = t) {
1080        t = p->next;
1081        g_free(p);
1082    }
1083    s->pool_first_large = NULL;
1084    s->pool_cur = s->pool_end = NULL;
1085    s->pool_current = NULL;
1086}
1087
1088typedef struct TCGHelperInfo {
1089    void *func;
1090    const char *name;
1091    unsigned flags;
1092    unsigned sizemask;
1093} TCGHelperInfo;
1094
1095#include "exec/helper-proto.h"
1096
1097static const TCGHelperInfo all_helpers[] = {
1098#include "exec/helper-tcg.h"
1099};
1100static GHashTable *helper_table;
1101
1102static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1103static void process_op_defs(TCGContext *s);
1104static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1105                                            TCGReg reg, const char *name);
1106
1107void tcg_context_init(TCGContext *s)
1108{
1109    int op, total_args, n, i;
1110    TCGOpDef *def;
1111    TCGArgConstraint *args_ct;
1112    TCGTemp *ts;
1113
1114    memset(s, 0, sizeof(*s));
1115    s->nb_globals = 0;
1116
1117    /* Count total number of arguments and allocate the corresponding
1118       space */
1119    total_args = 0;
1120    for(op = 0; op < NB_OPS; op++) {
1121        def = &tcg_op_defs[op];
1122        n = def->nb_iargs + def->nb_oargs;
1123        total_args += n;
1124    }
1125
1126    args_ct = g_new0(TCGArgConstraint, total_args);
1127
1128    for(op = 0; op < NB_OPS; op++) {
1129        def = &tcg_op_defs[op];
1130        def->args_ct = args_ct;
1131        n = def->nb_iargs + def->nb_oargs;
1132        args_ct += n;
1133    }
1134
1135    /* Register helpers.  */
1136    /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1137    helper_table = g_hash_table_new(NULL, NULL);
1138
1139    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1140        g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1141                            (gpointer)&all_helpers[i]);
1142    }
1143
1144    tcg_target_init(s);
1145    process_op_defs(s);
1146
1147    /* Reverse the order of the saved registers, assuming they're all at
1148       the start of tcg_target_reg_alloc_order.  */
1149    for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1150        int r = tcg_target_reg_alloc_order[n];
1151        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1152            break;
1153        }
1154    }
1155    for (i = 0; i < n; ++i) {
1156        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1157    }
1158    for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1159        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1160    }
1161
1162    alloc_tcg_plugin_context(s);
1163
1164    tcg_ctx = s;
1165    /*
1166     * In user-mode we simply share the init context among threads, since we
1167     * use a single region. See the documentation tcg_region_init() for the
1168     * reasoning behind this.
1169     * In softmmu we will have at most max_cpus TCG threads.
1170     */
1171#ifdef CONFIG_USER_ONLY
1172    tcg_ctxs = &tcg_ctx;
1173    n_tcg_ctxs = 1;
1174#else
1175    MachineState *ms = MACHINE(qdev_get_machine());
1176    unsigned int max_cpus = ms->smp.max_cpus;
1177    tcg_ctxs = g_new(TCGContext *, max_cpus);
1178#endif
1179
1180    tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1181    ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1182    cpu_env = temp_tcgv_ptr(ts);
1183}
1184
1185/*
1186 * Allocate TBs right before their corresponding translated code, making
1187 * sure that TBs and code are on different cache lines.
1188 */
1189TranslationBlock *tcg_tb_alloc(TCGContext *s)
1190{
1191    uintptr_t align = qemu_icache_linesize;
1192    TranslationBlock *tb;
1193    void *next;
1194
1195 retry:
1196    tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1197    next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1198
1199    if (unlikely(next > s->code_gen_highwater)) {
1200        if (tcg_region_alloc(s)) {
1201            return NULL;
1202        }
1203        goto retry;
1204    }
1205    qatomic_set(&s->code_gen_ptr, next);
1206    s->data_gen_ptr = NULL;
1207    return tb;
1208}
1209
1210void tcg_prologue_init(TCGContext *s)
1211{
1212    size_t prologue_size, total_size;
1213    void *buf0, *buf1;
1214
1215    /* Put the prologue at the beginning of code_gen_buffer.  */
1216    buf0 = s->code_gen_buffer;
1217    total_size = s->code_gen_buffer_size;
1218    s->code_ptr = buf0;
1219    s->code_buf = buf0;
1220    s->data_gen_ptr = NULL;
1221
1222    /*
1223     * The region trees are not yet configured, but tcg_splitwx_to_rx
1224     * needs the bounds for an assert.
1225     */
1226    region.start = buf0;
1227    region.end = buf0 + total_size;
1228
1229#ifndef CONFIG_TCG_INTERPRETER
1230    tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1231#endif
1232
1233    /* Compute a high-water mark, at which we voluntarily flush the buffer
1234       and start over.  The size here is arbitrary, significantly larger
1235       than we expect the code generation for any one opcode to require.  */
1236    s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1237
1238#ifdef TCG_TARGET_NEED_POOL_LABELS
1239    s->pool_labels = NULL;
1240#endif
1241
1242    qemu_thread_jit_write();
1243    /* Generate the prologue.  */
1244    tcg_target_qemu_prologue(s);
1245
1246#ifdef TCG_TARGET_NEED_POOL_LABELS
1247    /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1248    {
1249        int result = tcg_out_pool_finalize(s);
1250        tcg_debug_assert(result == 0);
1251    }
1252#endif
1253
1254    buf1 = s->code_ptr;
1255#ifndef CONFIG_TCG_INTERPRETER
1256    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1257                        tcg_ptr_byte_diff(buf1, buf0));
1258#endif
1259
1260    /* Deduct the prologue from the buffer.  */
1261    prologue_size = tcg_current_code_size(s);
1262    s->code_gen_ptr = buf1;
1263    s->code_gen_buffer = buf1;
1264    s->code_buf = buf1;
1265    total_size -= prologue_size;
1266    s->code_gen_buffer_size = total_size;
1267
1268    tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1269
1270#ifdef DEBUG_DISAS
1271    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1272        FILE *logfile = qemu_log_lock();
1273        qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1274        if (s->data_gen_ptr) {
1275            size_t code_size = s->data_gen_ptr - buf0;
1276            size_t data_size = prologue_size - code_size;
1277            size_t i;
1278
1279            log_disas(buf0, code_size);
1280
1281            for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1282                if (sizeof(tcg_target_ulong) == 8) {
1283                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1284                             (uintptr_t)s->data_gen_ptr + i,
1285                             *(uint64_t *)(s->data_gen_ptr + i));
1286                } else {
1287                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1288                             (uintptr_t)s->data_gen_ptr + i,
1289                             *(uint32_t *)(s->data_gen_ptr + i));
1290                }
1291            }
1292        } else {
1293            log_disas(buf0, prologue_size);
1294        }
1295        qemu_log("\n");
1296        qemu_log_flush();
1297        qemu_log_unlock(logfile);
1298    }
1299#endif
1300
1301    /* Assert that goto_ptr is implemented completely.  */
1302    if (TCG_TARGET_HAS_goto_ptr) {
1303        tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1304    }
1305}
1306
1307void tcg_func_start(TCGContext *s)
1308{
1309    tcg_pool_reset(s);
1310    s->nb_temps = s->nb_globals;
1311
1312    /* No temps have been previously allocated for size or locality.  */
1313    memset(s->free_temps, 0, sizeof(s->free_temps));
1314
1315    /* No constant temps have been previously allocated. */
1316    for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1317        if (s->const_table[i]) {
1318            g_hash_table_remove_all(s->const_table[i]);
1319        }
1320    }
1321
1322    s->nb_ops = 0;
1323    s->nb_labels = 0;
1324    s->current_frame_offset = s->frame_start;
1325
1326#ifdef CONFIG_DEBUG_TCG
1327    s->goto_tb_issue_mask = 0;
1328#endif
1329
1330    QTAILQ_INIT(&s->ops);
1331    QTAILQ_INIT(&s->free_ops);
1332    QSIMPLEQ_INIT(&s->labels);
1333}
1334
1335static TCGTemp *tcg_temp_alloc(TCGContext *s)
1336{
1337    int n = s->nb_temps++;
1338
1339    if (n >= TCG_MAX_TEMPS) {
1340        tcg_raise_tb_overflow(s);
1341    }
1342    return memset(&s->temps[n], 0, sizeof(TCGTemp));
1343}
1344
1345static TCGTemp *tcg_global_alloc(TCGContext *s)
1346{
1347    TCGTemp *ts;
1348
1349    tcg_debug_assert(s->nb_globals == s->nb_temps);
1350    tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1351    s->nb_globals++;
1352    ts = tcg_temp_alloc(s);
1353    ts->kind = TEMP_GLOBAL;
1354
1355    return ts;
1356}
1357
1358static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1359                                            TCGReg reg, const char *name)
1360{
1361    TCGTemp *ts;
1362
1363    if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1364        tcg_abort();
1365    }
1366
1367    ts = tcg_global_alloc(s);
1368    ts->base_type = type;
1369    ts->type = type;
1370    ts->kind = TEMP_FIXED;
1371    ts->reg = reg;
1372    ts->name = name;
1373    tcg_regset_set_reg(s->reserved_regs, reg);
1374
1375    return ts;
1376}
1377
1378void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1379{
1380    s->frame_start = start;
1381    s->frame_end = start + size;
1382    s->frame_temp
1383        = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1384}
1385
1386TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1387                                     intptr_t offset, const char *name)
1388{
1389    TCGContext *s = tcg_ctx;
1390    TCGTemp *base_ts = tcgv_ptr_temp(base);
1391    TCGTemp *ts = tcg_global_alloc(s);
1392    int indirect_reg = 0, bigendian = 0;
1393#ifdef HOST_WORDS_BIGENDIAN
1394    bigendian = 1;
1395#endif
1396
1397    switch (base_ts->kind) {
1398    case TEMP_FIXED:
1399        break;
1400    case TEMP_GLOBAL:
1401        /* We do not support double-indirect registers.  */
1402        tcg_debug_assert(!base_ts->indirect_reg);
1403        base_ts->indirect_base = 1;
1404        s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1405                            ? 2 : 1);
1406        indirect_reg = 1;
1407        break;
1408    default:
1409        g_assert_not_reached();
1410    }
1411
1412    if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1413        TCGTemp *ts2 = tcg_global_alloc(s);
1414        char buf[64];
1415
1416        ts->base_type = TCG_TYPE_I64;
1417        ts->type = TCG_TYPE_I32;
1418        ts->indirect_reg = indirect_reg;
1419        ts->mem_allocated = 1;
1420        ts->mem_base = base_ts;
1421        ts->mem_offset = offset + bigendian * 4;
1422        pstrcpy(buf, sizeof(buf), name);
1423        pstrcat(buf, sizeof(buf), "_0");
1424        ts->name = strdup(buf);
1425
1426        tcg_debug_assert(ts2 == ts + 1);
1427        ts2->base_type = TCG_TYPE_I64;
1428        ts2->type = TCG_TYPE_I32;
1429        ts2->indirect_reg = indirect_reg;
1430        ts2->mem_allocated = 1;
1431        ts2->mem_base = base_ts;
1432        ts2->mem_offset = offset + (1 - bigendian) * 4;
1433        pstrcpy(buf, sizeof(buf), name);
1434        pstrcat(buf, sizeof(buf), "_1");
1435        ts2->name = strdup(buf);
1436    } else {
1437        ts->base_type = type;
1438        ts->type = type;
1439        ts->indirect_reg = indirect_reg;
1440        ts->mem_allocated = 1;
1441        ts->mem_base = base_ts;
1442        ts->mem_offset = offset;
1443        ts->name = name;
1444    }
1445    return ts;
1446}
1447
1448TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1449{
1450    TCGContext *s = tcg_ctx;
1451    TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1452    TCGTemp *ts;
1453    int idx, k;
1454
1455    k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1456    idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1457    if (idx < TCG_MAX_TEMPS) {
1458        /* There is already an available temp with the right type.  */
1459        clear_bit(idx, s->free_temps[k].l);
1460
1461        ts = &s->temps[idx];
1462        ts->temp_allocated = 1;
1463        tcg_debug_assert(ts->base_type == type);
1464        tcg_debug_assert(ts->kind == kind);
1465    } else {
1466        ts = tcg_temp_alloc(s);
1467        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1468            TCGTemp *ts2 = tcg_temp_alloc(s);
1469
1470            ts->base_type = type;
1471            ts->type = TCG_TYPE_I32;
1472            ts->temp_allocated = 1;
1473            ts->kind = kind;
1474
1475            tcg_debug_assert(ts2 == ts + 1);
1476            ts2->base_type = TCG_TYPE_I64;
1477            ts2->type = TCG_TYPE_I32;
1478            ts2->temp_allocated = 1;
1479            ts2->kind = kind;
1480        } else {
1481            ts->base_type = type;
1482            ts->type = type;
1483            ts->temp_allocated = 1;
1484            ts->kind = kind;
1485        }
1486    }
1487
1488#if defined(CONFIG_DEBUG_TCG)
1489    s->temps_in_use++;
1490#endif
1491    return ts;
1492}
1493
1494TCGv_vec tcg_temp_new_vec(TCGType type)
1495{
1496    TCGTemp *t;
1497
1498#ifdef CONFIG_DEBUG_TCG
1499    switch (type) {
1500    case TCG_TYPE_V64:
1501        assert(TCG_TARGET_HAS_v64);
1502        break;
1503    case TCG_TYPE_V128:
1504        assert(TCG_TARGET_HAS_v128);
1505        break;
1506    case TCG_TYPE_V256:
1507        assert(TCG_TARGET_HAS_v256);
1508        break;
1509    default:
1510        g_assert_not_reached();
1511    }
1512#endif
1513
1514    t = tcg_temp_new_internal(type, 0);
1515    return temp_tcgv_vec(t);
1516}
1517
1518/* Create a new temp of the same type as an existing temp.  */
1519TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1520{
1521    TCGTemp *t = tcgv_vec_temp(match);
1522
1523    tcg_debug_assert(t->temp_allocated != 0);
1524
1525    t = tcg_temp_new_internal(t->base_type, 0);
1526    return temp_tcgv_vec(t);
1527}
1528
1529void tcg_temp_free_internal(TCGTemp *ts)
1530{
1531    TCGContext *s = tcg_ctx;
1532    int k, idx;
1533
1534    /* In order to simplify users of tcg_constant_*, silently ignore free. */
1535    if (ts->kind == TEMP_CONST) {
1536        return;
1537    }
1538
1539#if defined(CONFIG_DEBUG_TCG)
1540    s->temps_in_use--;
1541    if (s->temps_in_use < 0) {
1542        fprintf(stderr, "More temporaries freed than allocated!\n");
1543    }
1544#endif
1545
1546    tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1547    tcg_debug_assert(ts->temp_allocated != 0);
1548    ts->temp_allocated = 0;
1549
1550    idx = temp_idx(ts);
1551    k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1552    set_bit(idx, s->free_temps[k].l);
1553}
1554
1555TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1556{
1557    TCGContext *s = tcg_ctx;
1558    GHashTable *h = s->const_table[type];
1559    TCGTemp *ts;
1560
1561    if (h == NULL) {
1562        h = g_hash_table_new(g_int64_hash, g_int64_equal);
1563        s->const_table[type] = h;
1564    }
1565
1566    ts = g_hash_table_lookup(h, &val);
1567    if (ts == NULL) {
1568        ts = tcg_temp_alloc(s);
1569
1570        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1571            TCGTemp *ts2 = tcg_temp_alloc(s);
1572
1573            ts->base_type = TCG_TYPE_I64;
1574            ts->type = TCG_TYPE_I32;
1575            ts->kind = TEMP_CONST;
1576            ts->temp_allocated = 1;
1577            /*
1578             * Retain the full value of the 64-bit constant in the low
1579             * part, so that the hash table works.  Actual uses will
1580             * truncate the value to the low part.
1581             */
1582            ts->val = val;
1583
1584            tcg_debug_assert(ts2 == ts + 1);
1585            ts2->base_type = TCG_TYPE_I64;
1586            ts2->type = TCG_TYPE_I32;
1587            ts2->kind = TEMP_CONST;
1588            ts2->temp_allocated = 1;
1589            ts2->val = val >> 32;
1590        } else {
1591            ts->base_type = type;
1592            ts->type = type;
1593            ts->kind = TEMP_CONST;
1594            ts->temp_allocated = 1;
1595            ts->val = val;
1596        }
1597        g_hash_table_insert(h, &ts->val, ts);
1598    }
1599
1600    return ts;
1601}
1602
1603TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1604{
1605    val = dup_const(vece, val);
1606    return temp_tcgv_vec(tcg_constant_internal(type, val));
1607}
1608
1609TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1610{
1611    TCGTemp *t = tcgv_vec_temp(match);
1612
1613    tcg_debug_assert(t->temp_allocated != 0);
1614    return tcg_constant_vec(t->base_type, vece, val);
1615}
1616
1617TCGv_i32 tcg_const_i32(int32_t val)
1618{
1619    TCGv_i32 t0;
1620    t0 = tcg_temp_new_i32();
1621    tcg_gen_movi_i32(t0, val);
1622    return t0;
1623}
1624
1625TCGv_i64 tcg_const_i64(int64_t val)
1626{
1627    TCGv_i64 t0;
1628    t0 = tcg_temp_new_i64();
1629    tcg_gen_movi_i64(t0, val);
1630    return t0;
1631}
1632
1633TCGv_i32 tcg_const_local_i32(int32_t val)
1634{
1635    TCGv_i32 t0;
1636    t0 = tcg_temp_local_new_i32();
1637    tcg_gen_movi_i32(t0, val);
1638    return t0;
1639}
1640
1641TCGv_i64 tcg_const_local_i64(int64_t val)
1642{
1643    TCGv_i64 t0;
1644    t0 = tcg_temp_local_new_i64();
1645    tcg_gen_movi_i64(t0, val);
1646    return t0;
1647}
1648
1649#if defined(CONFIG_DEBUG_TCG)
1650void tcg_clear_temp_count(void)
1651{
1652    TCGContext *s = tcg_ctx;
1653    s->temps_in_use = 0;
1654}
1655
1656int tcg_check_temp_count(void)
1657{
1658    TCGContext *s = tcg_ctx;
1659    if (s->temps_in_use) {
1660        /* Clear the count so that we don't give another
1661         * warning immediately next time around.
1662         */
1663        s->temps_in_use = 0;
1664        return 1;
1665    }
1666    return 0;
1667}
1668#endif
1669
1670/* Return true if OP may appear in the opcode stream.
1671   Test the runtime variable that controls each opcode.  */
1672bool tcg_op_supported(TCGOpcode op)
1673{
1674    const bool have_vec
1675        = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1676
1677    switch (op) {
1678    case INDEX_op_discard:
1679    case INDEX_op_set_label:
1680    case INDEX_op_call:
1681    case INDEX_op_br:
1682    case INDEX_op_mb:
1683    case INDEX_op_insn_start:
1684    case INDEX_op_exit_tb:
1685    case INDEX_op_goto_tb:
1686    case INDEX_op_qemu_ld_i32:
1687    case INDEX_op_qemu_st_i32:
1688    case INDEX_op_qemu_ld_i64:
1689    case INDEX_op_qemu_st_i64:
1690        return true;
1691
1692    case INDEX_op_qemu_st8_i32:
1693        return TCG_TARGET_HAS_qemu_st8_i32;
1694
1695    case INDEX_op_goto_ptr:
1696        return TCG_TARGET_HAS_goto_ptr;
1697
1698    case INDEX_op_mov_i32:
1699    case INDEX_op_setcond_i32:
1700    case INDEX_op_brcond_i32:
1701    case INDEX_op_ld8u_i32:
1702    case INDEX_op_ld8s_i32:
1703    case INDEX_op_ld16u_i32:
1704    case INDEX_op_ld16s_i32:
1705    case INDEX_op_ld_i32:
1706    case INDEX_op_st8_i32:
1707    case INDEX_op_st16_i32:
1708    case INDEX_op_st_i32:
1709    case INDEX_op_add_i32:
1710    case INDEX_op_sub_i32:
1711    case INDEX_op_mul_i32:
1712    case INDEX_op_and_i32:
1713    case INDEX_op_or_i32:
1714    case INDEX_op_xor_i32:
1715    case INDEX_op_shl_i32:
1716    case INDEX_op_shr_i32:
1717    case INDEX_op_sar_i32:
1718        return true;
1719
1720    case INDEX_op_movcond_i32:
1721        return TCG_TARGET_HAS_movcond_i32;
1722    case INDEX_op_div_i32:
1723    case INDEX_op_divu_i32:
1724        return TCG_TARGET_HAS_div_i32;
1725    case INDEX_op_rem_i32:
1726    case INDEX_op_remu_i32:
1727        return TCG_TARGET_HAS_rem_i32;
1728    case INDEX_op_div2_i32:
1729    case INDEX_op_divu2_i32:
1730        return TCG_TARGET_HAS_div2_i32;
1731    case INDEX_op_rotl_i32:
1732    case INDEX_op_rotr_i32:
1733        return TCG_TARGET_HAS_rot_i32;
1734    case INDEX_op_deposit_i32:
1735        return TCG_TARGET_HAS_deposit_i32;
1736    case INDEX_op_extract_i32:
1737        return TCG_TARGET_HAS_extract_i32;
1738    case INDEX_op_sextract_i32:
1739        return TCG_TARGET_HAS_sextract_i32;
1740    case INDEX_op_extract2_i32:
1741        return TCG_TARGET_HAS_extract2_i32;
1742    case INDEX_op_add2_i32:
1743        return TCG_TARGET_HAS_add2_i32;
1744    case INDEX_op_sub2_i32:
1745        return TCG_TARGET_HAS_sub2_i32;
1746    case INDEX_op_mulu2_i32:
1747        return TCG_TARGET_HAS_mulu2_i32;
1748    case INDEX_op_muls2_i32:
1749        return TCG_TARGET_HAS_muls2_i32;
1750    case INDEX_op_muluh_i32:
1751        return TCG_TARGET_HAS_muluh_i32;
1752    case INDEX_op_mulsh_i32:
1753        return TCG_TARGET_HAS_mulsh_i32;
1754    case INDEX_op_ext8s_i32:
1755        return TCG_TARGET_HAS_ext8s_i32;
1756    case INDEX_op_ext16s_i32:
1757        return TCG_TARGET_HAS_ext16s_i32;
1758    case INDEX_op_ext8u_i32:
1759        return TCG_TARGET_HAS_ext8u_i32;
1760    case INDEX_op_ext16u_i32:
1761        return TCG_TARGET_HAS_ext16u_i32;
1762    case INDEX_op_bswap16_i32:
1763        return TCG_TARGET_HAS_bswap16_i32;
1764    case INDEX_op_bswap32_i32:
1765        return TCG_TARGET_HAS_bswap32_i32;
1766    case INDEX_op_not_i32:
1767        return TCG_TARGET_HAS_not_i32;
1768    case INDEX_op_neg_i32:
1769        return TCG_TARGET_HAS_neg_i32;
1770    case INDEX_op_andc_i32:
1771        return TCG_TARGET_HAS_andc_i32;
1772    case INDEX_op_orc_i32:
1773        return TCG_TARGET_HAS_orc_i32;
1774    case INDEX_op_eqv_i32:
1775        return TCG_TARGET_HAS_eqv_i32;
1776    case INDEX_op_nand_i32:
1777        return TCG_TARGET_HAS_nand_i32;
1778    case INDEX_op_nor_i32:
1779        return TCG_TARGET_HAS_nor_i32;
1780    case INDEX_op_clz_i32:
1781        return TCG_TARGET_HAS_clz_i32;
1782    case INDEX_op_ctz_i32:
1783        return TCG_TARGET_HAS_ctz_i32;
1784    case INDEX_op_ctpop_i32:
1785        return TCG_TARGET_HAS_ctpop_i32;
1786
1787    case INDEX_op_brcond2_i32:
1788    case INDEX_op_setcond2_i32:
1789        return TCG_TARGET_REG_BITS == 32;
1790
1791    case INDEX_op_mov_i64:
1792    case INDEX_op_setcond_i64:
1793    case INDEX_op_brcond_i64:
1794    case INDEX_op_ld8u_i64:
1795    case INDEX_op_ld8s_i64:
1796    case INDEX_op_ld16u_i64:
1797    case INDEX_op_ld16s_i64:
1798    case INDEX_op_ld32u_i64:
1799    case INDEX_op_ld32s_i64:
1800    case INDEX_op_ld_i64:
1801    case INDEX_op_st8_i64:
1802    case INDEX_op_st16_i64:
1803    case INDEX_op_st32_i64:
1804    case INDEX_op_st_i64:
1805    case INDEX_op_add_i64:
1806    case INDEX_op_sub_i64:
1807    case INDEX_op_mul_i64:
1808    case INDEX_op_and_i64:
1809    case INDEX_op_or_i64:
1810    case INDEX_op_xor_i64:
1811    case INDEX_op_shl_i64:
1812    case INDEX_op_shr_i64:
1813    case INDEX_op_sar_i64:
1814    case INDEX_op_ext_i32_i64:
1815    case INDEX_op_extu_i32_i64:
1816        return TCG_TARGET_REG_BITS == 64;
1817
1818    case INDEX_op_movcond_i64:
1819        return TCG_TARGET_HAS_movcond_i64;
1820    case INDEX_op_div_i64:
1821    case INDEX_op_divu_i64:
1822        return TCG_TARGET_HAS_div_i64;
1823    case INDEX_op_rem_i64:
1824    case INDEX_op_remu_i64:
1825        return TCG_TARGET_HAS_rem_i64;
1826    case INDEX_op_div2_i64:
1827    case INDEX_op_divu2_i64:
1828        return TCG_TARGET_HAS_div2_i64;
1829    case INDEX_op_rotl_i64:
1830    case INDEX_op_rotr_i64:
1831        return TCG_TARGET_HAS_rot_i64;
1832    case INDEX_op_deposit_i64:
1833        return TCG_TARGET_HAS_deposit_i64;
1834    case INDEX_op_extract_i64:
1835        return TCG_TARGET_HAS_extract_i64;
1836    case INDEX_op_sextract_i64:
1837        return TCG_TARGET_HAS_sextract_i64;
1838    case INDEX_op_extract2_i64:
1839        return TCG_TARGET_HAS_extract2_i64;
1840    case INDEX_op_extrl_i64_i32:
1841        return TCG_TARGET_HAS_extrl_i64_i32;
1842    case INDEX_op_extrh_i64_i32:
1843        return TCG_TARGET_HAS_extrh_i64_i32;
1844    case INDEX_op_ext8s_i64:
1845        return TCG_TARGET_HAS_ext8s_i64;
1846    case INDEX_op_ext16s_i64:
1847        return TCG_TARGET_HAS_ext16s_i64;
1848    case INDEX_op_ext32s_i64:
1849        return TCG_TARGET_HAS_ext32s_i64;
1850    case INDEX_op_ext8u_i64:
1851        return TCG_TARGET_HAS_ext8u_i64;
1852    case INDEX_op_ext16u_i64:
1853        return TCG_TARGET_HAS_ext16u_i64;
1854    case INDEX_op_ext32u_i64:
1855        return TCG_TARGET_HAS_ext32u_i64;
1856    case INDEX_op_bswap16_i64:
1857        return TCG_TARGET_HAS_bswap16_i64;
1858    case INDEX_op_bswap32_i64:
1859        return TCG_TARGET_HAS_bswap32_i64;
1860    case INDEX_op_bswap64_i64:
1861        return TCG_TARGET_HAS_bswap64_i64;
1862    case INDEX_op_not_i64:
1863        return TCG_TARGET_HAS_not_i64;
1864    case INDEX_op_neg_i64:
1865        return TCG_TARGET_HAS_neg_i64;
1866    case INDEX_op_andc_i64:
1867        return TCG_TARGET_HAS_andc_i64;
1868    case INDEX_op_orc_i64:
1869        return TCG_TARGET_HAS_orc_i64;
1870    case INDEX_op_eqv_i64:
1871        return TCG_TARGET_HAS_eqv_i64;
1872    case INDEX_op_nand_i64:
1873        return TCG_TARGET_HAS_nand_i64;
1874    case INDEX_op_nor_i64:
1875        return TCG_TARGET_HAS_nor_i64;
1876    case INDEX_op_clz_i64:
1877        return TCG_TARGET_HAS_clz_i64;
1878    case INDEX_op_ctz_i64:
1879        return TCG_TARGET_HAS_ctz_i64;
1880    case INDEX_op_ctpop_i64:
1881        return TCG_TARGET_HAS_ctpop_i64;
1882    case INDEX_op_add2_i64:
1883        return TCG_TARGET_HAS_add2_i64;
1884    case INDEX_op_sub2_i64:
1885        return TCG_TARGET_HAS_sub2_i64;
1886    case INDEX_op_mulu2_i64:
1887        return TCG_TARGET_HAS_mulu2_i64;
1888    case INDEX_op_muls2_i64:
1889        return TCG_TARGET_HAS_muls2_i64;
1890    case INDEX_op_muluh_i64:
1891        return TCG_TARGET_HAS_muluh_i64;
1892    case INDEX_op_mulsh_i64:
1893        return TCG_TARGET_HAS_mulsh_i64;
1894
1895    case INDEX_op_mov_vec:
1896    case INDEX_op_dup_vec:
1897    case INDEX_op_dupm_vec:
1898    case INDEX_op_ld_vec:
1899    case INDEX_op_st_vec:
1900    case INDEX_op_add_vec:
1901    case INDEX_op_sub_vec:
1902    case INDEX_op_and_vec:
1903    case INDEX_op_or_vec:
1904    case INDEX_op_xor_vec:
1905    case INDEX_op_cmp_vec:
1906        return have_vec;
1907    case INDEX_op_dup2_vec:
1908        return have_vec && TCG_TARGET_REG_BITS == 32;
1909    case INDEX_op_not_vec:
1910        return have_vec && TCG_TARGET_HAS_not_vec;
1911    case INDEX_op_neg_vec:
1912        return have_vec && TCG_TARGET_HAS_neg_vec;
1913    case INDEX_op_abs_vec:
1914        return have_vec && TCG_TARGET_HAS_abs_vec;
1915    case INDEX_op_andc_vec:
1916        return have_vec && TCG_TARGET_HAS_andc_vec;
1917    case INDEX_op_orc_vec:
1918        return have_vec && TCG_TARGET_HAS_orc_vec;
1919    case INDEX_op_mul_vec:
1920        return have_vec && TCG_TARGET_HAS_mul_vec;
1921    case INDEX_op_shli_vec:
1922    case INDEX_op_shri_vec:
1923    case INDEX_op_sari_vec:
1924        return have_vec && TCG_TARGET_HAS_shi_vec;
1925    case INDEX_op_shls_vec:
1926    case INDEX_op_shrs_vec:
1927    case INDEX_op_sars_vec:
1928        return have_vec && TCG_TARGET_HAS_shs_vec;
1929    case INDEX_op_shlv_vec:
1930    case INDEX_op_shrv_vec:
1931    case INDEX_op_sarv_vec:
1932        return have_vec && TCG_TARGET_HAS_shv_vec;
1933    case INDEX_op_rotli_vec:
1934        return have_vec && TCG_TARGET_HAS_roti_vec;
1935    case INDEX_op_rotls_vec:
1936        return have_vec && TCG_TARGET_HAS_rots_vec;
1937    case INDEX_op_rotlv_vec:
1938    case INDEX_op_rotrv_vec:
1939        return have_vec && TCG_TARGET_HAS_rotv_vec;
1940    case INDEX_op_ssadd_vec:
1941    case INDEX_op_usadd_vec:
1942    case INDEX_op_sssub_vec:
1943    case INDEX_op_ussub_vec:
1944        return have_vec && TCG_TARGET_HAS_sat_vec;
1945    case INDEX_op_smin_vec:
1946    case INDEX_op_umin_vec:
1947    case INDEX_op_smax_vec:
1948    case INDEX_op_umax_vec:
1949        return have_vec && TCG_TARGET_HAS_minmax_vec;
1950    case INDEX_op_bitsel_vec:
1951        return have_vec && TCG_TARGET_HAS_bitsel_vec;
1952    case INDEX_op_cmpsel_vec:
1953        return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1954
1955    default:
1956        tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1957        return true;
1958    }
1959}
1960
1961/* Note: we convert the 64 bit args to 32 bit and do some alignment
1962   and endian swap. Maybe it would be better to do the alignment
1963   and endian swap in tcg_reg_alloc_call(). */
1964void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1965{
1966    int i, real_args, nb_rets, pi;
1967    unsigned sizemask, flags;
1968    TCGHelperInfo *info;
1969    TCGOp *op;
1970
1971    info = g_hash_table_lookup(helper_table, (gpointer)func);
1972    flags = info->flags;
1973    sizemask = info->sizemask;
1974
1975#ifdef CONFIG_PLUGIN
1976    /* detect non-plugin helpers */
1977    if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1978        tcg_ctx->plugin_insn->calls_helpers = true;
1979    }
1980#endif
1981
1982#if defined(__sparc__) && !defined(__arch64__) \
1983    && !defined(CONFIG_TCG_INTERPRETER)
1984    /* We have 64-bit values in one register, but need to pass as two
1985       separate parameters.  Split them.  */
1986    int orig_sizemask = sizemask;
1987    int orig_nargs = nargs;
1988    TCGv_i64 retl, reth;
1989    TCGTemp *split_args[MAX_OPC_PARAM];
1990
1991    retl = NULL;
1992    reth = NULL;
1993    if (sizemask != 0) {
1994        for (i = real_args = 0; i < nargs; ++i) {
1995            int is_64bit = sizemask & (1 << (i+1)*2);
1996            if (is_64bit) {
1997                TCGv_i64 orig = temp_tcgv_i64(args[i]);
1998                TCGv_i32 h = tcg_temp_new_i32();
1999                TCGv_i32 l = tcg_temp_new_i32();
2000                tcg_gen_extr_i64_i32(l, h, orig);
2001                split_args[real_args++] = tcgv_i32_temp(h);
2002                split_args[real_args++] = tcgv_i32_temp(l);
2003            } else {
2004                split_args[real_args++] = args[i];
2005            }
2006        }
2007        nargs = real_args;
2008        args = split_args;
2009        sizemask = 0;
2010    }
2011#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2012    for (i = 0; i < nargs; ++i) {
2013        int is_64bit = sizemask & (1 << (i+1)*2);
2014        int is_signed = sizemask & (2 << (i+1)*2);
2015        if (!is_64bit) {
2016            TCGv_i64 temp = tcg_temp_new_i64();
2017            TCGv_i64 orig = temp_tcgv_i64(args[i]);
2018            if (is_signed) {
2019                tcg_gen_ext32s_i64(temp, orig);
2020            } else {
2021                tcg_gen_ext32u_i64(temp, orig);
2022            }
2023            args[i] = tcgv_i64_temp(temp);
2024        }
2025    }
2026#endif /* TCG_TARGET_EXTEND_ARGS */
2027
2028    op = tcg_emit_op(INDEX_op_call);
2029
2030    pi = 0;
2031    if (ret != NULL) {
2032#if defined(__sparc__) && !defined(__arch64__) \
2033    && !defined(CONFIG_TCG_INTERPRETER)
2034        if (orig_sizemask & 1) {
2035            /* The 32-bit ABI is going to return the 64-bit value in
2036               the %o0/%o1 register pair.  Prepare for this by using
2037               two return temporaries, and reassemble below.  */
2038            retl = tcg_temp_new_i64();
2039            reth = tcg_temp_new_i64();
2040            op->args[pi++] = tcgv_i64_arg(reth);
2041            op->args[pi++] = tcgv_i64_arg(retl);
2042            nb_rets = 2;
2043        } else {
2044            op->args[pi++] = temp_arg(ret);
2045            nb_rets = 1;
2046        }
2047#else
2048        if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
2049#ifdef HOST_WORDS_BIGENDIAN
2050            op->args[pi++] = temp_arg(ret + 1);
2051            op->args[pi++] = temp_arg(ret);
2052#else
2053            op->args[pi++] = temp_arg(ret);
2054            op->args[pi++] = temp_arg(ret + 1);
2055#endif
2056            nb_rets = 2;
2057        } else {
2058            op->args[pi++] = temp_arg(ret);
2059            nb_rets = 1;
2060        }
2061#endif
2062    } else {
2063        nb_rets = 0;
2064    }
2065    TCGOP_CALLO(op) = nb_rets;
2066
2067    real_args = 0;
2068    for (i = 0; i < nargs; i++) {
2069        int is_64bit = sizemask & (1 << (i+1)*2);
2070        if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
2071#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2072            /* some targets want aligned 64 bit args */
2073            if (real_args & 1) {
2074                op->args[pi++] = TCG_CALL_DUMMY_ARG;
2075                real_args++;
2076            }
2077#endif
2078           /* If stack grows up, then we will be placing successive
2079              arguments at lower addresses, which means we need to
2080              reverse the order compared to how we would normally
2081              treat either big or little-endian.  For those arguments
2082              that will wind up in registers, this still works for
2083              HPPA (the only current STACK_GROWSUP target) since the
2084              argument registers are *also* allocated in decreasing
2085              order.  If another such target is added, this logic may
2086              have to get more complicated to differentiate between
2087              stack arguments and register arguments.  */
2088#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
2089            op->args[pi++] = temp_arg(args[i] + 1);
2090            op->args[pi++] = temp_arg(args[i]);
2091#else
2092            op->args[pi++] = temp_arg(args[i]);
2093            op->args[pi++] = temp_arg(args[i] + 1);
2094#endif
2095            real_args += 2;
2096            continue;
2097        }
2098
2099        op->args[pi++] = temp_arg(args[i]);
2100        real_args++;
2101    }
2102    op->args[pi++] = (uintptr_t)func;
2103    op->args[pi++] = flags;
2104    TCGOP_CALLI(op) = real_args;
2105
2106    /* Make sure the fields didn't overflow.  */
2107    tcg_debug_assert(TCGOP_CALLI(op) == real_args);
2108    tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
2109
2110#if defined(__sparc__) && !defined(__arch64__) \
2111    && !defined(CONFIG_TCG_INTERPRETER)
2112    /* Free all of the parts we allocated above.  */
2113    for (i = real_args = 0; i < orig_nargs; ++i) {
2114        int is_64bit = orig_sizemask & (1 << (i+1)*2);
2115        if (is_64bit) {
2116            tcg_temp_free_internal(args[real_args++]);
2117            tcg_temp_free_internal(args[real_args++]);
2118        } else {
2119            real_args++;
2120        }
2121    }
2122    if (orig_sizemask & 1) {
2123        /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
2124           Note that describing these as TCGv_i64 eliminates an unnecessary
2125           zero-extension that tcg_gen_concat_i32_i64 would create.  */
2126        tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
2127        tcg_temp_free_i64(retl);
2128        tcg_temp_free_i64(reth);
2129    }
2130#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2131    for (i = 0; i < nargs; ++i) {
2132        int is_64bit = sizemask & (1 << (i+1)*2);
2133        if (!is_64bit) {
2134            tcg_temp_free_internal(args[i]);
2135        }
2136    }
2137#endif /* TCG_TARGET_EXTEND_ARGS */
2138}
2139
2140static void tcg_reg_alloc_start(TCGContext *s)
2141{
2142    int i, n;
2143
2144    for (i = 0, n = s->nb_temps; i < n; i++) {
2145        TCGTemp *ts = &s->temps[i];
2146        TCGTempVal val = TEMP_VAL_MEM;
2147
2148        switch (ts->kind) {
2149        case TEMP_CONST:
2150            val = TEMP_VAL_CONST;
2151            break;
2152        case TEMP_FIXED:
2153            val = TEMP_VAL_REG;
2154            break;
2155        case TEMP_GLOBAL:
2156            break;
2157        case TEMP_NORMAL:
2158            val = TEMP_VAL_DEAD;
2159            /* fall through */
2160        case TEMP_LOCAL:
2161            ts->mem_allocated = 0;
2162            break;
2163        default:
2164            g_assert_not_reached();
2165        }
2166        ts->val_type = val;
2167    }
2168
2169    memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2170}
2171
2172static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2173                                 TCGTemp *ts)
2174{
2175    int idx = temp_idx(ts);
2176
2177    switch (ts->kind) {
2178    case TEMP_FIXED:
2179    case TEMP_GLOBAL:
2180        pstrcpy(buf, buf_size, ts->name);
2181        break;
2182    case TEMP_LOCAL:
2183        snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2184        break;
2185    case TEMP_NORMAL:
2186        snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2187        break;
2188    case TEMP_CONST:
2189        switch (ts->type) {
2190        case TCG_TYPE_I32:
2191            snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2192            break;
2193#if TCG_TARGET_REG_BITS > 32
2194        case TCG_TYPE_I64:
2195            snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2196            break;
2197#endif
2198        case TCG_TYPE_V64:
2199        case TCG_TYPE_V128:
2200        case TCG_TYPE_V256:
2201            snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2202                     64 << (ts->type - TCG_TYPE_V64), ts->val);
2203            break;
2204        default:
2205            g_assert_not_reached();
2206        }
2207        break;
2208    }
2209    return buf;
2210}
2211
2212static char *tcg_get_arg_str(TCGContext *s, char *buf,
2213                             int buf_size, TCGArg arg)
2214{
2215    return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2216}
2217
2218/* Find helper name.  */
2219static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
2220{
2221    const char *ret = NULL;
2222    if (helper_table) {
2223        TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
2224        if (info) {
2225            ret = info->name;
2226        }
2227    }
2228    return ret;
2229}
2230
2231static const char * const cond_name[] =
2232{
2233    [TCG_COND_NEVER] = "never",
2234    [TCG_COND_ALWAYS] = "always",
2235    [TCG_COND_EQ] = "eq",
2236    [TCG_COND_NE] = "ne",
2237    [TCG_COND_LT] = "lt",
2238    [TCG_COND_GE] = "ge",
2239    [TCG_COND_LE] = "le",
2240    [TCG_COND_GT] = "gt",
2241    [TCG_COND_LTU] = "ltu",
2242    [TCG_COND_GEU] = "geu",
2243    [TCG_COND_LEU] = "leu",
2244    [TCG_COND_GTU] = "gtu"
2245};
2246
2247static const char * const ldst_name[] =
2248{
2249    [MO_UB]   = "ub",
2250    [MO_SB]   = "sb",
2251    [MO_LEUW] = "leuw",
2252    [MO_LESW] = "lesw",
2253    [MO_LEUL] = "leul",
2254    [MO_LESL] = "lesl",
2255    [MO_LEQ]  = "leq",
2256    [MO_BEUW] = "beuw",
2257    [MO_BESW] = "besw",
2258    [MO_BEUL] = "beul",
2259    [MO_BESL] = "besl",
2260    [MO_BEQ]  = "beq",
2261};
2262
2263static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2264#ifdef TARGET_ALIGNED_ONLY
2265    [MO_UNALN >> MO_ASHIFT]    = "un+",
2266    [MO_ALIGN >> MO_ASHIFT]    = "",
2267#else
2268    [MO_UNALN >> MO_ASHIFT]    = "",
2269    [MO_ALIGN >> MO_ASHIFT]    = "al+",
2270#endif
2271    [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2272    [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2273    [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2274    [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2275    [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2276    [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2277};
2278
2279static inline bool tcg_regset_single(TCGRegSet d)
2280{
2281    return (d & (d - 1)) == 0;
2282}
2283
2284static inline TCGReg tcg_regset_first(TCGRegSet d)
2285{
2286    if (TCG_TARGET_NB_REGS <= 32) {
2287        return ctz32(d);
2288    } else {
2289        return ctz64(d);
2290    }
2291}
2292
2293static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2294{
2295    char buf[128];
2296    TCGOp *op;
2297
2298    QTAILQ_FOREACH(op, &s->ops, link) {
2299        int i, k, nb_oargs, nb_iargs, nb_cargs;
2300        const TCGOpDef *def;
2301        TCGOpcode c;
2302        int col = 0;
2303
2304        c = op->opc;
2305        def = &tcg_op_defs[c];
2306
2307        if (c == INDEX_op_insn_start) {
2308            nb_oargs = 0;
2309            col += qemu_log("\n ----");
2310
2311            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2312                target_ulong a;
2313#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2314                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2315#else
2316                a = op->args[i];
2317#endif
2318                col += qemu_log(" " TARGET_FMT_lx, a);
2319            }
2320        } else if (c == INDEX_op_call) {
2321            /* variable number of arguments */
2322            nb_oargs = TCGOP_CALLO(op);
2323            nb_iargs = TCGOP_CALLI(op);
2324            nb_cargs = def->nb_cargs;
2325
2326            /* function name, flags, out args */
2327            col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2328                            tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2329                            op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2330            for (i = 0; i < nb_oargs; i++) {
2331                col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2332                                                       op->args[i]));
2333            }
2334            for (i = 0; i < nb_iargs; i++) {
2335                TCGArg arg = op->args[nb_oargs + i];
2336                const char *t = "<dummy>";
2337                if (arg != TCG_CALL_DUMMY_ARG) {
2338                    t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2339                }
2340                col += qemu_log(",%s", t);
2341            }
2342        } else {
2343            col += qemu_log(" %s ", def->name);
2344
2345            nb_oargs = def->nb_oargs;
2346            nb_iargs = def->nb_iargs;
2347            nb_cargs = def->nb_cargs;
2348
2349            if (def->flags & TCG_OPF_VECTOR) {
2350                col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2351                                8 << TCGOP_VECE(op));
2352            }
2353
2354            k = 0;
2355            for (i = 0; i < nb_oargs; i++) {
2356                if (k != 0) {
2357                    col += qemu_log(",");
2358                }
2359                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2360                                                      op->args[k++]));
2361            }
2362            for (i = 0; i < nb_iargs; i++) {
2363                if (k != 0) {
2364                    col += qemu_log(",");
2365                }
2366                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2367                                                      op->args[k++]));
2368            }
2369            switch (c) {
2370            case INDEX_op_brcond_i32:
2371            case INDEX_op_setcond_i32:
2372            case INDEX_op_movcond_i32:
2373            case INDEX_op_brcond2_i32:
2374            case INDEX_op_setcond2_i32:
2375            case INDEX_op_brcond_i64:
2376            case INDEX_op_setcond_i64:
2377            case INDEX_op_movcond_i64:
2378            case INDEX_op_cmp_vec:
2379            case INDEX_op_cmpsel_vec:
2380                if (op->args[k] < ARRAY_SIZE(cond_name)
2381                    && cond_name[op->args[k]]) {
2382                    col += qemu_log(",%s", cond_name[op->args[k++]]);
2383                } else {
2384                    col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2385                }
2386                i = 1;
2387                break;
2388            case INDEX_op_qemu_ld_i32:
2389            case INDEX_op_qemu_st_i32:
2390            case INDEX_op_qemu_st8_i32:
2391            case INDEX_op_qemu_ld_i64:
2392            case INDEX_op_qemu_st_i64:
2393                {
2394                    TCGMemOpIdx oi = op->args[k++];
2395                    MemOp op = get_memop(oi);
2396                    unsigned ix = get_mmuidx(oi);
2397
2398                    if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2399                        col += qemu_log(",$0x%x,%u", op, ix);
2400                    } else {
2401                        const char *s_al, *s_op;
2402                        s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2403                        s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2404                        col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2405                    }
2406                    i = 1;
2407                }
2408                break;
2409            default:
2410                i = 0;
2411                break;
2412            }
2413            switch (c) {
2414            case INDEX_op_set_label:
2415            case INDEX_op_br:
2416            case INDEX_op_brcond_i32:
2417            case INDEX_op_brcond_i64:
2418            case INDEX_op_brcond2_i32:
2419                col += qemu_log("%s$L%d", k ? "," : "",
2420                                arg_label(op->args[k])->id);
2421                i++, k++;
2422                break;
2423            default:
2424                break;
2425            }
2426            for (; i < nb_cargs; i++, k++) {
2427                col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2428            }
2429        }
2430
2431        if (have_prefs || op->life) {
2432
2433            QemuLogFile *logfile;
2434
2435            rcu_read_lock();
2436            logfile = qatomic_rcu_read(&qemu_logfile);
2437            if (logfile) {
2438                for (; col < 40; ++col) {
2439                    putc(' ', logfile->fd);
2440                }
2441            }
2442            rcu_read_unlock();
2443        }
2444
2445        if (op->life) {
2446            unsigned life = op->life;
2447
2448            if (life & (SYNC_ARG * 3)) {
2449                qemu_log("  sync:");
2450                for (i = 0; i < 2; ++i) {
2451                    if (life & (SYNC_ARG << i)) {
2452                        qemu_log(" %d", i);
2453                    }
2454                }
2455            }
2456            life /= DEAD_ARG;
2457            if (life) {
2458                qemu_log("  dead:");
2459                for (i = 0; life; ++i, life >>= 1) {
2460                    if (life & 1) {
2461                        qemu_log(" %d", i);
2462                    }
2463                }
2464            }
2465        }
2466
2467        if (have_prefs) {
2468            for (i = 0; i < nb_oargs; ++i) {
2469                TCGRegSet set = op->output_pref[i];
2470
2471                if (i == 0) {
2472                    qemu_log("  pref=");
2473                } else {
2474                    qemu_log(",");
2475                }
2476                if (set == 0) {
2477                    qemu_log("none");
2478                } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2479                    qemu_log("all");
2480#ifdef CONFIG_DEBUG_TCG
2481                } else if (tcg_regset_single(set)) {
2482                    TCGReg reg = tcg_regset_first(set);
2483                    qemu_log("%s", tcg_target_reg_names[reg]);
2484#endif
2485                } else if (TCG_TARGET_NB_REGS <= 32) {
2486                    qemu_log("%#x", (uint32_t)set);
2487                } else {
2488                    qemu_log("%#" PRIx64, (uint64_t)set);
2489                }
2490            }
2491        }
2492
2493        qemu_log("\n");
2494    }
2495}
2496
2497/* we give more priority to constraints with less registers */
2498static int get_constraint_priority(const TCGOpDef *def, int k)
2499{
2500    const TCGArgConstraint *arg_ct = &def->args_ct[k];
2501    int n;
2502
2503    if (arg_ct->oalias) {
2504        /* an alias is equivalent to a single register */
2505        n = 1;
2506    } else {
2507        n = ctpop64(arg_ct->regs);
2508    }
2509    return TCG_TARGET_NB_REGS - n + 1;
2510}
2511
2512/* sort from highest priority to lowest */
2513static void sort_constraints(TCGOpDef *def, int start, int n)
2514{
2515    int i, j;
2516    TCGArgConstraint *a = def->args_ct;
2517
2518    for (i = 0; i < n; i++) {
2519        a[start + i].sort_index = start + i;
2520    }
2521    if (n <= 1) {
2522        return;
2523    }
2524    for (i = 0; i < n - 1; i++) {
2525        for (j = i + 1; j < n; j++) {
2526            int p1 = get_constraint_priority(def, a[start + i].sort_index);
2527            int p2 = get_constraint_priority(def, a[start + j].sort_index);
2528            if (p1 < p2) {
2529                int tmp = a[start + i].sort_index;
2530                a[start + i].sort_index = a[start + j].sort_index;
2531                a[start + j].sort_index = tmp;
2532            }
2533        }
2534    }
2535}
2536
2537static void process_op_defs(TCGContext *s)
2538{
2539    TCGOpcode op;
2540
2541    for (op = 0; op < NB_OPS; op++) {
2542        TCGOpDef *def = &tcg_op_defs[op];
2543        const TCGTargetOpDef *tdefs;
2544        int i, nb_args;
2545
2546        if (def->flags & TCG_OPF_NOT_PRESENT) {
2547            continue;
2548        }
2549
2550        nb_args = def->nb_iargs + def->nb_oargs;
2551        if (nb_args == 0) {
2552            continue;
2553        }
2554
2555        /*
2556         * Macro magic should make it impossible, but double-check that
2557         * the array index is in range.  Since the signness of an enum
2558         * is implementation defined, force the result to unsigned.
2559         */
2560        unsigned con_set = tcg_target_op_def(op);
2561        tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2562        tdefs = &constraint_sets[con_set];
2563
2564        for (i = 0; i < nb_args; i++) {
2565            const char *ct_str = tdefs->args_ct_str[i];
2566            /* Incomplete TCGTargetOpDef entry. */
2567            tcg_debug_assert(ct_str != NULL);
2568
2569            while (*ct_str != '\0') {
2570                switch(*ct_str) {
2571                case '0' ... '9':
2572                    {
2573                        int oarg = *ct_str - '0';
2574                        tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2575                        tcg_debug_assert(oarg < def->nb_oargs);
2576                        tcg_debug_assert(def->args_ct[oarg].regs != 0);
2577                        def->args_ct[i] = def->args_ct[oarg];
2578                        /* The output sets oalias.  */
2579                        def->args_ct[oarg].oalias = true;
2580                        def->args_ct[oarg].alias_index = i;
2581                        /* The input sets ialias. */
2582                        def->args_ct[i].ialias = true;
2583                        def->args_ct[i].alias_index = oarg;
2584                    }
2585                    ct_str++;
2586                    break;
2587                case '&':
2588                    def->args_ct[i].newreg = true;
2589                    ct_str++;
2590                    break;
2591                case 'i':
2592                    def->args_ct[i].ct |= TCG_CT_CONST;
2593                    ct_str++;
2594                    break;
2595
2596                /* Include all of the target-specific constraints. */
2597
2598#undef CONST
2599#define CONST(CASE, MASK) \
2600    case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2601#define REGS(CASE, MASK) \
2602    case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2603
2604#include "tcg-target-con-str.h"
2605
2606#undef REGS
2607#undef CONST
2608                default:
2609                    /* Typo in TCGTargetOpDef constraint. */
2610                    g_assert_not_reached();
2611                }
2612            }
2613        }
2614
2615        /* TCGTargetOpDef entry with too much information? */
2616        tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2617
2618        /* sort the constraints (XXX: this is just an heuristic) */
2619        sort_constraints(def, 0, def->nb_oargs);
2620        sort_constraints(def, def->nb_oargs, def->nb_iargs);
2621    }
2622}
2623
2624void tcg_op_remove(TCGContext *s, TCGOp *op)
2625{
2626    TCGLabel *label;
2627
2628    switch (op->opc) {
2629    case INDEX_op_br:
2630        label = arg_label(op->args[0]);
2631        label->refs--;
2632        break;
2633    case INDEX_op_brcond_i32:
2634    case INDEX_op_brcond_i64:
2635        label = arg_label(op->args[3]);
2636        label->refs--;
2637        break;
2638    case INDEX_op_brcond2_i32:
2639        label = arg_label(op->args[5]);
2640        label->refs--;
2641        break;
2642    default:
2643        break;
2644    }
2645
2646    QTAILQ_REMOVE(&s->ops, op, link);
2647    QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2648    s->nb_ops--;
2649
2650#ifdef CONFIG_PROFILER
2651    qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2652#endif
2653}
2654
2655static TCGOp *tcg_op_alloc(TCGOpcode opc)
2656{
2657    TCGContext *s = tcg_ctx;
2658    TCGOp *op;
2659
2660    if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2661        op = tcg_malloc(sizeof(TCGOp));
2662    } else {
2663        op = QTAILQ_FIRST(&s->free_ops);
2664        QTAILQ_REMOVE(&s->free_ops, op, link);
2665    }
2666    memset(op, 0, offsetof(TCGOp, link));
2667    op->opc = opc;
2668    s->nb_ops++;
2669
2670    return op;
2671}
2672
2673TCGOp *tcg_emit_op(TCGOpcode opc)
2674{
2675    TCGOp *op = tcg_op_alloc(opc);
2676    QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2677    return op;
2678}
2679
2680TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2681{
2682    TCGOp *new_op = tcg_op_alloc(opc);
2683    QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2684    return new_op;
2685}
2686
2687TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2688{
2689    TCGOp *new_op = tcg_op_alloc(opc);
2690    QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2691    return new_op;
2692}
2693
2694/* Reachable analysis : remove unreachable code.  */
2695static void reachable_code_pass(TCGContext *s)
2696{
2697    TCGOp *op, *op_next;
2698    bool dead = false;
2699
2700    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2701        bool remove = dead;
2702        TCGLabel *label;
2703        int call_flags;
2704
2705        switch (op->opc) {
2706        case INDEX_op_set_label:
2707            label = arg_label(op->args[0]);
2708            if (label->refs == 0) {
2709                /*
2710                 * While there is an occasional backward branch, virtually
2711                 * all branches generated by the translators are forward.
2712                 * Which means that generally we will have already removed
2713                 * all references to the label that will be, and there is
2714                 * little to be gained by iterating.
2715                 */
2716                remove = true;
2717            } else {
2718                /* Once we see a label, insns become live again.  */
2719                dead = false;
2720                remove = false;
2721
2722                /*
2723                 * Optimization can fold conditional branches to unconditional.
2724                 * If we find a label with one reference which is preceded by
2725                 * an unconditional branch to it, remove both.  This needed to
2726                 * wait until the dead code in between them was removed.
2727                 */
2728                if (label->refs == 1) {
2729                    TCGOp *op_prev = QTAILQ_PREV(op, link);
2730                    if (op_prev->opc == INDEX_op_br &&
2731                        label == arg_label(op_prev->args[0])) {
2732                        tcg_op_remove(s, op_prev);
2733                        remove = true;
2734                    }
2735                }
2736            }
2737            break;
2738
2739        case INDEX_op_br:
2740        case INDEX_op_exit_tb:
2741        case INDEX_op_goto_ptr:
2742            /* Unconditional branches; everything following is dead.  */
2743            dead = true;
2744            break;
2745
2746        case INDEX_op_call:
2747            /* Notice noreturn helper calls, raising exceptions.  */
2748            call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2749            if (call_flags & TCG_CALL_NO_RETURN) {
2750                dead = true;
2751            }
2752            break;
2753
2754        case INDEX_op_insn_start:
2755            /* Never remove -- we need to keep these for unwind.  */
2756            remove = false;
2757            break;
2758
2759        default:
2760            break;
2761        }
2762
2763        if (remove) {
2764            tcg_op_remove(s, op);
2765        }
2766    }
2767}
2768
2769#define TS_DEAD  1
2770#define TS_MEM   2
2771
2772#define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2773#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2774
2775/* For liveness_pass_1, the register preferences for a given temp.  */
2776static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2777{
2778    return ts->state_ptr;
2779}
2780
2781/* For liveness_pass_1, reset the preferences for a given temp to the
2782 * maximal regset for its type.
2783 */
2784static inline void la_reset_pref(TCGTemp *ts)
2785{
2786    *la_temp_pref(ts)
2787        = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2788}
2789
2790/* liveness analysis: end of function: all temps are dead, and globals
2791   should be in memory. */
2792static void la_func_end(TCGContext *s, int ng, int nt)
2793{
2794    int i;
2795
2796    for (i = 0; i < ng; ++i) {
2797        s->temps[i].state = TS_DEAD | TS_MEM;
2798        la_reset_pref(&s->temps[i]);
2799    }
2800    for (i = ng; i < nt; ++i) {
2801        s->temps[i].state = TS_DEAD;
2802        la_reset_pref(&s->temps[i]);
2803    }
2804}
2805
2806/* liveness analysis: end of basic block: all temps are dead, globals
2807   and local temps should be in memory. */
2808static void la_bb_end(TCGContext *s, int ng, int nt)
2809{
2810    int i;
2811
2812    for (i = 0; i < nt; ++i) {
2813        TCGTemp *ts = &s->temps[i];
2814        int state;
2815
2816        switch (ts->kind) {
2817        case TEMP_FIXED:
2818        case TEMP_GLOBAL:
2819        case TEMP_LOCAL:
2820            state = TS_DEAD | TS_MEM;
2821            break;
2822        case TEMP_NORMAL:
2823        case TEMP_CONST:
2824            state = TS_DEAD;
2825            break;
2826        default:
2827            g_assert_not_reached();
2828        }
2829        ts->state = state;
2830        la_reset_pref(ts);
2831    }
2832}
2833
2834/* liveness analysis: sync globals back to memory.  */
2835static void la_global_sync(TCGContext *s, int ng)
2836{
2837    int i;
2838
2839    for (i = 0; i < ng; ++i) {
2840        int state = s->temps[i].state;
2841        s->temps[i].state = state | TS_MEM;
2842        if (state == TS_DEAD) {
2843            /* If the global was previously dead, reset prefs.  */
2844            la_reset_pref(&s->temps[i]);
2845        }
2846    }
2847}
2848
2849/*
2850 * liveness analysis: conditional branch: all temps are dead,
2851 * globals and local temps should be synced.
2852 */
2853static void la_bb_sync(TCGContext *s, int ng, int nt)
2854{
2855    la_global_sync(s, ng);
2856
2857    for (int i = ng; i < nt; ++i) {
2858        TCGTemp *ts = &s->temps[i];
2859        int state;
2860
2861        switch (ts->kind) {
2862        case TEMP_LOCAL:
2863            state = ts->state;
2864            ts->state = state | TS_MEM;
2865            if (state != TS_DEAD) {
2866                continue;
2867            }
2868            break;
2869        case TEMP_NORMAL:
2870            s->temps[i].state = TS_DEAD;
2871            break;
2872        case TEMP_CONST:
2873            continue;
2874        default:
2875            g_assert_not_reached();
2876        }
2877        la_reset_pref(&s->temps[i]);
2878    }
2879}
2880
2881/* liveness analysis: sync globals back to memory and kill.  */
2882static void la_global_kill(TCGContext *s, int ng)
2883{
2884    int i;
2885
2886    for (i = 0; i < ng; i++) {
2887        s->temps[i].state = TS_DEAD | TS_MEM;
2888        la_reset_pref(&s->temps[i]);
2889    }
2890}
2891
2892/* liveness analysis: note live globals crossing calls.  */
2893static void la_cross_call(TCGContext *s, int nt)
2894{
2895    TCGRegSet mask = ~tcg_target_call_clobber_regs;
2896    int i;
2897
2898    for (i = 0; i < nt; i++) {
2899        TCGTemp *ts = &s->temps[i];
2900        if (!(ts->state & TS_DEAD)) {
2901            TCGRegSet *pset = la_temp_pref(ts);
2902            TCGRegSet set = *pset;
2903
2904            set &= mask;
2905            /* If the combination is not possible, restart.  */
2906            if (set == 0) {
2907                set = tcg_target_available_regs[ts->type] & mask;
2908            }
2909            *pset = set;
2910        }
2911    }
2912}
2913
2914/* Liveness analysis : update the opc_arg_life array to tell if a
2915   given input arguments is dead. Instructions updating dead
2916   temporaries are removed. */
2917static void liveness_pass_1(TCGContext *s)
2918{
2919    int nb_globals = s->nb_globals;
2920    int nb_temps = s->nb_temps;
2921    TCGOp *op, *op_prev;
2922    TCGRegSet *prefs;
2923    int i;
2924
2925    prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2926    for (i = 0; i < nb_temps; ++i) {
2927        s->temps[i].state_ptr = prefs + i;
2928    }
2929
2930    /* ??? Should be redundant with the exit_tb that ends the TB.  */
2931    la_func_end(s, nb_globals, nb_temps);
2932
2933    QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2934        int nb_iargs, nb_oargs;
2935        TCGOpcode opc_new, opc_new2;
2936        bool have_opc_new2;
2937        TCGLifeData arg_life = 0;
2938        TCGTemp *ts;
2939        TCGOpcode opc = op->opc;
2940        const TCGOpDef *def = &tcg_op_defs[opc];
2941
2942        switch (opc) {
2943        case INDEX_op_call:
2944            {
2945                int call_flags;
2946                int nb_call_regs;
2947
2948                nb_oargs = TCGOP_CALLO(op);
2949                nb_iargs = TCGOP_CALLI(op);
2950                call_flags = op->args[nb_oargs + nb_iargs + 1];
2951
2952                /* pure functions can be removed if their result is unused */
2953                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2954                    for (i = 0; i < nb_oargs; i++) {
2955                        ts = arg_temp(op->args[i]);
2956                        if (ts->state != TS_DEAD) {
2957                            goto do_not_remove_call;
2958                        }
2959                    }
2960                    goto do_remove;
2961                }
2962            do_not_remove_call:
2963
2964                /* Output args are dead.  */
2965                for (i = 0; i < nb_oargs; i++) {
2966                    ts = arg_temp(op->args[i]);
2967                    if (ts->state & TS_DEAD) {
2968                        arg_life |= DEAD_ARG << i;
2969                    }
2970                    if (ts->state & TS_MEM) {
2971                        arg_life |= SYNC_ARG << i;
2972                    }
2973                    ts->state = TS_DEAD;
2974                    la_reset_pref(ts);
2975
2976                    /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2977                    op->output_pref[i] = 0;
2978                }
2979
2980                if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2981                                    TCG_CALL_NO_READ_GLOBALS))) {
2982                    la_global_kill(s, nb_globals);
2983                } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2984                    la_global_sync(s, nb_globals);
2985                }
2986
2987                /* Record arguments that die in this helper.  */
2988                for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2989                    ts = arg_temp(op->args[i]);
2990                    if (ts && ts->state & TS_DEAD) {
2991                        arg_life |= DEAD_ARG << i;
2992                    }
2993                }
2994
2995                /* For all live registers, remove call-clobbered prefs.  */
2996                la_cross_call(s, nb_temps);
2997
2998                nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2999
3000                /* Input arguments are live for preceding opcodes.  */
3001                for (i = 0; i < nb_iargs; i++) {
3002                    ts = arg_temp(op->args[i + nb_oargs]);
3003                    if (ts && ts->state & TS_DEAD) {
3004                        /* For those arguments that die, and will be allocated
3005                         * in registers, clear the register set for that arg,
3006                         * to be filled in below.  For args that will be on
3007                         * the stack, reset to any available reg.
3008                         */
3009                        *la_temp_pref(ts)
3010                            = (i < nb_call_regs ? 0 :
3011                               tcg_target_available_regs[ts->type]);
3012                        ts->state &= ~TS_DEAD;
3013                    }
3014                }
3015
3016                /* For each input argument, add its input register to prefs.
3017                   If a temp is used once, this produces a single set bit.  */
3018                for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
3019                    ts = arg_temp(op->args[i + nb_oargs]);
3020                    if (ts) {
3021                        tcg_regset_set_reg(*la_temp_pref(ts),
3022                                           tcg_target_call_iarg_regs[i]);
3023                    }
3024                }
3025            }
3026            break;
3027        case INDEX_op_insn_start:
3028            break;
3029        case INDEX_op_discard:
3030            /* mark the temporary as dead */
3031            ts = arg_temp(op->args[0]);
3032            ts->state = TS_DEAD;
3033            la_reset_pref(ts);
3034            break;
3035
3036        case INDEX_op_add2_i32:
3037            opc_new = INDEX_op_add_i32;
3038            goto do_addsub2;
3039        case INDEX_op_sub2_i32:
3040            opc_new = INDEX_op_sub_i32;
3041            goto do_addsub2;
3042        case INDEX_op_add2_i64:
3043            opc_new = INDEX_op_add_i64;
3044            goto do_addsub2;
3045        case INDEX_op_sub2_i64:
3046            opc_new = INDEX_op_sub_i64;
3047        do_addsub2:
3048            nb_iargs = 4;
3049            nb_oargs = 2;
3050            /* Test if the high part of the operation is dead, but not
3051               the low part.  The result can be optimized to a simple
3052               add or sub.  This happens often for x86_64 guest when the
3053               cpu mode is set to 32 bit.  */
3054            if (arg_temp(op->args[1])->state == TS_DEAD) {
3055                if (arg_temp(op->args[0])->state == TS_DEAD) {
3056                    goto do_remove;
3057                }
3058                /* Replace the opcode and adjust the args in place,
3059                   leaving 3 unused args at the end.  */
3060                op->opc = opc = opc_new;
3061                op->args[1] = op->args[2];
3062                op->args[2] = op->args[4];
3063                /* Fall through and mark the single-word operation live.  */
3064                nb_iargs = 2;
3065                nb_oargs = 1;
3066            }
3067            goto do_not_remove;
3068
3069        case INDEX_op_mulu2_i32:
3070            opc_new = INDEX_op_mul_i32;
3071            opc_new2 = INDEX_op_muluh_i32;
3072            have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3073            goto do_mul2;
3074        case INDEX_op_muls2_i32:
3075            opc_new = INDEX_op_mul_i32;
3076            opc_new2 = INDEX_op_mulsh_i32;
3077            have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3078            goto do_mul2;
3079        case INDEX_op_mulu2_i64:
3080            opc_new = INDEX_op_mul_i64;
3081            opc_new2 = INDEX_op_muluh_i64;
3082            have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3083            goto do_mul2;
3084        case INDEX_op_muls2_i64:
3085            opc_new = INDEX_op_mul_i64;
3086            opc_new2 = INDEX_op_mulsh_i64;
3087            have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3088            goto do_mul2;
3089        do_mul2:
3090            nb_iargs = 2;
3091            nb_oargs = 2;
3092            if (arg_temp(op->args[1])->state == TS_DEAD) {
3093                if (arg_temp(op->args[0])->state == TS_DEAD) {
3094                    /* Both parts of the operation are dead.  */
3095                    goto do_remove;
3096                }
3097                /* The high part of the operation is dead; generate the low. */
3098                op->opc = opc = opc_new;
3099                op->args[1] = op->args[2];
3100                op->args[2] = op->args[3];
3101            } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3102                /* The low part of the operation is dead; generate the high. */
3103                op->opc = opc = opc_new2;
3104                op->args[0] = op->args[1];
3105                op->args[1] = op->args[2];
3106                op->args[2] = op->args[3];
3107            } else {
3108                goto do_not_remove;
3109            }
3110            /* Mark the single-word operation live.  */
3111            nb_oargs = 1;
3112            goto do_not_remove;
3113
3114        default:
3115            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3116            nb_iargs = def->nb_iargs;
3117            nb_oargs = def->nb_oargs;
3118
3119            /* Test if the operation can be removed because all
3120               its outputs are dead. We assume that nb_oargs == 0
3121               implies side effects */
3122            if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3123                for (i = 0; i < nb_oargs; i++) {
3124                    if (arg_temp(op->args[i])->state != TS_DEAD) {
3125                        goto do_not_remove;
3126                    }
3127                }
3128                goto do_remove;
3129            }
3130            goto do_not_remove;
3131
3132        do_remove:
3133            tcg_op_remove(s, op);
3134            break;
3135
3136        do_not_remove:
3137            for (i = 0; i < nb_oargs; i++) {
3138                ts = arg_temp(op->args[i]);
3139
3140                /* Remember the preference of the uses that followed.  */
3141                op->output_pref[i] = *la_temp_pref(ts);
3142
3143                /* Output args are dead.  */
3144                if (ts->state & TS_DEAD) {
3145                    arg_life |= DEAD_ARG << i;
3146                }
3147                if (ts->state & TS_MEM) {
3148                    arg_life |= SYNC_ARG << i;
3149                }
3150                ts->state = TS_DEAD;
3151                la_reset_pref(ts);
3152            }
3153
3154            /* If end of basic block, update.  */
3155            if (def->flags & TCG_OPF_BB_EXIT) {
3156                la_func_end(s, nb_globals, nb_temps);
3157            } else if (def->flags & TCG_OPF_COND_BRANCH) {
3158                la_bb_sync(s, nb_globals, nb_temps);
3159            } else if (def->flags & TCG_OPF_BB_END) {
3160                la_bb_end(s, nb_globals, nb_temps);
3161            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3162                la_global_sync(s, nb_globals);
3163                if (def->flags & TCG_OPF_CALL_CLOBBER) {
3164                    la_cross_call(s, nb_temps);
3165                }
3166            }
3167
3168            /* Record arguments that die in this opcode.  */
3169            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3170                ts = arg_temp(op->args[i]);
3171                if (ts->state & TS_DEAD) {
3172                    arg_life |= DEAD_ARG << i;
3173                }
3174            }
3175
3176            /* Input arguments are live for preceding opcodes.  */
3177            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3178                ts = arg_temp(op->args[i]);
3179                if (ts->state & TS_DEAD) {
3180                    /* For operands that were dead, initially allow
3181                       all regs for the type.  */
3182                    *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3183                    ts->state &= ~TS_DEAD;
3184                }
3185            }
3186
3187            /* Incorporate constraints for this operand.  */
3188            switch (opc) {
3189            case INDEX_op_mov_i32:
3190            case INDEX_op_mov_i64:
3191                /* Note that these are TCG_OPF_NOT_PRESENT and do not
3192                   have proper constraints.  That said, special case
3193                   moves to propagate preferences backward.  */
3194                if (IS_DEAD_ARG(1)) {
3195                    *la_temp_pref(arg_temp(op->args[0]))
3196                        = *la_temp_pref(arg_temp(op->args[1]));
3197                }
3198                break;
3199
3200            default:
3201                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3202                    const TCGArgConstraint *ct = &def->args_ct[i];
3203                    TCGRegSet set, *pset;
3204
3205                    ts = arg_temp(op->args[i]);
3206                    pset = la_temp_pref(ts);
3207                    set = *pset;
3208
3209                    set &= ct->regs;
3210                    if (ct->ialias) {
3211                        set &= op->output_pref[ct->alias_index];
3212                    }
3213                    /* If the combination is not possible, restart.  */
3214                    if (set == 0) {
3215                        set = ct->regs;
3216                    }
3217                    *pset = set;
3218                }
3219                break;
3220            }
3221            break;
3222        }
3223        op->life = arg_life;
3224    }
3225}
3226
3227/* Liveness analysis: Convert indirect regs to direct temporaries.  */
3228static bool liveness_pass_2(TCGContext *s)
3229{
3230    int nb_globals = s->nb_globals;
3231    int nb_temps, i;
3232    bool changes = false;
3233    TCGOp *op, *op_next;
3234
3235    /* Create a temporary for each indirect global.  */
3236    for (i = 0; i < nb_globals; ++i) {
3237        TCGTemp *its = &s->temps[i];
3238        if (its->indirect_reg) {
3239            TCGTemp *dts = tcg_temp_alloc(s);
3240            dts->type = its->type;
3241            dts->base_type = its->base_type;
3242            its->state_ptr = dts;
3243        } else {
3244            its->state_ptr = NULL;
3245        }
3246        /* All globals begin dead.  */
3247        its->state = TS_DEAD;
3248    }
3249    for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3250        TCGTemp *its = &s->temps[i];
3251        its->state_ptr = NULL;
3252        its->state = TS_DEAD;
3253    }
3254
3255    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3256        TCGOpcode opc = op->opc;
3257        const TCGOpDef *def = &tcg_op_defs[opc];
3258        TCGLifeData arg_life = op->life;
3259        int nb_iargs, nb_oargs, call_flags;
3260        TCGTemp *arg_ts, *dir_ts;
3261
3262        if (opc == INDEX_op_call) {
3263            nb_oargs = TCGOP_CALLO(op);
3264            nb_iargs = TCGOP_CALLI(op);
3265            call_flags = op->args[nb_oargs + nb_iargs + 1];
3266        } else {
3267            nb_iargs = def->nb_iargs;
3268            nb_oargs = def->nb_oargs;
3269
3270            /* Set flags similar to how calls require.  */
3271            if (def->flags & TCG_OPF_COND_BRANCH) {
3272                /* Like reading globals: sync_globals */
3273                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3274            } else if (def->flags & TCG_OPF_BB_END) {
3275                /* Like writing globals: save_globals */
3276                call_flags = 0;
3277            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3278                /* Like reading globals: sync_globals */
3279                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3280            } else {
3281                /* No effect on globals.  */
3282                call_flags = (TCG_CALL_NO_READ_GLOBALS |
3283                              TCG_CALL_NO_WRITE_GLOBALS);
3284            }
3285        }
3286
3287        /* Make sure that input arguments are available.  */
3288        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3289            arg_ts = arg_temp(op->args[i]);
3290            if (arg_ts) {
3291                dir_ts = arg_ts->state_ptr;
3292                if (dir_ts && arg_ts->state == TS_DEAD) {
3293                    TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3294                                      ? INDEX_op_ld_i32
3295                                      : INDEX_op_ld_i64);
3296                    TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3297
3298                    lop->args[0] = temp_arg(dir_ts);
3299                    lop->args[1] = temp_arg(arg_ts->mem_base);
3300                    lop->args[2] = arg_ts->mem_offset;
3301
3302                    /* Loaded, but synced with memory.  */
3303                    arg_ts->state = TS_MEM;
3304                }
3305            }
3306        }
3307
3308        /* Perform input replacement, and mark inputs that became dead.
3309           No action is required except keeping temp_state up to date
3310           so that we reload when needed.  */
3311        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3312            arg_ts = arg_temp(op->args[i]);
3313            if (arg_ts) {
3314                dir_ts = arg_ts->state_ptr;
3315                if (dir_ts) {
3316                    op->args[i] = temp_arg(dir_ts);
3317                    changes = true;
3318                    if (IS_DEAD_ARG(i)) {
3319                        arg_ts->state = TS_DEAD;
3320                    }
3321                }
3322            }
3323        }
3324
3325        /* Liveness analysis should ensure that the following are
3326           all correct, for call sites and basic block end points.  */
3327        if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3328            /* Nothing to do */
3329        } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3330            for (i = 0; i < nb_globals; ++i) {
3331                /* Liveness should see that globals are synced back,
3332                   that is, either TS_DEAD or TS_MEM.  */
3333                arg_ts = &s->temps[i];
3334                tcg_debug_assert(arg_ts->state_ptr == 0
3335                                 || arg_ts->state != 0);
3336            }
3337        } else {
3338            for (i = 0; i < nb_globals; ++i) {
3339                /* Liveness should see that globals are saved back,
3340                   that is, TS_DEAD, waiting to be reloaded.  */
3341                arg_ts = &s->temps[i];
3342                tcg_debug_assert(arg_ts->state_ptr == 0
3343                                 || arg_ts->state == TS_DEAD);
3344            }
3345        }
3346
3347        /* Outputs become available.  */
3348        if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3349            arg_ts = arg_temp(op->args[0]);
3350            dir_ts = arg_ts->state_ptr;
3351            if (dir_ts) {
3352                op->args[0] = temp_arg(dir_ts);
3353                changes = true;
3354
3355                /* The output is now live and modified.  */
3356                arg_ts->state = 0;
3357
3358                if (NEED_SYNC_ARG(0)) {
3359                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3360                                      ? INDEX_op_st_i32
3361                                      : INDEX_op_st_i64);
3362                    TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3363                    TCGTemp *out_ts = dir_ts;
3364
3365                    if (IS_DEAD_ARG(0)) {
3366                        out_ts = arg_temp(op->args[1]);
3367                        arg_ts->state = TS_DEAD;
3368                        tcg_op_remove(s, op);
3369                    } else {
3370                        arg_ts->state = TS_MEM;
3371                    }
3372
3373                    sop->args[0] = temp_arg(out_ts);
3374                    sop->args[1] = temp_arg(arg_ts->mem_base);
3375                    sop->args[2] = arg_ts->mem_offset;
3376                } else {
3377                    tcg_debug_assert(!IS_DEAD_ARG(0));
3378                }
3379            }
3380        } else {
3381            for (i = 0; i < nb_oargs; i++) {
3382                arg_ts = arg_temp(op->args[i]);
3383                dir_ts = arg_ts->state_ptr;
3384                if (!dir_ts) {
3385                    continue;
3386                }
3387                op->args[i] = temp_arg(dir_ts);
3388                changes = true;
3389
3390                /* The output is now live and modified.  */
3391                arg_ts->state = 0;
3392
3393                /* Sync outputs upon their last write.  */
3394                if (NEED_SYNC_ARG(i)) {
3395                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3396                                      ? INDEX_op_st_i32
3397                                      : INDEX_op_st_i64);
3398                    TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3399
3400                    sop->args[0] = temp_arg(dir_ts);
3401                    sop->args[1] = temp_arg(arg_ts->mem_base);
3402                    sop->args[2] = arg_ts->mem_offset;
3403
3404                    arg_ts->state = TS_MEM;
3405                }
3406                /* Drop outputs that are dead.  */
3407                if (IS_DEAD_ARG(i)) {
3408                    arg_ts->state = TS_DEAD;
3409                }
3410            }
3411        }
3412    }
3413
3414    return changes;
3415}
3416
3417#ifdef CONFIG_DEBUG_TCG
3418static void dump_regs(TCGContext *s)
3419{
3420    TCGTemp *ts;
3421    int i;
3422    char buf[64];
3423
3424    for(i = 0; i < s->nb_temps; i++) {
3425        ts = &s->temps[i];
3426        printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3427        switch(ts->val_type) {
3428        case TEMP_VAL_REG:
3429            printf("%s", tcg_target_reg_names[ts->reg]);
3430            break;
3431        case TEMP_VAL_MEM:
3432            printf("%d(%s)", (int)ts->mem_offset,
3433                   tcg_target_reg_names[ts->mem_base->reg]);
3434            break;
3435        case TEMP_VAL_CONST:
3436            printf("$0x%" PRIx64, ts->val);
3437            break;
3438        case TEMP_VAL_DEAD:
3439            printf("D");
3440            break;
3441        default:
3442            printf("???");
3443            break;
3444        }
3445        printf("\n");
3446    }
3447
3448    for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3449        if (s->reg_to_temp[i] != NULL) {
3450            printf("%s: %s\n", 
3451                   tcg_target_reg_names[i], 
3452                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3453        }
3454    }
3455}
3456
3457static void check_regs(TCGContext *s)
3458{
3459    int reg;
3460    int k;
3461    TCGTemp *ts;
3462    char buf[64];
3463
3464    for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3465        ts = s->reg_to_temp[reg];
3466        if (ts != NULL) {
3467            if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3468                printf("Inconsistency for register %s:\n", 
3469                       tcg_target_reg_names[reg]);
3470                goto fail;
3471            }
3472        }
3473    }
3474    for (k = 0; k < s->nb_temps; k++) {
3475        ts = &s->temps[k];
3476        if (ts->val_type == TEMP_VAL_REG
3477            && ts->kind != TEMP_FIXED
3478            && s->reg_to_temp[ts->reg] != ts) {
3479            printf("Inconsistency for temp %s:\n",
3480                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3481        fail:
3482            printf("reg state:\n");
3483            dump_regs(s);
3484            tcg_abort();
3485        }
3486    }
3487}
3488#endif
3489
3490static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3491{
3492#if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3493    /* Sparc64 stack is accessed with offset of 2047 */
3494    s->current_frame_offset = (s->current_frame_offset +
3495                               (tcg_target_long)sizeof(tcg_target_long) - 1) &
3496        ~(sizeof(tcg_target_long) - 1);
3497#endif
3498    if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3499        s->frame_end) {
3500        tcg_abort();
3501    }
3502    ts->mem_offset = s->current_frame_offset;
3503    ts->mem_base = s->frame_temp;
3504    ts->mem_allocated = 1;
3505    s->current_frame_offset += sizeof(tcg_target_long);
3506}
3507
3508static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3509
3510/* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3511   mark it free; otherwise mark it dead.  */
3512static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3513{
3514    TCGTempVal new_type;
3515
3516    switch (ts->kind) {
3517    case TEMP_FIXED:
3518        return;
3519    case TEMP_GLOBAL:
3520    case TEMP_LOCAL:
3521        new_type = TEMP_VAL_MEM;
3522        break;
3523    case TEMP_NORMAL:
3524        new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3525        break;
3526    case TEMP_CONST:
3527        new_type = TEMP_VAL_CONST;
3528        break;
3529    default:
3530        g_assert_not_reached();
3531    }
3532    if (ts->val_type == TEMP_VAL_REG) {
3533        s->reg_to_temp[ts->reg] = NULL;
3534    }
3535    ts->val_type = new_type;
3536}
3537
3538/* Mark a temporary as dead.  */
3539static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3540{
3541    temp_free_or_dead(s, ts, 1);
3542}
3543
3544/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3545   registers needs to be allocated to store a constant.  If 'free_or_dead'
3546   is non-zero, subsequently release the temporary; if it is positive, the
3547   temp is dead; if it is negative, the temp is free.  */
3548static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3549                      TCGRegSet preferred_regs, int free_or_dead)
3550{
3551    if (!temp_readonly(ts) && !ts->mem_coherent) {
3552        if (!ts->mem_allocated) {
3553            temp_allocate_frame(s, ts);
3554        }
3555        switch (ts->val_type) {
3556        case TEMP_VAL_CONST:
3557            /* If we're going to free the temp immediately, then we won't
3558               require it later in a register, so attempt to store the
3559               constant to memory directly.  */
3560            if (free_or_dead
3561                && tcg_out_sti(s, ts->type, ts->val,
3562                               ts->mem_base->reg, ts->mem_offset)) {
3563                break;
3564            }
3565            temp_load(s, ts, tcg_target_available_regs[ts->type],
3566                      allocated_regs, preferred_regs);
3567            /* fallthrough */
3568
3569        case TEMP_VAL_REG:
3570            tcg_out_st(s, ts->type, ts->reg,
3571                       ts->mem_base->reg, ts->mem_offset);
3572            break;
3573
3574        case TEMP_VAL_MEM:
3575            break;
3576
3577        case TEMP_VAL_DEAD:
3578        default:
3579            tcg_abort();
3580        }
3581        ts->mem_coherent = 1;
3582    }
3583    if (free_or_dead) {
3584        temp_free_or_dead(s, ts, free_or_dead);
3585    }
3586}
3587
3588/* free register 'reg' by spilling the corresponding temporary if necessary */
3589static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3590{
3591    TCGTemp *ts = s->reg_to_temp[reg];
3592    if (ts != NULL) {
3593        temp_sync(s, ts, allocated_regs, 0, -1);
3594    }
3595}
3596
3597/**
3598 * tcg_reg_alloc:
3599 * @required_regs: Set of registers in which we must allocate.
3600 * @allocated_regs: Set of registers which must be avoided.
3601 * @preferred_regs: Set of registers we should prefer.
3602 * @rev: True if we search the registers in "indirect" order.
3603 *
3604 * The allocated register must be in @required_regs & ~@allocated_regs,
3605 * but if we can put it in @preferred_regs we may save a move later.
3606 */
3607static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3608                            TCGRegSet allocated_regs,
3609                            TCGRegSet preferred_regs, bool rev)
3610{
3611    int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3612    TCGRegSet reg_ct[2];
3613    const int *order;
3614
3615    reg_ct[1] = required_regs & ~allocated_regs;
3616    tcg_debug_assert(reg_ct[1] != 0);
3617    reg_ct[0] = reg_ct[1] & preferred_regs;
3618
3619    /* Skip the preferred_regs option if it cannot be satisfied,
3620       or if the preference made no difference.  */
3621    f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3622
3623    order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3624
3625    /* Try free registers, preferences first.  */
3626    for (j = f; j < 2; j++) {
3627        TCGRegSet set = reg_ct[j];
3628
3629        if (tcg_regset_single(set)) {
3630            /* One register in the set.  */
3631            TCGReg reg = tcg_regset_first(set);
3632            if (s->reg_to_temp[reg] == NULL) {
3633                return reg;
3634            }
3635        } else {
3636            for (i = 0; i < n; i++) {
3637                TCGReg reg = order[i];
3638                if (s->reg_to_temp[reg] == NULL &&
3639                    tcg_regset_test_reg(set, reg)) {
3640                    return reg;
3641                }
3642            }
3643        }
3644    }
3645
3646    /* We must spill something.  */
3647    for (j = f; j < 2; j++) {
3648        TCGRegSet set = reg_ct[j];
3649
3650        if (tcg_regset_single(set)) {
3651            /* One register in the set.  */
3652            TCGReg reg = tcg_regset_first(set);
3653            tcg_reg_free(s, reg, allocated_regs);
3654            return reg;
3655        } else {
3656            for (i = 0; i < n; i++) {
3657                TCGReg reg = order[i];
3658                if (tcg_regset_test_reg(set, reg)) {
3659                    tcg_reg_free(s, reg, allocated_regs);
3660                    return reg;
3661                }
3662            }
3663        }
3664    }
3665
3666    tcg_abort();
3667}
3668
3669/* Make sure the temporary is in a register.  If needed, allocate the register
3670   from DESIRED while avoiding ALLOCATED.  */
3671static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3672                      TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3673{
3674    TCGReg reg;
3675
3676    switch (ts->val_type) {
3677    case TEMP_VAL_REG:
3678        return;
3679    case TEMP_VAL_CONST:
3680        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3681                            preferred_regs, ts->indirect_base);
3682        if (ts->type <= TCG_TYPE_I64) {
3683            tcg_out_movi(s, ts->type, reg, ts->val);
3684        } else {
3685            uint64_t val = ts->val;
3686            MemOp vece = MO_64;
3687
3688            /*
3689             * Find the minimal vector element that matches the constant.
3690             * The targets will, in general, have to do this search anyway,
3691             * do this generically.
3692             */
3693            if (val == dup_const(MO_8, val)) {
3694                vece = MO_8;
3695            } else if (val == dup_const(MO_16, val)) {
3696                vece = MO_16;
3697            } else if (val == dup_const(MO_32, val)) {
3698                vece = MO_32;
3699            }
3700
3701            tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3702        }
3703        ts->mem_coherent = 0;
3704        break;
3705    case TEMP_VAL_MEM:
3706        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3707                            preferred_regs, ts->indirect_base);
3708        tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3709        ts->mem_coherent = 1;
3710        break;
3711    case TEMP_VAL_DEAD:
3712    default:
3713        tcg_abort();
3714    }
3715    ts->reg = reg;
3716    ts->val_type = TEMP_VAL_REG;
3717    s->reg_to_temp[reg] = ts;
3718}
3719
3720/* Save a temporary to memory. 'allocated_regs' is used in case a
3721   temporary registers needs to be allocated to store a constant.  */
3722static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3723{
3724    /* The liveness analysis already ensures that globals are back
3725       in memory. Keep an tcg_debug_assert for safety. */
3726    tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3727}
3728
3729/* save globals to their canonical location and assume they can be
3730   modified be the following code. 'allocated_regs' is used in case a
3731   temporary registers needs to be allocated to store a constant. */
3732static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3733{
3734    int i, n;
3735
3736    for (i = 0, n = s->nb_globals; i < n; i++) {
3737        temp_save(s, &s->temps[i], allocated_regs);
3738    }
3739}
3740
3741/* sync globals to their canonical location and assume they can be
3742   read by the following code. 'allocated_regs' is used in case a
3743   temporary registers needs to be allocated to store a constant. */
3744static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3745{
3746    int i, n;
3747
3748    for (i = 0, n = s->nb_globals; i < n; i++) {
3749        TCGTemp *ts = &s->temps[i];
3750        tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3751                         || ts->kind == TEMP_FIXED
3752                         || ts->mem_coherent);
3753    }
3754}
3755
3756/* at the end of a basic block, we assume all temporaries are dead and
3757   all globals are stored at their canonical location. */
3758static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3759{
3760    int i;
3761
3762    for (i = s->nb_globals; i < s->nb_temps; i++) {
3763        TCGTemp *ts = &s->temps[i];
3764
3765        switch (ts->kind) {
3766        case TEMP_LOCAL:
3767            temp_save(s, ts, allocated_regs);
3768            break;
3769        case TEMP_NORMAL:
3770            /* The liveness analysis already ensures that temps are dead.
3771               Keep an tcg_debug_assert for safety. */
3772            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3773            break;
3774        case TEMP_CONST:
3775            /* Similarly, we should have freed any allocated register. */
3776            tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3777            break;
3778        default:
3779            g_assert_not_reached();
3780        }
3781    }
3782
3783    save_globals(s, allocated_regs);
3784}
3785
3786/*
3787 * At a conditional branch, we assume all temporaries are dead and
3788 * all globals and local temps are synced to their location.
3789 */
3790static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3791{
3792    sync_globals(s, allocated_regs);
3793
3794    for (int i = s->nb_globals; i < s->nb_temps; i++) {
3795        TCGTemp *ts = &s->temps[i];
3796        /*
3797         * The liveness analysis already ensures that temps are dead.
3798         * Keep tcg_debug_asserts for safety.
3799         */
3800        switch (ts->kind) {
3801        case TEMP_LOCAL:
3802            tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3803            break;
3804        case TEMP_NORMAL:
3805            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3806            break;
3807        case TEMP_CONST:
3808            break;
3809        default:
3810            g_assert_not_reached();
3811        }
3812    }
3813}
3814
3815/*
3816 * Specialized code generation for INDEX_op_mov_* with a constant.
3817 */
3818static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3819                                  tcg_target_ulong val, TCGLifeData arg_life,
3820                                  TCGRegSet preferred_regs)
3821{
3822    /* ENV should not be modified.  */
3823    tcg_debug_assert(!temp_readonly(ots));
3824
3825    /* The movi is not explicitly generated here.  */
3826    if (ots->val_type == TEMP_VAL_REG) {
3827        s->reg_to_temp[ots->reg] = NULL;
3828    }
3829    ots->val_type = TEMP_VAL_CONST;
3830    ots->val = val;
3831    ots->mem_coherent = 0;
3832    if (NEED_SYNC_ARG(0)) {
3833        temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3834    } else if (IS_DEAD_ARG(0)) {
3835        temp_dead(s, ots);
3836    }
3837}
3838
3839/*
3840 * Specialized code generation for INDEX_op_mov_*.
3841 */
3842static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3843{
3844    const TCGLifeData arg_life = op->life;
3845    TCGRegSet allocated_regs, preferred_regs;
3846    TCGTemp *ts, *ots;
3847    TCGType otype, itype;
3848
3849    allocated_regs = s->reserved_regs;
3850    preferred_regs = op->output_pref[0];
3851    ots = arg_temp(op->args[0]);
3852    ts = arg_temp(op->args[1]);
3853
3854    /* ENV should not be modified.  */
3855    tcg_debug_assert(!temp_readonly(ots));
3856
3857    /* Note that otype != itype for no-op truncation.  */
3858    otype = ots->type;
3859    itype = ts->type;
3860
3861    if (ts->val_type == TEMP_VAL_CONST) {
3862        /* propagate constant or generate sti */
3863        tcg_target_ulong val = ts->val;
3864        if (IS_DEAD_ARG(1)) {
3865            temp_dead(s, ts);
3866        }
3867        tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3868        return;
3869    }
3870
3871    /* If the source value is in memory we're going to be forced
3872       to have it in a register in order to perform the copy.  Copy
3873       the SOURCE value into its own register first, that way we
3874       don't have to reload SOURCE the next time it is used. */
3875    if (ts->val_type == TEMP_VAL_MEM) {
3876        temp_load(s, ts, tcg_target_available_regs[itype],
3877                  allocated_regs, preferred_regs);
3878    }
3879
3880    tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3881    if (IS_DEAD_ARG(0)) {
3882        /* mov to a non-saved dead register makes no sense (even with
3883           liveness analysis disabled). */
3884        tcg_debug_assert(NEED_SYNC_ARG(0));
3885        if (!ots->mem_allocated) {
3886            temp_allocate_frame(s, ots);
3887        }
3888        tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3889        if (IS_DEAD_ARG(1)) {
3890            temp_dead(s, ts);
3891        }
3892        temp_dead(s, ots);
3893    } else {
3894        if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3895            /* the mov can be suppressed */
3896            if (ots->val_type == TEMP_VAL_REG) {
3897                s->reg_to_temp[ots->reg] = NULL;
3898            }
3899            ots->reg = ts->reg;
3900            temp_dead(s, ts);
3901        } else {
3902            if (ots->val_type != TEMP_VAL_REG) {
3903                /* When allocating a new register, make sure to not spill the
3904                   input one. */
3905                tcg_regset_set_reg(allocated_regs, ts->reg);
3906                ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3907                                         allocated_regs, preferred_regs,
3908                                         ots->indirect_base);
3909            }
3910            if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3911                /*
3912                 * Cross register class move not supported.
3913                 * Store the source register into the destination slot
3914                 * and leave the destination temp as TEMP_VAL_MEM.
3915                 */
3916                assert(!temp_readonly(ots));
3917                if (!ts->mem_allocated) {
3918                    temp_allocate_frame(s, ots);
3919                }
3920                tcg_out_st(s, ts->type, ts->reg,
3921                           ots->mem_base->reg, ots->mem_offset);
3922                ots->mem_coherent = 1;
3923                temp_free_or_dead(s, ots, -1);
3924                return;
3925            }
3926        }
3927        ots->val_type = TEMP_VAL_REG;
3928        ots->mem_coherent = 0;
3929        s->reg_to_temp[ots->reg] = ots;
3930        if (NEED_SYNC_ARG(0)) {
3931            temp_sync(s, ots, allocated_regs, 0, 0);
3932        }
3933    }
3934}
3935
3936/*
3937 * Specialized code generation for INDEX_op_dup_vec.
3938 */
3939static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3940{
3941    const TCGLifeData arg_life = op->life;
3942    TCGRegSet dup_out_regs, dup_in_regs;
3943    TCGTemp *its, *ots;
3944    TCGType itype, vtype;
3945    intptr_t endian_fixup;
3946    unsigned vece;
3947    bool ok;
3948
3949    ots = arg_temp(op->args[0]);
3950    its = arg_temp(op->args[1]);
3951
3952    /* ENV should not be modified.  */
3953    tcg_debug_assert(!temp_readonly(ots));
3954
3955    itype = its->type;
3956    vece = TCGOP_VECE(op);
3957    vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3958
3959    if (its->val_type == TEMP_VAL_CONST) {
3960        /* Propagate constant via movi -> dupi.  */
3961        tcg_target_ulong val = its->val;
3962        if (IS_DEAD_ARG(1)) {
3963            temp_dead(s, its);
3964        }
3965        tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3966        return;
3967    }
3968
3969    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3970    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3971
3972    /* Allocate the output register now.  */
3973    if (ots->val_type != TEMP_VAL_REG) {
3974        TCGRegSet allocated_regs = s->reserved_regs;
3975
3976        if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3977            /* Make sure to not spill the input register. */
3978            tcg_regset_set_reg(allocated_regs, its->reg);
3979        }
3980        ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3981                                 op->output_pref[0], ots->indirect_base);
3982        ots->val_type = TEMP_VAL_REG;
3983        ots->mem_coherent = 0;
3984        s->reg_to_temp[ots->reg] = ots;
3985    }
3986
3987    switch (its->val_type) {
3988    case TEMP_VAL_REG:
3989        /*
3990         * The dup constriaints must be broad, covering all possible VECE.
3991         * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3992         * to fail, indicating that extra moves are required for that case.
3993         */
3994        if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3995            if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3996                goto done;
3997            }
3998            /* Try again from memory or a vector input register.  */
3999        }
4000        if (!its->mem_coherent) {
4001            /*
4002             * The input register is not synced, and so an extra store
4003             * would be required to use memory.  Attempt an integer-vector
4004             * register move first.  We do not have a TCGRegSet for this.
4005             */
4006            if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4007                break;
4008            }
4009            /* Sync the temp back to its slot and load from there.  */
4010            temp_sync(s, its, s->reserved_regs, 0, 0);
4011        }
4012        /* fall through */
4013
4014    case TEMP_VAL_MEM:
4015#ifdef HOST_WORDS_BIGENDIAN
4016        endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
4017        endian_fixup -= 1 << vece;
4018#else
4019        endian_fixup = 0;
4020#endif
4021        if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4022                             its->mem_offset + endian_fixup)) {
4023            goto done;
4024        }
4025        tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4026        break;
4027
4028    default:
4029        g_assert_not_reached();
4030    }
4031
4032    /* We now have a vector input register, so dup must succeed. */
4033    ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4034    tcg_debug_assert(ok);
4035
4036 done:
4037    if (IS_DEAD_ARG(1)) {
4038        temp_dead(s, its);
4039    }
4040    if (NEED_SYNC_ARG(0)) {
4041        temp_sync(s, ots, s->reserved_regs, 0, 0);
4042    }
4043    if (IS_DEAD_ARG(0)) {
4044        temp_dead(s, ots);
4045    }
4046}
4047
4048static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4049{
4050    const TCGLifeData arg_life = op->life;
4051    const TCGOpDef * const def = &tcg_op_defs[op->opc];
4052    TCGRegSet i_allocated_regs;
4053    TCGRegSet o_allocated_regs;
4054    int i, k, nb_iargs, nb_oargs;
4055    TCGReg reg;
4056    TCGArg arg;
4057    const TCGArgConstraint *arg_ct;
4058    TCGTemp *ts;
4059    TCGArg new_args[TCG_MAX_OP_ARGS];
4060    int const_args[TCG_MAX_OP_ARGS];
4061
4062    nb_oargs = def->nb_oargs;
4063    nb_iargs = def->nb_iargs;
4064
4065    /* copy constants */
4066    memcpy(new_args + nb_oargs + nb_iargs, 
4067           op->args + nb_oargs + nb_iargs,
4068           sizeof(TCGArg) * def->nb_cargs);
4069
4070    i_allocated_regs = s->reserved_regs;
4071    o_allocated_regs = s->reserved_regs;
4072
4073    /* satisfy input constraints */ 
4074    for (k = 0; k < nb_iargs; k++) {
4075        TCGRegSet i_preferred_regs, o_preferred_regs;
4076
4077        i = def->args_ct[nb_oargs + k].sort_index;
4078        arg = op->args[i];
4079        arg_ct = &def->args_ct[i];
4080        ts = arg_temp(arg);
4081
4082        if (ts->val_type == TEMP_VAL_CONST
4083            && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
4084            /* constant is OK for instruction */
4085            const_args[i] = 1;
4086            new_args[i] = ts->val;
4087            continue;
4088        }
4089
4090        i_preferred_regs = o_preferred_regs = 0;
4091        if (arg_ct->ialias) {
4092            o_preferred_regs = op->output_pref[arg_ct->alias_index];
4093
4094            /*
4095             * If the input is readonly, then it cannot also be an
4096             * output and aliased to itself.  If the input is not
4097             * dead after the instruction, we must allocate a new
4098             * register and move it.
4099             */
4100            if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4101                goto allocate_in_reg;
4102            }
4103
4104            /*
4105             * Check if the current register has already been allocated
4106             * for another input aliased to an output.
4107             */
4108            if (ts->val_type == TEMP_VAL_REG) {
4109                reg = ts->reg;
4110                for (int k2 = 0; k2 < k; k2++) {
4111                    int i2 = def->args_ct[nb_oargs + k2].sort_index;
4112                    if (def->args_ct[i2].ialias && reg == new_args[i2]) {
4113                        goto allocate_in_reg;
4114                    }
4115                }
4116            }
4117            i_preferred_regs = o_preferred_regs;
4118        }
4119
4120        temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
4121        reg = ts->reg;
4122
4123        if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
4124 allocate_in_reg:
4125            /*
4126             * Allocate a new register matching the constraint
4127             * and move the temporary register into it.
4128             */
4129            temp_load(s, ts, tcg_target_available_regs[ts->type],
4130                      i_allocated_regs, 0);
4131            reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
4132                                o_preferred_regs, ts->indirect_base);
4133            if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4134                /*
4135                 * Cross register class move not supported.  Sync the
4136                 * temp back to its slot and load from there.
4137                 */
4138                temp_sync(s, ts, i_allocated_regs, 0, 0);
4139                tcg_out_ld(s, ts->type, reg,
4140                           ts->mem_base->reg, ts->mem_offset);
4141            }
4142        }
4143        new_args[i] = reg;
4144        const_args[i] = 0;
4145        tcg_regset_set_reg(i_allocated_regs, reg);
4146    }
4147    
4148    /* mark dead temporaries and free the associated registers */
4149    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4150        if (IS_DEAD_ARG(i)) {
4151            temp_dead(s, arg_temp(op->args[i]));
4152        }
4153    }
4154
4155    if (def->flags & TCG_OPF_COND_BRANCH) {
4156        tcg_reg_alloc_cbranch(s, i_allocated_regs);
4157    } else if (def->flags & TCG_OPF_BB_END) {
4158        tcg_reg_alloc_bb_end(s, i_allocated_regs);
4159    } else {
4160        if (def->flags & TCG_OPF_CALL_CLOBBER) {
4161            /* XXX: permit generic clobber register list ? */ 
4162            for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4163                if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4164                    tcg_reg_free(s, i, i_allocated_regs);
4165                }
4166            }
4167        }
4168        if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4169            /* sync globals if the op has side effects and might trigger
4170               an exception. */
4171            sync_globals(s, i_allocated_regs);
4172        }
4173        
4174        /* satisfy the output constraints */
4175        for(k = 0; k < nb_oargs; k++) {
4176            i = def->args_ct[k].sort_index;
4177            arg = op->args[i];
4178            arg_ct = &def->args_ct[i];
4179            ts = arg_temp(arg);
4180
4181            /* ENV should not be modified.  */
4182            tcg_debug_assert(!temp_readonly(ts));
4183
4184            if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4185                reg = new_args[arg_ct->alias_index];
4186            } else if (arg_ct->newreg) {
4187                reg = tcg_reg_alloc(s, arg_ct->regs,
4188                                    i_allocated_regs | o_allocated_regs,
4189                                    op->output_pref[k], ts->indirect_base);
4190            } else {
4191                reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4192                                    op->output_pref[k], ts->indirect_base);
4193            }
4194            tcg_regset_set_reg(o_allocated_regs, reg);
4195            if (ts->val_type == TEMP_VAL_REG) {
4196                s->reg_to_temp[ts->reg] = NULL;
4197            }
4198            ts->val_type = TEMP_VAL_REG;
4199            ts->reg = reg;
4200            /*
4201             * Temp value is modified, so the value kept in memory is
4202             * potentially not the same.
4203             */
4204            ts->mem_coherent = 0;
4205            s->reg_to_temp[reg] = ts;
4206            new_args[i] = reg;
4207        }
4208    }
4209
4210    /* emit instruction */
4211    if (def->flags & TCG_OPF_VECTOR) {
4212        tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4213                       new_args, const_args);
4214    } else {
4215        tcg_out_op(s, op->opc, new_args, const_args);
4216    }
4217
4218    /* move the outputs in the correct register if needed */
4219    for(i = 0; i < nb_oargs; i++) {
4220        ts = arg_temp(op->args[i]);
4221
4222        /* ENV should not be modified.  */
4223        tcg_debug_assert(!temp_readonly(ts));
4224
4225        if (NEED_SYNC_ARG(i)) {
4226            temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4227        } else if (IS_DEAD_ARG(i)) {
4228            temp_dead(s, ts);
4229        }
4230    }
4231}
4232
4233static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4234{
4235    const TCGLifeData arg_life = op->life;
4236    TCGTemp *ots, *itsl, *itsh;
4237    TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4238
4239    /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4240    tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4241    tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4242
4243    ots = arg_temp(op->args[0]);
4244    itsl = arg_temp(op->args[1]);
4245    itsh = arg_temp(op->args[2]);
4246
4247    /* ENV should not be modified.  */
4248    tcg_debug_assert(!temp_readonly(ots));
4249
4250    /* Allocate the output register now.  */
4251    if (ots->val_type != TEMP_VAL_REG) {
4252        TCGRegSet allocated_regs = s->reserved_regs;
4253        TCGRegSet dup_out_regs =
4254            tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4255
4256        /* Make sure to not spill the input registers. */
4257        if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4258            tcg_regset_set_reg(allocated_regs, itsl->reg);
4259        }
4260        if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4261            tcg_regset_set_reg(allocated_regs, itsh->reg);
4262        }
4263
4264        ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4265                                 op->output_pref[0], ots->indirect_base);
4266        ots->val_type = TEMP_VAL_REG;
4267        ots->mem_coherent = 0;
4268        s->reg_to_temp[ots->reg] = ots;
4269    }
4270
4271    /* Promote dup2 of immediates to dupi_vec. */
4272    if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4273        uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4274        MemOp vece = MO_64;
4275
4276        if (val == dup_const(MO_8, val)) {
4277            vece = MO_8;
4278        } else if (val == dup_const(MO_16, val)) {
4279            vece = MO_16;
4280        } else if (val == dup_const(MO_32, val)) {
4281            vece = MO_32;
4282        }
4283
4284        tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4285        goto done;
4286    }
4287
4288    /* If the two inputs form one 64-bit value, try dupm_vec. */
4289    if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4290        if (!itsl->mem_coherent) {
4291            temp_sync(s, itsl, s->reserved_regs, 0, 0);
4292        }
4293        if (!itsh->mem_coherent) {
4294            temp_sync(s, itsh, s->reserved_regs, 0, 0);
4295        }
4296#ifdef HOST_WORDS_BIGENDIAN
4297        TCGTemp *its = itsh;
4298#else
4299        TCGTemp *its = itsl;
4300#endif
4301        if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4302                             its->mem_base->reg, its->mem_offset)) {
4303            goto done;
4304        }
4305    }
4306
4307    /* Fall back to generic expansion. */
4308    return false;
4309
4310 done:
4311    if (IS_DEAD_ARG(1)) {
4312        temp_dead(s, itsl);
4313    }
4314    if (IS_DEAD_ARG(2)) {
4315        temp_dead(s, itsh);
4316    }
4317    if (NEED_SYNC_ARG(0)) {
4318        temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4319    } else if (IS_DEAD_ARG(0)) {
4320        temp_dead(s, ots);
4321    }
4322    return true;
4323}
4324
4325#ifdef TCG_TARGET_STACK_GROWSUP
4326#define STACK_DIR(x) (-(x))
4327#else
4328#define STACK_DIR(x) (x)
4329#endif
4330
4331static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4332{
4333    const int nb_oargs = TCGOP_CALLO(op);
4334    const int nb_iargs = TCGOP_CALLI(op);
4335    const TCGLifeData arg_life = op->life;
4336    int flags, nb_regs, i;
4337    TCGReg reg;
4338    TCGArg arg;
4339    TCGTemp *ts;
4340    intptr_t stack_offset;
4341    size_t call_stack_size;
4342    tcg_insn_unit *func_addr;
4343    int allocate_args;
4344    TCGRegSet allocated_regs;
4345
4346    func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
4347    flags = op->args[nb_oargs + nb_iargs + 1];
4348
4349    nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4350    if (nb_regs > nb_iargs) {
4351        nb_regs = nb_iargs;
4352    }
4353
4354    /* assign stack slots first */
4355    call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4356    call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 
4357        ~(TCG_TARGET_STACK_ALIGN - 1);
4358    allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4359    if (allocate_args) {
4360        /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4361           preallocate call stack */
4362        tcg_abort();
4363    }
4364
4365    stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4366    for (i = nb_regs; i < nb_iargs; i++) {
4367        arg = op->args[nb_oargs + i];
4368#ifdef TCG_TARGET_STACK_GROWSUP
4369        stack_offset -= sizeof(tcg_target_long);
4370#endif
4371        if (arg != TCG_CALL_DUMMY_ARG) {
4372            ts = arg_temp(arg);
4373            temp_load(s, ts, tcg_target_available_regs[ts->type],
4374                      s->reserved_regs, 0);
4375            tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4376        }
4377#ifndef TCG_TARGET_STACK_GROWSUP
4378        stack_offset += sizeof(tcg_target_long);
4379#endif
4380    }
4381    
4382    /* assign input registers */
4383    allocated_regs = s->reserved_regs;
4384    for (i = 0; i < nb_regs; i++) {
4385        arg = op->args[nb_oargs + i];
4386        if (arg != TCG_CALL_DUMMY_ARG) {
4387            ts = arg_temp(arg);
4388            reg = tcg_target_call_iarg_regs[i];
4389
4390            if (ts->val_type == TEMP_VAL_REG) {
4391                if (ts->reg != reg) {
4392                    tcg_reg_free(s, reg, allocated_regs);
4393                    if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4394                        /*
4395                         * Cross register class move not supported.  Sync the
4396                         * temp back to its slot and load from there.
4397                         */
4398                        temp_sync(s, ts, allocated_regs, 0, 0);
4399                        tcg_out_ld(s, ts->type, reg,
4400                                   ts->mem_base->reg, ts->mem_offset);
4401                    }
4402                }
4403            } else {
4404                TCGRegSet arg_set = 0;
4405
4406                tcg_reg_free(s, reg, allocated_regs);
4407                tcg_regset_set_reg(arg_set, reg);
4408                temp_load(s, ts, arg_set, allocated_regs, 0);
4409            }
4410
4411            tcg_regset_set_reg(allocated_regs, reg);
4412        }
4413    }
4414    
4415    /* mark dead temporaries and free the associated registers */
4416    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4417        if (IS_DEAD_ARG(i)) {
4418            temp_dead(s, arg_temp(op->args[i]));
4419        }
4420    }
4421    
4422    /* clobber call registers */
4423    for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4424        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4425            tcg_reg_free(s, i, allocated_regs);
4426        }
4427    }
4428
4429    /* Save globals if they might be written by the helper, sync them if
4430       they might be read. */
4431    if (flags & TCG_CALL_NO_READ_GLOBALS) {
4432        /* Nothing to do */
4433    } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4434        sync_globals(s, allocated_regs);
4435    } else {
4436        save_globals(s, allocated_regs);
4437    }
4438
4439    tcg_out_call(s, func_addr);
4440
4441    /* assign output registers and emit moves if needed */
4442    for(i = 0; i < nb_oargs; i++) {
4443        arg = op->args[i];
4444        ts = arg_temp(arg);
4445
4446        /* ENV should not be modified.  */
4447        tcg_debug_assert(!temp_readonly(ts));
4448
4449        reg = tcg_target_call_oarg_regs[i];
4450        tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4451        if (ts->val_type == TEMP_VAL_REG) {
4452            s->reg_to_temp[ts->reg] = NULL;
4453        }
4454        ts->val_type = TEMP_VAL_REG;
4455        ts->reg = reg;
4456        ts->mem_coherent = 0;
4457        s->reg_to_temp[reg] = ts;
4458        if (NEED_SYNC_ARG(i)) {
4459            temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4460        } else if (IS_DEAD_ARG(i)) {
4461            temp_dead(s, ts);
4462        }
4463    }
4464}
4465
4466#ifdef CONFIG_PROFILER
4467
4468/* avoid copy/paste errors */
4469#define PROF_ADD(to, from, field)                       \
4470    do {                                                \
4471        (to)->field += qatomic_read(&((from)->field));  \
4472    } while (0)
4473
4474#define PROF_MAX(to, from, field)                                       \
4475    do {                                                                \
4476        typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4477        if (val__ > (to)->field) {                                      \
4478            (to)->field = val__;                                        \
4479        }                                                               \
4480    } while (0)
4481
4482/* Pass in a zero'ed @prof */
4483static inline
4484void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4485{
4486    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4487    unsigned int i;
4488
4489    for (i = 0; i < n_ctxs; i++) {
4490        TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4491        const TCGProfile *orig = &s->prof;
4492
4493        if (counters) {
4494            PROF_ADD(prof, orig, cpu_exec_time);
4495            PROF_ADD(prof, orig, tb_count1);
4496            PROF_ADD(prof, orig, tb_count);
4497            PROF_ADD(prof, orig, op_count);
4498            PROF_MAX(prof, orig, op_count_max);
4499            PROF_ADD(prof, orig, temp_count);
4500            PROF_MAX(prof, orig, temp_count_max);
4501            PROF_ADD(prof, orig, del_op_count);
4502            PROF_ADD(prof, orig, code_in_len);
4503            PROF_ADD(prof, orig, code_out_len);
4504            PROF_ADD(prof, orig, search_out_len);
4505            PROF_ADD(prof, orig, interm_time);
4506            PROF_ADD(prof, orig, code_time);
4507            PROF_ADD(prof, orig, la_time);
4508            PROF_ADD(prof, orig, opt_time);
4509            PROF_ADD(prof, orig, restore_count);
4510            PROF_ADD(prof, orig, restore_time);
4511        }
4512        if (table) {
4513            int i;
4514
4515            for (i = 0; i < NB_OPS; i++) {
4516                PROF_ADD(prof, orig, table_op_count[i]);
4517            }
4518        }
4519    }
4520}
4521
4522#undef PROF_ADD
4523#undef PROF_MAX
4524
4525static void tcg_profile_snapshot_counters(TCGProfile *prof)
4526{
4527    tcg_profile_snapshot(prof, true, false);
4528}
4529
4530static void tcg_profile_snapshot_table(TCGProfile *prof)
4531{
4532    tcg_profile_snapshot(prof, false, true);
4533}
4534
4535void tcg_dump_op_count(void)
4536{
4537    TCGProfile prof = {};
4538    int i;
4539
4540    tcg_profile_snapshot_table(&prof);
4541    for (i = 0; i < NB_OPS; i++) {
4542        qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4543                    prof.table_op_count[i]);
4544    }
4545}
4546
4547int64_t tcg_cpu_exec_time(void)
4548{
4549    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4550    unsigned int i;
4551    int64_t ret = 0;
4552
4553    for (i = 0; i < n_ctxs; i++) {
4554        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4555        const TCGProfile *prof = &s->prof;
4556
4557        ret += qatomic_read(&prof->cpu_exec_time);
4558    }
4559    return ret;
4560}
4561#else
4562void tcg_dump_op_count(void)
4563{
4564    qemu_printf("[TCG profiler not compiled]\n");
4565}
4566
4567int64_t tcg_cpu_exec_time(void)
4568{
4569    error_report("%s: TCG profiler not compiled", __func__);
4570    exit(EXIT_FAILURE);
4571}
4572#endif
4573
4574
4575int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4576{
4577#ifdef CONFIG_PROFILER
4578    TCGProfile *prof = &s->prof;
4579#endif
4580    int i, num_insns;
4581    TCGOp *op;
4582
4583#ifdef CONFIG_PROFILER
4584    {
4585        int n = 0;
4586
4587        QTAILQ_FOREACH(op, &s->ops, link) {
4588            n++;
4589        }
4590        qatomic_set(&prof->op_count, prof->op_count + n);
4591        if (n > prof->op_count_max) {
4592            qatomic_set(&prof->op_count_max, n);
4593        }
4594
4595        n = s->nb_temps;
4596        qatomic_set(&prof->temp_count, prof->temp_count + n);
4597        if (n > prof->temp_count_max) {
4598            qatomic_set(&prof->temp_count_max, n);
4599        }
4600    }
4601#endif
4602
4603#ifdef DEBUG_DISAS
4604    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4605                 && qemu_log_in_addr_range(tb->pc))) {
4606        FILE *logfile = qemu_log_lock();
4607        qemu_log("OP:\n");
4608        tcg_dump_ops(s, false);
4609        qemu_log("\n");
4610        qemu_log_unlock(logfile);
4611    }
4612#endif
4613
4614#ifdef CONFIG_DEBUG_TCG
4615    /* Ensure all labels referenced have been emitted.  */
4616    {
4617        TCGLabel *l;
4618        bool error = false;
4619
4620        QSIMPLEQ_FOREACH(l, &s->labels, next) {
4621            if (unlikely(!l->present) && l->refs) {
4622                qemu_log_mask(CPU_LOG_TB_OP,
4623                              "$L%d referenced but not present.\n", l->id);
4624                error = true;
4625            }
4626        }
4627        assert(!error);
4628    }
4629#endif
4630
4631#ifdef CONFIG_PROFILER
4632    qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4633#endif
4634
4635#ifdef USE_TCG_OPTIMIZATIONS
4636    tcg_optimize(s);
4637#endif
4638
4639#ifdef CONFIG_PROFILER
4640    qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4641    qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4642#endif
4643
4644    reachable_code_pass(s);
4645    liveness_pass_1(s);
4646
4647    if (s->nb_indirects > 0) {
4648#ifdef DEBUG_DISAS
4649        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4650                     && qemu_log_in_addr_range(tb->pc))) {
4651            FILE *logfile = qemu_log_lock();
4652            qemu_log("OP before indirect lowering:\n");
4653            tcg_dump_ops(s, false);
4654            qemu_log("\n");
4655            qemu_log_unlock(logfile);
4656        }
4657#endif
4658        /* Replace indirect temps with direct temps.  */
4659        if (liveness_pass_2(s)) {
4660            /* If changes were made, re-run liveness.  */
4661            liveness_pass_1(s);
4662        }
4663    }
4664
4665#ifdef CONFIG_PROFILER
4666    qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4667#endif
4668
4669#ifdef DEBUG_DISAS
4670    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4671                 && qemu_log_in_addr_range(tb->pc))) {
4672        FILE *logfile = qemu_log_lock();
4673        qemu_log("OP after optimization and liveness analysis:\n");
4674        tcg_dump_ops(s, true);
4675        qemu_log("\n");
4676        qemu_log_unlock(logfile);
4677    }
4678#endif
4679
4680    tcg_reg_alloc_start(s);
4681
4682    /*
4683     * Reset the buffer pointers when restarting after overflow.
4684     * TODO: Move this into translate-all.c with the rest of the
4685     * buffer management.  Having only this done here is confusing.
4686     */
4687    s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4688    s->code_ptr = s->code_buf;
4689
4690#ifdef TCG_TARGET_NEED_LDST_LABELS
4691    QSIMPLEQ_INIT(&s->ldst_labels);
4692#endif
4693#ifdef TCG_TARGET_NEED_POOL_LABELS
4694    s->pool_labels = NULL;
4695#endif
4696
4697    num_insns = -1;
4698    QTAILQ_FOREACH(op, &s->ops, link) {
4699        TCGOpcode opc = op->opc;
4700
4701#ifdef CONFIG_PROFILER
4702        qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4703#endif
4704
4705        switch (opc) {
4706        case INDEX_op_mov_i32:
4707        case INDEX_op_mov_i64:
4708        case INDEX_op_mov_vec:
4709            tcg_reg_alloc_mov(s, op);
4710            break;
4711        case INDEX_op_dup_vec:
4712            tcg_reg_alloc_dup(s, op);
4713            break;
4714        case INDEX_op_insn_start:
4715            if (num_insns >= 0) {
4716                size_t off = tcg_current_code_size(s);
4717                s->gen_insn_end_off[num_insns] = off;
4718                /* Assert that we do not overflow our stored offset.  */
4719                assert(s->gen_insn_end_off[num_insns] == off);
4720            }
4721            num_insns++;
4722            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4723                target_ulong a;
4724#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4725                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4726#else
4727                a = op->args[i];
4728#endif
4729                s->gen_insn_data[num_insns][i] = a;
4730            }
4731            break;
4732        case INDEX_op_discard:
4733            temp_dead(s, arg_temp(op->args[0]));
4734            break;
4735        case INDEX_op_set_label:
4736            tcg_reg_alloc_bb_end(s, s->reserved_regs);
4737            tcg_out_label(s, arg_label(op->args[0]));
4738            break;
4739        case INDEX_op_call:
4740            tcg_reg_alloc_call(s, op);
4741            break;
4742        case INDEX_op_dup2_vec:
4743            if (tcg_reg_alloc_dup2(s, op)) {
4744                break;
4745            }
4746            /* fall through */
4747        default:
4748            /* Sanity check that we've not introduced any unhandled opcodes. */
4749            tcg_debug_assert(tcg_op_supported(opc));
4750            /* Note: in order to speed up the code, it would be much
4751               faster to have specialized register allocator functions for
4752               some common argument patterns */
4753            tcg_reg_alloc_op(s, op);
4754            break;
4755        }
4756#ifdef CONFIG_DEBUG_TCG
4757        check_regs(s);
4758#endif
4759        /* Test for (pending) buffer overflow.  The assumption is that any
4760           one operation beginning below the high water mark cannot overrun
4761           the buffer completely.  Thus we can test for overflow after
4762           generating code without having to check during generation.  */
4763        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4764            return -1;
4765        }
4766        /* Test for TB overflow, as seen by gen_insn_end_off.  */
4767        if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4768            return -2;
4769        }
4770    }
4771    tcg_debug_assert(num_insns >= 0);
4772    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4773
4774    /* Generate TB finalization at the end of block */
4775#ifdef TCG_TARGET_NEED_LDST_LABELS
4776    i = tcg_out_ldst_finalize(s);
4777    if (i < 0) {
4778        return i;
4779    }
4780#endif
4781#ifdef TCG_TARGET_NEED_POOL_LABELS
4782    i = tcg_out_pool_finalize(s);
4783    if (i < 0) {
4784        return i;
4785    }
4786#endif
4787    if (!tcg_resolve_relocs(s)) {
4788        return -2;
4789    }
4790
4791#ifndef CONFIG_TCG_INTERPRETER
4792    /* flush instruction cache */
4793    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4794                        (uintptr_t)s->code_buf,
4795                        tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4796#endif
4797
4798    return tcg_current_code_size(s);
4799}
4800
4801#ifdef CONFIG_PROFILER
4802void tcg_dump_info(void)
4803{
4804    TCGProfile prof = {};
4805    const TCGProfile *s;
4806    int64_t tb_count;
4807    int64_t tb_div_count;
4808    int64_t tot;
4809
4810    tcg_profile_snapshot_counters(&prof);
4811    s = &prof;
4812    tb_count = s->tb_count;
4813    tb_div_count = tb_count ? tb_count : 1;
4814    tot = s->interm_time + s->code_time;
4815
4816    qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4817                tot, tot / 2.4e9);
4818    qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4819                " %0.1f%%)\n",
4820                tb_count, s->tb_count1 - tb_count,
4821                (double)(s->tb_count1 - s->tb_count)
4822                / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4823    qemu_printf("avg ops/TB          %0.1f max=%d\n",
4824                (double)s->op_count / tb_div_count, s->op_count_max);
4825    qemu_printf("deleted ops/TB      %0.2f\n",
4826                (double)s->del_op_count / tb_div_count);
4827    qemu_printf("avg temps/TB        %0.2f max=%d\n",
4828                (double)s->temp_count / tb_div_count, s->temp_count_max);
4829    qemu_printf("avg host code/TB    %0.1f\n",
4830                (double)s->code_out_len / tb_div_count);
4831    qemu_printf("avg search data/TB  %0.1f\n",
4832                (double)s->search_out_len / tb_div_count);
4833    
4834    qemu_printf("cycles/op           %0.1f\n",
4835                s->op_count ? (double)tot / s->op_count : 0);
4836    qemu_printf("cycles/in byte      %0.1f\n",
4837                s->code_in_len ? (double)tot / s->code_in_len : 0);
4838    qemu_printf("cycles/out byte     %0.1f\n",
4839                s->code_out_len ? (double)tot / s->code_out_len : 0);
4840    qemu_printf("cycles/search byte     %0.1f\n",
4841                s->search_out_len ? (double)tot / s->search_out_len : 0);
4842    if (tot == 0) {
4843        tot = 1;
4844    }
4845    qemu_printf("  gen_interm time   %0.1f%%\n",
4846                (double)s->interm_time / tot * 100.0);
4847    qemu_printf("  gen_code time     %0.1f%%\n",
4848                (double)s->code_time / tot * 100.0);
4849    qemu_printf("optim./code time    %0.1f%%\n",
4850                (double)s->opt_time / (s->code_time ? s->code_time : 1)
4851                * 100.0);
4852    qemu_printf("liveness/code time  %0.1f%%\n",
4853                (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4854    qemu_printf("cpu_restore count   %" PRId64 "\n",
4855                s->restore_count);
4856    qemu_printf("  avg cycles        %0.1f\n",
4857                s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4858}
4859#else
4860void tcg_dump_info(void)
4861{
4862    qemu_printf("[TCG profiler not compiled]\n");
4863}
4864#endif
4865
4866#ifdef ELF_HOST_MACHINE
4867/* In order to use this feature, the backend needs to do three things:
4868
4869   (1) Define ELF_HOST_MACHINE to indicate both what value to
4870       put into the ELF image and to indicate support for the feature.
4871
4872   (2) Define tcg_register_jit.  This should create a buffer containing
4873       the contents of a .debug_frame section that describes the post-
4874       prologue unwind info for the tcg machine.
4875
4876   (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4877*/
4878
4879/* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4880typedef enum {
4881    JIT_NOACTION = 0,
4882    JIT_REGISTER_FN,
4883    JIT_UNREGISTER_FN
4884} jit_actions_t;
4885
4886struct jit_code_entry {
4887    struct jit_code_entry *next_entry;
4888    struct jit_code_entry *prev_entry;
4889    const void *symfile_addr;
4890    uint64_t symfile_size;
4891};
4892
4893struct jit_descriptor {
4894    uint32_t version;
4895    uint32_t action_flag;
4896    struct jit_code_entry *relevant_entry;
4897    struct jit_code_entry *first_entry;
4898};
4899
4900void __jit_debug_register_code(void) __attribute__((noinline));
4901void __jit_debug_register_code(void)
4902{
4903    asm("");
4904}
4905
4906/* Must statically initialize the version, because GDB may check
4907   the version before we can set it.  */
4908struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4909
4910/* End GDB interface.  */
4911
4912static int find_string(const char *strtab, const char *str)
4913{
4914    const char *p = strtab + 1;
4915
4916    while (1) {
4917        if (strcmp(p, str) == 0) {
4918            return p - strtab;
4919        }
4920        p += strlen(p) + 1;
4921    }
4922}
4923
4924static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4925                                 const void *debug_frame,
4926                                 size_t debug_frame_size)
4927{
4928    struct __attribute__((packed)) DebugInfo {
4929        uint32_t  len;
4930        uint16_t  version;
4931        uint32_t  abbrev;
4932        uint8_t   ptr_size;
4933        uint8_t   cu_die;
4934        uint16_t  cu_lang;
4935        uintptr_t cu_low_pc;
4936        uintptr_t cu_high_pc;
4937        uint8_t   fn_die;
4938        char      fn_name[16];
4939        uintptr_t fn_low_pc;
4940        uintptr_t fn_high_pc;
4941        uint8_t   cu_eoc;
4942    };
4943
4944    struct ElfImage {
4945        ElfW(Ehdr) ehdr;
4946        ElfW(Phdr) phdr;
4947        ElfW(Shdr) shdr[7];
4948        ElfW(Sym)  sym[2];
4949        struct DebugInfo di;
4950        uint8_t    da[24];
4951        char       str[80];
4952    };
4953
4954    struct ElfImage *img;
4955
4956    static const struct ElfImage img_template = {
4957        .ehdr = {
4958            .e_ident[EI_MAG0] = ELFMAG0,
4959            .e_ident[EI_MAG1] = ELFMAG1,
4960            .e_ident[EI_MAG2] = ELFMAG2,
4961            .e_ident[EI_MAG3] = ELFMAG3,
4962            .e_ident[EI_CLASS] = ELF_CLASS,
4963            .e_ident[EI_DATA] = ELF_DATA,
4964            .e_ident[EI_VERSION] = EV_CURRENT,
4965            .e_type = ET_EXEC,
4966            .e_machine = ELF_HOST_MACHINE,
4967            .e_version = EV_CURRENT,
4968            .e_phoff = offsetof(struct ElfImage, phdr),
4969            .e_shoff = offsetof(struct ElfImage, shdr),
4970            .e_ehsize = sizeof(ElfW(Shdr)),
4971            .e_phentsize = sizeof(ElfW(Phdr)),
4972            .e_phnum = 1,
4973            .e_shentsize = sizeof(ElfW(Shdr)),
4974            .e_shnum = ARRAY_SIZE(img->shdr),
4975            .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4976#ifdef ELF_HOST_FLAGS
4977            .e_flags = ELF_HOST_FLAGS,
4978#endif
4979#ifdef ELF_OSABI
4980            .e_ident[EI_OSABI] = ELF_OSABI,
4981#endif
4982        },
4983        .phdr = {
4984            .p_type = PT_LOAD,
4985            .p_flags = PF_X,
4986        },
4987        .shdr = {
4988            [0] = { .sh_type = SHT_NULL },
4989            /* Trick: The contents of code_gen_buffer are not present in
4990               this fake ELF file; that got allocated elsewhere.  Therefore
4991               we mark .text as SHT_NOBITS (similar to .bss) so that readers
4992               will not look for contents.  We can record any address.  */
4993            [1] = { /* .text */
4994                .sh_type = SHT_NOBITS,
4995                .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4996            },
4997            [2] = { /* .debug_info */
4998                .sh_type = SHT_PROGBITS,
4999                .sh_offset = offsetof(struct ElfImage, di),
5000                .sh_size = sizeof(struct DebugInfo),
5001            },
5002            [3] = { /* .debug_abbrev */
5003                .sh_type = SHT_PROGBITS,
5004                .sh_offset = offsetof(struct ElfImage, da),
5005                .sh_size = sizeof(img->da),
5006            },
5007            [4] = { /* .debug_frame */
5008                .sh_type = SHT_PROGBITS,
5009                .sh_offset = sizeof(struct ElfImage),
5010            },
5011            [5] = { /* .symtab */
5012                .sh_type = SHT_SYMTAB,
5013                .sh_offset = offsetof(struct ElfImage, sym),
5014                .sh_size = sizeof(img->sym),
5015                .sh_info = 1,
5016                .sh_link = ARRAY_SIZE(img->shdr) - 1,
5017                .sh_entsize = sizeof(ElfW(Sym)),
5018            },
5019            [6] = { /* .strtab */
5020                .sh_type = SHT_STRTAB,
5021                .sh_offset = offsetof(struct ElfImage, str),
5022                .sh_size = sizeof(img->str),
5023            }
5024        },
5025        .sym = {
5026            [1] = { /* code_gen_buffer */
5027                .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5028                .st_shndx = 1,
5029            }
5030        },
5031        .di = {
5032            .len = sizeof(struct DebugInfo) - 4,
5033            .version = 2,
5034            .ptr_size = sizeof(void *),
5035            .cu_die = 1,
5036            .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5037            .fn_die = 2,
5038            .fn_name = "code_gen_buffer"
5039        },
5040        .da = {
5041            1,          /* abbrev number (the cu) */
5042            0x11, 1,    /* DW_TAG_compile_unit, has children */
5043            0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5044            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5045            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5046            0, 0,       /* end of abbrev */
5047            2,          /* abbrev number (the fn) */
5048            0x2e, 0,    /* DW_TAG_subprogram, no children */
5049            0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5050            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5051            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5052            0, 0,       /* end of abbrev */
5053            0           /* no more abbrev */
5054        },
5055        .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5056               ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5057    };
5058
5059    /* We only need a single jit entry; statically allocate it.  */
5060    static struct jit_code_entry one_entry;
5061
5062    uintptr_t buf = (uintptr_t)buf_ptr;
5063    size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5064    DebugFrameHeader *dfh;
5065
5066    img = g_malloc(img_size);
5067    *img = img_template;
5068
5069    img->phdr.p_vaddr = buf;
5070    img->phdr.p_paddr = buf;
5071    img->phdr.p_memsz = buf_size;
5072
5073    img->shdr[1].sh_name = find_string(img->str, ".text");
5074    img->shdr[1].sh_addr = buf;
5075    img->shdr[1].sh_size = buf_size;
5076
5077    img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5078    img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5079
5080    img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5081    img->shdr[4].sh_size = debug_frame_size;
5082
5083    img->shdr[5].sh_name = find_string(img->str, ".symtab");
5084    img->shdr[6].sh_name = find_string(img->str, ".strtab");
5085
5086    img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5087    img->sym[1].st_value = buf;
5088    img->sym[1].st_size = buf_size;
5089
5090    img->di.cu_low_pc = buf;
5091    img->di.cu_high_pc = buf + buf_size;
5092    img->di.fn_low_pc = buf;
5093    img->di.fn_high_pc = buf + buf_size;
5094
5095    dfh = (DebugFrameHeader *)(img + 1);
5096    memcpy(dfh, debug_frame, debug_frame_size);
5097    dfh->fde.func_start = buf;
5098    dfh->fde.func_len = buf_size;
5099
5100#ifdef DEBUG_JIT
5101    /* Enable this block to be able to debug the ELF image file creation.
5102       One can use readelf, objdump, or other inspection utilities.  */
5103    {
5104        FILE *f = fopen("/tmp/qemu.jit", "w+b");
5105        if (f) {
5106            if (fwrite(img, img_size, 1, f) != img_size) {
5107                /* Avoid stupid unused return value warning for fwrite.  */
5108            }
5109            fclose(f);
5110        }
5111    }
5112#endif
5113
5114    one_entry.symfile_addr = img;
5115    one_entry.symfile_size = img_size;
5116
5117    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5118    __jit_debug_descriptor.relevant_entry = &one_entry;
5119    __jit_debug_descriptor.first_entry = &one_entry;
5120    __jit_debug_register_code();
5121}
5122#else
5123/* No support for the feature.  Provide the entry point expected by exec.c,
5124   and implement the internal function we declared earlier.  */
5125
5126static void tcg_register_jit_int(const void *buf, size_t size,
5127                                 const void *debug_frame,
5128                                 size_t debug_frame_size)
5129{
5130}
5131
5132void tcg_register_jit(const void *buf, size_t buf_size)
5133{
5134}
5135#endif /* ELF_HOST_MACHINE */
5136
5137#if !TCG_TARGET_MAYBE_vec
5138void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5139{
5140    g_assert_not_reached();
5141}
5142#endif
5143