qemu/tcg/tcg.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* define it to use liveness analysis (better code) */
  26#define USE_TCG_OPTIMIZATIONS
  27
  28#include "qemu/osdep.h"
  29
  30/* Define to jump the ELF file used to communicate with GDB.  */
  31#undef DEBUG_JIT
  32
  33#include "qemu/error-report.h"
  34#include "qemu/cutils.h"
  35#include "qemu/host-utils.h"
  36#include "qemu/qemu-print.h"
  37#include "qemu/timer.h"
  38#include "qemu/cacheflush.h"
  39
  40/* Note: the long term plan is to reduce the dependencies on the QEMU
  41   CPU definitions. Currently they are used for qemu_ld/st
  42   instructions */
  43#define NO_CPU_IO_DEFS
  44#include "cpu.h"
  45
  46#include "exec/exec-all.h"
  47
  48#if !defined(CONFIG_USER_ONLY)
  49#include "hw/boards.h"
  50#endif
  51
  52#include "tcg/tcg-op.h"
  53
  54#if UINTPTR_MAX == UINT32_MAX
  55# define ELF_CLASS  ELFCLASS32
  56#else
  57# define ELF_CLASS  ELFCLASS64
  58#endif
  59#ifdef HOST_WORDS_BIGENDIAN
  60# define ELF_DATA   ELFDATA2MSB
  61#else
  62# define ELF_DATA   ELFDATA2LSB
  63#endif
  64
  65#include "elf.h"
  66#include "exec/log.h"
  67#include "sysemu/sysemu.h"
  68
  69/* Forward declarations for functions declared in tcg-target.c.inc and
  70   used here. */
  71static void tcg_target_init(TCGContext *s);
  72static void tcg_target_qemu_prologue(TCGContext *s);
  73static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
  74                        intptr_t value, intptr_t addend);
  75
  76/* The CIE and FDE header definitions will be common to all hosts.  */
  77typedef struct {
  78    uint32_t len __attribute__((aligned((sizeof(void *)))));
  79    uint32_t id;
  80    uint8_t version;
  81    char augmentation[1];
  82    uint8_t code_align;
  83    uint8_t data_align;
  84    uint8_t return_column;
  85} DebugFrameCIE;
  86
  87typedef struct QEMU_PACKED {
  88    uint32_t len __attribute__((aligned((sizeof(void *)))));
  89    uint32_t cie_offset;
  90    uintptr_t func_start;
  91    uintptr_t func_len;
  92} DebugFrameFDEHeader;
  93
  94typedef struct QEMU_PACKED {
  95    DebugFrameCIE cie;
  96    DebugFrameFDEHeader fde;
  97} DebugFrameHeader;
  98
  99static void tcg_register_jit_int(const void *buf, size_t size,
 100                                 const void *debug_frame,
 101                                 size_t debug_frame_size)
 102    __attribute__((unused));
 103
 104/* Forward declarations for functions declared and used in tcg-target.c.inc. */
 105static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
 106                       intptr_t arg2);
 107static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
 108static void tcg_out_movi(TCGContext *s, TCGType type,
 109                         TCGReg ret, tcg_target_long arg);
 110static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 111                       const TCGArg args[TCG_MAX_OP_ARGS],
 112                       const int const_args[TCG_MAX_OP_ARGS]);
 113#if TCG_TARGET_MAYBE_vec
 114static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 115                            TCGReg dst, TCGReg src);
 116static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 117                             TCGReg dst, TCGReg base, intptr_t offset);
 118static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 119                             TCGReg dst, int64_t arg);
 120static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 121                           unsigned vecl, unsigned vece,
 122                           const TCGArg args[TCG_MAX_OP_ARGS],
 123                           const int const_args[TCG_MAX_OP_ARGS]);
 124#else
 125static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 126                                   TCGReg dst, TCGReg src)
 127{
 128    g_assert_not_reached();
 129}
 130static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 131                                    TCGReg dst, TCGReg base, intptr_t offset)
 132{
 133    g_assert_not_reached();
 134}
 135static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 136                                    TCGReg dst, int64_t arg)
 137{
 138    g_assert_not_reached();
 139}
 140static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 141                                  unsigned vecl, unsigned vece,
 142                                  const TCGArg args[TCG_MAX_OP_ARGS],
 143                                  const int const_args[TCG_MAX_OP_ARGS])
 144{
 145    g_assert_not_reached();
 146}
 147#endif
 148static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
 149                       intptr_t arg2);
 150static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 151                        TCGReg base, intptr_t ofs);
 152static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
 153static int tcg_target_const_match(tcg_target_long val, TCGType type,
 154                                  const TCGArgConstraint *arg_ct);
 155#ifdef TCG_TARGET_NEED_LDST_LABELS
 156static int tcg_out_ldst_finalize(TCGContext *s);
 157#endif
 158
 159#define TCG_HIGHWATER 1024
 160
 161static TCGContext **tcg_ctxs;
 162static unsigned int n_tcg_ctxs;
 163TCGv_env cpu_env = 0;
 164const void *tcg_code_gen_epilogue;
 165uintptr_t tcg_splitwx_diff;
 166
 167#ifndef CONFIG_TCG_INTERPRETER
 168tcg_prologue_fn *tcg_qemu_tb_exec;
 169#endif
 170
 171struct tcg_region_tree {
 172    QemuMutex lock;
 173    GTree *tree;
 174    /* padding to avoid false sharing is computed at run-time */
 175};
 176
 177/*
 178 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
 179 * dynamically allocate from as demand dictates. Given appropriate region
 180 * sizing, this minimizes flushes even when some TCG threads generate a lot
 181 * more code than others.
 182 */
 183struct tcg_region_state {
 184    QemuMutex lock;
 185
 186    /* fields set at init time */
 187    void *start;
 188    void *start_aligned;
 189    void *end;
 190    size_t n;
 191    size_t size; /* size of one region */
 192    size_t stride; /* .size + guard size */
 193
 194    /* fields protected by the lock */
 195    size_t current; /* current region index */
 196    size_t agg_size_full; /* aggregate size of full regions */
 197};
 198
 199static struct tcg_region_state region;
 200/*
 201 * This is an array of struct tcg_region_tree's, with padding.
 202 * We use void * to simplify the computation of region_trees[i]; each
 203 * struct is found every tree_size bytes.
 204 */
 205static void *region_trees;
 206static size_t tree_size;
 207static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
 208static TCGRegSet tcg_target_call_clobber_regs;
 209
 210#if TCG_TARGET_INSN_UNIT_SIZE == 1
 211static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
 212{
 213    *s->code_ptr++ = v;
 214}
 215
 216static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
 217                                                      uint8_t v)
 218{
 219    *p = v;
 220}
 221#endif
 222
 223#if TCG_TARGET_INSN_UNIT_SIZE <= 2
 224static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
 225{
 226    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 227        *s->code_ptr++ = v;
 228    } else {
 229        tcg_insn_unit *p = s->code_ptr;
 230        memcpy(p, &v, sizeof(v));
 231        s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
 232    }
 233}
 234
 235static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
 236                                                       uint16_t v)
 237{
 238    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 239        *p = v;
 240    } else {
 241        memcpy(p, &v, sizeof(v));
 242    }
 243}
 244#endif
 245
 246#if TCG_TARGET_INSN_UNIT_SIZE <= 4
 247static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
 248{
 249    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 250        *s->code_ptr++ = v;
 251    } else {
 252        tcg_insn_unit *p = s->code_ptr;
 253        memcpy(p, &v, sizeof(v));
 254        s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
 255    }
 256}
 257
 258static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
 259                                                       uint32_t v)
 260{
 261    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 262        *p = v;
 263    } else {
 264        memcpy(p, &v, sizeof(v));
 265    }
 266}
 267#endif
 268
 269#if TCG_TARGET_INSN_UNIT_SIZE <= 8
 270static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
 271{
 272    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 273        *s->code_ptr++ = v;
 274    } else {
 275        tcg_insn_unit *p = s->code_ptr;
 276        memcpy(p, &v, sizeof(v));
 277        s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
 278    }
 279}
 280
 281static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
 282                                                       uint64_t v)
 283{
 284    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 285        *p = v;
 286    } else {
 287        memcpy(p, &v, sizeof(v));
 288    }
 289}
 290#endif
 291
 292/* label relocation processing */
 293
 294static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
 295                          TCGLabel *l, intptr_t addend)
 296{
 297    TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
 298
 299    r->type = type;
 300    r->ptr = code_ptr;
 301    r->addend = addend;
 302    QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
 303}
 304
 305static void tcg_out_label(TCGContext *s, TCGLabel *l)
 306{
 307    tcg_debug_assert(!l->has_value);
 308    l->has_value = 1;
 309    l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
 310}
 311
 312TCGLabel *gen_new_label(void)
 313{
 314    TCGContext *s = tcg_ctx;
 315    TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
 316
 317    memset(l, 0, sizeof(TCGLabel));
 318    l->id = s->nb_labels++;
 319    QSIMPLEQ_INIT(&l->relocs);
 320
 321    QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
 322
 323    return l;
 324}
 325
 326static bool tcg_resolve_relocs(TCGContext *s)
 327{
 328    TCGLabel *l;
 329
 330    QSIMPLEQ_FOREACH(l, &s->labels, next) {
 331        TCGRelocation *r;
 332        uintptr_t value = l->u.value;
 333
 334        QSIMPLEQ_FOREACH(r, &l->relocs, next) {
 335            if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
 336                return false;
 337            }
 338        }
 339    }
 340    return true;
 341}
 342
 343static void set_jmp_reset_offset(TCGContext *s, int which)
 344{
 345    /*
 346     * We will check for overflow at the end of the opcode loop in
 347     * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
 348     */
 349    s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
 350}
 351
 352/* Signal overflow, starting over with fewer guest insns. */
 353static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
 354{
 355    siglongjmp(s->jmp_trans, -2);
 356}
 357
 358#define C_PFX1(P, A)                    P##A
 359#define C_PFX2(P, A, B)                 P##A##_##B
 360#define C_PFX3(P, A, B, C)              P##A##_##B##_##C
 361#define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
 362#define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
 363#define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
 364
 365/* Define an enumeration for the various combinations. */
 366
 367#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
 368#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
 369#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
 370#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
 371
 372#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
 373#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
 374#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
 375#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
 376
 377#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
 378
 379#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
 380#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
 381#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
 382#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
 383
 384typedef enum {
 385#include "tcg-target-con-set.h"
 386} TCGConstraintSetIndex;
 387
 388static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
 389
 390#undef C_O0_I1
 391#undef C_O0_I2
 392#undef C_O0_I3
 393#undef C_O0_I4
 394#undef C_O1_I1
 395#undef C_O1_I2
 396#undef C_O1_I3
 397#undef C_O1_I4
 398#undef C_N1_I2
 399#undef C_O2_I1
 400#undef C_O2_I2
 401#undef C_O2_I3
 402#undef C_O2_I4
 403
 404/* Put all of the constraint sets into an array, indexed by the enum. */
 405
 406#define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
 407#define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
 408#define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
 409#define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
 410
 411#define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
 412#define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
 413#define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
 414#define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
 415
 416#define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
 417
 418#define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
 419#define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
 420#define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
 421#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
 422
 423static const TCGTargetOpDef constraint_sets[] = {
 424#include "tcg-target-con-set.h"
 425};
 426
 427
 428#undef C_O0_I1
 429#undef C_O0_I2
 430#undef C_O0_I3
 431#undef C_O0_I4
 432#undef C_O1_I1
 433#undef C_O1_I2
 434#undef C_O1_I3
 435#undef C_O1_I4
 436#undef C_N1_I2
 437#undef C_O2_I1
 438#undef C_O2_I2
 439#undef C_O2_I3
 440#undef C_O2_I4
 441
 442/* Expand the enumerator to be returned from tcg_target_op_def(). */
 443
 444#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
 445#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
 446#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
 447#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
 448
 449#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
 450#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
 451#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
 452#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
 453
 454#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
 455
 456#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
 457#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
 458#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
 459#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
 460
 461#include "tcg-target.c.inc"
 462
 463/* compare a pointer @ptr and a tb_tc @s */
 464static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
 465{
 466    if (ptr >= s->ptr + s->size) {
 467        return 1;
 468    } else if (ptr < s->ptr) {
 469        return -1;
 470    }
 471    return 0;
 472}
 473
 474static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
 475{
 476    const struct tb_tc *a = ap;
 477    const struct tb_tc *b = bp;
 478
 479    /*
 480     * When both sizes are set, we know this isn't a lookup.
 481     * This is the most likely case: every TB must be inserted; lookups
 482     * are a lot less frequent.
 483     */
 484    if (likely(a->size && b->size)) {
 485        if (a->ptr > b->ptr) {
 486            return 1;
 487        } else if (a->ptr < b->ptr) {
 488            return -1;
 489        }
 490        /* a->ptr == b->ptr should happen only on deletions */
 491        g_assert(a->size == b->size);
 492        return 0;
 493    }
 494    /*
 495     * All lookups have either .size field set to 0.
 496     * From the glib sources we see that @ap is always the lookup key. However
 497     * the docs provide no guarantee, so we just mark this case as likely.
 498     */
 499    if (likely(a->size == 0)) {
 500        return ptr_cmp_tb_tc(a->ptr, b);
 501    }
 502    return ptr_cmp_tb_tc(b->ptr, a);
 503}
 504
 505static void tcg_region_trees_init(void)
 506{
 507    size_t i;
 508
 509    tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
 510    region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
 511    for (i = 0; i < region.n; i++) {
 512        struct tcg_region_tree *rt = region_trees + i * tree_size;
 513
 514        qemu_mutex_init(&rt->lock);
 515        rt->tree = g_tree_new(tb_tc_cmp);
 516    }
 517}
 518
 519static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
 520{
 521    size_t region_idx;
 522
 523    /*
 524     * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
 525     * a signal handler over which the caller has no control.
 526     */
 527    if (!in_code_gen_buffer(p)) {
 528        p -= tcg_splitwx_diff;
 529        if (!in_code_gen_buffer(p)) {
 530            return NULL;
 531        }
 532    }
 533
 534    if (p < region.start_aligned) {
 535        region_idx = 0;
 536    } else {
 537        ptrdiff_t offset = p - region.start_aligned;
 538
 539        if (offset > region.stride * (region.n - 1)) {
 540            region_idx = region.n - 1;
 541        } else {
 542            region_idx = offset / region.stride;
 543        }
 544    }
 545    return region_trees + region_idx * tree_size;
 546}
 547
 548void tcg_tb_insert(TranslationBlock *tb)
 549{
 550    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 551
 552    g_assert(rt != NULL);
 553    qemu_mutex_lock(&rt->lock);
 554    g_tree_insert(rt->tree, &tb->tc, tb);
 555    qemu_mutex_unlock(&rt->lock);
 556}
 557
 558void tcg_tb_remove(TranslationBlock *tb)
 559{
 560    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 561
 562    g_assert(rt != NULL);
 563    qemu_mutex_lock(&rt->lock);
 564    g_tree_remove(rt->tree, &tb->tc);
 565    qemu_mutex_unlock(&rt->lock);
 566}
 567
 568/*
 569 * Find the TB 'tb' such that
 570 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
 571 * Return NULL if not found.
 572 */
 573TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
 574{
 575    struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
 576    TranslationBlock *tb;
 577    struct tb_tc s = { .ptr = (void *)tc_ptr };
 578
 579    if (rt == NULL) {
 580        return NULL;
 581    }
 582
 583    qemu_mutex_lock(&rt->lock);
 584    tb = g_tree_lookup(rt->tree, &s);
 585    qemu_mutex_unlock(&rt->lock);
 586    return tb;
 587}
 588
 589static void tcg_region_tree_lock_all(void)
 590{
 591    size_t i;
 592
 593    for (i = 0; i < region.n; i++) {
 594        struct tcg_region_tree *rt = region_trees + i * tree_size;
 595
 596        qemu_mutex_lock(&rt->lock);
 597    }
 598}
 599
 600static void tcg_region_tree_unlock_all(void)
 601{
 602    size_t i;
 603
 604    for (i = 0; i < region.n; i++) {
 605        struct tcg_region_tree *rt = region_trees + i * tree_size;
 606
 607        qemu_mutex_unlock(&rt->lock);
 608    }
 609}
 610
 611void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
 612{
 613    size_t i;
 614
 615    tcg_region_tree_lock_all();
 616    for (i = 0; i < region.n; i++) {
 617        struct tcg_region_tree *rt = region_trees + i * tree_size;
 618
 619        g_tree_foreach(rt->tree, func, user_data);
 620    }
 621    tcg_region_tree_unlock_all();
 622}
 623
 624size_t tcg_nb_tbs(void)
 625{
 626    size_t nb_tbs = 0;
 627    size_t i;
 628
 629    tcg_region_tree_lock_all();
 630    for (i = 0; i < region.n; i++) {
 631        struct tcg_region_tree *rt = region_trees + i * tree_size;
 632
 633        nb_tbs += g_tree_nnodes(rt->tree);
 634    }
 635    tcg_region_tree_unlock_all();
 636    return nb_tbs;
 637}
 638
 639static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
 640{
 641    TranslationBlock *tb = v;
 642
 643    tb_destroy(tb);
 644    return FALSE;
 645}
 646
 647static void tcg_region_tree_reset_all(void)
 648{
 649    size_t i;
 650
 651    tcg_region_tree_lock_all();
 652    for (i = 0; i < region.n; i++) {
 653        struct tcg_region_tree *rt = region_trees + i * tree_size;
 654
 655        g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
 656        /* Increment the refcount first so that destroy acts as a reset */
 657        g_tree_ref(rt->tree);
 658        g_tree_destroy(rt->tree);
 659    }
 660    tcg_region_tree_unlock_all();
 661}
 662
 663static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
 664{
 665    void *start, *end;
 666
 667    start = region.start_aligned + curr_region * region.stride;
 668    end = start + region.size;
 669
 670    if (curr_region == 0) {
 671        start = region.start;
 672    }
 673    if (curr_region == region.n - 1) {
 674        end = region.end;
 675    }
 676
 677    *pstart = start;
 678    *pend = end;
 679}
 680
 681static void tcg_region_assign(TCGContext *s, size_t curr_region)
 682{
 683    void *start, *end;
 684
 685    tcg_region_bounds(curr_region, &start, &end);
 686
 687    s->code_gen_buffer = start;
 688    s->code_gen_ptr = start;
 689    s->code_gen_buffer_size = end - start;
 690    s->code_gen_highwater = end - TCG_HIGHWATER;
 691}
 692
 693static bool tcg_region_alloc__locked(TCGContext *s)
 694{
 695    if (region.current == region.n) {
 696        return true;
 697    }
 698    tcg_region_assign(s, region.current);
 699    region.current++;
 700    return false;
 701}
 702
 703/*
 704 * Request a new region once the one in use has filled up.
 705 * Returns true on error.
 706 */
 707static bool tcg_region_alloc(TCGContext *s)
 708{
 709    bool err;
 710    /* read the region size now; alloc__locked will overwrite it on success */
 711    size_t size_full = s->code_gen_buffer_size;
 712
 713    qemu_mutex_lock(&region.lock);
 714    err = tcg_region_alloc__locked(s);
 715    if (!err) {
 716        region.agg_size_full += size_full - TCG_HIGHWATER;
 717    }
 718    qemu_mutex_unlock(&region.lock);
 719    return err;
 720}
 721
 722/*
 723 * Perform a context's first region allocation.
 724 * This function does _not_ increment region.agg_size_full.
 725 */
 726static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
 727{
 728    return tcg_region_alloc__locked(s);
 729}
 730
 731/* Call from a safe-work context */
 732void tcg_region_reset_all(void)
 733{
 734    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
 735    unsigned int i;
 736
 737    qemu_mutex_lock(&region.lock);
 738    region.current = 0;
 739    region.agg_size_full = 0;
 740
 741    for (i = 0; i < n_ctxs; i++) {
 742        TCGContext *s = qatomic_read(&tcg_ctxs[i]);
 743        bool err = tcg_region_initial_alloc__locked(s);
 744
 745        g_assert(!err);
 746    }
 747    qemu_mutex_unlock(&region.lock);
 748
 749    tcg_region_tree_reset_all();
 750}
 751
 752#ifdef CONFIG_USER_ONLY
 753static size_t tcg_n_regions(void)
 754{
 755    return 1;
 756}
 757#else
 758/*
 759 * It is likely that some vCPUs will translate more code than others, so we
 760 * first try to set more regions than max_cpus, with those regions being of
 761 * reasonable size. If that's not possible we make do by evenly dividing
 762 * the code_gen_buffer among the vCPUs.
 763 */
 764static size_t tcg_n_regions(void)
 765{
 766    size_t i;
 767
 768    /* Use a single region if all we have is one vCPU thread */
 769#if !defined(CONFIG_USER_ONLY)
 770    MachineState *ms = MACHINE(qdev_get_machine());
 771    unsigned int max_cpus = ms->smp.max_cpus;
 772#endif
 773    if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
 774        return 1;
 775    }
 776
 777    /* Try to have more regions than max_cpus, with each region being >= 2 MB */
 778    for (i = 8; i > 0; i--) {
 779        size_t regions_per_thread = i;
 780        size_t region_size;
 781
 782        region_size = tcg_init_ctx.code_gen_buffer_size;
 783        region_size /= max_cpus * regions_per_thread;
 784
 785        if (region_size >= 2 * 1024u * 1024) {
 786            return max_cpus * regions_per_thread;
 787        }
 788    }
 789    /* If we can't, then just allocate one region per vCPU thread */
 790    return max_cpus;
 791}
 792#endif
 793
 794/*
 795 * Initializes region partitioning.
 796 *
 797 * Called at init time from the parent thread (i.e. the one calling
 798 * tcg_context_init), after the target's TCG globals have been set.
 799 *
 800 * Region partitioning works by splitting code_gen_buffer into separate regions,
 801 * and then assigning regions to TCG threads so that the threads can translate
 802 * code in parallel without synchronization.
 803 *
 804 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
 805 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
 806 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
 807 * must have been parsed before calling this function, since it calls
 808 * qemu_tcg_mttcg_enabled().
 809 *
 810 * In user-mode we use a single region.  Having multiple regions in user-mode
 811 * is not supported, because the number of vCPU threads (recall that each thread
 812 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
 813 * OS, and usually this number is huge (tens of thousands is not uncommon).
 814 * Thus, given this large bound on the number of vCPU threads and the fact
 815 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
 816 * that the availability of at least one region per vCPU thread.
 817 *
 818 * However, this user-mode limitation is unlikely to be a significant problem
 819 * in practice. Multi-threaded guests share most if not all of their translated
 820 * code, which makes parallel code generation less appealing than in softmmu.
 821 */
 822void tcg_region_init(void)
 823{
 824    void *buf = tcg_init_ctx.code_gen_buffer;
 825    void *aligned;
 826    size_t size = tcg_init_ctx.code_gen_buffer_size;
 827    size_t page_size = qemu_real_host_page_size;
 828    size_t region_size;
 829    size_t n_regions;
 830    size_t i;
 831
 832    n_regions = tcg_n_regions();
 833
 834    /* The first region will be 'aligned - buf' bytes larger than the others */
 835    aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
 836    g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
 837    /*
 838     * Make region_size a multiple of page_size, using aligned as the start.
 839     * As a result of this we might end up with a few extra pages at the end of
 840     * the buffer; we will assign those to the last region.
 841     */
 842    region_size = (size - (aligned - buf)) / n_regions;
 843    region_size = QEMU_ALIGN_DOWN(region_size, page_size);
 844
 845    /* A region must have at least 2 pages; one code, one guard */
 846    g_assert(region_size >= 2 * page_size);
 847
 848    /* init the region struct */
 849    qemu_mutex_init(&region.lock);
 850    region.n = n_regions;
 851    region.size = region_size - page_size;
 852    region.stride = region_size;
 853    region.start = buf;
 854    region.start_aligned = aligned;
 855    /* page-align the end, since its last page will be a guard page */
 856    region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
 857    /* account for that last guard page */
 858    region.end -= page_size;
 859
 860    /*
 861     * Set guard pages in the rw buffer, as that's the one into which
 862     * buffer overruns could occur.  Do not set guard pages in the rx
 863     * buffer -- let that one use hugepages throughout.
 864     */
 865    for (i = 0; i < region.n; i++) {
 866        void *start, *end;
 867
 868        tcg_region_bounds(i, &start, &end);
 869
 870        /*
 871         * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
 872         * rejects a permission change from RWX -> NONE.  Guard pages are
 873         * nice for bug detection but are not essential; ignore any failure.
 874         */
 875        (void)qemu_mprotect_none(end, page_size);
 876    }
 877
 878    tcg_region_trees_init();
 879
 880    /* In user-mode we support only one ctx, so do the initial allocation now */
 881#ifdef CONFIG_USER_ONLY
 882    {
 883        bool err = tcg_region_initial_alloc__locked(tcg_ctx);
 884
 885        g_assert(!err);
 886    }
 887#endif
 888}
 889
 890#ifdef CONFIG_DEBUG_TCG
 891const void *tcg_splitwx_to_rx(void *rw)
 892{
 893    /* Pass NULL pointers unchanged. */
 894    if (rw) {
 895        g_assert(in_code_gen_buffer(rw));
 896        rw += tcg_splitwx_diff;
 897    }
 898    return rw;
 899}
 900
 901void *tcg_splitwx_to_rw(const void *rx)
 902{
 903    /* Pass NULL pointers unchanged. */
 904    if (rx) {
 905        rx -= tcg_splitwx_diff;
 906        /* Assert that we end with a pointer in the rw region. */
 907        g_assert(in_code_gen_buffer(rx));
 908    }
 909    return (void *)rx;
 910}
 911#endif /* CONFIG_DEBUG_TCG */
 912
 913static void alloc_tcg_plugin_context(TCGContext *s)
 914{
 915#ifdef CONFIG_PLUGIN
 916    s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
 917    s->plugin_tb->insns =
 918        g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
 919#endif
 920}
 921
 922/*
 923 * All TCG threads except the parent (i.e. the one that called tcg_context_init
 924 * and registered the target's TCG globals) must register with this function
 925 * before initiating translation.
 926 *
 927 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
 928 * of tcg_region_init() for the reasoning behind this.
 929 *
 930 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
 931 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
 932 * is not used anymore for translation once this function is called.
 933 *
 934 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
 935 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
 936 */
 937#ifdef CONFIG_USER_ONLY
 938void tcg_register_thread(void)
 939{
 940    tcg_ctx = &tcg_init_ctx;
 941}
 942#else
 943void tcg_register_thread(void)
 944{
 945    MachineState *ms = MACHINE(qdev_get_machine());
 946    TCGContext *s = g_malloc(sizeof(*s));
 947    unsigned int i, n;
 948    bool err;
 949
 950    *s = tcg_init_ctx;
 951
 952    /* Relink mem_base.  */
 953    for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
 954        if (tcg_init_ctx.temps[i].mem_base) {
 955            ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
 956            tcg_debug_assert(b >= 0 && b < n);
 957            s->temps[i].mem_base = &s->temps[b];
 958        }
 959    }
 960
 961    /* Claim an entry in tcg_ctxs */
 962    n = qatomic_fetch_inc(&n_tcg_ctxs);
 963    g_assert(n < ms->smp.max_cpus);
 964    qatomic_set(&tcg_ctxs[n], s);
 965
 966    if (n > 0) {
 967        alloc_tcg_plugin_context(s);
 968    }
 969
 970    tcg_ctx = s;
 971    qemu_mutex_lock(&region.lock);
 972    err = tcg_region_initial_alloc__locked(tcg_ctx);
 973    g_assert(!err);
 974    qemu_mutex_unlock(&region.lock);
 975}
 976#endif /* !CONFIG_USER_ONLY */
 977
 978/*
 979 * Returns the size (in bytes) of all translated code (i.e. from all regions)
 980 * currently in the cache.
 981 * See also: tcg_code_capacity()
 982 * Do not confuse with tcg_current_code_size(); that one applies to a single
 983 * TCG context.
 984 */
 985size_t tcg_code_size(void)
 986{
 987    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
 988    unsigned int i;
 989    size_t total;
 990
 991    qemu_mutex_lock(&region.lock);
 992    total = region.agg_size_full;
 993    for (i = 0; i < n_ctxs; i++) {
 994        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
 995        size_t size;
 996
 997        size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
 998        g_assert(size <= s->code_gen_buffer_size);
 999        total += size;
1000    }
1001    qemu_mutex_unlock(&region.lock);
1002    return total;
1003}
1004
1005/*
1006 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
1007 * regions.
1008 * See also: tcg_code_size()
1009 */
1010size_t tcg_code_capacity(void)
1011{
1012    size_t guard_size, capacity;
1013
1014    /* no need for synchronization; these variables are set at init time */
1015    guard_size = region.stride - region.size;
1016    capacity = region.end + guard_size - region.start;
1017    capacity -= region.n * (guard_size + TCG_HIGHWATER);
1018    return capacity;
1019}
1020
1021size_t tcg_tb_phys_invalidate_count(void)
1022{
1023    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
1024    unsigned int i;
1025    size_t total = 0;
1026
1027    for (i = 0; i < n_ctxs; i++) {
1028        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
1029
1030        total += qatomic_read(&s->tb_phys_invalidate_count);
1031    }
1032    return total;
1033}
1034
1035/* pool based memory allocation */
1036void *tcg_malloc_internal(TCGContext *s, int size)
1037{
1038    TCGPool *p;
1039    int pool_size;
1040    
1041    if (size > TCG_POOL_CHUNK_SIZE) {
1042        /* big malloc: insert a new pool (XXX: could optimize) */
1043        p = g_malloc(sizeof(TCGPool) + size);
1044        p->size = size;
1045        p->next = s->pool_first_large;
1046        s->pool_first_large = p;
1047        return p->data;
1048    } else {
1049        p = s->pool_current;
1050        if (!p) {
1051            p = s->pool_first;
1052            if (!p)
1053                goto new_pool;
1054        } else {
1055            if (!p->next) {
1056            new_pool:
1057                pool_size = TCG_POOL_CHUNK_SIZE;
1058                p = g_malloc(sizeof(TCGPool) + pool_size);
1059                p->size = pool_size;
1060                p->next = NULL;
1061                if (s->pool_current) 
1062                    s->pool_current->next = p;
1063                else
1064                    s->pool_first = p;
1065            } else {
1066                p = p->next;
1067            }
1068        }
1069    }
1070    s->pool_current = p;
1071    s->pool_cur = p->data + size;
1072    s->pool_end = p->data + p->size;
1073    return p->data;
1074}
1075
1076void tcg_pool_reset(TCGContext *s)
1077{
1078    TCGPool *p, *t;
1079    for (p = s->pool_first_large; p; p = t) {
1080        t = p->next;
1081        g_free(p);
1082    }
1083    s->pool_first_large = NULL;
1084    s->pool_cur = s->pool_end = NULL;
1085    s->pool_current = NULL;
1086}
1087
1088typedef struct TCGHelperInfo {
1089    void *func;
1090    const char *name;
1091    unsigned flags;
1092    unsigned sizemask;
1093} TCGHelperInfo;
1094
1095#include "exec/helper-proto.h"
1096
1097static const TCGHelperInfo all_helpers[] = {
1098#include "exec/helper-tcg.h"
1099};
1100static GHashTable *helper_table;
1101
1102static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1103static void process_op_defs(TCGContext *s);
1104static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1105                                            TCGReg reg, const char *name);
1106
1107void tcg_context_init(TCGContext *s)
1108{
1109    int op, total_args, n, i;
1110    TCGOpDef *def;
1111    TCGArgConstraint *args_ct;
1112    TCGTemp *ts;
1113
1114    memset(s, 0, sizeof(*s));
1115    s->nb_globals = 0;
1116
1117    /* Count total number of arguments and allocate the corresponding
1118       space */
1119    total_args = 0;
1120    for(op = 0; op < NB_OPS; op++) {
1121        def = &tcg_op_defs[op];
1122        n = def->nb_iargs + def->nb_oargs;
1123        total_args += n;
1124    }
1125
1126    args_ct = g_new0(TCGArgConstraint, total_args);
1127
1128    for(op = 0; op < NB_OPS; op++) {
1129        def = &tcg_op_defs[op];
1130        def->args_ct = args_ct;
1131        n = def->nb_iargs + def->nb_oargs;
1132        args_ct += n;
1133    }
1134
1135    /* Register helpers.  */
1136    /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1137    helper_table = g_hash_table_new(NULL, NULL);
1138
1139    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1140        g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1141                            (gpointer)&all_helpers[i]);
1142    }
1143
1144    tcg_target_init(s);
1145    process_op_defs(s);
1146
1147    /* Reverse the order of the saved registers, assuming they're all at
1148       the start of tcg_target_reg_alloc_order.  */
1149    for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1150        int r = tcg_target_reg_alloc_order[n];
1151        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1152            break;
1153        }
1154    }
1155    for (i = 0; i < n; ++i) {
1156        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1157    }
1158    for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1159        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1160    }
1161
1162    alloc_tcg_plugin_context(s);
1163
1164    tcg_ctx = s;
1165    /*
1166     * In user-mode we simply share the init context among threads, since we
1167     * use a single region. See the documentation tcg_region_init() for the
1168     * reasoning behind this.
1169     * In softmmu we will have at most max_cpus TCG threads.
1170     */
1171#ifdef CONFIG_USER_ONLY
1172    tcg_ctxs = &tcg_ctx;
1173    n_tcg_ctxs = 1;
1174#else
1175    MachineState *ms = MACHINE(qdev_get_machine());
1176    unsigned int max_cpus = ms->smp.max_cpus;
1177    tcg_ctxs = g_new(TCGContext *, max_cpus);
1178#endif
1179
1180    tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1181    ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1182    cpu_env = temp_tcgv_ptr(ts);
1183}
1184
1185/*
1186 * Allocate TBs right before their corresponding translated code, making
1187 * sure that TBs and code are on different cache lines.
1188 */
1189TranslationBlock *tcg_tb_alloc(TCGContext *s)
1190{
1191    uintptr_t align = qemu_icache_linesize;
1192    TranslationBlock *tb;
1193    void *next;
1194
1195 retry:
1196    tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1197    next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1198
1199    if (unlikely(next > s->code_gen_highwater)) {
1200        if (tcg_region_alloc(s)) {
1201            return NULL;
1202        }
1203        goto retry;
1204    }
1205    qatomic_set(&s->code_gen_ptr, next);
1206    s->data_gen_ptr = NULL;
1207    return tb;
1208}
1209
1210void tcg_prologue_init(TCGContext *s)
1211{
1212    size_t prologue_size, total_size;
1213    void *buf0, *buf1;
1214
1215    /* Put the prologue at the beginning of code_gen_buffer.  */
1216    buf0 = s->code_gen_buffer;
1217    total_size = s->code_gen_buffer_size;
1218    s->code_ptr = buf0;
1219    s->code_buf = buf0;
1220    s->data_gen_ptr = NULL;
1221
1222    /*
1223     * The region trees are not yet configured, but tcg_splitwx_to_rx
1224     * needs the bounds for an assert.
1225     */
1226    region.start = buf0;
1227    region.end = buf0 + total_size;
1228
1229#ifndef CONFIG_TCG_INTERPRETER
1230    tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1231#endif
1232
1233    /* Compute a high-water mark, at which we voluntarily flush the buffer
1234       and start over.  The size here is arbitrary, significantly larger
1235       than we expect the code generation for any one opcode to require.  */
1236    s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1237
1238#ifdef TCG_TARGET_NEED_POOL_LABELS
1239    s->pool_labels = NULL;
1240#endif
1241
1242    qemu_thread_jit_write();
1243    /* Generate the prologue.  */
1244    tcg_target_qemu_prologue(s);
1245
1246#ifdef TCG_TARGET_NEED_POOL_LABELS
1247    /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1248    {
1249        int result = tcg_out_pool_finalize(s);
1250        tcg_debug_assert(result == 0);
1251    }
1252#endif
1253
1254    buf1 = s->code_ptr;
1255#ifndef CONFIG_TCG_INTERPRETER
1256    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1257                        tcg_ptr_byte_diff(buf1, buf0));
1258#endif
1259
1260    /* Deduct the prologue from the buffer.  */
1261    prologue_size = tcg_current_code_size(s);
1262    s->code_gen_ptr = buf1;
1263    s->code_gen_buffer = buf1;
1264    s->code_buf = buf1;
1265    total_size -= prologue_size;
1266    s->code_gen_buffer_size = total_size;
1267
1268    tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1269
1270#ifdef DEBUG_DISAS
1271    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1272        FILE *logfile = qemu_log_lock();
1273        qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1274        if (s->data_gen_ptr) {
1275            size_t code_size = s->data_gen_ptr - buf0;
1276            size_t data_size = prologue_size - code_size;
1277            size_t i;
1278
1279            log_disas(buf0, code_size);
1280
1281            for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1282                if (sizeof(tcg_target_ulong) == 8) {
1283                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1284                             (uintptr_t)s->data_gen_ptr + i,
1285                             *(uint64_t *)(s->data_gen_ptr + i));
1286                } else {
1287                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1288                             (uintptr_t)s->data_gen_ptr + i,
1289                             *(uint32_t *)(s->data_gen_ptr + i));
1290                }
1291            }
1292        } else {
1293            log_disas(buf0, prologue_size);
1294        }
1295        qemu_log("\n");
1296        qemu_log_flush();
1297        qemu_log_unlock(logfile);
1298    }
1299#endif
1300
1301    /* Assert that goto_ptr is implemented completely.  */
1302    if (TCG_TARGET_HAS_goto_ptr) {
1303        tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1304    }
1305}
1306
1307void tcg_func_start(TCGContext *s)
1308{
1309    tcg_pool_reset(s);
1310    s->nb_temps = s->nb_globals;
1311
1312    /* No temps have been previously allocated for size or locality.  */
1313    memset(s->free_temps, 0, sizeof(s->free_temps));
1314
1315    /* No constant temps have been previously allocated. */
1316    for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1317        if (s->const_table[i]) {
1318            g_hash_table_remove_all(s->const_table[i]);
1319        }
1320    }
1321
1322    s->nb_ops = 0;
1323    s->nb_labels = 0;
1324    s->current_frame_offset = s->frame_start;
1325
1326#ifdef CONFIG_DEBUG_TCG
1327    s->goto_tb_issue_mask = 0;
1328#endif
1329
1330    QTAILQ_INIT(&s->ops);
1331    QTAILQ_INIT(&s->free_ops);
1332    QSIMPLEQ_INIT(&s->labels);
1333}
1334
1335static TCGTemp *tcg_temp_alloc(TCGContext *s)
1336{
1337    int n = s->nb_temps++;
1338
1339    if (n >= TCG_MAX_TEMPS) {
1340        tcg_raise_tb_overflow(s);
1341    }
1342    return memset(&s->temps[n], 0, sizeof(TCGTemp));
1343}
1344
1345static TCGTemp *tcg_global_alloc(TCGContext *s)
1346{
1347    TCGTemp *ts;
1348
1349    tcg_debug_assert(s->nb_globals == s->nb_temps);
1350    tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1351    s->nb_globals++;
1352    ts = tcg_temp_alloc(s);
1353    ts->kind = TEMP_GLOBAL;
1354
1355    return ts;
1356}
1357
1358static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1359                                            TCGReg reg, const char *name)
1360{
1361    TCGTemp *ts;
1362
1363    if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1364        tcg_abort();
1365    }
1366
1367    ts = tcg_global_alloc(s);
1368    ts->base_type = type;
1369    ts->type = type;
1370    ts->kind = TEMP_FIXED;
1371    ts->reg = reg;
1372    ts->name = name;
1373    tcg_regset_set_reg(s->reserved_regs, reg);
1374
1375    return ts;
1376}
1377
1378void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1379{
1380    s->frame_start = start;
1381    s->frame_end = start + size;
1382    s->frame_temp
1383        = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1384}
1385
1386TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1387                                     intptr_t offset, const char *name)
1388{
1389    TCGContext *s = tcg_ctx;
1390    TCGTemp *base_ts = tcgv_ptr_temp(base);
1391    TCGTemp *ts = tcg_global_alloc(s);
1392    int indirect_reg = 0, bigendian = 0;
1393#ifdef HOST_WORDS_BIGENDIAN
1394    bigendian = 1;
1395#endif
1396
1397    switch (base_ts->kind) {
1398    case TEMP_FIXED:
1399        break;
1400    case TEMP_GLOBAL:
1401        /* We do not support double-indirect registers.  */
1402        tcg_debug_assert(!base_ts->indirect_reg);
1403        base_ts->indirect_base = 1;
1404        s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1405                            ? 2 : 1);
1406        indirect_reg = 1;
1407        break;
1408    default:
1409        g_assert_not_reached();
1410    }
1411
1412    if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1413        TCGTemp *ts2 = tcg_global_alloc(s);
1414        char buf[64];
1415
1416        ts->base_type = TCG_TYPE_I64;
1417        ts->type = TCG_TYPE_I32;
1418        ts->indirect_reg = indirect_reg;
1419        ts->mem_allocated = 1;
1420        ts->mem_base = base_ts;
1421        ts->mem_offset = offset + bigendian * 4;
1422        pstrcpy(buf, sizeof(buf), name);
1423        pstrcat(buf, sizeof(buf), "_0");
1424        ts->name = strdup(buf);
1425
1426        tcg_debug_assert(ts2 == ts + 1);
1427        ts2->base_type = TCG_TYPE_I64;
1428        ts2->type = TCG_TYPE_I32;
1429        ts2->indirect_reg = indirect_reg;
1430        ts2->mem_allocated = 1;
1431        ts2->mem_base = base_ts;
1432        ts2->mem_offset = offset + (1 - bigendian) * 4;
1433        pstrcpy(buf, sizeof(buf), name);
1434        pstrcat(buf, sizeof(buf), "_1");
1435        ts2->name = strdup(buf);
1436    } else {
1437        ts->base_type = type;
1438        ts->type = type;
1439        ts->indirect_reg = indirect_reg;
1440        ts->mem_allocated = 1;
1441        ts->mem_base = base_ts;
1442        ts->mem_offset = offset;
1443        ts->name = name;
1444    }
1445    return ts;
1446}
1447
1448TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1449{
1450    TCGContext *s = tcg_ctx;
1451    TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1452    TCGTemp *ts;
1453    int idx, k;
1454
1455    k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1456    idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1457    if (idx < TCG_MAX_TEMPS) {
1458        /* There is already an available temp with the right type.  */
1459        clear_bit(idx, s->free_temps[k].l);
1460
1461        ts = &s->temps[idx];
1462        ts->temp_allocated = 1;
1463        tcg_debug_assert(ts->base_type == type);
1464        tcg_debug_assert(ts->kind == kind);
1465    } else {
1466        ts = tcg_temp_alloc(s);
1467        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1468            TCGTemp *ts2 = tcg_temp_alloc(s);
1469
1470            ts->base_type = type;
1471            ts->type = TCG_TYPE_I32;
1472            ts->temp_allocated = 1;
1473            ts->kind = kind;
1474
1475            tcg_debug_assert(ts2 == ts + 1);
1476            ts2->base_type = TCG_TYPE_I64;
1477            ts2->type = TCG_TYPE_I32;
1478            ts2->temp_allocated = 1;
1479            ts2->kind = kind;
1480        } else {
1481            ts->base_type = type;
1482            ts->type = type;
1483            ts->temp_allocated = 1;
1484            ts->kind = kind;
1485        }
1486    }
1487
1488#if defined(CONFIG_DEBUG_TCG)
1489    s->temps_in_use++;
1490#endif
1491    return ts;
1492}
1493
1494TCGv_vec tcg_temp_new_vec(TCGType type)
1495{
1496    TCGTemp *t;
1497
1498#ifdef CONFIG_DEBUG_TCG
1499    switch (type) {
1500    case TCG_TYPE_V64:
1501        assert(TCG_TARGET_HAS_v64);
1502        break;
1503    case TCG_TYPE_V128:
1504        assert(TCG_TARGET_HAS_v128);
1505        break;
1506    case TCG_TYPE_V256:
1507        assert(TCG_TARGET_HAS_v256);
1508        break;
1509    default:
1510        g_assert_not_reached();
1511    }
1512#endif
1513
1514    t = tcg_temp_new_internal(type, 0);
1515    return temp_tcgv_vec(t);
1516}
1517
1518/* Create a new temp of the same type as an existing temp.  */
1519TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1520{
1521    TCGTemp *t = tcgv_vec_temp(match);
1522
1523    tcg_debug_assert(t->temp_allocated != 0);
1524
1525    t = tcg_temp_new_internal(t->base_type, 0);
1526    return temp_tcgv_vec(t);
1527}
1528
1529void tcg_temp_free_internal(TCGTemp *ts)
1530{
1531    TCGContext *s = tcg_ctx;
1532    int k, idx;
1533
1534    /* In order to simplify users of tcg_constant_*, silently ignore free. */
1535    if (ts->kind == TEMP_CONST) {
1536        return;
1537    }
1538
1539#if defined(CONFIG_DEBUG_TCG)
1540    s->temps_in_use--;
1541    if (s->temps_in_use < 0) {
1542        fprintf(stderr, "More temporaries freed than allocated!\n");
1543    }
1544#endif
1545
1546    tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1547    tcg_debug_assert(ts->temp_allocated != 0);
1548    ts->temp_allocated = 0;
1549
1550    idx = temp_idx(ts);
1551    k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1552    set_bit(idx, s->free_temps[k].l);
1553}
1554
1555TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1556{
1557    TCGContext *s = tcg_ctx;
1558    GHashTable *h = s->const_table[type];
1559    TCGTemp *ts;
1560
1561    if (h == NULL) {
1562        h = g_hash_table_new(g_int64_hash, g_int64_equal);
1563        s->const_table[type] = h;
1564    }
1565
1566    ts = g_hash_table_lookup(h, &val);
1567    if (ts == NULL) {
1568        ts = tcg_temp_alloc(s);
1569
1570        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1571            TCGTemp *ts2 = tcg_temp_alloc(s);
1572
1573            ts->base_type = TCG_TYPE_I64;
1574            ts->type = TCG_TYPE_I32;
1575            ts->kind = TEMP_CONST;
1576            ts->temp_allocated = 1;
1577            /*
1578             * Retain the full value of the 64-bit constant in the low
1579             * part, so that the hash table works.  Actual uses will
1580             * truncate the value to the low part.
1581             */
1582            ts->val = val;
1583
1584            tcg_debug_assert(ts2 == ts + 1);
1585            ts2->base_type = TCG_TYPE_I64;
1586            ts2->type = TCG_TYPE_I32;
1587            ts2->kind = TEMP_CONST;
1588            ts2->temp_allocated = 1;
1589            ts2->val = val >> 32;
1590        } else {
1591            ts->base_type = type;
1592            ts->type = type;
1593            ts->kind = TEMP_CONST;
1594            ts->temp_allocated = 1;
1595            ts->val = val;
1596        }
1597        g_hash_table_insert(h, &ts->val, ts);
1598    }
1599
1600    return ts;
1601}
1602
1603TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1604{
1605    val = dup_const(vece, val);
1606    return temp_tcgv_vec(tcg_constant_internal(type, val));
1607}
1608
1609TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1610{
1611    TCGTemp *t = tcgv_vec_temp(match);
1612
1613    tcg_debug_assert(t->temp_allocated != 0);
1614    return tcg_constant_vec(t->base_type, vece, val);
1615}
1616
1617TCGv_i32 tcg_const_i32(int32_t val)
1618{
1619    TCGv_i32 t0;
1620    t0 = tcg_temp_new_i32();
1621    tcg_gen_movi_i32(t0, val);
1622    return t0;
1623}
1624
1625TCGv_i64 tcg_const_i64(int64_t val)
1626{
1627    TCGv_i64 t0;
1628    t0 = tcg_temp_new_i64();
1629    tcg_gen_movi_i64(t0, val);
1630    return t0;
1631}
1632
1633TCGv_i32 tcg_const_local_i32(int32_t val)
1634{
1635    TCGv_i32 t0;
1636    t0 = tcg_temp_local_new_i32();
1637    tcg_gen_movi_i32(t0, val);
1638    return t0;
1639}
1640
1641TCGv_i64 tcg_const_local_i64(int64_t val)
1642{
1643    TCGv_i64 t0;
1644    t0 = tcg_temp_local_new_i64();
1645    tcg_gen_movi_i64(t0, val);
1646    return t0;
1647}
1648
1649#if defined(CONFIG_DEBUG_TCG)
1650void tcg_clear_temp_count(void)
1651{
1652    TCGContext *s = tcg_ctx;
1653    s->temps_in_use = 0;
1654}
1655
1656int tcg_check_temp_count(void)
1657{
1658    TCGContext *s = tcg_ctx;
1659    if (s->temps_in_use) {
1660        /* Clear the count so that we don't give another
1661         * warning immediately next time around.
1662         */
1663        s->temps_in_use = 0;
1664        return 1;
1665    }
1666    return 0;
1667}
1668#endif
1669
1670/* Return true if OP may appear in the opcode stream.
1671   Test the runtime variable that controls each opcode.  */
1672bool tcg_op_supported(TCGOpcode op)
1673{
1674    const bool have_vec
1675        = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1676
1677    switch (op) {
1678    case INDEX_op_discard:
1679    case INDEX_op_set_label:
1680    case INDEX_op_call:
1681    case INDEX_op_br:
1682    case INDEX_op_mb:
1683    case INDEX_op_insn_start:
1684    case INDEX_op_exit_tb:
1685    case INDEX_op_goto_tb:
1686    case INDEX_op_qemu_ld_i32:
1687    case INDEX_op_qemu_st_i32:
1688    case INDEX_op_qemu_ld_i64:
1689    case INDEX_op_qemu_st_i64:
1690        return true;
1691
1692    case INDEX_op_qemu_st8_i32:
1693        return TCG_TARGET_HAS_qemu_st8_i32;
1694
1695    case INDEX_op_goto_ptr:
1696        return TCG_TARGET_HAS_goto_ptr;
1697
1698    case INDEX_op_mov_i32:
1699    case INDEX_op_setcond_i32:
1700    case INDEX_op_brcond_i32:
1701    case INDEX_op_ld8u_i32:
1702    case INDEX_op_ld8s_i32:
1703    case INDEX_op_ld16u_i32:
1704    case INDEX_op_ld16s_i32:
1705    case INDEX_op_ld_i32:
1706    case INDEX_op_st8_i32:
1707    case INDEX_op_st16_i32:
1708    case INDEX_op_st_i32:
1709    case INDEX_op_add_i32:
1710    case INDEX_op_sub_i32:
1711    case INDEX_op_mul_i32:
1712    case INDEX_op_and_i32:
1713    case INDEX_op_or_i32:
1714    case INDEX_op_xor_i32:
1715    case INDEX_op_shl_i32:
1716    case INDEX_op_shr_i32:
1717    case INDEX_op_sar_i32:
1718        return true;
1719
1720    case INDEX_op_movcond_i32:
1721        return TCG_TARGET_HAS_movcond_i32;
1722    case INDEX_op_div_i32:
1723    case INDEX_op_divu_i32:
1724        return TCG_TARGET_HAS_div_i32;
1725    case INDEX_op_rem_i32:
1726    case INDEX_op_remu_i32:
1727        return TCG_TARGET_HAS_rem_i32;
1728    case INDEX_op_div2_i32:
1729    case INDEX_op_divu2_i32:
1730        return TCG_TARGET_HAS_div2_i32;
1731    case INDEX_op_rotl_i32:
1732    case INDEX_op_rotr_i32:
1733        return TCG_TARGET_HAS_rot_i32;
1734    case INDEX_op_deposit_i32:
1735        return TCG_TARGET_HAS_deposit_i32;
1736    case INDEX_op_extract_i32:
1737        return TCG_TARGET_HAS_extract_i32;
1738    case INDEX_op_sextract_i32:
1739        return TCG_TARGET_HAS_sextract_i32;
1740    case INDEX_op_extract2_i32:
1741        return TCG_TARGET_HAS_extract2_i32;
1742    case INDEX_op_add2_i32:
1743        return TCG_TARGET_HAS_add2_i32;
1744    case INDEX_op_sub2_i32:
1745        return TCG_TARGET_HAS_sub2_i32;
1746    case INDEX_op_mulu2_i32:
1747        return TCG_TARGET_HAS_mulu2_i32;
1748    case INDEX_op_muls2_i32:
1749        return TCG_TARGET_HAS_muls2_i32;
1750    case INDEX_op_muluh_i32:
1751        return TCG_TARGET_HAS_muluh_i32;
1752    case INDEX_op_mulsh_i32:
1753        return TCG_TARGET_HAS_mulsh_i32;
1754    case INDEX_op_ext8s_i32:
1755        return TCG_TARGET_HAS_ext8s_i32;
1756    case INDEX_op_ext16s_i32:
1757        return TCG_TARGET_HAS_ext16s_i32;
1758    case INDEX_op_ext8u_i32:
1759        return TCG_TARGET_HAS_ext8u_i32;
1760    case INDEX_op_ext16u_i32:
1761        return TCG_TARGET_HAS_ext16u_i32;
1762    case INDEX_op_bswap16_i32:
1763        return TCG_TARGET_HAS_bswap16_i32;
1764    case INDEX_op_bswap32_i32:
1765        return TCG_TARGET_HAS_bswap32_i32;
1766    case INDEX_op_not_i32:
1767        return TCG_TARGET_HAS_not_i32;
1768    case INDEX_op_neg_i32:
1769        return TCG_TARGET_HAS_neg_i32;
1770    case INDEX_op_andc_i32:
1771        return TCG_TARGET_HAS_andc_i32;
1772    case INDEX_op_orc_i32:
1773        return TCG_TARGET_HAS_orc_i32;
1774    case INDEX_op_eqv_i32:
1775        return TCG_TARGET_HAS_eqv_i32;
1776    case INDEX_op_nand_i32:
1777        return TCG_TARGET_HAS_nand_i32;
1778    case INDEX_op_nor_i32:
1779        return TCG_TARGET_HAS_nor_i32;
1780    case INDEX_op_clz_i32:
1781        return TCG_TARGET_HAS_clz_i32;
1782    case INDEX_op_ctz_i32:
1783        return TCG_TARGET_HAS_ctz_i32;
1784    case INDEX_op_ctpop_i32:
1785        return TCG_TARGET_HAS_ctpop_i32;
1786
1787    case INDEX_op_brcond2_i32:
1788    case INDEX_op_setcond2_i32:
1789        return TCG_TARGET_REG_BITS == 32;
1790
1791    case INDEX_op_mov_i64:
1792    case INDEX_op_setcond_i64:
1793    case INDEX_op_brcond_i64:
1794    case INDEX_op_ld8u_i64:
1795    case INDEX_op_ld8s_i64:
1796    case INDEX_op_ld16u_i64:
1797    case INDEX_op_ld16s_i64:
1798    case INDEX_op_ld32u_i64:
1799    case INDEX_op_ld32s_i64:
1800    case INDEX_op_ld_i64:
1801    case INDEX_op_st8_i64:
1802    case INDEX_op_st16_i64:
1803    case INDEX_op_st32_i64:
1804    case INDEX_op_st_i64:
1805    case INDEX_op_add_i64:
1806    case INDEX_op_sub_i64:
1807    case INDEX_op_mul_i64:
1808    case INDEX_op_and_i64:
1809    case INDEX_op_or_i64:
1810    case INDEX_op_xor_i64:
1811    case INDEX_op_shl_i64:
1812    case INDEX_op_shr_i64:
1813    case INDEX_op_sar_i64:
1814    case INDEX_op_ext_i32_i64:
1815    case INDEX_op_extu_i32_i64:
1816        return TCG_TARGET_REG_BITS == 64;
1817
1818    case INDEX_op_movcond_i64:
1819        return TCG_TARGET_HAS_movcond_i64;
1820    case INDEX_op_div_i64:
1821    case INDEX_op_divu_i64:
1822        return TCG_TARGET_HAS_div_i64;
1823    case INDEX_op_rem_i64:
1824    case INDEX_op_remu_i64:
1825        return TCG_TARGET_HAS_rem_i64;
1826    case INDEX_op_div2_i64:
1827    case INDEX_op_divu2_i64:
1828        return TCG_TARGET_HAS_div2_i64;
1829    case INDEX_op_rotl_i64:
1830    case INDEX_op_rotr_i64:
1831        return TCG_TARGET_HAS_rot_i64;
1832    case INDEX_op_deposit_i64:
1833        return TCG_TARGET_HAS_deposit_i64;
1834    case INDEX_op_extract_i64:
1835        return TCG_TARGET_HAS_extract_i64;
1836    case INDEX_op_sextract_i64:
1837        return TCG_TARGET_HAS_sextract_i64;
1838    case INDEX_op_extract2_i64:
1839        return TCG_TARGET_HAS_extract2_i64;
1840    case INDEX_op_extrl_i64_i32:
1841        return TCG_TARGET_HAS_extrl_i64_i32;
1842    case INDEX_op_extrh_i64_i32:
1843        return TCG_TARGET_HAS_extrh_i64_i32;
1844    case INDEX_op_ext8s_i64:
1845        return TCG_TARGET_HAS_ext8s_i64;
1846    case INDEX_op_ext16s_i64:
1847        return TCG_TARGET_HAS_ext16s_i64;
1848    case INDEX_op_ext32s_i64:
1849        return TCG_TARGET_HAS_ext32s_i64;
1850    case INDEX_op_ext8u_i64:
1851        return TCG_TARGET_HAS_ext8u_i64;
1852    case INDEX_op_ext16u_i64:
1853        return TCG_TARGET_HAS_ext16u_i64;
1854    case INDEX_op_ext32u_i64:
1855        return TCG_TARGET_HAS_ext32u_i64;
1856    case INDEX_op_bswap16_i64:
1857        return TCG_TARGET_HAS_bswap16_i64;
1858    case INDEX_op_bswap32_i64:
1859        return TCG_TARGET_HAS_bswap32_i64;
1860    case INDEX_op_bswap64_i64:
1861        return TCG_TARGET_HAS_bswap64_i64;
1862    case INDEX_op_not_i64:
1863        return TCG_TARGET_HAS_not_i64;
1864    case INDEX_op_neg_i64:
1865        return TCG_TARGET_HAS_neg_i64;
1866    case INDEX_op_andc_i64:
1867        return TCG_TARGET_HAS_andc_i64;
1868    case INDEX_op_orc_i64:
1869        return TCG_TARGET_HAS_orc_i64;
1870    case INDEX_op_eqv_i64:
1871        return TCG_TARGET_HAS_eqv_i64;
1872    case INDEX_op_nand_i64:
1873        return TCG_TARGET_HAS_nand_i64;
1874    case INDEX_op_nor_i64:
1875        return TCG_TARGET_HAS_nor_i64;
1876    case INDEX_op_clz_i64:
1877        return TCG_TARGET_HAS_clz_i64;
1878    case INDEX_op_ctz_i64:
1879        return TCG_TARGET_HAS_ctz_i64;
1880    case INDEX_op_ctpop_i64:
1881        return TCG_TARGET_HAS_ctpop_i64;
1882    case INDEX_op_add2_i64:
1883        return TCG_TARGET_HAS_add2_i64;
1884    case INDEX_op_sub2_i64:
1885        return TCG_TARGET_HAS_sub2_i64;
1886    case INDEX_op_mulu2_i64:
1887        return TCG_TARGET_HAS_mulu2_i64;
1888    case INDEX_op_muls2_i64:
1889        return TCG_TARGET_HAS_muls2_i64;
1890    case INDEX_op_muluh_i64:
1891        return TCG_TARGET_HAS_muluh_i64;
1892    case INDEX_op_mulsh_i64:
1893        return TCG_TARGET_HAS_mulsh_i64;
1894
1895    case INDEX_op_mov_vec:
1896    case INDEX_op_dup_vec:
1897    case INDEX_op_dupm_vec:
1898    case INDEX_op_ld_vec:
1899    case INDEX_op_st_vec:
1900    case INDEX_op_add_vec:
1901    case INDEX_op_sub_vec:
1902    case INDEX_op_and_vec:
1903    case INDEX_op_or_vec:
1904    case INDEX_op_xor_vec:
1905    case INDEX_op_cmp_vec:
1906        return have_vec;
1907    case INDEX_op_dup2_vec:
1908        return have_vec && TCG_TARGET_REG_BITS == 32;
1909    case INDEX_op_not_vec:
1910        return have_vec && TCG_TARGET_HAS_not_vec;
1911    case INDEX_op_neg_vec:
1912        return have_vec && TCG_TARGET_HAS_neg_vec;
1913    case INDEX_op_abs_vec:
1914        return have_vec && TCG_TARGET_HAS_abs_vec;
1915    case INDEX_op_andc_vec:
1916        return have_vec && TCG_TARGET_HAS_andc_vec;
1917    case INDEX_op_orc_vec:
1918        return have_vec && TCG_TARGET_HAS_orc_vec;
1919    case INDEX_op_mul_vec:
1920        return have_vec && TCG_TARGET_HAS_mul_vec;
1921    case INDEX_op_shli_vec:
1922    case INDEX_op_shri_vec:
1923    case INDEX_op_sari_vec:
1924        return have_vec && TCG_TARGET_HAS_shi_vec;
1925    case INDEX_op_shls_vec:
1926    case INDEX_op_shrs_vec:
1927    case INDEX_op_sars_vec:
1928        return have_vec && TCG_TARGET_HAS_shs_vec;
1929    case INDEX_op_shlv_vec:
1930    case INDEX_op_shrv_vec:
1931    case INDEX_op_sarv_vec:
1932        return have_vec && TCG_TARGET_HAS_shv_vec;
1933    case INDEX_op_rotli_vec:
1934        return have_vec && TCG_TARGET_HAS_roti_vec;
1935    case INDEX_op_rotls_vec:
1936        return have_vec && TCG_TARGET_HAS_rots_vec;
1937    case INDEX_op_rotlv_vec:
1938    case INDEX_op_rotrv_vec:
1939        return have_vec && TCG_TARGET_HAS_rotv_vec;
1940    case INDEX_op_ssadd_vec:
1941    case INDEX_op_usadd_vec:
1942    case INDEX_op_sssub_vec:
1943    case INDEX_op_ussub_vec:
1944        return have_vec && TCG_TARGET_HAS_sat_vec;
1945    case INDEX_op_smin_vec:
1946    case INDEX_op_umin_vec:
1947    case INDEX_op_smax_vec:
1948    case INDEX_op_umax_vec:
1949        return have_vec && TCG_TARGET_HAS_minmax_vec;
1950    case INDEX_op_bitsel_vec:
1951        return have_vec && TCG_TARGET_HAS_bitsel_vec;
1952    case INDEX_op_cmpsel_vec:
1953        return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1954
1955    default:
1956        tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1957        return true;
1958    }
1959}
1960
1961/* Note: we convert the 64 bit args to 32 bit and do some alignment
1962   and endian swap. Maybe it would be better to do the alignment
1963   and endian swap in tcg_reg_alloc_call(). */
1964void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1965{
1966    int i, real_args, nb_rets, pi;
1967    unsigned sizemask, flags;
1968    TCGHelperInfo *info;
1969    TCGOp *op;
1970
1971    info = g_hash_table_lookup(helper_table, (gpointer)func);
1972    flags = info->flags;
1973    sizemask = info->sizemask;
1974
1975#ifdef CONFIG_PLUGIN
1976    /* detect non-plugin helpers */
1977    if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1978        tcg_ctx->plugin_insn->calls_helpers = true;
1979    }
1980#endif
1981
1982#if defined(__sparc__) && !defined(__arch64__) \
1983    && !defined(CONFIG_TCG_INTERPRETER)
1984    /* We have 64-bit values in one register, but need to pass as two
1985       separate parameters.  Split them.  */
1986    int orig_sizemask = sizemask;
1987    int orig_nargs = nargs;
1988    TCGv_i64 retl, reth;
1989    TCGTemp *split_args[MAX_OPC_PARAM];
1990
1991    retl = NULL;
1992    reth = NULL;
1993    if (sizemask != 0) {
1994        for (i = real_args = 0; i < nargs; ++i) {
1995            int is_64bit = sizemask & (1 << (i+1)*2);
1996            if (is_64bit) {
1997                TCGv_i64 orig = temp_tcgv_i64(args[i]);
1998                TCGv_i32 h = tcg_temp_new_i32();
1999                TCGv_i32 l = tcg_temp_new_i32();
2000                tcg_gen_extr_i64_i32(l, h, orig);
2001                split_args[real_args++] = tcgv_i32_temp(h);
2002                split_args[real_args++] = tcgv_i32_temp(l);
2003            } else {
2004                split_args[real_args++] = args[i];
2005            }
2006        }
2007        nargs = real_args;
2008        args = split_args;
2009        sizemask = 0;
2010    }
2011#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2012    for (i = 0; i < nargs; ++i) {
2013        int is_64bit = sizemask & (1 << (i+1)*2);
2014        int is_signed = sizemask & (2 << (i+1)*2);
2015        if (!is_64bit) {
2016            TCGv_i64 temp = tcg_temp_new_i64();
2017            TCGv_i64 orig = temp_tcgv_i64(args[i]);
2018            if (is_signed) {
2019                tcg_gen_ext32s_i64(temp, orig);
2020            } else {
2021                tcg_gen_ext32u_i64(temp, orig);
2022            }
2023            args[i] = tcgv_i64_temp(temp);
2024        }
2025    }
2026#endif /* TCG_TARGET_EXTEND_ARGS */
2027
2028    op = tcg_emit_op(INDEX_op_call);
2029
2030    pi = 0;
2031    if (ret != NULL) {
2032#if defined(__sparc__) && !defined(__arch64__) \
2033    && !defined(CONFIG_TCG_INTERPRETER)
2034        if (orig_sizemask & 1) {
2035            /* The 32-bit ABI is going to return the 64-bit value in
2036               the %o0/%o1 register pair.  Prepare for this by using
2037               two return temporaries, and reassemble below.  */
2038            retl = tcg_temp_new_i64();
2039            reth = tcg_temp_new_i64();
2040            op->args[pi++] = tcgv_i64_arg(reth);
2041            op->args[pi++] = tcgv_i64_arg(retl);
2042            nb_rets = 2;
2043        } else {
2044            op->args[pi++] = temp_arg(ret);
2045            nb_rets = 1;
2046        }
2047#else
2048        if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
2049#ifdef HOST_WORDS_BIGENDIAN
2050            op->args[pi++] = temp_arg(ret + 1);
2051            op->args[pi++] = temp_arg(ret);
2052#else
2053            op->args[pi++] = temp_arg(ret);
2054            op->args[pi++] = temp_arg(ret + 1);
2055#endif
2056            nb_rets = 2;
2057        } else {
2058            op->args[pi++] = temp_arg(ret);
2059            nb_rets = 1;
2060        }
2061#endif
2062    } else {
2063        nb_rets = 0;
2064    }
2065    TCGOP_CALLO(op) = nb_rets;
2066
2067    real_args = 0;
2068    for (i = 0; i < nargs; i++) {
2069        int is_64bit = sizemask & (1 << (i+1)*2);
2070        if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
2071#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2072            /* some targets want aligned 64 bit args */
2073            if (real_args & 1) {
2074                op->args[pi++] = TCG_CALL_DUMMY_ARG;
2075                real_args++;
2076            }
2077#endif
2078           /* If stack grows up, then we will be placing successive
2079              arguments at lower addresses, which means we need to
2080              reverse the order compared to how we would normally
2081              treat either big or little-endian.  For those arguments
2082              that will wind up in registers, this still works for
2083              HPPA (the only current STACK_GROWSUP target) since the
2084              argument registers are *also* allocated in decreasing
2085              order.  If another such target is added, this logic may
2086              have to get more complicated to differentiate between
2087              stack arguments and register arguments.  */
2088#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
2089            op->args[pi++] = temp_arg(args[i] + 1);
2090            op->args[pi++] = temp_arg(args[i]);
2091#else
2092            op->args[pi++] = temp_arg(args[i]);
2093            op->args[pi++] = temp_arg(args[i] + 1);
2094#endif
2095            real_args += 2;
2096            continue;
2097        }
2098
2099        op->args[pi++] = temp_arg(args[i]);
2100        real_args++;
2101    }
2102    op->args[pi++] = (uintptr_t)func;
2103    op->args[pi++] = flags;
2104    TCGOP_CALLI(op) = real_args;
2105
2106    /* Make sure the fields didn't overflow.  */
2107    tcg_debug_assert(TCGOP_CALLI(op) == real_args);
2108    tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
2109
2110#if defined(__sparc__) && !defined(__arch64__) \
2111    && !defined(CONFIG_TCG_INTERPRETER)
2112    /* Free all of the parts we allocated above.  */
2113    for (i = real_args = 0; i < orig_nargs; ++i) {
2114        int is_64bit = orig_sizemask & (1 << (i+1)*2);
2115        if (is_64bit) {
2116            tcg_temp_free_internal(args[real_args++]);
2117            tcg_temp_free_internal(args[real_args++]);
2118        } else {
2119            real_args++;
2120        }
2121    }
2122    if (orig_sizemask & 1) {
2123        /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
2124           Note that describing these as TCGv_i64 eliminates an unnecessary
2125           zero-extension that tcg_gen_concat_i32_i64 would create.  */
2126        tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
2127        tcg_temp_free_i64(retl);
2128        tcg_temp_free_i64(reth);
2129    }
2130#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2131    for (i = 0; i < nargs; ++i) {
2132        int is_64bit = sizemask & (1 << (i+1)*2);
2133        if (!is_64bit) {
2134            tcg_temp_free_internal(args[i]);
2135        }
2136    }
2137#endif /* TCG_TARGET_EXTEND_ARGS */
2138}
2139
2140static void tcg_reg_alloc_start(TCGContext *s)
2141{
2142    int i, n;
2143
2144    for (i = 0, n = s->nb_temps; i < n; i++) {
2145        TCGTemp *ts = &s->temps[i];
2146        TCGTempVal val = TEMP_VAL_MEM;
2147
2148        switch (ts->kind) {
2149        case TEMP_CONST:
2150            val = TEMP_VAL_CONST;
2151            break;
2152        case TEMP_FIXED:
2153            val = TEMP_VAL_REG;
2154            break;
2155        case TEMP_GLOBAL:
2156            break;
2157        case TEMP_NORMAL:
2158            val = TEMP_VAL_DEAD;
2159            /* fall through */
2160        case TEMP_LOCAL:
2161            ts->mem_allocated = 0;
2162            break;
2163        default:
2164            g_assert_not_reached();
2165        }
2166        ts->val_type = val;
2167    }
2168
2169    memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2170}
2171
2172static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2173                                 TCGTemp *ts)
2174{
2175    int idx = temp_idx(ts);
2176
2177    switch (ts->kind) {
2178    case TEMP_FIXED:
2179    case TEMP_GLOBAL:
2180        pstrcpy(buf, buf_size, ts->name);
2181        break;
2182    case TEMP_LOCAL:
2183        snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2184        break;
2185    case TEMP_NORMAL:
2186        snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2187        break;
2188    case TEMP_CONST:
2189        switch (ts->type) {
2190        case TCG_TYPE_I32:
2191            snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2192            break;
2193#if TCG_TARGET_REG_BITS > 32
2194        case TCG_TYPE_I64:
2195            snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2196            break;
2197#endif
2198        case TCG_TYPE_V64:
2199        case TCG_TYPE_V128:
2200        case TCG_TYPE_V256:
2201            snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2202                     64 << (ts->type - TCG_TYPE_V64), ts->val);
2203            break;
2204        default:
2205            g_assert_not_reached();
2206        }
2207        break;
2208    }
2209    return buf;
2210}
2211
2212static char *tcg_get_arg_str(TCGContext *s, char *buf,
2213                             int buf_size, TCGArg arg)
2214{
2215    return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2216}
2217
2218/* Find helper name.  */
2219static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
2220{
2221    const char *ret = NULL;
2222    if (helper_table) {
2223        TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
2224        if (info) {
2225            ret = info->name;
2226        }
2227    }
2228    return ret;
2229}
2230
2231static const char * const cond_name[] =
2232{
2233    [TCG_COND_NEVER] = "never",
2234    [TCG_COND_ALWAYS] = "always",
2235    [TCG_COND_EQ] = "eq",
2236    [TCG_COND_NE] = "ne",
2237    [TCG_COND_LT] = "lt",
2238    [TCG_COND_GE] = "ge",
2239    [TCG_COND_LE] = "le",
2240    [TCG_COND_GT] = "gt",
2241    [TCG_COND_LTU] = "ltu",
2242    [TCG_COND_GEU] = "geu",
2243    [TCG_COND_LEU] = "leu",
2244    [TCG_COND_GTU] = "gtu"
2245};
2246
2247static const char * const ldst_name[] =
2248{
2249    [MO_UB]   = "ub",
2250    [MO_SB]   = "sb",
2251    [MO_LEUW] = "leuw",
2252    [MO_LESW] = "lesw",
2253    [MO_LEUL] = "leul",
2254    [MO_LESL] = "lesl",
2255    [MO_LEQ]  = "leq",
2256    [MO_BEUW] = "beuw",
2257    [MO_BESW] = "besw",
2258    [MO_BEUL] = "beul",
2259    [MO_BESL] = "besl",
2260    [MO_BEQ]  = "beq",
2261};
2262
2263static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2264#ifdef TARGET_ALIGNED_ONLY
2265    [MO_UNALN >> MO_ASHIFT]    = "un+",
2266    [MO_ALIGN >> MO_ASHIFT]    = "",
2267#else
2268    [MO_UNALN >> MO_ASHIFT]    = "",
2269    [MO_ALIGN >> MO_ASHIFT]    = "al+",
2270#endif
2271    [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2272    [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2273    [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2274    [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2275    [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2276    [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2277};
2278
2279static inline bool tcg_regset_single(TCGRegSet d)
2280{
2281    return (d & (d - 1)) == 0;
2282}
2283
2284static inline TCGReg tcg_regset_first(TCGRegSet d)
2285{
2286    if (TCG_TARGET_NB_REGS <= 32) {
2287        return ctz32(d);
2288    } else {
2289        return ctz64(d);
2290    }
2291}
2292
2293static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2294{
2295    char buf[128];
2296    TCGOp *op;
2297
2298    QTAILQ_FOREACH(op, &s->ops, link) {
2299        int i, k, nb_oargs, nb_iargs, nb_cargs;
2300        const TCGOpDef *def;
2301        TCGOpcode c;
2302        int col = 0;
2303
2304        c = op->opc;
2305        def = &tcg_op_defs[c];
2306
2307        if (c == INDEX_op_insn_start) {
2308            nb_oargs = 0;
2309            col += qemu_log("\n ----");
2310
2311            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2312                target_ulong a;
2313#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2314                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2315#else
2316                a = op->args[i];
2317#endif
2318                col += qemu_log(" " TARGET_FMT_lx, a);
2319            }
2320        } else if (c == INDEX_op_call) {
2321            /* variable number of arguments */
2322            nb_oargs = TCGOP_CALLO(op);
2323            nb_iargs = TCGOP_CALLI(op);
2324            nb_cargs = def->nb_cargs;
2325
2326            /* function name, flags, out args */
2327            col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2328                            tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2329                            op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2330            for (i = 0; i < nb_oargs; i++) {
2331                col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2332                                                       op->args[i]));
2333            }
2334            for (i = 0; i < nb_iargs; i++) {
2335                TCGArg arg = op->args[nb_oargs + i];
2336                const char *t = "<dummy>";
2337                if (arg != TCG_CALL_DUMMY_ARG) {
2338                    t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2339                }
2340                col += qemu_log(",%s", t);
2341            }
2342        } else {
2343            col += qemu_log(" %s ", def->name);
2344
2345            nb_oargs = def->nb_oargs;
2346            nb_iargs = def->nb_iargs;
2347            nb_cargs = def->nb_cargs;
2348
2349            if (def->flags & TCG_OPF_VECTOR) {
2350                col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2351                                8 << TCGOP_VECE(op));
2352            }
2353
2354            k = 0;
2355            for (i = 0; i < nb_oargs; i++) {
2356                if (k != 0) {
2357                    col += qemu_log(",");
2358                }
2359                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2360                                                      op->args[k++]));
2361            }
2362            for (i = 0; i < nb_iargs; i++) {
2363                if (k != 0) {
2364                    col += qemu_log(",");
2365                }
2366                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2367                                                      op->args[k++]));
2368            }
2369            switch (c) {
2370            case INDEX_op_brcond_i32:
2371            case INDEX_op_setcond_i32:
2372            case INDEX_op_movcond_i32:
2373            case INDEX_op_brcond2_i32:
2374            case INDEX_op_setcond2_i32:
2375            case INDEX_op_brcond_i64:
2376            case INDEX_op_setcond_i64:
2377            case INDEX_op_movcond_i64:
2378            case INDEX_op_cmp_vec:
2379            case INDEX_op_cmpsel_vec:
2380                if (op->args[k] < ARRAY_SIZE(cond_name)
2381                    && cond_name[op->args[k]]) {
2382                    col += qemu_log(",%s", cond_name[op->args[k++]]);
2383                } else {
2384                    col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2385                }
2386                i = 1;
2387                break;
2388            case INDEX_op_qemu_ld_i32:
2389            case INDEX_op_qemu_st_i32:
2390            case INDEX_op_qemu_st8_i32:
2391            case INDEX_op_qemu_ld_i64:
2392            case INDEX_op_qemu_st_i64:
2393                {
2394                    TCGMemOpIdx oi = op->args[k++];
2395                    MemOp op = get_memop(oi);
2396                    unsigned ix = get_mmuidx(oi);
2397
2398                    if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2399                        col += qemu_log(",$0x%x,%u", op, ix);
2400                    } else {
2401                        const char *s_al, *s_op;
2402                        s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2403                        s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2404                        col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2405                    }
2406                    i = 1;
2407                }
2408                break;
2409            default:
2410                i = 0;
2411                break;
2412            }
2413            switch (c) {
2414            case INDEX_op_set_label:
2415            case INDEX_op_br:
2416            case INDEX_op_brcond_i32:
2417            case INDEX_op_brcond_i64:
2418            case INDEX_op_brcond2_i32:
2419                col += qemu_log("%s$L%d", k ? "," : "",
2420                                arg_label(op->args[k])->id);
2421                i++, k++;
2422                break;
2423            default:
2424                break;
2425            }
2426            for (; i < nb_cargs; i++, k++) {
2427                col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2428            }
2429        }
2430
2431        if (have_prefs || op->life) {
2432
2433            QemuLogFile *logfile;
2434
2435            rcu_read_lock();
2436            logfile = qatomic_rcu_read(&qemu_logfile);
2437            if (logfile) {
2438                for (; col < 40; ++col) {
2439                    putc(' ', logfile->fd);
2440                }
2441            }
2442            rcu_read_unlock();
2443        }
2444
2445        if (op->life) {
2446            unsigned life = op->life;
2447
2448            if (life & (SYNC_ARG * 3)) {
2449                qemu_log("  sync:");
2450                for (i = 0; i < 2; ++i) {
2451                    if (life & (SYNC_ARG << i)) {
2452                        qemu_log(" %d", i);
2453                    }
2454                }
2455            }
2456            life /= DEAD_ARG;
2457            if (life) {
2458                qemu_log("  dead:");
2459                for (i = 0; life; ++i, life >>= 1) {
2460                    if (life & 1) {
2461                        qemu_log(" %d", i);
2462                    }
2463                }
2464            }
2465        }
2466
2467        if (have_prefs) {
2468            for (i = 0; i < nb_oargs; ++i) {
2469                TCGRegSet set = op->output_pref[i];
2470
2471                if (i == 0) {
2472                    qemu_log("  pref=");
2473                } else {
2474                    qemu_log(",");
2475                }
2476                if (set == 0) {
2477                    qemu_log("none");
2478                } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2479                    qemu_log("all");
2480#ifdef CONFIG_DEBUG_TCG
2481                } else if (tcg_regset_single(set)) {
2482                    TCGReg reg = tcg_regset_first(set);
2483                    qemu_log("%s", tcg_target_reg_names[reg]);
2484#endif
2485                } else if (TCG_TARGET_NB_REGS <= 32) {
2486                    qemu_log("%#x", (uint32_t)set);
2487                } else {
2488                    qemu_log("%#" PRIx64, (uint64_t)set);
2489                }
2490            }
2491        }
2492
2493        qemu_log("\n");
2494    }
2495}
2496
2497/* we give more priority to constraints with less registers */
2498static int get_constraint_priority(const TCGOpDef *def, int k)
2499{
2500    const TCGArgConstraint *arg_ct = &def->args_ct[k];
2501    int n;
2502
2503    if (arg_ct->oalias) {
2504        /* an alias is equivalent to a single register */
2505        n = 1;
2506    } else {
2507        n = ctpop64(arg_ct->regs);
2508    }
2509    return TCG_TARGET_NB_REGS - n + 1;
2510}
2511
2512/* sort from highest priority to lowest */
2513static void sort_constraints(TCGOpDef *def, int start, int n)
2514{
2515    int i, j;
2516    TCGArgConstraint *a = def->args_ct;
2517
2518    for (i = 0; i < n; i++) {
2519        a[start + i].sort_index = start + i;
2520    }
2521    if (n <= 1) {
2522        return;
2523    }
2524    for (i = 0; i < n - 1; i++) {
2525        for (j = i + 1; j < n; j++) {
2526            int p1 = get_constraint_priority(def, a[start + i].sort_index);
2527            int p2 = get_constraint_priority(def, a[start + j].sort_index);
2528            if (p1 < p2) {
2529                int tmp = a[start + i].sort_index;
2530                a[start + i].sort_index = a[start + j].sort_index;
2531                a[start + j].sort_index = tmp;
2532            }
2533        }
2534    }
2535}
2536
2537static void process_op_defs(TCGContext *s)
2538{
2539    TCGOpcode op;
2540
2541    for (op = 0; op < NB_OPS; op++) {
2542        TCGOpDef *def = &tcg_op_defs[op];
2543        const TCGTargetOpDef *tdefs;
2544        int i, nb_args;
2545
2546        if (def->flags & TCG_OPF_NOT_PRESENT) {
2547            continue;
2548        }
2549
2550        nb_args = def->nb_iargs + def->nb_oargs;
2551        if (nb_args == 0) {
2552            continue;
2553        }
2554
2555        /*
2556         * Macro magic should make it impossible, but double-check that
2557         * the array index is in range.  Since the signness of an enum
2558         * is implementation defined, force the result to unsigned.
2559         */
2560        unsigned con_set = tcg_target_op_def(op);
2561        tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2562        tdefs = &constraint_sets[con_set];
2563
2564        for (i = 0; i < nb_args; i++) {
2565            const char *ct_str = tdefs->args_ct_str[i];
2566            /* Incomplete TCGTargetOpDef entry. */
2567            tcg_debug_assert(ct_str != NULL);
2568
2569            while (*ct_str != '\0') {
2570                switch(*ct_str) {
2571                case '0' ... '9':
2572                    {
2573                        int oarg = *ct_str - '0';
2574                        tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2575                        tcg_debug_assert(oarg < def->nb_oargs);
2576                        tcg_debug_assert(def->args_ct[oarg].regs != 0);
2577                        def->args_ct[i] = def->args_ct[oarg];
2578                        /* The output sets oalias.  */
2579                        def->args_ct[oarg].oalias = true;
2580                        def->args_ct[oarg].alias_index = i;
2581                        /* The input sets ialias. */
2582                        def->args_ct[i].ialias = true;
2583                        def->args_ct[i].alias_index = oarg;
2584                    }
2585                    ct_str++;
2586                    break;
2587                case '&':
2588                    def->args_ct[i].newreg = true;
2589                    ct_str++;
2590                    break;
2591                case 'i':
2592                    def->args_ct[i].ct |= TCG_CT_CONST;
2593                    ct_str++;
2594                    break;
2595
2596                /* Include all of the target-specific constraints. */
2597
2598#undef CONST
2599#define CONST(CASE, MASK) \
2600    case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2601#define REGS(CASE, MASK) \
2602    case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2603
2604#include "tcg-target-con-str.h"
2605
2606#undef REGS
2607#undef CONST
2608                default:
2609                    /* Typo in TCGTargetOpDef constraint. */
2610                    g_assert_not_reached();
2611                }
2612            }
2613        }
2614
2615        /* TCGTargetOpDef entry with too much information? */
2616        tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2617
2618        /* sort the constraints (XXX: this is just an heuristic) */
2619        sort_constraints(def, 0, def->nb_oargs);
2620        sort_constraints(def, def->nb_oargs, def->nb_iargs);
2621    }
2622}
2623
2624void tcg_op_remove(TCGContext *s, TCGOp *op)
2625{
2626    TCGLabel *label;
2627
2628    switch (op->opc) {
2629    case INDEX_op_br:
2630        label = arg_label(op->args[0]);
2631        label->refs--;
2632        break;
2633    case INDEX_op_brcond_i32:
2634    case INDEX_op_brcond_i64:
2635        label = arg_label(op->args[3]);
2636        label->refs--;
2637        break;
2638    case INDEX_op_brcond2_i32:
2639        label = arg_label(op->args[5]);
2640        label->refs--;
2641        break;
2642    default:
2643        break;
2644    }
2645
2646    QTAILQ_REMOVE(&s->ops, op, link);
2647    QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2648    s->nb_ops--;
2649
2650#ifdef CONFIG_PROFILER
2651    qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2652#endif
2653}
2654
2655static TCGOp *tcg_op_alloc(TCGOpcode opc)
2656{
2657    TCGContext *s = tcg_ctx;
2658    TCGOp *op;
2659
2660    if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2661        op = tcg_malloc(sizeof(TCGOp));
2662    } else {
2663        op = QTAILQ_FIRST(&s->free_ops);
2664        QTAILQ_REMOVE(&s->free_ops, op, link);
2665    }
2666    memset(op, 0, offsetof(TCGOp, link));
2667    op->opc = opc;
2668    s->nb_ops++;
2669
2670    return op;
2671}
2672
2673TCGOp *tcg_emit_op(TCGOpcode opc)
2674{
2675    TCGOp *op = tcg_op_alloc(opc);
2676    QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2677    return op;
2678}
2679
2680TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2681{
2682    TCGOp *new_op = tcg_op_alloc(opc);
2683    QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2684    return new_op;
2685}
2686
2687TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2688{
2689    TCGOp *new_op = tcg_op_alloc(opc);
2690    QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2691    return new_op;
2692}
2693
2694/* Reachable analysis : remove unreachable code.  */
2695static void reachable_code_pass(TCGContext *s)
2696{
2697    TCGOp *op, *op_next;
2698    bool dead = false;
2699
2700    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2701        bool remove = dead;
2702        TCGLabel *label;
2703        int call_flags;
2704
2705        switch (op->opc) {
2706        case INDEX_op_set_label:
2707            label = arg_label(op->args[0]);
2708            if (label->refs == 0) {
2709                /*
2710                 * While there is an occasional backward branch, virtually
2711                 * all branches generated by the translators are forward.
2712                 * Which means that generally we will have already removed
2713                 * all references to the label that will be, and there is
2714                 * little to be gained by iterating.
2715                 */
2716                remove = true;
2717            } else {
2718                /* Once we see a label, insns become live again.  */
2719                dead = false;
2720                remove = false;
2721
2722                /*
2723                 * Optimization can fold conditional branches to unconditional.
2724                 * If we find a label with one reference which is preceded by
2725                 * an unconditional branch to it, remove both.  This needed to
2726                 * wait until the dead code in between them was removed.
2727                 */
2728                if (label->refs == 1) {
2729                    TCGOp *op_prev = QTAILQ_PREV(op, link);
2730                    if (op_prev->opc == INDEX_op_br &&
2731                        label == arg_label(op_prev->args[0])) {
2732                        tcg_op_remove(s, op_prev);
2733                        remove = true;
2734                    }
2735                }
2736            }
2737            break;
2738
2739        case INDEX_op_br:
2740        case INDEX_op_exit_tb:
2741        case INDEX_op_goto_ptr:
2742            /* Unconditional branches; everything following is dead.  */
2743            dead = true;
2744            break;
2745
2746        case INDEX_op_call:
2747            /* Notice noreturn helper calls, raising exceptions.  */
2748            call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2749            if (call_flags & TCG_CALL_NO_RETURN) {
2750                dead = true;
2751            }
2752            break;
2753
2754        case INDEX_op_insn_start:
2755            /* Never remove -- we need to keep these for unwind.  */
2756            remove = false;
2757            break;
2758
2759        default:
2760            break;
2761        }
2762
2763        if (remove) {
2764            tcg_op_remove(s, op);
2765        }
2766    }
2767}
2768
2769#define TS_DEAD  1
2770#define TS_MEM   2
2771
2772#define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2773#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2774
2775/* For liveness_pass_1, the register preferences for a given temp.  */
2776static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2777{
2778    return ts->state_ptr;
2779}
2780
2781/* For liveness_pass_1, reset the preferences for a given temp to the
2782 * maximal regset for its type.
2783 */
2784static inline void la_reset_pref(TCGTemp *ts)
2785{
2786    *la_temp_pref(ts)
2787        = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2788}
2789
2790/* liveness analysis: end of function: all temps are dead, and globals
2791   should be in memory. */
2792static void la_func_end(TCGContext *s, int ng, int nt)
2793{
2794    int i;
2795
2796    for (i = 0; i < ng; ++i) {
2797        s->temps[i].state = TS_DEAD | TS_MEM;
2798        la_reset_pref(&s->temps[i]);
2799    }
2800    for (i = ng; i < nt; ++i) {
2801        s->temps[i].state = TS_DEAD;
2802        la_reset_pref(&s->temps[i]);
2803    }
2804}
2805
2806/* liveness analysis: end of basic block: all temps are dead, globals
2807   and local temps should be in memory. */
2808static void la_bb_end(TCGContext *s, int ng, int nt)
2809{
2810    int i;
2811
2812    for (i = 0; i < nt; ++i) {
2813        TCGTemp *ts = &s->temps[i];
2814        int state;
2815
2816        switch (ts->kind) {
2817        case TEMP_FIXED:
2818        case TEMP_GLOBAL:
2819        case TEMP_LOCAL:
2820            state = TS_DEAD | TS_MEM;
2821            break;
2822        case TEMP_NORMAL:
2823        case TEMP_CONST:
2824            state = TS_DEAD;
2825            break;
2826        default:
2827            g_assert_not_reached();
2828        }
2829        ts->state = state;
2830        la_reset_pref(ts);
2831    }
2832}
2833
2834/* liveness analysis: sync globals back to memory.  */
2835static void la_global_sync(TCGContext *s, int ng)
2836{
2837    int i;
2838
2839    for (i = 0; i < ng; ++i) {
2840        int state = s->temps[i].state;
2841        s->temps[i].state = state | TS_MEM;
2842        if (state == TS_DEAD) {
2843            /* If the global was previously dead, reset prefs.  */
2844            la_reset_pref(&s->temps[i]);
2845        }
2846    }
2847}
2848
2849/*
2850 * liveness analysis: conditional branch: all temps are dead,
2851 * globals and local temps should be synced.
2852 */
2853static void la_bb_sync(TCGContext *s, int ng, int nt)
2854{
2855    la_global_sync(s, ng);
2856
2857    for (int i = ng; i < nt; ++i) {
2858        TCGTemp *ts = &s->temps[i];
2859        int state;
2860
2861        switch (ts->kind) {
2862        case TEMP_LOCAL:
2863            state = ts->state;
2864            ts->state = state | TS_MEM;
2865            if (state != TS_DEAD) {
2866                continue;
2867            }
2868            break;
2869        case TEMP_NORMAL:
2870            s->temps[i].state = TS_DEAD;
2871            break;
2872        case TEMP_CONST:
2873            continue;
2874        default:
2875            g_assert_not_reached();
2876        }
2877        la_reset_pref(&s->temps[i]);
2878    }
2879}
2880
2881/* liveness analysis: sync globals back to memory and kill.  */
2882static void la_global_kill(TCGContext *s, int ng)
2883{
2884    int i;
2885
2886    for (i = 0; i < ng; i++) {
2887        s->temps[i].state = TS_DEAD | TS_MEM;
2888        la_reset_pref(&s->temps[i]);
2889    }
2890}
2891
2892/* liveness analysis: note live globals crossing calls.  */
2893static void la_cross_call(TCGContext *s, int nt)
2894{
2895    TCGRegSet mask = ~tcg_target_call_clobber_regs;
2896    int i;
2897
2898    for (i = 0; i < nt; i++) {
2899        TCGTemp *ts = &s->temps[i];
2900        if (!(ts->state & TS_DEAD)) {
2901            TCGRegSet *pset = la_temp_pref(ts);
2902            TCGRegSet set = *pset;
2903
2904            set &= mask;
2905            /* If the combination is not possible, restart.  */
2906            if (set == 0) {
2907                set = tcg_target_available_regs[ts->type] & mask;
2908            }
2909            *pset = set;
2910        }
2911    }
2912}
2913
2914/* Liveness analysis : update the opc_arg_life array to tell if a
2915   given input arguments is dead. Instructions updating dead
2916   temporaries are removed. */
2917static void liveness_pass_1(TCGContext *s)
2918{
2919    int nb_globals = s->nb_globals;
2920    int nb_temps = s->nb_temps;
2921    TCGOp *op, *op_prev;
2922    TCGRegSet *prefs;
2923    int i;
2924
2925    prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2926    for (i = 0; i < nb_temps; ++i) {
2927        s->temps[i].state_ptr = prefs + i;
2928    }
2929
2930    /* ??? Should be redundant with the exit_tb that ends the TB.  */
2931    la_func_end(s, nb_globals, nb_temps);
2932
2933    QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2934        int nb_iargs, nb_oargs;
2935        TCGOpcode opc_new, opc_new2;
2936        bool have_opc_new2;
2937        TCGLifeData arg_life = 0;
2938        TCGTemp *ts;
2939        TCGOpcode opc = op->opc;
2940        const TCGOpDef *def = &tcg_op_defs[opc];
2941
2942        switch (opc) {
2943        case INDEX_op_call:
2944            {
2945                int call_flags;
2946                int nb_call_regs;
2947
2948                nb_oargs = TCGOP_CALLO(op);
2949                nb_iargs = TCGOP_CALLI(op);
2950                call_flags = op->args[nb_oargs + nb_iargs + 1];
2951
2952                /* pure functions can be removed if their result is unused */
2953                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2954                    for (i = 0; i < nb_oargs; i++) {
2955                        ts = arg_temp(op->args[i]);
2956                        if (ts->state != TS_DEAD) {
2957                            goto do_not_remove_call;
2958                        }
2959                    }
2960                    goto do_remove;
2961                }
2962            do_not_remove_call:
2963
2964                /* Output args are dead.  */
2965                for (i = 0; i < nb_oargs; i++) {
2966                    ts = arg_temp(op->args[i]);
2967                    if (ts->state & TS_DEAD) {
2968                        arg_life |= DEAD_ARG << i;
2969                    }
2970                    if (ts->state & TS_MEM) {
2971                        arg_life |= SYNC_ARG << i;
2972                    }
2973                    ts->state = TS_DEAD;
2974                    la_reset_pref(ts);
2975
2976                    /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2977                    op->output_pref[i] = 0;
2978                }
2979
2980                if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2981                                    TCG_CALL_NO_READ_GLOBALS))) {
2982                    la_global_kill(s, nb_globals);
2983                } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2984                    la_global_sync(s, nb_globals);
2985                }
2986
2987                /* Record arguments that die in this helper.  */
2988                for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2989                    ts = arg_temp(op->args[i]);
2990                    if (ts && ts->state & TS_DEAD) {
2991                        arg_life |= DEAD_ARG << i;
2992                    }
2993                }
2994
2995                /* For all live registers, remove call-clobbered prefs.  */
2996                la_cross_call(s, nb_temps);
2997
2998                nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2999
3000                /* Input arguments are live for preceding opcodes.  */
3001                for (i = 0; i < nb_iargs; i++) {
3002                    ts = arg_temp(op->args[i + nb_oargs]);
3003                    if (ts && ts->state & TS_DEAD) {
3004                        /* For those arguments that die, and will be allocated
3005                         * in registers, clear the register set for that arg,
3006                         * to be filled in below.  For args that will be on
3007                         * the stack, reset to any available reg.
3008                         */
3009                        *la_temp_pref(ts)
3010                            = (i < nb_call_regs ? 0 :
3011                               tcg_target_available_regs[ts->type]);
3012                        ts->state &= ~TS_DEAD;
3013                    }
3014                }
3015
3016                /* For each input argument, add its input register to prefs.
3017                   If a temp is used once, this produces a single set bit.  */
3018                for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
3019                    ts = arg_temp(op->args[i + nb_oargs]);
3020                    if (ts) {
3021                        tcg_regset_set_reg(*la_temp_pref(ts),
3022                                           tcg_target_call_iarg_regs[i]);
3023                    }
3024                }
3025            }
3026            break;
3027        case INDEX_op_insn_start:
3028            break;
3029        case INDEX_op_discard:
3030            /* mark the temporary as dead */
3031            ts = arg_temp(op->args[0]);
3032            ts->state = TS_DEAD;
3033            la_reset_pref(ts);
3034            break;
3035
3036        case INDEX_op_add2_i32:
3037            opc_new = INDEX_op_add_i32;
3038            goto do_addsub2;
3039        case INDEX_op_sub2_i32:
3040            opc_new = INDEX_op_sub_i32;
3041            goto do_addsub2;
3042        case INDEX_op_add2_i64:
3043            opc_new = INDEX_op_add_i64;
3044            goto do_addsub2;
3045        case INDEX_op_sub2_i64:
3046            opc_new = INDEX_op_sub_i64;
3047        do_addsub2:
3048            nb_iargs = 4;
3049            nb_oargs = 2;
3050            /* Test if the high part of the operation is dead, but not
3051               the low part.  The result can be optimized to a simple
3052               add or sub.  This happens often for x86_64 guest when the
3053               cpu mode is set to 32 bit.  */
3054            if (arg_temp(op->args[1])->state == TS_DEAD) {
3055                if (arg_temp(op->args[0])->state == TS_DEAD) {
3056                    goto do_remove;
3057                }
3058                /* Replace the opcode and adjust the args in place,
3059                   leaving 3 unused args at the end.  */
3060                op->opc = opc = opc_new;
3061                op->args[1] = op->args[2];
3062                op->args[2] = op->args[4];
3063                /* Fall through and mark the single-word operation live.  */
3064                nb_iargs = 2;
3065                nb_oargs = 1;
3066            }
3067            goto do_not_remove;
3068
3069        case INDEX_op_mulu2_i32:
3070            opc_new = INDEX_op_mul_i32;
3071            opc_new2 = INDEX_op_muluh_i32;
3072            have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3073            goto do_mul2;
3074        case INDEX_op_muls2_i32:
3075            opc_new = INDEX_op_mul_i32;
3076            opc_new2 = INDEX_op_mulsh_i32;
3077            have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3078            goto do_mul2;
3079        case INDEX_op_mulu2_i64:
3080            opc_new = INDEX_op_mul_i64;
3081            opc_new2 = INDEX_op_muluh_i64;
3082            have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3083            goto do_mul2;
3084        case INDEX_op_muls2_i64:
3085            opc_new = INDEX_op_mul_i64;
3086            opc_new2 = INDEX_op_mulsh_i64;
3087            have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3088            goto do_mul2;
3089        do_mul2:
3090            nb_iargs = 2;
3091            nb_oargs = 2;
3092            if (arg_temp(op->args[1])->state == TS_DEAD) {
3093                if (arg_temp(op->args[0])->state == TS_DEAD) {
3094                    /* Both parts of the operation are dead.  */
3095                    goto do_remove;
3096                }
3097                /* The high part of the operation is dead; generate the low. */
3098                op->opc = opc = opc_new;
3099                op->args[1] = op->args[2];
3100                op->args[2] = op->args[3];
3101            } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3102                /* The low part of the operation is dead; generate the high. */
3103                op->opc = opc = opc_new2;
3104                op->args[0] = op->args[1];
3105                op->args[1] = op->args[2];
3106                op->args[2] = op->args[3];
3107            } else {
3108                goto do_not_remove;
3109            }
3110            /* Mark the single-word operation live.  */
3111            nb_oargs = 1;
3112            goto do_not_remove;
3113
3114        default:
3115            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3116            nb_iargs = def->nb_iargs;
3117            nb_oargs = def->nb_oargs;
3118
3119            /* Test if the operation can be removed because all
3120               its outputs are dead. We assume that nb_oargs == 0
3121               implies side effects */
3122            if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3123                for (i = 0; i < nb_oargs; i++) {
3124                    if (arg_temp(op->args[i])->state != TS_DEAD) {
3125                        goto do_not_remove;
3126                    }
3127                }
3128                goto do_remove;
3129            }
3130            goto do_not_remove;
3131
3132        do_remove:
3133            tcg_op_remove(s, op);
3134            break;
3135
3136        do_not_remove:
3137            for (i = 0; i < nb_oargs; i++) {
3138                ts = arg_temp(op->args[i]);
3139
3140                /* Remember the preference of the uses that followed.  */
3141                op->output_pref[i] = *la_temp_pref(ts);
3142
3143                /* Output args are dead.  */
3144                if (ts->state & TS_DEAD) {
3145                    arg_life |= DEAD_ARG << i;
3146                }
3147                if (ts->state & TS_MEM) {
3148                    arg_life |= SYNC_ARG << i;
3149                }
3150                ts->state = TS_DEAD;
3151                la_reset_pref(ts);
3152            }
3153
3154            /* If end of basic block, update.  */
3155            if (def->flags & TCG_OPF_BB_EXIT) {
3156                la_func_end(s, nb_globals, nb_temps);
3157            } else if (def->flags & TCG_OPF_COND_BRANCH) {
3158                la_bb_sync(s, nb_globals, nb_temps);
3159            } else if (def->flags & TCG_OPF_BB_END) {
3160                la_bb_end(s, nb_globals, nb_temps);
3161            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3162                la_global_sync(s, nb_globals);
3163                if (def->flags & TCG_OPF_CALL_CLOBBER) {
3164                    la_cross_call(s, nb_temps);
3165                }
3166            }
3167
3168            /* Record arguments that die in this opcode.  */
3169            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3170                ts = arg_temp(op->args[i]);
3171                if (ts->state & TS_DEAD) {
3172                    arg_life |= DEAD_ARG << i;
3173                }
3174            }
3175
3176            /* Input arguments are live for preceding opcodes.  */
3177            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3178                ts = arg_temp(op->args[i]);
3179                if (ts->state & TS_DEAD) {
3180                    /* For operands that were dead, initially allow
3181                       all regs for the type.  */
3182                    *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3183                    ts->state &= ~TS_DEAD;
3184                }
3185            }
3186
3187            /* Incorporate constraints for this operand.  */
3188            switch (opc) {
3189            case INDEX_op_mov_i32:
3190            case INDEX_op_mov_i64:
3191                /* Note that these are TCG_OPF_NOT_PRESENT and do not
3192                   have proper constraints.  That said, special case
3193                   moves to propagate preferences backward.  */
3194                if (IS_DEAD_ARG(1)) {
3195                    *la_temp_pref(arg_temp(op->args[0]))
3196                        = *la_temp_pref(arg_temp(op->args[1]));
3197                }
3198                break;
3199
3200            default:
3201                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3202                    const TCGArgConstraint *ct = &def->args_ct[i];
3203                    TCGRegSet set, *pset;
3204
3205                    ts = arg_temp(op->args[i]);
3206                    pset = la_temp_pref(ts);
3207                    set = *pset;
3208
3209                    set &= ct->regs;
3210                    if (ct->ialias) {
3211                        set &= op->output_pref[ct->alias_index];
3212                    }
3213                    /* If the combination is not possible, restart.  */
3214                    if (set == 0) {
3215                        set = ct->regs;
3216                    }
3217                    *pset = set;
3218                }
3219                break;
3220            }
3221            break;
3222        }
3223        op->life = arg_life;
3224    }
3225}
3226
3227/* Liveness analysis: Convert indirect regs to direct temporaries.  */
3228static bool liveness_pass_2(TCGContext *s)
3229{
3230    int nb_globals = s->nb_globals;
3231    int nb_temps, i;
3232    bool changes = false;
3233    TCGOp *op, *op_next;
3234
3235    /* Create a temporary for each indirect global.  */
3236    for (i = 0; i < nb_globals; ++i) {
3237        TCGTemp *its = &s->temps[i];
3238        if (its->indirect_reg) {
3239            TCGTemp *dts = tcg_temp_alloc(s);
3240            dts->type = its->type;
3241            dts->base_type = its->base_type;
3242            its->state_ptr = dts;
3243        } else {
3244            its->state_ptr = NULL;
3245        }
3246        /* All globals begin dead.  */
3247        its->state = TS_DEAD;
3248    }
3249    for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3250        TCGTemp *its = &s->temps[i];
3251        its->state_ptr = NULL;
3252        its->state = TS_DEAD;
3253    }
3254
3255    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3256        TCGOpcode opc = op->opc;
3257        const TCGOpDef *def = &tcg_op_defs[opc];
3258        TCGLifeData arg_life = op->life;
3259        int nb_iargs, nb_oargs, call_flags;
3260        TCGTemp *arg_ts, *dir_ts;
3261
3262        if (opc == INDEX_op_call) {
3263            nb_oargs = TCGOP_CALLO(op);
3264            nb_iargs = TCGOP_CALLI(op);
3265            call_flags = op->args[nb_oargs + nb_iargs + 1];
3266        } else {
3267            nb_iargs = def->nb_iargs;
3268            nb_oargs = def->nb_oargs;
3269
3270            /* Set flags similar to how calls require.  */
3271            if (def->flags & TCG_OPF_COND_BRANCH) {
3272                /* Like reading globals: sync_globals */
3273                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3274            } else if (def->flags & TCG_OPF_BB_END) {
3275                /* Like writing globals: save_globals */
3276                call_flags = 0;
3277            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3278                /* Like reading globals: sync_globals */
3279                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3280            } else {
3281                /* No effect on globals.  */
3282                call_flags = (TCG_CALL_NO_READ_GLOBALS |
3283                              TCG_CALL_NO_WRITE_GLOBALS);
3284            }
3285        }
3286
3287        /* Make sure that input arguments are available.  */
3288        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3289            arg_ts = arg_temp(op->args[i]);
3290            if (arg_ts) {
3291                dir_ts = arg_ts->state_ptr;
3292                if (dir_ts && arg_ts->state == TS_DEAD) {
3293                    TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3294                                      ? INDEX_op_ld_i32
3295                                      : INDEX_op_ld_i64);
3296                    TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3297
3298                    lop->args[0] = temp_arg(dir_ts);
3299                    lop->args[1] = temp_arg(arg_ts->mem_base);
3300                    lop->args[2] = arg_ts->mem_offset;
3301
3302                    /* Loaded, but synced with memory.  */
3303                    arg_ts->state = TS_MEM;
3304                }
3305            }
3306        }
3307
3308        /* Perform input replacement, and mark inputs that became dead.
3309           No action is required except keeping temp_state up to date
3310           so that we reload when needed.  */
3311        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3312            arg_ts = arg_temp(op->args[i]);
3313            if (arg_ts) {
3314                dir_ts = arg_ts->state_ptr;
3315                if (dir_ts) {
3316                    op->args[i] = temp_arg(dir_ts);
3317                    changes = true;
3318                    if (IS_DEAD_ARG(i)) {
3319                        arg_ts->state = TS_DEAD;
3320                    }
3321                }
3322            }
3323        }
3324
3325        /* Liveness analysis should ensure that the following are
3326           all correct, for call sites and basic block end points.  */
3327        if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3328            /* Nothing to do */
3329        } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3330            for (i = 0; i < nb_globals; ++i) {
3331                /* Liveness should see that globals are synced back,
3332                   that is, either TS_DEAD or TS_MEM.  */
3333                arg_ts = &s->temps[i];
3334                tcg_debug_assert(arg_ts->state_ptr == 0
3335                                 || arg_ts->state != 0);
3336            }
3337        } else {
3338            for (i = 0; i < nb_globals; ++i) {
3339                /* Liveness should see that globals are saved back,
3340                   that is, TS_DEAD, waiting to be reloaded.  */
3341                arg_ts = &s->temps[i];
3342                tcg_debug_assert(arg_ts->state_ptr == 0
3343                                 || arg_ts->state == TS_DEAD);
3344            }
3345        }
3346
3347        /* Outputs become available.  */
3348        if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3349            arg_ts = arg_temp(op->args[0]);
3350            dir_ts = arg_ts->state_ptr;
3351            if (dir_ts) {
3352                op->args[0] = temp_arg(dir_ts);
3353                changes = true;
3354
3355                /* The output is now live and modified.  */
3356                arg_ts->state = 0;
3357
3358                if (NEED_SYNC_ARG(0)) {
3359                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3360                                      ? INDEX_op_st_i32
3361                                      : INDEX_op_st_i64);
3362                    TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3363                    TCGTemp *out_ts = dir_ts;
3364
3365                    if (IS_DEAD_ARG(0)) {
3366                        out_ts = arg_temp(op->args[1]);
3367                        arg_ts->state = TS_DEAD;
3368                        tcg_op_remove(s, op);
3369                    } else {
3370                        arg_ts->state = TS_MEM;
3371                    }
3372
3373                    sop->args[0] = temp_arg(out_ts);
3374                    sop->args[1] = temp_arg(arg_ts->mem_base);
3375                    sop->args[2] = arg_ts->mem_offset;
3376                } else {
3377                    tcg_debug_assert(!IS_DEAD_ARG(0));
3378                }
3379            }
3380        } else {
3381            for (i = 0; i < nb_oargs; i++) {
3382                arg_ts = arg_temp(op->args[i]);
3383                dir_ts = arg_ts->state_ptr;
3384                if (!dir_ts) {
3385                    continue;
3386                }
3387                op->args[i] = temp_arg(dir_ts);
3388                changes = true;
3389
3390                /* The output is now live and modified.  */
3391                arg_ts->state = 0;
3392
3393                /* Sync outputs upon their last write.  */
3394                if (NEED_SYNC_ARG(i)) {
3395                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3396                                      ? INDEX_op_st_i32
3397                                      : INDEX_op_st_i64);
3398                    TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3399
3400                    sop->args[0] = temp_arg(dir_ts);
3401                    sop->args[1] = temp_arg(arg_ts->mem_base);
3402                    sop->args[2] = arg_ts->mem_offset;
3403
3404                    arg_ts->state = TS_MEM;
3405                }
3406                /* Drop outputs that are dead.  */
3407                if (IS_DEAD_ARG(i)) {
3408                    arg_ts->state = TS_DEAD;
3409                }
3410            }
3411        }
3412    }
3413
3414    return changes;
3415}
3416
3417#ifdef CONFIG_DEBUG_TCG
3418static void dump_regs(TCGContext *s)
3419{
3420    TCGTemp *ts;
3421    int i;
3422    char buf[64];
3423
3424    for(i = 0; i < s->nb_temps; i++) {
3425        ts = &s->temps[i];
3426        printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3427        switch(ts->val_type) {
3428        case TEMP_VAL_REG:
3429            printf("%s", tcg_target_reg_names[ts->reg]);
3430            break;
3431        case TEMP_VAL_MEM:
3432            printf("%d(%s)", (int)ts->mem_offset,
3433                   tcg_target_reg_names[ts->mem_base->reg]);
3434            break;
3435        case TEMP_VAL_CONST:
3436            printf("$0x%" PRIx64, ts->val);
3437            break;
3438        case TEMP_VAL_DEAD:
3439            printf("D");
3440            break;
3441        default:
3442            printf("???");
3443            break;
3444        }
3445        printf("\n");
3446    }
3447
3448    for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3449        if (s->reg_to_temp[i] != NULL) {
3450            printf("%s: %s\n", 
3451                   tcg_target_reg_names[i], 
3452                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3453        }
3454    }
3455}
3456
3457static void check_regs(TCGContext *s)
3458{
3459    int reg;
3460    int k;
3461    TCGTemp *ts;
3462    char buf[64];
3463
3464    for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3465        ts = s->reg_to_temp[reg];
3466        if (ts != NULL) {
3467            if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3468                printf("Inconsistency for register %s:\n", 
3469                       tcg_target_reg_names[reg]);
3470                goto fail;
3471            }
3472        }
3473    }
3474    for (k = 0; k < s->nb_temps; k++) {
3475        ts = &s->temps[k];
3476        if (ts->val_type == TEMP_VAL_REG
3477            && ts->kind != TEMP_FIXED
3478            && s->reg_to_temp[ts->reg] != ts) {
3479            printf("Inconsistency for temp %s:\n",
3480                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3481        fail:
3482            printf("reg state:\n");
3483            dump_regs(s);
3484            tcg_abort();
3485        }
3486    }
3487}
3488#endif
3489
3490static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3491{
3492    intptr_t off, size, align;
3493
3494    switch (ts->type) {
3495    case TCG_TYPE_I32:
3496        size = align = 4;
3497        break;
3498    case TCG_TYPE_I64:
3499    case TCG_TYPE_V64:
3500        size = align = 8;
3501        break;
3502    case TCG_TYPE_V128:
3503        size = align = 16;
3504        break;
3505    case TCG_TYPE_V256:
3506        /* Note that we do not require aligned storage for V256. */
3507        size = 32, align = 16;
3508        break;
3509    default:
3510        g_assert_not_reached();
3511    }
3512
3513    assert(align <= TCG_TARGET_STACK_ALIGN);
3514    off = ROUND_UP(s->current_frame_offset, align);
3515    assert(off + size <= s->frame_end);
3516    s->current_frame_offset = off + size;
3517
3518    ts->mem_offset = off;
3519#if defined(__sparc__)
3520    ts->mem_offset += TCG_TARGET_STACK_BIAS;
3521#endif
3522    ts->mem_base = s->frame_temp;
3523    ts->mem_allocated = 1;
3524}
3525
3526static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3527
3528/* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3529   mark it free; otherwise mark it dead.  */
3530static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3531{
3532    TCGTempVal new_type;
3533
3534    switch (ts->kind) {
3535    case TEMP_FIXED:
3536        return;
3537    case TEMP_GLOBAL:
3538    case TEMP_LOCAL:
3539        new_type = TEMP_VAL_MEM;
3540        break;
3541    case TEMP_NORMAL:
3542        new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3543        break;
3544    case TEMP_CONST:
3545        new_type = TEMP_VAL_CONST;
3546        break;
3547    default:
3548        g_assert_not_reached();
3549    }
3550    if (ts->val_type == TEMP_VAL_REG) {
3551        s->reg_to_temp[ts->reg] = NULL;
3552    }
3553    ts->val_type = new_type;
3554}
3555
3556/* Mark a temporary as dead.  */
3557static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3558{
3559    temp_free_or_dead(s, ts, 1);
3560}
3561
3562/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3563   registers needs to be allocated to store a constant.  If 'free_or_dead'
3564   is non-zero, subsequently release the temporary; if it is positive, the
3565   temp is dead; if it is negative, the temp is free.  */
3566static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3567                      TCGRegSet preferred_regs, int free_or_dead)
3568{
3569    if (!temp_readonly(ts) && !ts->mem_coherent) {
3570        if (!ts->mem_allocated) {
3571            temp_allocate_frame(s, ts);
3572        }
3573        switch (ts->val_type) {
3574        case TEMP_VAL_CONST:
3575            /* If we're going to free the temp immediately, then we won't
3576               require it later in a register, so attempt to store the
3577               constant to memory directly.  */
3578            if (free_or_dead
3579                && tcg_out_sti(s, ts->type, ts->val,
3580                               ts->mem_base->reg, ts->mem_offset)) {
3581                break;
3582            }
3583            temp_load(s, ts, tcg_target_available_regs[ts->type],
3584                      allocated_regs, preferred_regs);
3585            /* fallthrough */
3586
3587        case TEMP_VAL_REG:
3588            tcg_out_st(s, ts->type, ts->reg,
3589                       ts->mem_base->reg, ts->mem_offset);
3590            break;
3591
3592        case TEMP_VAL_MEM:
3593            break;
3594
3595        case TEMP_VAL_DEAD:
3596        default:
3597            tcg_abort();
3598        }
3599        ts->mem_coherent = 1;
3600    }
3601    if (free_or_dead) {
3602        temp_free_or_dead(s, ts, free_or_dead);
3603    }
3604}
3605
3606/* free register 'reg' by spilling the corresponding temporary if necessary */
3607static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3608{
3609    TCGTemp *ts = s->reg_to_temp[reg];
3610    if (ts != NULL) {
3611        temp_sync(s, ts, allocated_regs, 0, -1);
3612    }
3613}
3614
3615/**
3616 * tcg_reg_alloc:
3617 * @required_regs: Set of registers in which we must allocate.
3618 * @allocated_regs: Set of registers which must be avoided.
3619 * @preferred_regs: Set of registers we should prefer.
3620 * @rev: True if we search the registers in "indirect" order.
3621 *
3622 * The allocated register must be in @required_regs & ~@allocated_regs,
3623 * but if we can put it in @preferred_regs we may save a move later.
3624 */
3625static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3626                            TCGRegSet allocated_regs,
3627                            TCGRegSet preferred_regs, bool rev)
3628{
3629    int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3630    TCGRegSet reg_ct[2];
3631    const int *order;
3632
3633    reg_ct[1] = required_regs & ~allocated_regs;
3634    tcg_debug_assert(reg_ct[1] != 0);
3635    reg_ct[0] = reg_ct[1] & preferred_regs;
3636
3637    /* Skip the preferred_regs option if it cannot be satisfied,
3638       or if the preference made no difference.  */
3639    f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3640
3641    order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3642
3643    /* Try free registers, preferences first.  */
3644    for (j = f; j < 2; j++) {
3645        TCGRegSet set = reg_ct[j];
3646
3647        if (tcg_regset_single(set)) {
3648            /* One register in the set.  */
3649            TCGReg reg = tcg_regset_first(set);
3650            if (s->reg_to_temp[reg] == NULL) {
3651                return reg;
3652            }
3653        } else {
3654            for (i = 0; i < n; i++) {
3655                TCGReg reg = order[i];
3656                if (s->reg_to_temp[reg] == NULL &&
3657                    tcg_regset_test_reg(set, reg)) {
3658                    return reg;
3659                }
3660            }
3661        }
3662    }
3663
3664    /* We must spill something.  */
3665    for (j = f; j < 2; j++) {
3666        TCGRegSet set = reg_ct[j];
3667
3668        if (tcg_regset_single(set)) {
3669            /* One register in the set.  */
3670            TCGReg reg = tcg_regset_first(set);
3671            tcg_reg_free(s, reg, allocated_regs);
3672            return reg;
3673        } else {
3674            for (i = 0; i < n; i++) {
3675                TCGReg reg = order[i];
3676                if (tcg_regset_test_reg(set, reg)) {
3677                    tcg_reg_free(s, reg, allocated_regs);
3678                    return reg;
3679                }
3680            }
3681        }
3682    }
3683
3684    tcg_abort();
3685}
3686
3687/* Make sure the temporary is in a register.  If needed, allocate the register
3688   from DESIRED while avoiding ALLOCATED.  */
3689static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3690                      TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3691{
3692    TCGReg reg;
3693
3694    switch (ts->val_type) {
3695    case TEMP_VAL_REG:
3696        return;
3697    case TEMP_VAL_CONST:
3698        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3699                            preferred_regs, ts->indirect_base);
3700        if (ts->type <= TCG_TYPE_I64) {
3701            tcg_out_movi(s, ts->type, reg, ts->val);
3702        } else {
3703            uint64_t val = ts->val;
3704            MemOp vece = MO_64;
3705
3706            /*
3707             * Find the minimal vector element that matches the constant.
3708             * The targets will, in general, have to do this search anyway,
3709             * do this generically.
3710             */
3711            if (val == dup_const(MO_8, val)) {
3712                vece = MO_8;
3713            } else if (val == dup_const(MO_16, val)) {
3714                vece = MO_16;
3715            } else if (val == dup_const(MO_32, val)) {
3716                vece = MO_32;
3717            }
3718
3719            tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3720        }
3721        ts->mem_coherent = 0;
3722        break;
3723    case TEMP_VAL_MEM:
3724        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3725                            preferred_regs, ts->indirect_base);
3726        tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3727        ts->mem_coherent = 1;
3728        break;
3729    case TEMP_VAL_DEAD:
3730    default:
3731        tcg_abort();
3732    }
3733    ts->reg = reg;
3734    ts->val_type = TEMP_VAL_REG;
3735    s->reg_to_temp[reg] = ts;
3736}
3737
3738/* Save a temporary to memory. 'allocated_regs' is used in case a
3739   temporary registers needs to be allocated to store a constant.  */
3740static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3741{
3742    /* The liveness analysis already ensures that globals are back
3743       in memory. Keep an tcg_debug_assert for safety. */
3744    tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3745}
3746
3747/* save globals to their canonical location and assume they can be
3748   modified be the following code. 'allocated_regs' is used in case a
3749   temporary registers needs to be allocated to store a constant. */
3750static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3751{
3752    int i, n;
3753
3754    for (i = 0, n = s->nb_globals; i < n; i++) {
3755        temp_save(s, &s->temps[i], allocated_regs);
3756    }
3757}
3758
3759/* sync globals to their canonical location and assume they can be
3760   read by the following code. 'allocated_regs' is used in case a
3761   temporary registers needs to be allocated to store a constant. */
3762static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3763{
3764    int i, n;
3765
3766    for (i = 0, n = s->nb_globals; i < n; i++) {
3767        TCGTemp *ts = &s->temps[i];
3768        tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3769                         || ts->kind == TEMP_FIXED
3770                         || ts->mem_coherent);
3771    }
3772}
3773
3774/* at the end of a basic block, we assume all temporaries are dead and
3775   all globals are stored at their canonical location. */
3776static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3777{
3778    int i;
3779
3780    for (i = s->nb_globals; i < s->nb_temps; i++) {
3781        TCGTemp *ts = &s->temps[i];
3782
3783        switch (ts->kind) {
3784        case TEMP_LOCAL:
3785            temp_save(s, ts, allocated_regs);
3786            break;
3787        case TEMP_NORMAL:
3788            /* The liveness analysis already ensures that temps are dead.
3789               Keep an tcg_debug_assert for safety. */
3790            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3791            break;
3792        case TEMP_CONST:
3793            /* Similarly, we should have freed any allocated register. */
3794            tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3795            break;
3796        default:
3797            g_assert_not_reached();
3798        }
3799    }
3800
3801    save_globals(s, allocated_regs);
3802}
3803
3804/*
3805 * At a conditional branch, we assume all temporaries are dead and
3806 * all globals and local temps are synced to their location.
3807 */
3808static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3809{
3810    sync_globals(s, allocated_regs);
3811
3812    for (int i = s->nb_globals; i < s->nb_temps; i++) {
3813        TCGTemp *ts = &s->temps[i];
3814        /*
3815         * The liveness analysis already ensures that temps are dead.
3816         * Keep tcg_debug_asserts for safety.
3817         */
3818        switch (ts->kind) {
3819        case TEMP_LOCAL:
3820            tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3821            break;
3822        case TEMP_NORMAL:
3823            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3824            break;
3825        case TEMP_CONST:
3826            break;
3827        default:
3828            g_assert_not_reached();
3829        }
3830    }
3831}
3832
3833/*
3834 * Specialized code generation for INDEX_op_mov_* with a constant.
3835 */
3836static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3837                                  tcg_target_ulong val, TCGLifeData arg_life,
3838                                  TCGRegSet preferred_regs)
3839{
3840    /* ENV should not be modified.  */
3841    tcg_debug_assert(!temp_readonly(ots));
3842
3843    /* The movi is not explicitly generated here.  */
3844    if (ots->val_type == TEMP_VAL_REG) {
3845        s->reg_to_temp[ots->reg] = NULL;
3846    }
3847    ots->val_type = TEMP_VAL_CONST;
3848    ots->val = val;
3849    ots->mem_coherent = 0;
3850    if (NEED_SYNC_ARG(0)) {
3851        temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3852    } else if (IS_DEAD_ARG(0)) {
3853        temp_dead(s, ots);
3854    }
3855}
3856
3857/*
3858 * Specialized code generation for INDEX_op_mov_*.
3859 */
3860static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3861{
3862    const TCGLifeData arg_life = op->life;
3863    TCGRegSet allocated_regs, preferred_regs;
3864    TCGTemp *ts, *ots;
3865    TCGType otype, itype;
3866
3867    allocated_regs = s->reserved_regs;
3868    preferred_regs = op->output_pref[0];
3869    ots = arg_temp(op->args[0]);
3870    ts = arg_temp(op->args[1]);
3871
3872    /* ENV should not be modified.  */
3873    tcg_debug_assert(!temp_readonly(ots));
3874
3875    /* Note that otype != itype for no-op truncation.  */
3876    otype = ots->type;
3877    itype = ts->type;
3878
3879    if (ts->val_type == TEMP_VAL_CONST) {
3880        /* propagate constant or generate sti */
3881        tcg_target_ulong val = ts->val;
3882        if (IS_DEAD_ARG(1)) {
3883            temp_dead(s, ts);
3884        }
3885        tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3886        return;
3887    }
3888
3889    /* If the source value is in memory we're going to be forced
3890       to have it in a register in order to perform the copy.  Copy
3891       the SOURCE value into its own register first, that way we
3892       don't have to reload SOURCE the next time it is used. */
3893    if (ts->val_type == TEMP_VAL_MEM) {
3894        temp_load(s, ts, tcg_target_available_regs[itype],
3895                  allocated_regs, preferred_regs);
3896    }
3897
3898    tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3899    if (IS_DEAD_ARG(0)) {
3900        /* mov to a non-saved dead register makes no sense (even with
3901           liveness analysis disabled). */
3902        tcg_debug_assert(NEED_SYNC_ARG(0));
3903        if (!ots->mem_allocated) {
3904            temp_allocate_frame(s, ots);
3905        }
3906        tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3907        if (IS_DEAD_ARG(1)) {
3908            temp_dead(s, ts);
3909        }
3910        temp_dead(s, ots);
3911    } else {
3912        if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3913            /* the mov can be suppressed */
3914            if (ots->val_type == TEMP_VAL_REG) {
3915                s->reg_to_temp[ots->reg] = NULL;
3916            }
3917            ots->reg = ts->reg;
3918            temp_dead(s, ts);
3919        } else {
3920            if (ots->val_type != TEMP_VAL_REG) {
3921                /* When allocating a new register, make sure to not spill the
3922                   input one. */
3923                tcg_regset_set_reg(allocated_regs, ts->reg);
3924                ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3925                                         allocated_regs, preferred_regs,
3926                                         ots->indirect_base);
3927            }
3928            if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3929                /*
3930                 * Cross register class move not supported.
3931                 * Store the source register into the destination slot
3932                 * and leave the destination temp as TEMP_VAL_MEM.
3933                 */
3934                assert(!temp_readonly(ots));
3935                if (!ts->mem_allocated) {
3936                    temp_allocate_frame(s, ots);
3937                }
3938                tcg_out_st(s, ts->type, ts->reg,
3939                           ots->mem_base->reg, ots->mem_offset);
3940                ots->mem_coherent = 1;
3941                temp_free_or_dead(s, ots, -1);
3942                return;
3943            }
3944        }
3945        ots->val_type = TEMP_VAL_REG;
3946        ots->mem_coherent = 0;
3947        s->reg_to_temp[ots->reg] = ots;
3948        if (NEED_SYNC_ARG(0)) {
3949            temp_sync(s, ots, allocated_regs, 0, 0);
3950        }
3951    }
3952}
3953
3954/*
3955 * Specialized code generation for INDEX_op_dup_vec.
3956 */
3957static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3958{
3959    const TCGLifeData arg_life = op->life;
3960    TCGRegSet dup_out_regs, dup_in_regs;
3961    TCGTemp *its, *ots;
3962    TCGType itype, vtype;
3963    intptr_t endian_fixup;
3964    unsigned vece;
3965    bool ok;
3966
3967    ots = arg_temp(op->args[0]);
3968    its = arg_temp(op->args[1]);
3969
3970    /* ENV should not be modified.  */
3971    tcg_debug_assert(!temp_readonly(ots));
3972
3973    itype = its->type;
3974    vece = TCGOP_VECE(op);
3975    vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3976
3977    if (its->val_type == TEMP_VAL_CONST) {
3978        /* Propagate constant via movi -> dupi.  */
3979        tcg_target_ulong val = its->val;
3980        if (IS_DEAD_ARG(1)) {
3981            temp_dead(s, its);
3982        }
3983        tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3984        return;
3985    }
3986
3987    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3988    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3989
3990    /* Allocate the output register now.  */
3991    if (ots->val_type != TEMP_VAL_REG) {
3992        TCGRegSet allocated_regs = s->reserved_regs;
3993
3994        if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3995            /* Make sure to not spill the input register. */
3996            tcg_regset_set_reg(allocated_regs, its->reg);
3997        }
3998        ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3999                                 op->output_pref[0], ots->indirect_base);
4000        ots->val_type = TEMP_VAL_REG;
4001        ots->mem_coherent = 0;
4002        s->reg_to_temp[ots->reg] = ots;
4003    }
4004
4005    switch (its->val_type) {
4006    case TEMP_VAL_REG:
4007        /*
4008         * The dup constriaints must be broad, covering all possible VECE.
4009         * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4010         * to fail, indicating that extra moves are required for that case.
4011         */
4012        if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4013            if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4014                goto done;
4015            }
4016            /* Try again from memory or a vector input register.  */
4017        }
4018        if (!its->mem_coherent) {
4019            /*
4020             * The input register is not synced, and so an extra store
4021             * would be required to use memory.  Attempt an integer-vector
4022             * register move first.  We do not have a TCGRegSet for this.
4023             */
4024            if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4025                break;
4026            }
4027            /* Sync the temp back to its slot and load from there.  */
4028            temp_sync(s, its, s->reserved_regs, 0, 0);
4029        }
4030        /* fall through */
4031
4032    case TEMP_VAL_MEM:
4033#ifdef HOST_WORDS_BIGENDIAN
4034        endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
4035        endian_fixup -= 1 << vece;
4036#else
4037        endian_fixup = 0;
4038#endif
4039        if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4040                             its->mem_offset + endian_fixup)) {
4041            goto done;
4042        }
4043        tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4044        break;
4045
4046    default:
4047        g_assert_not_reached();
4048    }
4049
4050    /* We now have a vector input register, so dup must succeed. */
4051    ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4052    tcg_debug_assert(ok);
4053
4054 done:
4055    if (IS_DEAD_ARG(1)) {
4056        temp_dead(s, its);
4057    }
4058    if (NEED_SYNC_ARG(0)) {
4059        temp_sync(s, ots, s->reserved_regs, 0, 0);
4060    }
4061    if (IS_DEAD_ARG(0)) {
4062        temp_dead(s, ots);
4063    }
4064}
4065
4066static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4067{
4068    const TCGLifeData arg_life = op->life;
4069    const TCGOpDef * const def = &tcg_op_defs[op->opc];
4070    TCGRegSet i_allocated_regs;
4071    TCGRegSet o_allocated_regs;
4072    int i, k, nb_iargs, nb_oargs;
4073    TCGReg reg;
4074    TCGArg arg;
4075    const TCGArgConstraint *arg_ct;
4076    TCGTemp *ts;
4077    TCGArg new_args[TCG_MAX_OP_ARGS];
4078    int const_args[TCG_MAX_OP_ARGS];
4079
4080    nb_oargs = def->nb_oargs;
4081    nb_iargs = def->nb_iargs;
4082
4083    /* copy constants */
4084    memcpy(new_args + nb_oargs + nb_iargs, 
4085           op->args + nb_oargs + nb_iargs,
4086           sizeof(TCGArg) * def->nb_cargs);
4087
4088    i_allocated_regs = s->reserved_regs;
4089    o_allocated_regs = s->reserved_regs;
4090
4091    /* satisfy input constraints */ 
4092    for (k = 0; k < nb_iargs; k++) {
4093        TCGRegSet i_preferred_regs, o_preferred_regs;
4094
4095        i = def->args_ct[nb_oargs + k].sort_index;
4096        arg = op->args[i];
4097        arg_ct = &def->args_ct[i];
4098        ts = arg_temp(arg);
4099
4100        if (ts->val_type == TEMP_VAL_CONST
4101            && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
4102            /* constant is OK for instruction */
4103            const_args[i] = 1;
4104            new_args[i] = ts->val;
4105            continue;
4106        }
4107
4108        i_preferred_regs = o_preferred_regs = 0;
4109        if (arg_ct->ialias) {
4110            o_preferred_regs = op->output_pref[arg_ct->alias_index];
4111
4112            /*
4113             * If the input is readonly, then it cannot also be an
4114             * output and aliased to itself.  If the input is not
4115             * dead after the instruction, we must allocate a new
4116             * register and move it.
4117             */
4118            if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4119                goto allocate_in_reg;
4120            }
4121
4122            /*
4123             * Check if the current register has already been allocated
4124             * for another input aliased to an output.
4125             */
4126            if (ts->val_type == TEMP_VAL_REG) {
4127                reg = ts->reg;
4128                for (int k2 = 0; k2 < k; k2++) {
4129                    int i2 = def->args_ct[nb_oargs + k2].sort_index;
4130                    if (def->args_ct[i2].ialias && reg == new_args[i2]) {
4131                        goto allocate_in_reg;
4132                    }
4133                }
4134            }
4135            i_preferred_regs = o_preferred_regs;
4136        }
4137
4138        temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
4139        reg = ts->reg;
4140
4141        if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
4142 allocate_in_reg:
4143            /*
4144             * Allocate a new register matching the constraint
4145             * and move the temporary register into it.
4146             */
4147            temp_load(s, ts, tcg_target_available_regs[ts->type],
4148                      i_allocated_regs, 0);
4149            reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
4150                                o_preferred_regs, ts->indirect_base);
4151            if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4152                /*
4153                 * Cross register class move not supported.  Sync the
4154                 * temp back to its slot and load from there.
4155                 */
4156                temp_sync(s, ts, i_allocated_regs, 0, 0);
4157                tcg_out_ld(s, ts->type, reg,
4158                           ts->mem_base->reg, ts->mem_offset);
4159            }
4160        }
4161        new_args[i] = reg;
4162        const_args[i] = 0;
4163        tcg_regset_set_reg(i_allocated_regs, reg);
4164    }
4165    
4166    /* mark dead temporaries and free the associated registers */
4167    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4168        if (IS_DEAD_ARG(i)) {
4169            temp_dead(s, arg_temp(op->args[i]));
4170        }
4171    }
4172
4173    if (def->flags & TCG_OPF_COND_BRANCH) {
4174        tcg_reg_alloc_cbranch(s, i_allocated_regs);
4175    } else if (def->flags & TCG_OPF_BB_END) {
4176        tcg_reg_alloc_bb_end(s, i_allocated_regs);
4177    } else {
4178        if (def->flags & TCG_OPF_CALL_CLOBBER) {
4179            /* XXX: permit generic clobber register list ? */ 
4180            for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4181                if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4182                    tcg_reg_free(s, i, i_allocated_regs);
4183                }
4184            }
4185        }
4186        if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4187            /* sync globals if the op has side effects and might trigger
4188               an exception. */
4189            sync_globals(s, i_allocated_regs);
4190        }
4191        
4192        /* satisfy the output constraints */
4193        for(k = 0; k < nb_oargs; k++) {
4194            i = def->args_ct[k].sort_index;
4195            arg = op->args[i];
4196            arg_ct = &def->args_ct[i];
4197            ts = arg_temp(arg);
4198
4199            /* ENV should not be modified.  */
4200            tcg_debug_assert(!temp_readonly(ts));
4201
4202            if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4203                reg = new_args[arg_ct->alias_index];
4204            } else if (arg_ct->newreg) {
4205                reg = tcg_reg_alloc(s, arg_ct->regs,
4206                                    i_allocated_regs | o_allocated_regs,
4207                                    op->output_pref[k], ts->indirect_base);
4208            } else {
4209                reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4210                                    op->output_pref[k], ts->indirect_base);
4211            }
4212            tcg_regset_set_reg(o_allocated_regs, reg);
4213            if (ts->val_type == TEMP_VAL_REG) {
4214                s->reg_to_temp[ts->reg] = NULL;
4215            }
4216            ts->val_type = TEMP_VAL_REG;
4217            ts->reg = reg;
4218            /*
4219             * Temp value is modified, so the value kept in memory is
4220             * potentially not the same.
4221             */
4222            ts->mem_coherent = 0;
4223            s->reg_to_temp[reg] = ts;
4224            new_args[i] = reg;
4225        }
4226    }
4227
4228    /* emit instruction */
4229    if (def->flags & TCG_OPF_VECTOR) {
4230        tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4231                       new_args, const_args);
4232    } else {
4233        tcg_out_op(s, op->opc, new_args, const_args);
4234    }
4235
4236    /* move the outputs in the correct register if needed */
4237    for(i = 0; i < nb_oargs; i++) {
4238        ts = arg_temp(op->args[i]);
4239
4240        /* ENV should not be modified.  */
4241        tcg_debug_assert(!temp_readonly(ts));
4242
4243        if (NEED_SYNC_ARG(i)) {
4244            temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4245        } else if (IS_DEAD_ARG(i)) {
4246            temp_dead(s, ts);
4247        }
4248    }
4249}
4250
4251static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4252{
4253    const TCGLifeData arg_life = op->life;
4254    TCGTemp *ots, *itsl, *itsh;
4255    TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4256
4257    /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4258    tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4259    tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4260
4261    ots = arg_temp(op->args[0]);
4262    itsl = arg_temp(op->args[1]);
4263    itsh = arg_temp(op->args[2]);
4264
4265    /* ENV should not be modified.  */
4266    tcg_debug_assert(!temp_readonly(ots));
4267
4268    /* Allocate the output register now.  */
4269    if (ots->val_type != TEMP_VAL_REG) {
4270        TCGRegSet allocated_regs = s->reserved_regs;
4271        TCGRegSet dup_out_regs =
4272            tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4273
4274        /* Make sure to not spill the input registers. */
4275        if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4276            tcg_regset_set_reg(allocated_regs, itsl->reg);
4277        }
4278        if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4279            tcg_regset_set_reg(allocated_regs, itsh->reg);
4280        }
4281
4282        ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4283                                 op->output_pref[0], ots->indirect_base);
4284        ots->val_type = TEMP_VAL_REG;
4285        ots->mem_coherent = 0;
4286        s->reg_to_temp[ots->reg] = ots;
4287    }
4288
4289    /* Promote dup2 of immediates to dupi_vec. */
4290    if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4291        uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4292        MemOp vece = MO_64;
4293
4294        if (val == dup_const(MO_8, val)) {
4295            vece = MO_8;
4296        } else if (val == dup_const(MO_16, val)) {
4297            vece = MO_16;
4298        } else if (val == dup_const(MO_32, val)) {
4299            vece = MO_32;
4300        }
4301
4302        tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4303        goto done;
4304    }
4305
4306    /* If the two inputs form one 64-bit value, try dupm_vec. */
4307    if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4308        if (!itsl->mem_coherent) {
4309            temp_sync(s, itsl, s->reserved_regs, 0, 0);
4310        }
4311        if (!itsh->mem_coherent) {
4312            temp_sync(s, itsh, s->reserved_regs, 0, 0);
4313        }
4314#ifdef HOST_WORDS_BIGENDIAN
4315        TCGTemp *its = itsh;
4316#else
4317        TCGTemp *its = itsl;
4318#endif
4319        if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4320                             its->mem_base->reg, its->mem_offset)) {
4321            goto done;
4322        }
4323    }
4324
4325    /* Fall back to generic expansion. */
4326    return false;
4327
4328 done:
4329    if (IS_DEAD_ARG(1)) {
4330        temp_dead(s, itsl);
4331    }
4332    if (IS_DEAD_ARG(2)) {
4333        temp_dead(s, itsh);
4334    }
4335    if (NEED_SYNC_ARG(0)) {
4336        temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4337    } else if (IS_DEAD_ARG(0)) {
4338        temp_dead(s, ots);
4339    }
4340    return true;
4341}
4342
4343#ifdef TCG_TARGET_STACK_GROWSUP
4344#define STACK_DIR(x) (-(x))
4345#else
4346#define STACK_DIR(x) (x)
4347#endif
4348
4349static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4350{
4351    const int nb_oargs = TCGOP_CALLO(op);
4352    const int nb_iargs = TCGOP_CALLI(op);
4353    const TCGLifeData arg_life = op->life;
4354    int flags, nb_regs, i;
4355    TCGReg reg;
4356    TCGArg arg;
4357    TCGTemp *ts;
4358    intptr_t stack_offset;
4359    size_t call_stack_size;
4360    tcg_insn_unit *func_addr;
4361    int allocate_args;
4362    TCGRegSet allocated_regs;
4363
4364    func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
4365    flags = op->args[nb_oargs + nb_iargs + 1];
4366
4367    nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4368    if (nb_regs > nb_iargs) {
4369        nb_regs = nb_iargs;
4370    }
4371
4372    /* assign stack slots first */
4373    call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4374    call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 
4375        ~(TCG_TARGET_STACK_ALIGN - 1);
4376    allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4377    if (allocate_args) {
4378        /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4379           preallocate call stack */
4380        tcg_abort();
4381    }
4382
4383    stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4384    for (i = nb_regs; i < nb_iargs; i++) {
4385        arg = op->args[nb_oargs + i];
4386#ifdef TCG_TARGET_STACK_GROWSUP
4387        stack_offset -= sizeof(tcg_target_long);
4388#endif
4389        if (arg != TCG_CALL_DUMMY_ARG) {
4390            ts = arg_temp(arg);
4391            temp_load(s, ts, tcg_target_available_regs[ts->type],
4392                      s->reserved_regs, 0);
4393            tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4394        }
4395#ifndef TCG_TARGET_STACK_GROWSUP
4396        stack_offset += sizeof(tcg_target_long);
4397#endif
4398    }
4399    
4400    /* assign input registers */
4401    allocated_regs = s->reserved_regs;
4402    for (i = 0; i < nb_regs; i++) {
4403        arg = op->args[nb_oargs + i];
4404        if (arg != TCG_CALL_DUMMY_ARG) {
4405            ts = arg_temp(arg);
4406            reg = tcg_target_call_iarg_regs[i];
4407
4408            if (ts->val_type == TEMP_VAL_REG) {
4409                if (ts->reg != reg) {
4410                    tcg_reg_free(s, reg, allocated_regs);
4411                    if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4412                        /*
4413                         * Cross register class move not supported.  Sync the
4414                         * temp back to its slot and load from there.
4415                         */
4416                        temp_sync(s, ts, allocated_regs, 0, 0);
4417                        tcg_out_ld(s, ts->type, reg,
4418                                   ts->mem_base->reg, ts->mem_offset);
4419                    }
4420                }
4421            } else {
4422                TCGRegSet arg_set = 0;
4423
4424                tcg_reg_free(s, reg, allocated_regs);
4425                tcg_regset_set_reg(arg_set, reg);
4426                temp_load(s, ts, arg_set, allocated_regs, 0);
4427            }
4428
4429            tcg_regset_set_reg(allocated_regs, reg);
4430        }
4431    }
4432    
4433    /* mark dead temporaries and free the associated registers */
4434    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4435        if (IS_DEAD_ARG(i)) {
4436            temp_dead(s, arg_temp(op->args[i]));
4437        }
4438    }
4439    
4440    /* clobber call registers */
4441    for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4442        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4443            tcg_reg_free(s, i, allocated_regs);
4444        }
4445    }
4446
4447    /* Save globals if they might be written by the helper, sync them if
4448       they might be read. */
4449    if (flags & TCG_CALL_NO_READ_GLOBALS) {
4450        /* Nothing to do */
4451    } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4452        sync_globals(s, allocated_regs);
4453    } else {
4454        save_globals(s, allocated_regs);
4455    }
4456
4457    tcg_out_call(s, func_addr);
4458
4459    /* assign output registers and emit moves if needed */
4460    for(i = 0; i < nb_oargs; i++) {
4461        arg = op->args[i];
4462        ts = arg_temp(arg);
4463
4464        /* ENV should not be modified.  */
4465        tcg_debug_assert(!temp_readonly(ts));
4466
4467        reg = tcg_target_call_oarg_regs[i];
4468        tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4469        if (ts->val_type == TEMP_VAL_REG) {
4470            s->reg_to_temp[ts->reg] = NULL;
4471        }
4472        ts->val_type = TEMP_VAL_REG;
4473        ts->reg = reg;
4474        ts->mem_coherent = 0;
4475        s->reg_to_temp[reg] = ts;
4476        if (NEED_SYNC_ARG(i)) {
4477            temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4478        } else if (IS_DEAD_ARG(i)) {
4479            temp_dead(s, ts);
4480        }
4481    }
4482}
4483
4484#ifdef CONFIG_PROFILER
4485
4486/* avoid copy/paste errors */
4487#define PROF_ADD(to, from, field)                       \
4488    do {                                                \
4489        (to)->field += qatomic_read(&((from)->field));  \
4490    } while (0)
4491
4492#define PROF_MAX(to, from, field)                                       \
4493    do {                                                                \
4494        typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4495        if (val__ > (to)->field) {                                      \
4496            (to)->field = val__;                                        \
4497        }                                                               \
4498    } while (0)
4499
4500/* Pass in a zero'ed @prof */
4501static inline
4502void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4503{
4504    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4505    unsigned int i;
4506
4507    for (i = 0; i < n_ctxs; i++) {
4508        TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4509        const TCGProfile *orig = &s->prof;
4510
4511        if (counters) {
4512            PROF_ADD(prof, orig, cpu_exec_time);
4513            PROF_ADD(prof, orig, tb_count1);
4514            PROF_ADD(prof, orig, tb_count);
4515            PROF_ADD(prof, orig, op_count);
4516            PROF_MAX(prof, orig, op_count_max);
4517            PROF_ADD(prof, orig, temp_count);
4518            PROF_MAX(prof, orig, temp_count_max);
4519            PROF_ADD(prof, orig, del_op_count);
4520            PROF_ADD(prof, orig, code_in_len);
4521            PROF_ADD(prof, orig, code_out_len);
4522            PROF_ADD(prof, orig, search_out_len);
4523            PROF_ADD(prof, orig, interm_time);
4524            PROF_ADD(prof, orig, code_time);
4525            PROF_ADD(prof, orig, la_time);
4526            PROF_ADD(prof, orig, opt_time);
4527            PROF_ADD(prof, orig, restore_count);
4528            PROF_ADD(prof, orig, restore_time);
4529        }
4530        if (table) {
4531            int i;
4532
4533            for (i = 0; i < NB_OPS; i++) {
4534                PROF_ADD(prof, orig, table_op_count[i]);
4535            }
4536        }
4537    }
4538}
4539
4540#undef PROF_ADD
4541#undef PROF_MAX
4542
4543static void tcg_profile_snapshot_counters(TCGProfile *prof)
4544{
4545    tcg_profile_snapshot(prof, true, false);
4546}
4547
4548static void tcg_profile_snapshot_table(TCGProfile *prof)
4549{
4550    tcg_profile_snapshot(prof, false, true);
4551}
4552
4553void tcg_dump_op_count(void)
4554{
4555    TCGProfile prof = {};
4556    int i;
4557
4558    tcg_profile_snapshot_table(&prof);
4559    for (i = 0; i < NB_OPS; i++) {
4560        qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4561                    prof.table_op_count[i]);
4562    }
4563}
4564
4565int64_t tcg_cpu_exec_time(void)
4566{
4567    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4568    unsigned int i;
4569    int64_t ret = 0;
4570
4571    for (i = 0; i < n_ctxs; i++) {
4572        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4573        const TCGProfile *prof = &s->prof;
4574
4575        ret += qatomic_read(&prof->cpu_exec_time);
4576    }
4577    return ret;
4578}
4579#else
4580void tcg_dump_op_count(void)
4581{
4582    qemu_printf("[TCG profiler not compiled]\n");
4583}
4584
4585int64_t tcg_cpu_exec_time(void)
4586{
4587    error_report("%s: TCG profiler not compiled", __func__);
4588    exit(EXIT_FAILURE);
4589}
4590#endif
4591
4592
4593int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4594{
4595#ifdef CONFIG_PROFILER
4596    TCGProfile *prof = &s->prof;
4597#endif
4598    int i, num_insns;
4599    TCGOp *op;
4600
4601#ifdef CONFIG_PROFILER
4602    {
4603        int n = 0;
4604
4605        QTAILQ_FOREACH(op, &s->ops, link) {
4606            n++;
4607        }
4608        qatomic_set(&prof->op_count, prof->op_count + n);
4609        if (n > prof->op_count_max) {
4610            qatomic_set(&prof->op_count_max, n);
4611        }
4612
4613        n = s->nb_temps;
4614        qatomic_set(&prof->temp_count, prof->temp_count + n);
4615        if (n > prof->temp_count_max) {
4616            qatomic_set(&prof->temp_count_max, n);
4617        }
4618    }
4619#endif
4620
4621#ifdef DEBUG_DISAS
4622    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4623                 && qemu_log_in_addr_range(tb->pc))) {
4624        FILE *logfile = qemu_log_lock();
4625        qemu_log("OP:\n");
4626        tcg_dump_ops(s, false);
4627        qemu_log("\n");
4628        qemu_log_unlock(logfile);
4629    }
4630#endif
4631
4632#ifdef CONFIG_DEBUG_TCG
4633    /* Ensure all labels referenced have been emitted.  */
4634    {
4635        TCGLabel *l;
4636        bool error = false;
4637
4638        QSIMPLEQ_FOREACH(l, &s->labels, next) {
4639            if (unlikely(!l->present) && l->refs) {
4640                qemu_log_mask(CPU_LOG_TB_OP,
4641                              "$L%d referenced but not present.\n", l->id);
4642                error = true;
4643            }
4644        }
4645        assert(!error);
4646    }
4647#endif
4648
4649#ifdef CONFIG_PROFILER
4650    qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4651#endif
4652
4653#ifdef USE_TCG_OPTIMIZATIONS
4654    tcg_optimize(s);
4655#endif
4656
4657#ifdef CONFIG_PROFILER
4658    qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4659    qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4660#endif
4661
4662    reachable_code_pass(s);
4663    liveness_pass_1(s);
4664
4665    if (s->nb_indirects > 0) {
4666#ifdef DEBUG_DISAS
4667        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4668                     && qemu_log_in_addr_range(tb->pc))) {
4669            FILE *logfile = qemu_log_lock();
4670            qemu_log("OP before indirect lowering:\n");
4671            tcg_dump_ops(s, false);
4672            qemu_log("\n");
4673            qemu_log_unlock(logfile);
4674        }
4675#endif
4676        /* Replace indirect temps with direct temps.  */
4677        if (liveness_pass_2(s)) {
4678            /* If changes were made, re-run liveness.  */
4679            liveness_pass_1(s);
4680        }
4681    }
4682
4683#ifdef CONFIG_PROFILER
4684    qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4685#endif
4686
4687#ifdef DEBUG_DISAS
4688    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4689                 && qemu_log_in_addr_range(tb->pc))) {
4690        FILE *logfile = qemu_log_lock();
4691        qemu_log("OP after optimization and liveness analysis:\n");
4692        tcg_dump_ops(s, true);
4693        qemu_log("\n");
4694        qemu_log_unlock(logfile);
4695    }
4696#endif
4697
4698    tcg_reg_alloc_start(s);
4699
4700    /*
4701     * Reset the buffer pointers when restarting after overflow.
4702     * TODO: Move this into translate-all.c with the rest of the
4703     * buffer management.  Having only this done here is confusing.
4704     */
4705    s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4706    s->code_ptr = s->code_buf;
4707
4708#ifdef TCG_TARGET_NEED_LDST_LABELS
4709    QSIMPLEQ_INIT(&s->ldst_labels);
4710#endif
4711#ifdef TCG_TARGET_NEED_POOL_LABELS
4712    s->pool_labels = NULL;
4713#endif
4714
4715    num_insns = -1;
4716    QTAILQ_FOREACH(op, &s->ops, link) {
4717        TCGOpcode opc = op->opc;
4718
4719#ifdef CONFIG_PROFILER
4720        qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4721#endif
4722
4723        switch (opc) {
4724        case INDEX_op_mov_i32:
4725        case INDEX_op_mov_i64:
4726        case INDEX_op_mov_vec:
4727            tcg_reg_alloc_mov(s, op);
4728            break;
4729        case INDEX_op_dup_vec:
4730            tcg_reg_alloc_dup(s, op);
4731            break;
4732        case INDEX_op_insn_start:
4733            if (num_insns >= 0) {
4734                size_t off = tcg_current_code_size(s);
4735                s->gen_insn_end_off[num_insns] = off;
4736                /* Assert that we do not overflow our stored offset.  */
4737                assert(s->gen_insn_end_off[num_insns] == off);
4738            }
4739            num_insns++;
4740            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4741                target_ulong a;
4742#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4743                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4744#else
4745                a = op->args[i];
4746#endif
4747                s->gen_insn_data[num_insns][i] = a;
4748            }
4749            break;
4750        case INDEX_op_discard:
4751            temp_dead(s, arg_temp(op->args[0]));
4752            break;
4753        case INDEX_op_set_label:
4754            tcg_reg_alloc_bb_end(s, s->reserved_regs);
4755            tcg_out_label(s, arg_label(op->args[0]));
4756            break;
4757        case INDEX_op_call:
4758            tcg_reg_alloc_call(s, op);
4759            break;
4760        case INDEX_op_dup2_vec:
4761            if (tcg_reg_alloc_dup2(s, op)) {
4762                break;
4763            }
4764            /* fall through */
4765        default:
4766            /* Sanity check that we've not introduced any unhandled opcodes. */
4767            tcg_debug_assert(tcg_op_supported(opc));
4768            /* Note: in order to speed up the code, it would be much
4769               faster to have specialized register allocator functions for
4770               some common argument patterns */
4771            tcg_reg_alloc_op(s, op);
4772            break;
4773        }
4774#ifdef CONFIG_DEBUG_TCG
4775        check_regs(s);
4776#endif
4777        /* Test for (pending) buffer overflow.  The assumption is that any
4778           one operation beginning below the high water mark cannot overrun
4779           the buffer completely.  Thus we can test for overflow after
4780           generating code without having to check during generation.  */
4781        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4782            return -1;
4783        }
4784        /* Test for TB overflow, as seen by gen_insn_end_off.  */
4785        if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4786            return -2;
4787        }
4788    }
4789    tcg_debug_assert(num_insns >= 0);
4790    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4791
4792    /* Generate TB finalization at the end of block */
4793#ifdef TCG_TARGET_NEED_LDST_LABELS
4794    i = tcg_out_ldst_finalize(s);
4795    if (i < 0) {
4796        return i;
4797    }
4798#endif
4799#ifdef TCG_TARGET_NEED_POOL_LABELS
4800    i = tcg_out_pool_finalize(s);
4801    if (i < 0) {
4802        return i;
4803    }
4804#endif
4805    if (!tcg_resolve_relocs(s)) {
4806        return -2;
4807    }
4808
4809#ifndef CONFIG_TCG_INTERPRETER
4810    /* flush instruction cache */
4811    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4812                        (uintptr_t)s->code_buf,
4813                        tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4814#endif
4815
4816    return tcg_current_code_size(s);
4817}
4818
4819#ifdef CONFIG_PROFILER
4820void tcg_dump_info(void)
4821{
4822    TCGProfile prof = {};
4823    const TCGProfile *s;
4824    int64_t tb_count;
4825    int64_t tb_div_count;
4826    int64_t tot;
4827
4828    tcg_profile_snapshot_counters(&prof);
4829    s = &prof;
4830    tb_count = s->tb_count;
4831    tb_div_count = tb_count ? tb_count : 1;
4832    tot = s->interm_time + s->code_time;
4833
4834    qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4835                tot, tot / 2.4e9);
4836    qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4837                " %0.1f%%)\n",
4838                tb_count, s->tb_count1 - tb_count,
4839                (double)(s->tb_count1 - s->tb_count)
4840                / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4841    qemu_printf("avg ops/TB          %0.1f max=%d\n",
4842                (double)s->op_count / tb_div_count, s->op_count_max);
4843    qemu_printf("deleted ops/TB      %0.2f\n",
4844                (double)s->del_op_count / tb_div_count);
4845    qemu_printf("avg temps/TB        %0.2f max=%d\n",
4846                (double)s->temp_count / tb_div_count, s->temp_count_max);
4847    qemu_printf("avg host code/TB    %0.1f\n",
4848                (double)s->code_out_len / tb_div_count);
4849    qemu_printf("avg search data/TB  %0.1f\n",
4850                (double)s->search_out_len / tb_div_count);
4851    
4852    qemu_printf("cycles/op           %0.1f\n",
4853                s->op_count ? (double)tot / s->op_count : 0);
4854    qemu_printf("cycles/in byte      %0.1f\n",
4855                s->code_in_len ? (double)tot / s->code_in_len : 0);
4856    qemu_printf("cycles/out byte     %0.1f\n",
4857                s->code_out_len ? (double)tot / s->code_out_len : 0);
4858    qemu_printf("cycles/search byte     %0.1f\n",
4859                s->search_out_len ? (double)tot / s->search_out_len : 0);
4860    if (tot == 0) {
4861        tot = 1;
4862    }
4863    qemu_printf("  gen_interm time   %0.1f%%\n",
4864                (double)s->interm_time / tot * 100.0);
4865    qemu_printf("  gen_code time     %0.1f%%\n",
4866                (double)s->code_time / tot * 100.0);
4867    qemu_printf("optim./code time    %0.1f%%\n",
4868                (double)s->opt_time / (s->code_time ? s->code_time : 1)
4869                * 100.0);
4870    qemu_printf("liveness/code time  %0.1f%%\n",
4871                (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4872    qemu_printf("cpu_restore count   %" PRId64 "\n",
4873                s->restore_count);
4874    qemu_printf("  avg cycles        %0.1f\n",
4875                s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4876}
4877#else
4878void tcg_dump_info(void)
4879{
4880    qemu_printf("[TCG profiler not compiled]\n");
4881}
4882#endif
4883
4884#ifdef ELF_HOST_MACHINE
4885/* In order to use this feature, the backend needs to do three things:
4886
4887   (1) Define ELF_HOST_MACHINE to indicate both what value to
4888       put into the ELF image and to indicate support for the feature.
4889
4890   (2) Define tcg_register_jit.  This should create a buffer containing
4891       the contents of a .debug_frame section that describes the post-
4892       prologue unwind info for the tcg machine.
4893
4894   (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4895*/
4896
4897/* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4898typedef enum {
4899    JIT_NOACTION = 0,
4900    JIT_REGISTER_FN,
4901    JIT_UNREGISTER_FN
4902} jit_actions_t;
4903
4904struct jit_code_entry {
4905    struct jit_code_entry *next_entry;
4906    struct jit_code_entry *prev_entry;
4907    const void *symfile_addr;
4908    uint64_t symfile_size;
4909};
4910
4911struct jit_descriptor {
4912    uint32_t version;
4913    uint32_t action_flag;
4914    struct jit_code_entry *relevant_entry;
4915    struct jit_code_entry *first_entry;
4916};
4917
4918void __jit_debug_register_code(void) __attribute__((noinline));
4919void __jit_debug_register_code(void)
4920{
4921    asm("");
4922}
4923
4924/* Must statically initialize the version, because GDB may check
4925   the version before we can set it.  */
4926struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4927
4928/* End GDB interface.  */
4929
4930static int find_string(const char *strtab, const char *str)
4931{
4932    const char *p = strtab + 1;
4933
4934    while (1) {
4935        if (strcmp(p, str) == 0) {
4936            return p - strtab;
4937        }
4938        p += strlen(p) + 1;
4939    }
4940}
4941
4942static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4943                                 const void *debug_frame,
4944                                 size_t debug_frame_size)
4945{
4946    struct __attribute__((packed)) DebugInfo {
4947        uint32_t  len;
4948        uint16_t  version;
4949        uint32_t  abbrev;
4950        uint8_t   ptr_size;
4951        uint8_t   cu_die;
4952        uint16_t  cu_lang;
4953        uintptr_t cu_low_pc;
4954        uintptr_t cu_high_pc;
4955        uint8_t   fn_die;
4956        char      fn_name[16];
4957        uintptr_t fn_low_pc;
4958        uintptr_t fn_high_pc;
4959        uint8_t   cu_eoc;
4960    };
4961
4962    struct ElfImage {
4963        ElfW(Ehdr) ehdr;
4964        ElfW(Phdr) phdr;
4965        ElfW(Shdr) shdr[7];
4966        ElfW(Sym)  sym[2];
4967        struct DebugInfo di;
4968        uint8_t    da[24];
4969        char       str[80];
4970    };
4971
4972    struct ElfImage *img;
4973
4974    static const struct ElfImage img_template = {
4975        .ehdr = {
4976            .e_ident[EI_MAG0] = ELFMAG0,
4977            .e_ident[EI_MAG1] = ELFMAG1,
4978            .e_ident[EI_MAG2] = ELFMAG2,
4979            .e_ident[EI_MAG3] = ELFMAG3,
4980            .e_ident[EI_CLASS] = ELF_CLASS,
4981            .e_ident[EI_DATA] = ELF_DATA,
4982            .e_ident[EI_VERSION] = EV_CURRENT,
4983            .e_type = ET_EXEC,
4984            .e_machine = ELF_HOST_MACHINE,
4985            .e_version = EV_CURRENT,
4986            .e_phoff = offsetof(struct ElfImage, phdr),
4987            .e_shoff = offsetof(struct ElfImage, shdr),
4988            .e_ehsize = sizeof(ElfW(Shdr)),
4989            .e_phentsize = sizeof(ElfW(Phdr)),
4990            .e_phnum = 1,
4991            .e_shentsize = sizeof(ElfW(Shdr)),
4992            .e_shnum = ARRAY_SIZE(img->shdr),
4993            .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4994#ifdef ELF_HOST_FLAGS
4995            .e_flags = ELF_HOST_FLAGS,
4996#endif
4997#ifdef ELF_OSABI
4998            .e_ident[EI_OSABI] = ELF_OSABI,
4999#endif
5000        },
5001        .phdr = {
5002            .p_type = PT_LOAD,
5003            .p_flags = PF_X,
5004        },
5005        .shdr = {
5006            [0] = { .sh_type = SHT_NULL },
5007            /* Trick: The contents of code_gen_buffer are not present in
5008               this fake ELF file; that got allocated elsewhere.  Therefore
5009               we mark .text as SHT_NOBITS (similar to .bss) so that readers
5010               will not look for contents.  We can record any address.  */
5011            [1] = { /* .text */
5012                .sh_type = SHT_NOBITS,
5013                .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
5014            },
5015            [2] = { /* .debug_info */
5016                .sh_type = SHT_PROGBITS,
5017                .sh_offset = offsetof(struct ElfImage, di),
5018                .sh_size = sizeof(struct DebugInfo),
5019            },
5020            [3] = { /* .debug_abbrev */
5021                .sh_type = SHT_PROGBITS,
5022                .sh_offset = offsetof(struct ElfImage, da),
5023                .sh_size = sizeof(img->da),
5024            },
5025            [4] = { /* .debug_frame */
5026                .sh_type = SHT_PROGBITS,
5027                .sh_offset = sizeof(struct ElfImage),
5028            },
5029            [5] = { /* .symtab */
5030                .sh_type = SHT_SYMTAB,
5031                .sh_offset = offsetof(struct ElfImage, sym),
5032                .sh_size = sizeof(img->sym),
5033                .sh_info = 1,
5034                .sh_link = ARRAY_SIZE(img->shdr) - 1,
5035                .sh_entsize = sizeof(ElfW(Sym)),
5036            },
5037            [6] = { /* .strtab */
5038                .sh_type = SHT_STRTAB,
5039                .sh_offset = offsetof(struct ElfImage, str),
5040                .sh_size = sizeof(img->str),
5041            }
5042        },
5043        .sym = {
5044            [1] = { /* code_gen_buffer */
5045                .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5046                .st_shndx = 1,
5047            }
5048        },
5049        .di = {
5050            .len = sizeof(struct DebugInfo) - 4,
5051            .version = 2,
5052            .ptr_size = sizeof(void *),
5053            .cu_die = 1,
5054            .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5055            .fn_die = 2,
5056            .fn_name = "code_gen_buffer"
5057        },
5058        .da = {
5059            1,          /* abbrev number (the cu) */
5060            0x11, 1,    /* DW_TAG_compile_unit, has children */
5061            0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5062            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5063            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5064            0, 0,       /* end of abbrev */
5065            2,          /* abbrev number (the fn) */
5066            0x2e, 0,    /* DW_TAG_subprogram, no children */
5067            0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5068            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5069            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5070            0, 0,       /* end of abbrev */
5071            0           /* no more abbrev */
5072        },
5073        .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5074               ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5075    };
5076
5077    /* We only need a single jit entry; statically allocate it.  */
5078    static struct jit_code_entry one_entry;
5079
5080    uintptr_t buf = (uintptr_t)buf_ptr;
5081    size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5082    DebugFrameHeader *dfh;
5083
5084    img = g_malloc(img_size);
5085    *img = img_template;
5086
5087    img->phdr.p_vaddr = buf;
5088    img->phdr.p_paddr = buf;
5089    img->phdr.p_memsz = buf_size;
5090
5091    img->shdr[1].sh_name = find_string(img->str, ".text");
5092    img->shdr[1].sh_addr = buf;
5093    img->shdr[1].sh_size = buf_size;
5094
5095    img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5096    img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5097
5098    img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5099    img->shdr[4].sh_size = debug_frame_size;
5100
5101    img->shdr[5].sh_name = find_string(img->str, ".symtab");
5102    img->shdr[6].sh_name = find_string(img->str, ".strtab");
5103
5104    img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5105    img->sym[1].st_value = buf;
5106    img->sym[1].st_size = buf_size;
5107
5108    img->di.cu_low_pc = buf;
5109    img->di.cu_high_pc = buf + buf_size;
5110    img->di.fn_low_pc = buf;
5111    img->di.fn_high_pc = buf + buf_size;
5112
5113    dfh = (DebugFrameHeader *)(img + 1);
5114    memcpy(dfh, debug_frame, debug_frame_size);
5115    dfh->fde.func_start = buf;
5116    dfh->fde.func_len = buf_size;
5117
5118#ifdef DEBUG_JIT
5119    /* Enable this block to be able to debug the ELF image file creation.
5120       One can use readelf, objdump, or other inspection utilities.  */
5121    {
5122        FILE *f = fopen("/tmp/qemu.jit", "w+b");
5123        if (f) {
5124            if (fwrite(img, img_size, 1, f) != img_size) {
5125                /* Avoid stupid unused return value warning for fwrite.  */
5126            }
5127            fclose(f);
5128        }
5129    }
5130#endif
5131
5132    one_entry.symfile_addr = img;
5133    one_entry.symfile_size = img_size;
5134
5135    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5136    __jit_debug_descriptor.relevant_entry = &one_entry;
5137    __jit_debug_descriptor.first_entry = &one_entry;
5138    __jit_debug_register_code();
5139}
5140#else
5141/* No support for the feature.  Provide the entry point expected by exec.c,
5142   and implement the internal function we declared earlier.  */
5143
5144static void tcg_register_jit_int(const void *buf, size_t size,
5145                                 const void *debug_frame,
5146                                 size_t debug_frame_size)
5147{
5148}
5149
5150void tcg_register_jit(const void *buf, size_t buf_size)
5151{
5152}
5153#endif /* ELF_HOST_MACHINE */
5154
5155#if !TCG_TARGET_MAYBE_vec
5156void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5157{
5158    g_assert_not_reached();
5159}
5160#endif
5161