qemu/tcg/tcg.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "qemu/osdep.h"
  26
  27/* Define to jump the ELF file used to communicate with GDB.  */
  28#undef DEBUG_JIT
  29
  30#include "qemu/error-report.h"
  31#include "qemu/cutils.h"
  32#include "qemu/host-utils.h"
  33#include "qemu/qemu-print.h"
  34#include "qemu/cacheflush.h"
  35#include "qemu/cacheinfo.h"
  36#include "qemu/timer.h"
  37#include "exec/translation-block.h"
  38#include "exec/tlb-common.h"
  39#include "tcg/tcg-op-common.h"
  40
  41#if UINTPTR_MAX == UINT32_MAX
  42# define ELF_CLASS  ELFCLASS32
  43#else
  44# define ELF_CLASS  ELFCLASS64
  45#endif
  46#if HOST_BIG_ENDIAN
  47# define ELF_DATA   ELFDATA2MSB
  48#else
  49# define ELF_DATA   ELFDATA2LSB
  50#endif
  51
  52#include "elf.h"
  53#include "exec/log.h"
  54#include "tcg/tcg-ldst.h"
  55#include "tcg/tcg-temp-internal.h"
  56#include "tcg-internal.h"
  57#include "accel/tcg/perf.h"
  58#ifdef CONFIG_USER_ONLY
  59#include "exec/user/guest-base.h"
  60#endif
  61
  62/* Forward declarations for functions declared in tcg-target.c.inc and
  63   used here. */
  64static void tcg_target_init(TCGContext *s);
  65static void tcg_target_qemu_prologue(TCGContext *s);
  66static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
  67                        intptr_t value, intptr_t addend);
  68
  69/* The CIE and FDE header definitions will be common to all hosts.  */
  70typedef struct {
  71    uint32_t len __attribute__((aligned((sizeof(void *)))));
  72    uint32_t id;
  73    uint8_t version;
  74    char augmentation[1];
  75    uint8_t code_align;
  76    uint8_t data_align;
  77    uint8_t return_column;
  78} DebugFrameCIE;
  79
  80typedef struct QEMU_PACKED {
  81    uint32_t len __attribute__((aligned((sizeof(void *)))));
  82    uint32_t cie_offset;
  83    uintptr_t func_start;
  84    uintptr_t func_len;
  85} DebugFrameFDEHeader;
  86
  87typedef struct QEMU_PACKED {
  88    DebugFrameCIE cie;
  89    DebugFrameFDEHeader fde;
  90} DebugFrameHeader;
  91
  92typedef struct TCGLabelQemuLdst {
  93    bool is_ld;             /* qemu_ld: true, qemu_st: false */
  94    MemOpIdx oi;
  95    TCGType type;           /* result type of a load */
  96    TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
  97    TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
  98    TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
  99    TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
 100    const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
 101    tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
 102    QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
 103} TCGLabelQemuLdst;
 104
 105static void tcg_register_jit_int(const void *buf, size_t size,
 106                                 const void *debug_frame,
 107                                 size_t debug_frame_size)
 108    __attribute__((unused));
 109
 110/* Forward declarations for functions declared and used in tcg-target.c.inc. */
 111static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
 112                       intptr_t arg2);
 113static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
 114static void tcg_out_movi(TCGContext *s, TCGType type,
 115                         TCGReg ret, tcg_target_long arg);
 116static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
 117static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
 118static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
 119static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
 120static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
 121static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
 122static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
 123static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
 124static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
 125static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
 126static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
 127static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
 128static void tcg_out_goto_tb(TCGContext *s, int which);
 129static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 130                       const TCGArg args[TCG_MAX_OP_ARGS],
 131                       const int const_args[TCG_MAX_OP_ARGS]);
 132#if TCG_TARGET_MAYBE_vec
 133static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 134                            TCGReg dst, TCGReg src);
 135static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 136                             TCGReg dst, TCGReg base, intptr_t offset);
 137static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 138                             TCGReg dst, int64_t arg);
 139static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 140                           unsigned vecl, unsigned vece,
 141                           const TCGArg args[TCG_MAX_OP_ARGS],
 142                           const int const_args[TCG_MAX_OP_ARGS]);
 143#else
 144static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 145                                   TCGReg dst, TCGReg src)
 146{
 147    g_assert_not_reached();
 148}
 149static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 150                                    TCGReg dst, TCGReg base, intptr_t offset)
 151{
 152    g_assert_not_reached();
 153}
 154static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
 155                                    TCGReg dst, int64_t arg)
 156{
 157    g_assert_not_reached();
 158}
 159static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 160                                  unsigned vecl, unsigned vece,
 161                                  const TCGArg args[TCG_MAX_OP_ARGS],
 162                                  const int const_args[TCG_MAX_OP_ARGS])
 163{
 164    g_assert_not_reached();
 165}
 166#endif
 167static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
 168                       intptr_t arg2);
 169static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 170                        TCGReg base, intptr_t ofs);
 171static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
 172                         const TCGHelperInfo *info);
 173static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
 174static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
 175#ifdef TCG_TARGET_NEED_LDST_LABELS
 176static int tcg_out_ldst_finalize(TCGContext *s);
 177#endif
 178
 179typedef struct TCGLdstHelperParam {
 180    TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
 181    unsigned ntmp;
 182    int tmp[3];
 183} TCGLdstHelperParam;
 184
 185static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
 186                                   const TCGLdstHelperParam *p)
 187    __attribute__((unused));
 188static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
 189                                  bool load_sign, const TCGLdstHelperParam *p)
 190    __attribute__((unused));
 191static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
 192                                   const TCGLdstHelperParam *p)
 193    __attribute__((unused));
 194
 195static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
 196    [MO_UB] = helper_ldub_mmu,
 197    [MO_SB] = helper_ldsb_mmu,
 198    [MO_UW] = helper_lduw_mmu,
 199    [MO_SW] = helper_ldsw_mmu,
 200    [MO_UL] = helper_ldul_mmu,
 201    [MO_UQ] = helper_ldq_mmu,
 202#if TCG_TARGET_REG_BITS == 64
 203    [MO_SL] = helper_ldsl_mmu,
 204    [MO_128] = helper_ld16_mmu,
 205#endif
 206};
 207
 208static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
 209    [MO_8]  = helper_stb_mmu,
 210    [MO_16] = helper_stw_mmu,
 211    [MO_32] = helper_stl_mmu,
 212    [MO_64] = helper_stq_mmu,
 213#if TCG_TARGET_REG_BITS == 64
 214    [MO_128] = helper_st16_mmu,
 215#endif
 216};
 217
 218typedef struct {
 219    MemOp atom;   /* lg2 bits of atomicity required */
 220    MemOp align;  /* lg2 bits of alignment to use */
 221} TCGAtomAlign;
 222
 223static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
 224                                           MemOp host_atom, bool allow_two_ops)
 225    __attribute__((unused));
 226
 227TCGContext tcg_init_ctx;
 228__thread TCGContext *tcg_ctx;
 229
 230TCGContext **tcg_ctxs;
 231unsigned int tcg_cur_ctxs;
 232unsigned int tcg_max_ctxs;
 233TCGv_env cpu_env = 0;
 234const void *tcg_code_gen_epilogue;
 235uintptr_t tcg_splitwx_diff;
 236
 237#ifndef CONFIG_TCG_INTERPRETER
 238tcg_prologue_fn *tcg_qemu_tb_exec;
 239#endif
 240
 241static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
 242static TCGRegSet tcg_target_call_clobber_regs;
 243
 244#if TCG_TARGET_INSN_UNIT_SIZE == 1
 245static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
 246{
 247    *s->code_ptr++ = v;
 248}
 249
 250static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
 251                                                      uint8_t v)
 252{
 253    *p = v;
 254}
 255#endif
 256
 257#if TCG_TARGET_INSN_UNIT_SIZE <= 2
 258static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
 259{
 260    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 261        *s->code_ptr++ = v;
 262    } else {
 263        tcg_insn_unit *p = s->code_ptr;
 264        memcpy(p, &v, sizeof(v));
 265        s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
 266    }
 267}
 268
 269static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
 270                                                       uint16_t v)
 271{
 272    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 273        *p = v;
 274    } else {
 275        memcpy(p, &v, sizeof(v));
 276    }
 277}
 278#endif
 279
 280#if TCG_TARGET_INSN_UNIT_SIZE <= 4
 281static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
 282{
 283    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 284        *s->code_ptr++ = v;
 285    } else {
 286        tcg_insn_unit *p = s->code_ptr;
 287        memcpy(p, &v, sizeof(v));
 288        s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
 289    }
 290}
 291
 292static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
 293                                                       uint32_t v)
 294{
 295    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 296        *p = v;
 297    } else {
 298        memcpy(p, &v, sizeof(v));
 299    }
 300}
 301#endif
 302
 303#if TCG_TARGET_INSN_UNIT_SIZE <= 8
 304static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
 305{
 306    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 307        *s->code_ptr++ = v;
 308    } else {
 309        tcg_insn_unit *p = s->code_ptr;
 310        memcpy(p, &v, sizeof(v));
 311        s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
 312    }
 313}
 314
 315static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
 316                                                       uint64_t v)
 317{
 318    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 319        *p = v;
 320    } else {
 321        memcpy(p, &v, sizeof(v));
 322    }
 323}
 324#endif
 325
 326/* label relocation processing */
 327
 328static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
 329                          TCGLabel *l, intptr_t addend)
 330{
 331    TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
 332
 333    r->type = type;
 334    r->ptr = code_ptr;
 335    r->addend = addend;
 336    QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
 337}
 338
 339static void tcg_out_label(TCGContext *s, TCGLabel *l)
 340{
 341    tcg_debug_assert(!l->has_value);
 342    l->has_value = 1;
 343    l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
 344}
 345
 346TCGLabel *gen_new_label(void)
 347{
 348    TCGContext *s = tcg_ctx;
 349    TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
 350
 351    memset(l, 0, sizeof(TCGLabel));
 352    l->id = s->nb_labels++;
 353    QSIMPLEQ_INIT(&l->branches);
 354    QSIMPLEQ_INIT(&l->relocs);
 355
 356    QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
 357
 358    return l;
 359}
 360
 361static bool tcg_resolve_relocs(TCGContext *s)
 362{
 363    TCGLabel *l;
 364
 365    QSIMPLEQ_FOREACH(l, &s->labels, next) {
 366        TCGRelocation *r;
 367        uintptr_t value = l->u.value;
 368
 369        QSIMPLEQ_FOREACH(r, &l->relocs, next) {
 370            if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
 371                return false;
 372            }
 373        }
 374    }
 375    return true;
 376}
 377
 378static void set_jmp_reset_offset(TCGContext *s, int which)
 379{
 380    /*
 381     * We will check for overflow at the end of the opcode loop in
 382     * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
 383     */
 384    s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
 385}
 386
 387static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
 388{
 389    /*
 390     * We will check for overflow at the end of the opcode loop in
 391     * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
 392     */
 393    s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
 394}
 395
 396static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
 397{
 398    /*
 399     * Return the read-execute version of the pointer, for the benefit
 400     * of any pc-relative addressing mode.
 401     */
 402    return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
 403}
 404
 405#if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
 406static int tlb_mask_table_ofs(TCGContext *s, int which)
 407{
 408    return s->tlb_fast_offset + which * sizeof(CPUTLBDescFast);
 409}
 410#endif
 411
 412/* Signal overflow, starting over with fewer guest insns. */
 413static G_NORETURN
 414void tcg_raise_tb_overflow(TCGContext *s)
 415{
 416    siglongjmp(s->jmp_trans, -2);
 417}
 418
 419/*
 420 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
 421 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
 422 *
 423 * However, tcg_out_helper_load_slots reuses this field to hold an
 424 * argument slot number (which may designate a argument register or an
 425 * argument stack slot), converting to TCGReg once all arguments that
 426 * are destined for the stack are processed.
 427 */
 428typedef struct TCGMovExtend {
 429    unsigned dst;
 430    TCGReg src;
 431    TCGType dst_type;
 432    TCGType src_type;
 433    MemOp src_ext;
 434} TCGMovExtend;
 435
 436/**
 437 * tcg_out_movext -- move and extend
 438 * @s: tcg context
 439 * @dst_type: integral type for destination
 440 * @dst: destination register
 441 * @src_type: integral type for source
 442 * @src_ext: extension to apply to source
 443 * @src: source register
 444 *
 445 * Move or extend @src into @dst, depending on @src_ext and the types.
 446 */
 447static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
 448                           TCGType src_type, MemOp src_ext, TCGReg src)
 449{
 450    switch (src_ext) {
 451    case MO_UB:
 452        tcg_out_ext8u(s, dst, src);
 453        break;
 454    case MO_SB:
 455        tcg_out_ext8s(s, dst_type, dst, src);
 456        break;
 457    case MO_UW:
 458        tcg_out_ext16u(s, dst, src);
 459        break;
 460    case MO_SW:
 461        tcg_out_ext16s(s, dst_type, dst, src);
 462        break;
 463    case MO_UL:
 464    case MO_SL:
 465        if (dst_type == TCG_TYPE_I32) {
 466            if (src_type == TCG_TYPE_I32) {
 467                tcg_out_mov(s, TCG_TYPE_I32, dst, src);
 468            } else {
 469                tcg_out_extrl_i64_i32(s, dst, src);
 470            }
 471        } else if (src_type == TCG_TYPE_I32) {
 472            if (src_ext & MO_SIGN) {
 473                tcg_out_exts_i32_i64(s, dst, src);
 474            } else {
 475                tcg_out_extu_i32_i64(s, dst, src);
 476            }
 477        } else {
 478            if (src_ext & MO_SIGN) {
 479                tcg_out_ext32s(s, dst, src);
 480            } else {
 481                tcg_out_ext32u(s, dst, src);
 482            }
 483        }
 484        break;
 485    case MO_UQ:
 486        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
 487        if (dst_type == TCG_TYPE_I32) {
 488            tcg_out_extrl_i64_i32(s, dst, src);
 489        } else {
 490            tcg_out_mov(s, TCG_TYPE_I64, dst, src);
 491        }
 492        break;
 493    default:
 494        g_assert_not_reached();
 495    }
 496}
 497
 498/* Minor variations on a theme, using a structure. */
 499static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
 500                                    TCGReg src)
 501{
 502    tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
 503}
 504
 505static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
 506{
 507    tcg_out_movext1_new_src(s, i, i->src);
 508}
 509
 510/**
 511 * tcg_out_movext2 -- move and extend two pair
 512 * @s: tcg context
 513 * @i1: first move description
 514 * @i2: second move description
 515 * @scratch: temporary register, or -1 for none
 516 *
 517 * As tcg_out_movext, for both @i1 and @i2, caring for overlap
 518 * between the sources and destinations.
 519 */
 520
 521static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
 522                            const TCGMovExtend *i2, int scratch)
 523{
 524    TCGReg src1 = i1->src;
 525    TCGReg src2 = i2->src;
 526
 527    if (i1->dst != src2) {
 528        tcg_out_movext1(s, i1);
 529        tcg_out_movext1(s, i2);
 530        return;
 531    }
 532    if (i2->dst == src1) {
 533        TCGType src1_type = i1->src_type;
 534        TCGType src2_type = i2->src_type;
 535
 536        if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
 537            /* The data is now in the correct registers, now extend. */
 538            src1 = i2->src;
 539            src2 = i1->src;
 540        } else {
 541            tcg_debug_assert(scratch >= 0);
 542            tcg_out_mov(s, src1_type, scratch, src1);
 543            src1 = scratch;
 544        }
 545    }
 546    tcg_out_movext1_new_src(s, i2, src2);
 547    tcg_out_movext1_new_src(s, i1, src1);
 548}
 549
 550/**
 551 * tcg_out_movext3 -- move and extend three pair
 552 * @s: tcg context
 553 * @i1: first move description
 554 * @i2: second move description
 555 * @i3: third move description
 556 * @scratch: temporary register, or -1 for none
 557 *
 558 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
 559 * between the sources and destinations.
 560 */
 561
 562static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
 563                            const TCGMovExtend *i2, const TCGMovExtend *i3,
 564                            int scratch)
 565{
 566    TCGReg src1 = i1->src;
 567    TCGReg src2 = i2->src;
 568    TCGReg src3 = i3->src;
 569
 570    if (i1->dst != src2 && i1->dst != src3) {
 571        tcg_out_movext1(s, i1);
 572        tcg_out_movext2(s, i2, i3, scratch);
 573        return;
 574    }
 575    if (i2->dst != src1 && i2->dst != src3) {
 576        tcg_out_movext1(s, i2);
 577        tcg_out_movext2(s, i1, i3, scratch);
 578        return;
 579    }
 580    if (i3->dst != src1 && i3->dst != src2) {
 581        tcg_out_movext1(s, i3);
 582        tcg_out_movext2(s, i1, i2, scratch);
 583        return;
 584    }
 585
 586    /*
 587     * There is a cycle.  Since there are only 3 nodes, the cycle is
 588     * either "clockwise" or "anti-clockwise", and can be solved with
 589     * a single scratch or two xchg.
 590     */
 591    if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
 592        /* "Clockwise" */
 593        if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
 594            tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
 595            /* The data is now in the correct registers, now extend. */
 596            tcg_out_movext1_new_src(s, i1, i1->dst);
 597            tcg_out_movext1_new_src(s, i2, i2->dst);
 598            tcg_out_movext1_new_src(s, i3, i3->dst);
 599        } else {
 600            tcg_debug_assert(scratch >= 0);
 601            tcg_out_mov(s, i1->src_type, scratch, src1);
 602            tcg_out_movext1(s, i3);
 603            tcg_out_movext1(s, i2);
 604            tcg_out_movext1_new_src(s, i1, scratch);
 605        }
 606    } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
 607        /* "Anti-clockwise" */
 608        if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
 609            tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
 610            /* The data is now in the correct registers, now extend. */
 611            tcg_out_movext1_new_src(s, i1, i1->dst);
 612            tcg_out_movext1_new_src(s, i2, i2->dst);
 613            tcg_out_movext1_new_src(s, i3, i3->dst);
 614        } else {
 615            tcg_debug_assert(scratch >= 0);
 616            tcg_out_mov(s, i1->src_type, scratch, src1);
 617            tcg_out_movext1(s, i2);
 618            tcg_out_movext1(s, i3);
 619            tcg_out_movext1_new_src(s, i1, scratch);
 620        }
 621    } else {
 622        g_assert_not_reached();
 623    }
 624}
 625
 626#define C_PFX1(P, A)                    P##A
 627#define C_PFX2(P, A, B)                 P##A##_##B
 628#define C_PFX3(P, A, B, C)              P##A##_##B##_##C
 629#define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
 630#define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
 631#define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
 632
 633/* Define an enumeration for the various combinations. */
 634
 635#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
 636#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
 637#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
 638#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
 639
 640#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
 641#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
 642#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
 643#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
 644
 645#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
 646
 647#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
 648#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
 649#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
 650#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
 651#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
 652
 653typedef enum {
 654#include "tcg-target-con-set.h"
 655} TCGConstraintSetIndex;
 656
 657static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
 658
 659#undef C_O0_I1
 660#undef C_O0_I2
 661#undef C_O0_I3
 662#undef C_O0_I4
 663#undef C_O1_I1
 664#undef C_O1_I2
 665#undef C_O1_I3
 666#undef C_O1_I4
 667#undef C_N1_I2
 668#undef C_O2_I1
 669#undef C_O2_I2
 670#undef C_O2_I3
 671#undef C_O2_I4
 672#undef C_N1_O1_I4
 673
 674/* Put all of the constraint sets into an array, indexed by the enum. */
 675
 676#define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
 677#define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
 678#define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
 679#define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
 680
 681#define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
 682#define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
 683#define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
 684#define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
 685
 686#define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
 687
 688#define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
 689#define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
 690#define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
 691#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
 692#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
 693
 694static const TCGTargetOpDef constraint_sets[] = {
 695#include "tcg-target-con-set.h"
 696};
 697
 698
 699#undef C_O0_I1
 700#undef C_O0_I2
 701#undef C_O0_I3
 702#undef C_O0_I4
 703#undef C_O1_I1
 704#undef C_O1_I2
 705#undef C_O1_I3
 706#undef C_O1_I4
 707#undef C_N1_I2
 708#undef C_O2_I1
 709#undef C_O2_I2
 710#undef C_O2_I3
 711#undef C_O2_I4
 712#undef C_N1_O1_I4
 713
 714/* Expand the enumerator to be returned from tcg_target_op_def(). */
 715
 716#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
 717#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
 718#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
 719#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
 720
 721#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
 722#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
 723#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
 724#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
 725
 726#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
 727
 728#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
 729#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
 730#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
 731#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
 732#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
 733
 734#include "tcg-target.c.inc"
 735
 736static void alloc_tcg_plugin_context(TCGContext *s)
 737{
 738#ifdef CONFIG_PLUGIN
 739    s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
 740    s->plugin_tb->insns =
 741        g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
 742#endif
 743}
 744
 745/*
 746 * All TCG threads except the parent (i.e. the one that called tcg_context_init
 747 * and registered the target's TCG globals) must register with this function
 748 * before initiating translation.
 749 *
 750 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
 751 * of tcg_region_init() for the reasoning behind this.
 752 *
 753 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
 754 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
 755 * is not used anymore for translation once this function is called.
 756 *
 757 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
 758 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
 759 */
 760#ifdef CONFIG_USER_ONLY
 761void tcg_register_thread(void)
 762{
 763    tcg_ctx = &tcg_init_ctx;
 764}
 765#else
 766void tcg_register_thread(void)
 767{
 768    TCGContext *s = g_malloc(sizeof(*s));
 769    unsigned int i, n;
 770
 771    *s = tcg_init_ctx;
 772
 773    /* Relink mem_base.  */
 774    for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
 775        if (tcg_init_ctx.temps[i].mem_base) {
 776            ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
 777            tcg_debug_assert(b >= 0 && b < n);
 778            s->temps[i].mem_base = &s->temps[b];
 779        }
 780    }
 781
 782    /* Claim an entry in tcg_ctxs */
 783    n = qatomic_fetch_inc(&tcg_cur_ctxs);
 784    g_assert(n < tcg_max_ctxs);
 785    qatomic_set(&tcg_ctxs[n], s);
 786
 787    if (n > 0) {
 788        alloc_tcg_plugin_context(s);
 789        tcg_region_initial_alloc(s);
 790    }
 791
 792    tcg_ctx = s;
 793}
 794#endif /* !CONFIG_USER_ONLY */
 795
 796/* pool based memory allocation */
 797void *tcg_malloc_internal(TCGContext *s, int size)
 798{
 799    TCGPool *p;
 800    int pool_size;
 801
 802    if (size > TCG_POOL_CHUNK_SIZE) {
 803        /* big malloc: insert a new pool (XXX: could optimize) */
 804        p = g_malloc(sizeof(TCGPool) + size);
 805        p->size = size;
 806        p->next = s->pool_first_large;
 807        s->pool_first_large = p;
 808        return p->data;
 809    } else {
 810        p = s->pool_current;
 811        if (!p) {
 812            p = s->pool_first;
 813            if (!p)
 814                goto new_pool;
 815        } else {
 816            if (!p->next) {
 817            new_pool:
 818                pool_size = TCG_POOL_CHUNK_SIZE;
 819                p = g_malloc(sizeof(TCGPool) + pool_size);
 820                p->size = pool_size;
 821                p->next = NULL;
 822                if (s->pool_current) {
 823                    s->pool_current->next = p;
 824                } else {
 825                    s->pool_first = p;
 826                }
 827            } else {
 828                p = p->next;
 829            }
 830        }
 831    }
 832    s->pool_current = p;
 833    s->pool_cur = p->data + size;
 834    s->pool_end = p->data + p->size;
 835    return p->data;
 836}
 837
 838void tcg_pool_reset(TCGContext *s)
 839{
 840    TCGPool *p, *t;
 841    for (p = s->pool_first_large; p; p = t) {
 842        t = p->next;
 843        g_free(p);
 844    }
 845    s->pool_first_large = NULL;
 846    s->pool_cur = s->pool_end = NULL;
 847    s->pool_current = NULL;
 848}
 849
 850/*
 851 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
 852 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
 853 * We only use these for layout in tcg_out_ld_helper_ret and
 854 * tcg_out_st_helper_args, and share them between several of
 855 * the helpers, with the end result that it's easier to build manually.
 856 */
 857
 858#if TCG_TARGET_REG_BITS == 32
 859# define dh_typecode_ttl  dh_typecode_i32
 860#else
 861# define dh_typecode_ttl  dh_typecode_i64
 862#endif
 863
 864static TCGHelperInfo info_helper_ld32_mmu = {
 865    .flags = TCG_CALL_NO_WG,
 866    .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
 867              | dh_typemask(env, 1)
 868              | dh_typemask(i64, 2)  /* uint64_t addr */
 869              | dh_typemask(i32, 3)  /* unsigned oi */
 870              | dh_typemask(ptr, 4)  /* uintptr_t ra */
 871};
 872
 873static TCGHelperInfo info_helper_ld64_mmu = {
 874    .flags = TCG_CALL_NO_WG,
 875    .typemask = dh_typemask(i64, 0)  /* return uint64_t */
 876              | dh_typemask(env, 1)
 877              | dh_typemask(i64, 2)  /* uint64_t addr */
 878              | dh_typemask(i32, 3)  /* unsigned oi */
 879              | dh_typemask(ptr, 4)  /* uintptr_t ra */
 880};
 881
 882static TCGHelperInfo info_helper_ld128_mmu = {
 883    .flags = TCG_CALL_NO_WG,
 884    .typemask = dh_typemask(i128, 0) /* return Int128 */
 885              | dh_typemask(env, 1)
 886              | dh_typemask(i64, 2)  /* uint64_t addr */
 887              | dh_typemask(i32, 3)  /* unsigned oi */
 888              | dh_typemask(ptr, 4)  /* uintptr_t ra */
 889};
 890
 891static TCGHelperInfo info_helper_st32_mmu = {
 892    .flags = TCG_CALL_NO_WG,
 893    .typemask = dh_typemask(void, 0)
 894              | dh_typemask(env, 1)
 895              | dh_typemask(i64, 2)  /* uint64_t addr */
 896              | dh_typemask(i32, 3)  /* uint32_t data */
 897              | dh_typemask(i32, 4)  /* unsigned oi */
 898              | dh_typemask(ptr, 5)  /* uintptr_t ra */
 899};
 900
 901static TCGHelperInfo info_helper_st64_mmu = {
 902    .flags = TCG_CALL_NO_WG,
 903    .typemask = dh_typemask(void, 0)
 904              | dh_typemask(env, 1)
 905              | dh_typemask(i64, 2)  /* uint64_t addr */
 906              | dh_typemask(i64, 3)  /* uint64_t data */
 907              | dh_typemask(i32, 4)  /* unsigned oi */
 908              | dh_typemask(ptr, 5)  /* uintptr_t ra */
 909};
 910
 911static TCGHelperInfo info_helper_st128_mmu = {
 912    .flags = TCG_CALL_NO_WG,
 913    .typemask = dh_typemask(void, 0)
 914              | dh_typemask(env, 1)
 915              | dh_typemask(i64, 2)  /* uint64_t addr */
 916              | dh_typemask(i128, 3) /* Int128 data */
 917              | dh_typemask(i32, 4)  /* unsigned oi */
 918              | dh_typemask(ptr, 5)  /* uintptr_t ra */
 919};
 920
 921#ifdef CONFIG_TCG_INTERPRETER
 922static ffi_type *typecode_to_ffi(int argmask)
 923{
 924    /*
 925     * libffi does not support __int128_t, so we have forced Int128
 926     * to use the structure definition instead of the builtin type.
 927     */
 928    static ffi_type *ffi_type_i128_elements[3] = {
 929        &ffi_type_uint64,
 930        &ffi_type_uint64,
 931        NULL
 932    };
 933    static ffi_type ffi_type_i128 = {
 934        .size = 16,
 935        .alignment = __alignof__(Int128),
 936        .type = FFI_TYPE_STRUCT,
 937        .elements = ffi_type_i128_elements,
 938    };
 939
 940    switch (argmask) {
 941    case dh_typecode_void:
 942        return &ffi_type_void;
 943    case dh_typecode_i32:
 944        return &ffi_type_uint32;
 945    case dh_typecode_s32:
 946        return &ffi_type_sint32;
 947    case dh_typecode_i64:
 948        return &ffi_type_uint64;
 949    case dh_typecode_s64:
 950        return &ffi_type_sint64;
 951    case dh_typecode_ptr:
 952        return &ffi_type_pointer;
 953    case dh_typecode_i128:
 954        return &ffi_type_i128;
 955    }
 956    g_assert_not_reached();
 957}
 958
 959static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
 960{
 961    unsigned typemask = info->typemask;
 962    struct {
 963        ffi_cif cif;
 964        ffi_type *args[];
 965    } *ca;
 966    ffi_status status;
 967    int nargs;
 968
 969    /* Ignoring the return type, find the last non-zero field. */
 970    nargs = 32 - clz32(typemask >> 3);
 971    nargs = DIV_ROUND_UP(nargs, 3);
 972    assert(nargs <= MAX_CALL_IARGS);
 973
 974    ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
 975    ca->cif.rtype = typecode_to_ffi(typemask & 7);
 976    ca->cif.nargs = nargs;
 977
 978    if (nargs != 0) {
 979        ca->cif.arg_types = ca->args;
 980        for (int j = 0; j < nargs; ++j) {
 981            int typecode = extract32(typemask, (j + 1) * 3, 3);
 982            ca->args[j] = typecode_to_ffi(typecode);
 983        }
 984    }
 985
 986    status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
 987                          ca->cif.rtype, ca->cif.arg_types);
 988    assert(status == FFI_OK);
 989
 990    return &ca->cif;
 991}
 992
 993#define HELPER_INFO_INIT(I)      (&(I)->cif)
 994#define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
 995#else
 996#define HELPER_INFO_INIT(I)      (&(I)->init)
 997#define HELPER_INFO_INIT_VAL(I)  1
 998#endif /* CONFIG_TCG_INTERPRETER */
 999
1000static inline bool arg_slot_reg_p(unsigned arg_slot)
1001{
1002    /*
1003     * Split the sizeof away from the comparison to avoid Werror from
1004     * "unsigned < 0 is always false", when iarg_regs is empty.
1005     */
1006    unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1007    return arg_slot < nreg;
1008}
1009
1010static inline int arg_slot_stk_ofs(unsigned arg_slot)
1011{
1012    unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1013    unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1014
1015    tcg_debug_assert(stk_slot < max);
1016    return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1017}
1018
1019typedef struct TCGCumulativeArgs {
1020    int arg_idx;                /* tcg_gen_callN args[] */
1021    int info_in_idx;            /* TCGHelperInfo in[] */
1022    int arg_slot;               /* regs+stack slot */
1023    int ref_slot;               /* stack slots for references */
1024} TCGCumulativeArgs;
1025
1026static void layout_arg_even(TCGCumulativeArgs *cum)
1027{
1028    cum->arg_slot += cum->arg_slot & 1;
1029}
1030
1031static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1032                         TCGCallArgumentKind kind)
1033{
1034    TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1035
1036    *loc = (TCGCallArgumentLoc){
1037        .kind = kind,
1038        .arg_idx = cum->arg_idx,
1039        .arg_slot = cum->arg_slot,
1040    };
1041    cum->info_in_idx++;
1042    cum->arg_slot++;
1043}
1044
1045static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1046                                TCGHelperInfo *info, int n)
1047{
1048    TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1049
1050    for (int i = 0; i < n; ++i) {
1051        /* Layout all using the same arg_idx, adjusting the subindex. */
1052        loc[i] = (TCGCallArgumentLoc){
1053            .kind = TCG_CALL_ARG_NORMAL,
1054            .arg_idx = cum->arg_idx,
1055            .tmp_subindex = i,
1056            .arg_slot = cum->arg_slot + i,
1057        };
1058    }
1059    cum->info_in_idx += n;
1060    cum->arg_slot += n;
1061}
1062
1063static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1064{
1065    TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1066    int n = 128 / TCG_TARGET_REG_BITS;
1067
1068    /* The first subindex carries the pointer. */
1069    layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1070
1071    /*
1072     * The callee is allowed to clobber memory associated with
1073     * structure pass by-reference.  Therefore we must make copies.
1074     * Allocate space from "ref_slot", which will be adjusted to
1075     * follow the parameters on the stack.
1076     */
1077    loc[0].ref_slot = cum->ref_slot;
1078
1079    /*
1080     * Subsequent words also go into the reference slot, but
1081     * do not accumulate into the regular arguments.
1082     */
1083    for (int i = 1; i < n; ++i) {
1084        loc[i] = (TCGCallArgumentLoc){
1085            .kind = TCG_CALL_ARG_BY_REF_N,
1086            .arg_idx = cum->arg_idx,
1087            .tmp_subindex = i,
1088            .ref_slot = cum->ref_slot + i,
1089        };
1090    }
1091    cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1092    cum->ref_slot += n;
1093}
1094
1095static void init_call_layout(TCGHelperInfo *info)
1096{
1097    int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1098    int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1099    unsigned typemask = info->typemask;
1100    unsigned typecode;
1101    TCGCumulativeArgs cum = { };
1102
1103    /*
1104     * Parse and place any function return value.
1105     */
1106    typecode = typemask & 7;
1107    switch (typecode) {
1108    case dh_typecode_void:
1109        info->nr_out = 0;
1110        break;
1111    case dh_typecode_i32:
1112    case dh_typecode_s32:
1113    case dh_typecode_ptr:
1114        info->nr_out = 1;
1115        info->out_kind = TCG_CALL_RET_NORMAL;
1116        break;
1117    case dh_typecode_i64:
1118    case dh_typecode_s64:
1119        info->nr_out = 64 / TCG_TARGET_REG_BITS;
1120        info->out_kind = TCG_CALL_RET_NORMAL;
1121        /* Query the last register now to trigger any assert early. */
1122        tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1123        break;
1124    case dh_typecode_i128:
1125        info->nr_out = 128 / TCG_TARGET_REG_BITS;
1126        info->out_kind = TCG_TARGET_CALL_RET_I128;
1127        switch (TCG_TARGET_CALL_RET_I128) {
1128        case TCG_CALL_RET_NORMAL:
1129            /* Query the last register now to trigger any assert early. */
1130            tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1131            break;
1132        case TCG_CALL_RET_BY_VEC:
1133            /* Query the single register now to trigger any assert early. */
1134            tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1135            break;
1136        case TCG_CALL_RET_BY_REF:
1137            /*
1138             * Allocate the first argument to the output.
1139             * We don't need to store this anywhere, just make it
1140             * unavailable for use in the input loop below.
1141             */
1142            cum.arg_slot = 1;
1143            break;
1144        default:
1145            qemu_build_not_reached();
1146        }
1147        break;
1148    default:
1149        g_assert_not_reached();
1150    }
1151
1152    /*
1153     * Parse and place function arguments.
1154     */
1155    for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1156        TCGCallArgumentKind kind;
1157        TCGType type;
1158
1159        typecode = typemask & 7;
1160        switch (typecode) {
1161        case dh_typecode_i32:
1162        case dh_typecode_s32:
1163            type = TCG_TYPE_I32;
1164            break;
1165        case dh_typecode_i64:
1166        case dh_typecode_s64:
1167            type = TCG_TYPE_I64;
1168            break;
1169        case dh_typecode_ptr:
1170            type = TCG_TYPE_PTR;
1171            break;
1172        case dh_typecode_i128:
1173            type = TCG_TYPE_I128;
1174            break;
1175        default:
1176            g_assert_not_reached();
1177        }
1178
1179        switch (type) {
1180        case TCG_TYPE_I32:
1181            switch (TCG_TARGET_CALL_ARG_I32) {
1182            case TCG_CALL_ARG_EVEN:
1183                layout_arg_even(&cum);
1184                /* fall through */
1185            case TCG_CALL_ARG_NORMAL:
1186                layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1187                break;
1188            case TCG_CALL_ARG_EXTEND:
1189                kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1190                layout_arg_1(&cum, info, kind);
1191                break;
1192            default:
1193                qemu_build_not_reached();
1194            }
1195            break;
1196
1197        case TCG_TYPE_I64:
1198            switch (TCG_TARGET_CALL_ARG_I64) {
1199            case TCG_CALL_ARG_EVEN:
1200                layout_arg_even(&cum);
1201                /* fall through */
1202            case TCG_CALL_ARG_NORMAL:
1203                if (TCG_TARGET_REG_BITS == 32) {
1204                    layout_arg_normal_n(&cum, info, 2);
1205                } else {
1206                    layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1207                }
1208                break;
1209            default:
1210                qemu_build_not_reached();
1211            }
1212            break;
1213
1214        case TCG_TYPE_I128:
1215            switch (TCG_TARGET_CALL_ARG_I128) {
1216            case TCG_CALL_ARG_EVEN:
1217                layout_arg_even(&cum);
1218                /* fall through */
1219            case TCG_CALL_ARG_NORMAL:
1220                layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1221                break;
1222            case TCG_CALL_ARG_BY_REF:
1223                layout_arg_by_ref(&cum, info);
1224                break;
1225            default:
1226                qemu_build_not_reached();
1227            }
1228            break;
1229
1230        default:
1231            g_assert_not_reached();
1232        }
1233    }
1234    info->nr_in = cum.info_in_idx;
1235
1236    /* Validate that we didn't overrun the input array. */
1237    assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1238    /* Validate the backend has enough argument space. */
1239    assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1240
1241    /*
1242     * Relocate the "ref_slot" area to the end of the parameters.
1243     * Minimizing this stack offset helps code size for x86,
1244     * which has a signed 8-bit offset encoding.
1245     */
1246    if (cum.ref_slot != 0) {
1247        int ref_base = 0;
1248
1249        if (cum.arg_slot > max_reg_slots) {
1250            int align = __alignof(Int128) / sizeof(tcg_target_long);
1251
1252            ref_base = cum.arg_slot - max_reg_slots;
1253            if (align > 1) {
1254                ref_base = ROUND_UP(ref_base, align);
1255            }
1256        }
1257        assert(ref_base + cum.ref_slot <= max_stk_slots);
1258        ref_base += max_reg_slots;
1259
1260        if (ref_base != 0) {
1261            for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1262                TCGCallArgumentLoc *loc = &info->in[i];
1263                switch (loc->kind) {
1264                case TCG_CALL_ARG_BY_REF:
1265                case TCG_CALL_ARG_BY_REF_N:
1266                    loc->ref_slot += ref_base;
1267                    break;
1268                default:
1269                    break;
1270                }
1271            }
1272        }
1273    }
1274}
1275
1276static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1277static void process_op_defs(TCGContext *s);
1278static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1279                                            TCGReg reg, const char *name);
1280
1281static void tcg_context_init(unsigned max_cpus)
1282{
1283    TCGContext *s = &tcg_init_ctx;
1284    int op, total_args, n, i;
1285    TCGOpDef *def;
1286    TCGArgConstraint *args_ct;
1287    TCGTemp *ts;
1288
1289    memset(s, 0, sizeof(*s));
1290    s->nb_globals = 0;
1291
1292    /* Count total number of arguments and allocate the corresponding
1293       space */
1294    total_args = 0;
1295    for(op = 0; op < NB_OPS; op++) {
1296        def = &tcg_op_defs[op];
1297        n = def->nb_iargs + def->nb_oargs;
1298        total_args += n;
1299    }
1300
1301    args_ct = g_new0(TCGArgConstraint, total_args);
1302
1303    for(op = 0; op < NB_OPS; op++) {
1304        def = &tcg_op_defs[op];
1305        def->args_ct = args_ct;
1306        n = def->nb_iargs + def->nb_oargs;
1307        args_ct += n;
1308    }
1309
1310    init_call_layout(&info_helper_ld32_mmu);
1311    init_call_layout(&info_helper_ld64_mmu);
1312    init_call_layout(&info_helper_ld128_mmu);
1313    init_call_layout(&info_helper_st32_mmu);
1314    init_call_layout(&info_helper_st64_mmu);
1315    init_call_layout(&info_helper_st128_mmu);
1316
1317    tcg_target_init(s);
1318    process_op_defs(s);
1319
1320    /* Reverse the order of the saved registers, assuming they're all at
1321       the start of tcg_target_reg_alloc_order.  */
1322    for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1323        int r = tcg_target_reg_alloc_order[n];
1324        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1325            break;
1326        }
1327    }
1328    for (i = 0; i < n; ++i) {
1329        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1330    }
1331    for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1332        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1333    }
1334
1335    alloc_tcg_plugin_context(s);
1336
1337    tcg_ctx = s;
1338    /*
1339     * In user-mode we simply share the init context among threads, since we
1340     * use a single region. See the documentation tcg_region_init() for the
1341     * reasoning behind this.
1342     * In softmmu we will have at most max_cpus TCG threads.
1343     */
1344#ifdef CONFIG_USER_ONLY
1345    tcg_ctxs = &tcg_ctx;
1346    tcg_cur_ctxs = 1;
1347    tcg_max_ctxs = 1;
1348#else
1349    tcg_max_ctxs = max_cpus;
1350    tcg_ctxs = g_new0(TCGContext *, max_cpus);
1351#endif
1352
1353    tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1354    ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1355    cpu_env = temp_tcgv_ptr(ts);
1356}
1357
1358void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1359{
1360    tcg_context_init(max_cpus);
1361    tcg_region_init(tb_size, splitwx, max_cpus);
1362}
1363
1364/*
1365 * Allocate TBs right before their corresponding translated code, making
1366 * sure that TBs and code are on different cache lines.
1367 */
1368TranslationBlock *tcg_tb_alloc(TCGContext *s)
1369{
1370    uintptr_t align = qemu_icache_linesize;
1371    TranslationBlock *tb;
1372    void *next;
1373
1374 retry:
1375    tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1376    next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1377
1378    if (unlikely(next > s->code_gen_highwater)) {
1379        if (tcg_region_alloc(s)) {
1380            return NULL;
1381        }
1382        goto retry;
1383    }
1384    qatomic_set(&s->code_gen_ptr, next);
1385    s->data_gen_ptr = NULL;
1386    return tb;
1387}
1388
1389void tcg_prologue_init(TCGContext *s)
1390{
1391    size_t prologue_size;
1392
1393    s->code_ptr = s->code_gen_ptr;
1394    s->code_buf = s->code_gen_ptr;
1395    s->data_gen_ptr = NULL;
1396
1397#ifndef CONFIG_TCG_INTERPRETER
1398    tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1399#endif
1400
1401#ifdef TCG_TARGET_NEED_POOL_LABELS
1402    s->pool_labels = NULL;
1403#endif
1404
1405    qemu_thread_jit_write();
1406    /* Generate the prologue.  */
1407    tcg_target_qemu_prologue(s);
1408
1409#ifdef TCG_TARGET_NEED_POOL_LABELS
1410    /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1411    {
1412        int result = tcg_out_pool_finalize(s);
1413        tcg_debug_assert(result == 0);
1414    }
1415#endif
1416
1417    prologue_size = tcg_current_code_size(s);
1418    perf_report_prologue(s->code_gen_ptr, prologue_size);
1419
1420#ifndef CONFIG_TCG_INTERPRETER
1421    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1422                        (uintptr_t)s->code_buf, prologue_size);
1423#endif
1424
1425    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1426        FILE *logfile = qemu_log_trylock();
1427        if (logfile) {
1428            fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1429            if (s->data_gen_ptr) {
1430                size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1431                size_t data_size = prologue_size - code_size;
1432                size_t i;
1433
1434                disas(logfile, s->code_gen_ptr, code_size);
1435
1436                for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1437                    if (sizeof(tcg_target_ulong) == 8) {
1438                        fprintf(logfile,
1439                                "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1440                                (uintptr_t)s->data_gen_ptr + i,
1441                                *(uint64_t *)(s->data_gen_ptr + i));
1442                    } else {
1443                        fprintf(logfile,
1444                                "0x%08" PRIxPTR ":  .long  0x%08x\n",
1445                                (uintptr_t)s->data_gen_ptr + i,
1446                                *(uint32_t *)(s->data_gen_ptr + i));
1447                    }
1448                }
1449            } else {
1450                disas(logfile, s->code_gen_ptr, prologue_size);
1451            }
1452            fprintf(logfile, "\n");
1453            qemu_log_unlock(logfile);
1454        }
1455    }
1456
1457#ifndef CONFIG_TCG_INTERPRETER
1458    /*
1459     * Assert that goto_ptr is implemented completely, setting an epilogue.
1460     * For tci, we use NULL as the signal to return from the interpreter,
1461     * so skip this check.
1462     */
1463    tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1464#endif
1465
1466    tcg_region_prologue_set(s);
1467}
1468
1469void tcg_func_start(TCGContext *s)
1470{
1471    tcg_pool_reset(s);
1472    s->nb_temps = s->nb_globals;
1473
1474    /* No temps have been previously allocated for size or locality.  */
1475    memset(s->free_temps, 0, sizeof(s->free_temps));
1476
1477    /* No constant temps have been previously allocated. */
1478    for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1479        if (s->const_table[i]) {
1480            g_hash_table_remove_all(s->const_table[i]);
1481        }
1482    }
1483
1484    s->nb_ops = 0;
1485    s->nb_labels = 0;
1486    s->current_frame_offset = s->frame_start;
1487
1488#ifdef CONFIG_DEBUG_TCG
1489    s->goto_tb_issue_mask = 0;
1490#endif
1491
1492    QTAILQ_INIT(&s->ops);
1493    QTAILQ_INIT(&s->free_ops);
1494    QSIMPLEQ_INIT(&s->labels);
1495
1496    tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1497                     s->addr_type == TCG_TYPE_I64);
1498
1499#if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
1500    tcg_debug_assert(s->tlb_fast_offset < 0);
1501    tcg_debug_assert(s->tlb_fast_offset >= MIN_TLB_MASK_TABLE_OFS);
1502#endif
1503
1504    tcg_debug_assert(s->insn_start_words > 0);
1505}
1506
1507static TCGTemp *tcg_temp_alloc(TCGContext *s)
1508{
1509    int n = s->nb_temps++;
1510
1511    if (n >= TCG_MAX_TEMPS) {
1512        tcg_raise_tb_overflow(s);
1513    }
1514    return memset(&s->temps[n], 0, sizeof(TCGTemp));
1515}
1516
1517static TCGTemp *tcg_global_alloc(TCGContext *s)
1518{
1519    TCGTemp *ts;
1520
1521    tcg_debug_assert(s->nb_globals == s->nb_temps);
1522    tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1523    s->nb_globals++;
1524    ts = tcg_temp_alloc(s);
1525    ts->kind = TEMP_GLOBAL;
1526
1527    return ts;
1528}
1529
1530static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1531                                            TCGReg reg, const char *name)
1532{
1533    TCGTemp *ts;
1534
1535    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1536
1537    ts = tcg_global_alloc(s);
1538    ts->base_type = type;
1539    ts->type = type;
1540    ts->kind = TEMP_FIXED;
1541    ts->reg = reg;
1542    ts->name = name;
1543    tcg_regset_set_reg(s->reserved_regs, reg);
1544
1545    return ts;
1546}
1547
1548void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1549{
1550    s->frame_start = start;
1551    s->frame_end = start + size;
1552    s->frame_temp
1553        = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1554}
1555
1556TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1557                                     intptr_t offset, const char *name)
1558{
1559    TCGContext *s = tcg_ctx;
1560    TCGTemp *base_ts = tcgv_ptr_temp(base);
1561    TCGTemp *ts = tcg_global_alloc(s);
1562    int indirect_reg = 0;
1563
1564    switch (base_ts->kind) {
1565    case TEMP_FIXED:
1566        break;
1567    case TEMP_GLOBAL:
1568        /* We do not support double-indirect registers.  */
1569        tcg_debug_assert(!base_ts->indirect_reg);
1570        base_ts->indirect_base = 1;
1571        s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1572                            ? 2 : 1);
1573        indirect_reg = 1;
1574        break;
1575    default:
1576        g_assert_not_reached();
1577    }
1578
1579    if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1580        TCGTemp *ts2 = tcg_global_alloc(s);
1581        char buf[64];
1582
1583        ts->base_type = TCG_TYPE_I64;
1584        ts->type = TCG_TYPE_I32;
1585        ts->indirect_reg = indirect_reg;
1586        ts->mem_allocated = 1;
1587        ts->mem_base = base_ts;
1588        ts->mem_offset = offset;
1589        pstrcpy(buf, sizeof(buf), name);
1590        pstrcat(buf, sizeof(buf), "_0");
1591        ts->name = strdup(buf);
1592
1593        tcg_debug_assert(ts2 == ts + 1);
1594        ts2->base_type = TCG_TYPE_I64;
1595        ts2->type = TCG_TYPE_I32;
1596        ts2->indirect_reg = indirect_reg;
1597        ts2->mem_allocated = 1;
1598        ts2->mem_base = base_ts;
1599        ts2->mem_offset = offset + 4;
1600        ts2->temp_subindex = 1;
1601        pstrcpy(buf, sizeof(buf), name);
1602        pstrcat(buf, sizeof(buf), "_1");
1603        ts2->name = strdup(buf);
1604    } else {
1605        ts->base_type = type;
1606        ts->type = type;
1607        ts->indirect_reg = indirect_reg;
1608        ts->mem_allocated = 1;
1609        ts->mem_base = base_ts;
1610        ts->mem_offset = offset;
1611        ts->name = name;
1612    }
1613    return ts;
1614}
1615
1616TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1617{
1618    TCGContext *s = tcg_ctx;
1619    TCGTemp *ts;
1620    int n;
1621
1622    if (kind == TEMP_EBB) {
1623        int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1624
1625        if (idx < TCG_MAX_TEMPS) {
1626            /* There is already an available temp with the right type.  */
1627            clear_bit(idx, s->free_temps[type].l);
1628
1629            ts = &s->temps[idx];
1630            ts->temp_allocated = 1;
1631            tcg_debug_assert(ts->base_type == type);
1632            tcg_debug_assert(ts->kind == kind);
1633            return ts;
1634        }
1635    } else {
1636        tcg_debug_assert(kind == TEMP_TB);
1637    }
1638
1639    switch (type) {
1640    case TCG_TYPE_I32:
1641    case TCG_TYPE_V64:
1642    case TCG_TYPE_V128:
1643    case TCG_TYPE_V256:
1644        n = 1;
1645        break;
1646    case TCG_TYPE_I64:
1647        n = 64 / TCG_TARGET_REG_BITS;
1648        break;
1649    case TCG_TYPE_I128:
1650        n = 128 / TCG_TARGET_REG_BITS;
1651        break;
1652    default:
1653        g_assert_not_reached();
1654    }
1655
1656    ts = tcg_temp_alloc(s);
1657    ts->base_type = type;
1658    ts->temp_allocated = 1;
1659    ts->kind = kind;
1660
1661    if (n == 1) {
1662        ts->type = type;
1663    } else {
1664        ts->type = TCG_TYPE_REG;
1665
1666        for (int i = 1; i < n; ++i) {
1667            TCGTemp *ts2 = tcg_temp_alloc(s);
1668
1669            tcg_debug_assert(ts2 == ts + i);
1670            ts2->base_type = type;
1671            ts2->type = TCG_TYPE_REG;
1672            ts2->temp_allocated = 1;
1673            ts2->temp_subindex = i;
1674            ts2->kind = kind;
1675        }
1676    }
1677    return ts;
1678}
1679
1680TCGv_vec tcg_temp_new_vec(TCGType type)
1681{
1682    TCGTemp *t;
1683
1684#ifdef CONFIG_DEBUG_TCG
1685    switch (type) {
1686    case TCG_TYPE_V64:
1687        assert(TCG_TARGET_HAS_v64);
1688        break;
1689    case TCG_TYPE_V128:
1690        assert(TCG_TARGET_HAS_v128);
1691        break;
1692    case TCG_TYPE_V256:
1693        assert(TCG_TARGET_HAS_v256);
1694        break;
1695    default:
1696        g_assert_not_reached();
1697    }
1698#endif
1699
1700    t = tcg_temp_new_internal(type, TEMP_EBB);
1701    return temp_tcgv_vec(t);
1702}
1703
1704/* Create a new temp of the same type as an existing temp.  */
1705TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1706{
1707    TCGTemp *t = tcgv_vec_temp(match);
1708
1709    tcg_debug_assert(t->temp_allocated != 0);
1710
1711    t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1712    return temp_tcgv_vec(t);
1713}
1714
1715void tcg_temp_free_internal(TCGTemp *ts)
1716{
1717    TCGContext *s = tcg_ctx;
1718
1719    switch (ts->kind) {
1720    case TEMP_CONST:
1721    case TEMP_TB:
1722        /* Silently ignore free. */
1723        break;
1724    case TEMP_EBB:
1725        tcg_debug_assert(ts->temp_allocated != 0);
1726        ts->temp_allocated = 0;
1727        set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1728        break;
1729    default:
1730        /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1731        g_assert_not_reached();
1732    }
1733}
1734
1735TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1736{
1737    TCGContext *s = tcg_ctx;
1738    GHashTable *h = s->const_table[type];
1739    TCGTemp *ts;
1740
1741    if (h == NULL) {
1742        h = g_hash_table_new(g_int64_hash, g_int64_equal);
1743        s->const_table[type] = h;
1744    }
1745
1746    ts = g_hash_table_lookup(h, &val);
1747    if (ts == NULL) {
1748        int64_t *val_ptr;
1749
1750        ts = tcg_temp_alloc(s);
1751
1752        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1753            TCGTemp *ts2 = tcg_temp_alloc(s);
1754
1755            tcg_debug_assert(ts2 == ts + 1);
1756
1757            ts->base_type = TCG_TYPE_I64;
1758            ts->type = TCG_TYPE_I32;
1759            ts->kind = TEMP_CONST;
1760            ts->temp_allocated = 1;
1761
1762            ts2->base_type = TCG_TYPE_I64;
1763            ts2->type = TCG_TYPE_I32;
1764            ts2->kind = TEMP_CONST;
1765            ts2->temp_allocated = 1;
1766            ts2->temp_subindex = 1;
1767
1768            /*
1769             * Retain the full value of the 64-bit constant in the low
1770             * part, so that the hash table works.  Actual uses will
1771             * truncate the value to the low part.
1772             */
1773            ts[HOST_BIG_ENDIAN].val = val;
1774            ts[!HOST_BIG_ENDIAN].val = val >> 32;
1775            val_ptr = &ts[HOST_BIG_ENDIAN].val;
1776        } else {
1777            ts->base_type = type;
1778            ts->type = type;
1779            ts->kind = TEMP_CONST;
1780            ts->temp_allocated = 1;
1781            ts->val = val;
1782            val_ptr = &ts->val;
1783        }
1784        g_hash_table_insert(h, val_ptr, ts);
1785    }
1786
1787    return ts;
1788}
1789
1790TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1791{
1792    val = dup_const(vece, val);
1793    return temp_tcgv_vec(tcg_constant_internal(type, val));
1794}
1795
1796TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1797{
1798    TCGTemp *t = tcgv_vec_temp(match);
1799
1800    tcg_debug_assert(t->temp_allocated != 0);
1801    return tcg_constant_vec(t->base_type, vece, val);
1802}
1803
1804#ifdef CONFIG_DEBUG_TCG
1805size_t temp_idx(TCGTemp *ts)
1806{
1807    ptrdiff_t n = ts - tcg_ctx->temps;
1808    assert(n >= 0 && n < tcg_ctx->nb_temps);
1809    return n;
1810}
1811
1812TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1813{
1814    uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1815
1816    assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1817    assert(o % sizeof(TCGTemp) == 0);
1818
1819    return (void *)tcg_ctx + (uintptr_t)v;
1820}
1821#endif /* CONFIG_DEBUG_TCG */
1822
1823/* Return true if OP may appear in the opcode stream.
1824   Test the runtime variable that controls each opcode.  */
1825bool tcg_op_supported(TCGOpcode op)
1826{
1827    const bool have_vec
1828        = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1829
1830    switch (op) {
1831    case INDEX_op_discard:
1832    case INDEX_op_set_label:
1833    case INDEX_op_call:
1834    case INDEX_op_br:
1835    case INDEX_op_mb:
1836    case INDEX_op_insn_start:
1837    case INDEX_op_exit_tb:
1838    case INDEX_op_goto_tb:
1839    case INDEX_op_goto_ptr:
1840    case INDEX_op_qemu_ld_a32_i32:
1841    case INDEX_op_qemu_ld_a64_i32:
1842    case INDEX_op_qemu_st_a32_i32:
1843    case INDEX_op_qemu_st_a64_i32:
1844    case INDEX_op_qemu_ld_a32_i64:
1845    case INDEX_op_qemu_ld_a64_i64:
1846    case INDEX_op_qemu_st_a32_i64:
1847    case INDEX_op_qemu_st_a64_i64:
1848        return true;
1849
1850    case INDEX_op_qemu_st8_a32_i32:
1851    case INDEX_op_qemu_st8_a64_i32:
1852        return TCG_TARGET_HAS_qemu_st8_i32;
1853
1854    case INDEX_op_qemu_ld_a32_i128:
1855    case INDEX_op_qemu_ld_a64_i128:
1856    case INDEX_op_qemu_st_a32_i128:
1857    case INDEX_op_qemu_st_a64_i128:
1858        return TCG_TARGET_HAS_qemu_ldst_i128;
1859
1860    case INDEX_op_mov_i32:
1861    case INDEX_op_setcond_i32:
1862    case INDEX_op_brcond_i32:
1863    case INDEX_op_ld8u_i32:
1864    case INDEX_op_ld8s_i32:
1865    case INDEX_op_ld16u_i32:
1866    case INDEX_op_ld16s_i32:
1867    case INDEX_op_ld_i32:
1868    case INDEX_op_st8_i32:
1869    case INDEX_op_st16_i32:
1870    case INDEX_op_st_i32:
1871    case INDEX_op_add_i32:
1872    case INDEX_op_sub_i32:
1873    case INDEX_op_mul_i32:
1874    case INDEX_op_and_i32:
1875    case INDEX_op_or_i32:
1876    case INDEX_op_xor_i32:
1877    case INDEX_op_shl_i32:
1878    case INDEX_op_shr_i32:
1879    case INDEX_op_sar_i32:
1880        return true;
1881
1882    case INDEX_op_movcond_i32:
1883        return TCG_TARGET_HAS_movcond_i32;
1884    case INDEX_op_div_i32:
1885    case INDEX_op_divu_i32:
1886        return TCG_TARGET_HAS_div_i32;
1887    case INDEX_op_rem_i32:
1888    case INDEX_op_remu_i32:
1889        return TCG_TARGET_HAS_rem_i32;
1890    case INDEX_op_div2_i32:
1891    case INDEX_op_divu2_i32:
1892        return TCG_TARGET_HAS_div2_i32;
1893    case INDEX_op_rotl_i32:
1894    case INDEX_op_rotr_i32:
1895        return TCG_TARGET_HAS_rot_i32;
1896    case INDEX_op_deposit_i32:
1897        return TCG_TARGET_HAS_deposit_i32;
1898    case INDEX_op_extract_i32:
1899        return TCG_TARGET_HAS_extract_i32;
1900    case INDEX_op_sextract_i32:
1901        return TCG_TARGET_HAS_sextract_i32;
1902    case INDEX_op_extract2_i32:
1903        return TCG_TARGET_HAS_extract2_i32;
1904    case INDEX_op_add2_i32:
1905        return TCG_TARGET_HAS_add2_i32;
1906    case INDEX_op_sub2_i32:
1907        return TCG_TARGET_HAS_sub2_i32;
1908    case INDEX_op_mulu2_i32:
1909        return TCG_TARGET_HAS_mulu2_i32;
1910    case INDEX_op_muls2_i32:
1911        return TCG_TARGET_HAS_muls2_i32;
1912    case INDEX_op_muluh_i32:
1913        return TCG_TARGET_HAS_muluh_i32;
1914    case INDEX_op_mulsh_i32:
1915        return TCG_TARGET_HAS_mulsh_i32;
1916    case INDEX_op_ext8s_i32:
1917        return TCG_TARGET_HAS_ext8s_i32;
1918    case INDEX_op_ext16s_i32:
1919        return TCG_TARGET_HAS_ext16s_i32;
1920    case INDEX_op_ext8u_i32:
1921        return TCG_TARGET_HAS_ext8u_i32;
1922    case INDEX_op_ext16u_i32:
1923        return TCG_TARGET_HAS_ext16u_i32;
1924    case INDEX_op_bswap16_i32:
1925        return TCG_TARGET_HAS_bswap16_i32;
1926    case INDEX_op_bswap32_i32:
1927        return TCG_TARGET_HAS_bswap32_i32;
1928    case INDEX_op_not_i32:
1929        return TCG_TARGET_HAS_not_i32;
1930    case INDEX_op_neg_i32:
1931        return TCG_TARGET_HAS_neg_i32;
1932    case INDEX_op_andc_i32:
1933        return TCG_TARGET_HAS_andc_i32;
1934    case INDEX_op_orc_i32:
1935        return TCG_TARGET_HAS_orc_i32;
1936    case INDEX_op_eqv_i32:
1937        return TCG_TARGET_HAS_eqv_i32;
1938    case INDEX_op_nand_i32:
1939        return TCG_TARGET_HAS_nand_i32;
1940    case INDEX_op_nor_i32:
1941        return TCG_TARGET_HAS_nor_i32;
1942    case INDEX_op_clz_i32:
1943        return TCG_TARGET_HAS_clz_i32;
1944    case INDEX_op_ctz_i32:
1945        return TCG_TARGET_HAS_ctz_i32;
1946    case INDEX_op_ctpop_i32:
1947        return TCG_TARGET_HAS_ctpop_i32;
1948
1949    case INDEX_op_brcond2_i32:
1950    case INDEX_op_setcond2_i32:
1951        return TCG_TARGET_REG_BITS == 32;
1952
1953    case INDEX_op_mov_i64:
1954    case INDEX_op_setcond_i64:
1955    case INDEX_op_brcond_i64:
1956    case INDEX_op_ld8u_i64:
1957    case INDEX_op_ld8s_i64:
1958    case INDEX_op_ld16u_i64:
1959    case INDEX_op_ld16s_i64:
1960    case INDEX_op_ld32u_i64:
1961    case INDEX_op_ld32s_i64:
1962    case INDEX_op_ld_i64:
1963    case INDEX_op_st8_i64:
1964    case INDEX_op_st16_i64:
1965    case INDEX_op_st32_i64:
1966    case INDEX_op_st_i64:
1967    case INDEX_op_add_i64:
1968    case INDEX_op_sub_i64:
1969    case INDEX_op_mul_i64:
1970    case INDEX_op_and_i64:
1971    case INDEX_op_or_i64:
1972    case INDEX_op_xor_i64:
1973    case INDEX_op_shl_i64:
1974    case INDEX_op_shr_i64:
1975    case INDEX_op_sar_i64:
1976    case INDEX_op_ext_i32_i64:
1977    case INDEX_op_extu_i32_i64:
1978        return TCG_TARGET_REG_BITS == 64;
1979
1980    case INDEX_op_movcond_i64:
1981        return TCG_TARGET_HAS_movcond_i64;
1982    case INDEX_op_div_i64:
1983    case INDEX_op_divu_i64:
1984        return TCG_TARGET_HAS_div_i64;
1985    case INDEX_op_rem_i64:
1986    case INDEX_op_remu_i64:
1987        return TCG_TARGET_HAS_rem_i64;
1988    case INDEX_op_div2_i64:
1989    case INDEX_op_divu2_i64:
1990        return TCG_TARGET_HAS_div2_i64;
1991    case INDEX_op_rotl_i64:
1992    case INDEX_op_rotr_i64:
1993        return TCG_TARGET_HAS_rot_i64;
1994    case INDEX_op_deposit_i64:
1995        return TCG_TARGET_HAS_deposit_i64;
1996    case INDEX_op_extract_i64:
1997        return TCG_TARGET_HAS_extract_i64;
1998    case INDEX_op_sextract_i64:
1999        return TCG_TARGET_HAS_sextract_i64;
2000    case INDEX_op_extract2_i64:
2001        return TCG_TARGET_HAS_extract2_i64;
2002    case INDEX_op_extrl_i64_i32:
2003        return TCG_TARGET_HAS_extrl_i64_i32;
2004    case INDEX_op_extrh_i64_i32:
2005        return TCG_TARGET_HAS_extrh_i64_i32;
2006    case INDEX_op_ext8s_i64:
2007        return TCG_TARGET_HAS_ext8s_i64;
2008    case INDEX_op_ext16s_i64:
2009        return TCG_TARGET_HAS_ext16s_i64;
2010    case INDEX_op_ext32s_i64:
2011        return TCG_TARGET_HAS_ext32s_i64;
2012    case INDEX_op_ext8u_i64:
2013        return TCG_TARGET_HAS_ext8u_i64;
2014    case INDEX_op_ext16u_i64:
2015        return TCG_TARGET_HAS_ext16u_i64;
2016    case INDEX_op_ext32u_i64:
2017        return TCG_TARGET_HAS_ext32u_i64;
2018    case INDEX_op_bswap16_i64:
2019        return TCG_TARGET_HAS_bswap16_i64;
2020    case INDEX_op_bswap32_i64:
2021        return TCG_TARGET_HAS_bswap32_i64;
2022    case INDEX_op_bswap64_i64:
2023        return TCG_TARGET_HAS_bswap64_i64;
2024    case INDEX_op_not_i64:
2025        return TCG_TARGET_HAS_not_i64;
2026    case INDEX_op_neg_i64:
2027        return TCG_TARGET_HAS_neg_i64;
2028    case INDEX_op_andc_i64:
2029        return TCG_TARGET_HAS_andc_i64;
2030    case INDEX_op_orc_i64:
2031        return TCG_TARGET_HAS_orc_i64;
2032    case INDEX_op_eqv_i64:
2033        return TCG_TARGET_HAS_eqv_i64;
2034    case INDEX_op_nand_i64:
2035        return TCG_TARGET_HAS_nand_i64;
2036    case INDEX_op_nor_i64:
2037        return TCG_TARGET_HAS_nor_i64;
2038    case INDEX_op_clz_i64:
2039        return TCG_TARGET_HAS_clz_i64;
2040    case INDEX_op_ctz_i64:
2041        return TCG_TARGET_HAS_ctz_i64;
2042    case INDEX_op_ctpop_i64:
2043        return TCG_TARGET_HAS_ctpop_i64;
2044    case INDEX_op_add2_i64:
2045        return TCG_TARGET_HAS_add2_i64;
2046    case INDEX_op_sub2_i64:
2047        return TCG_TARGET_HAS_sub2_i64;
2048    case INDEX_op_mulu2_i64:
2049        return TCG_TARGET_HAS_mulu2_i64;
2050    case INDEX_op_muls2_i64:
2051        return TCG_TARGET_HAS_muls2_i64;
2052    case INDEX_op_muluh_i64:
2053        return TCG_TARGET_HAS_muluh_i64;
2054    case INDEX_op_mulsh_i64:
2055        return TCG_TARGET_HAS_mulsh_i64;
2056
2057    case INDEX_op_mov_vec:
2058    case INDEX_op_dup_vec:
2059    case INDEX_op_dupm_vec:
2060    case INDEX_op_ld_vec:
2061    case INDEX_op_st_vec:
2062    case INDEX_op_add_vec:
2063    case INDEX_op_sub_vec:
2064    case INDEX_op_and_vec:
2065    case INDEX_op_or_vec:
2066    case INDEX_op_xor_vec:
2067    case INDEX_op_cmp_vec:
2068        return have_vec;
2069    case INDEX_op_dup2_vec:
2070        return have_vec && TCG_TARGET_REG_BITS == 32;
2071    case INDEX_op_not_vec:
2072        return have_vec && TCG_TARGET_HAS_not_vec;
2073    case INDEX_op_neg_vec:
2074        return have_vec && TCG_TARGET_HAS_neg_vec;
2075    case INDEX_op_abs_vec:
2076        return have_vec && TCG_TARGET_HAS_abs_vec;
2077    case INDEX_op_andc_vec:
2078        return have_vec && TCG_TARGET_HAS_andc_vec;
2079    case INDEX_op_orc_vec:
2080        return have_vec && TCG_TARGET_HAS_orc_vec;
2081    case INDEX_op_nand_vec:
2082        return have_vec && TCG_TARGET_HAS_nand_vec;
2083    case INDEX_op_nor_vec:
2084        return have_vec && TCG_TARGET_HAS_nor_vec;
2085    case INDEX_op_eqv_vec:
2086        return have_vec && TCG_TARGET_HAS_eqv_vec;
2087    case INDEX_op_mul_vec:
2088        return have_vec && TCG_TARGET_HAS_mul_vec;
2089    case INDEX_op_shli_vec:
2090    case INDEX_op_shri_vec:
2091    case INDEX_op_sari_vec:
2092        return have_vec && TCG_TARGET_HAS_shi_vec;
2093    case INDEX_op_shls_vec:
2094    case INDEX_op_shrs_vec:
2095    case INDEX_op_sars_vec:
2096        return have_vec && TCG_TARGET_HAS_shs_vec;
2097    case INDEX_op_shlv_vec:
2098    case INDEX_op_shrv_vec:
2099    case INDEX_op_sarv_vec:
2100        return have_vec && TCG_TARGET_HAS_shv_vec;
2101    case INDEX_op_rotli_vec:
2102        return have_vec && TCG_TARGET_HAS_roti_vec;
2103    case INDEX_op_rotls_vec:
2104        return have_vec && TCG_TARGET_HAS_rots_vec;
2105    case INDEX_op_rotlv_vec:
2106    case INDEX_op_rotrv_vec:
2107        return have_vec && TCG_TARGET_HAS_rotv_vec;
2108    case INDEX_op_ssadd_vec:
2109    case INDEX_op_usadd_vec:
2110    case INDEX_op_sssub_vec:
2111    case INDEX_op_ussub_vec:
2112        return have_vec && TCG_TARGET_HAS_sat_vec;
2113    case INDEX_op_smin_vec:
2114    case INDEX_op_umin_vec:
2115    case INDEX_op_smax_vec:
2116    case INDEX_op_umax_vec:
2117        return have_vec && TCG_TARGET_HAS_minmax_vec;
2118    case INDEX_op_bitsel_vec:
2119        return have_vec && TCG_TARGET_HAS_bitsel_vec;
2120    case INDEX_op_cmpsel_vec:
2121        return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2122
2123    default:
2124        tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2125        return true;
2126    }
2127}
2128
2129static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2130
2131static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2132{
2133    TCGv_i64 extend_free[MAX_CALL_IARGS];
2134    int n_extend = 0;
2135    TCGOp *op;
2136    int i, n, pi = 0, total_args;
2137
2138    if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2139        init_call_layout(info);
2140        g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2141    }
2142
2143    total_args = info->nr_out + info->nr_in + 2;
2144    op = tcg_op_alloc(INDEX_op_call, total_args);
2145
2146#ifdef CONFIG_PLUGIN
2147    /* Flag helpers that may affect guest state */
2148    if (tcg_ctx->plugin_insn &&
2149        !(info->flags & TCG_CALL_PLUGIN) &&
2150        !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2151        tcg_ctx->plugin_insn->calls_helpers = true;
2152    }
2153#endif
2154
2155    TCGOP_CALLO(op) = n = info->nr_out;
2156    switch (n) {
2157    case 0:
2158        tcg_debug_assert(ret == NULL);
2159        break;
2160    case 1:
2161        tcg_debug_assert(ret != NULL);
2162        op->args[pi++] = temp_arg(ret);
2163        break;
2164    case 2:
2165    case 4:
2166        tcg_debug_assert(ret != NULL);
2167        tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2168        tcg_debug_assert(ret->temp_subindex == 0);
2169        for (i = 0; i < n; ++i) {
2170            op->args[pi++] = temp_arg(ret + i);
2171        }
2172        break;
2173    default:
2174        g_assert_not_reached();
2175    }
2176
2177    TCGOP_CALLI(op) = n = info->nr_in;
2178    for (i = 0; i < n; i++) {
2179        const TCGCallArgumentLoc *loc = &info->in[i];
2180        TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2181
2182        switch (loc->kind) {
2183        case TCG_CALL_ARG_NORMAL:
2184        case TCG_CALL_ARG_BY_REF:
2185        case TCG_CALL_ARG_BY_REF_N:
2186            op->args[pi++] = temp_arg(ts);
2187            break;
2188
2189        case TCG_CALL_ARG_EXTEND_U:
2190        case TCG_CALL_ARG_EXTEND_S:
2191            {
2192                TCGv_i64 temp = tcg_temp_ebb_new_i64();
2193                TCGv_i32 orig = temp_tcgv_i32(ts);
2194
2195                if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2196                    tcg_gen_ext_i32_i64(temp, orig);
2197                } else {
2198                    tcg_gen_extu_i32_i64(temp, orig);
2199                }
2200                op->args[pi++] = tcgv_i64_arg(temp);
2201                extend_free[n_extend++] = temp;
2202            }
2203            break;
2204
2205        default:
2206            g_assert_not_reached();
2207        }
2208    }
2209    op->args[pi++] = (uintptr_t)info->func;
2210    op->args[pi++] = (uintptr_t)info;
2211    tcg_debug_assert(pi == total_args);
2212
2213    QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2214
2215    tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2216    for (i = 0; i < n_extend; ++i) {
2217        tcg_temp_free_i64(extend_free[i]);
2218    }
2219}
2220
2221void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2222{
2223    tcg_gen_callN(info, ret, NULL);
2224}
2225
2226void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2227{
2228    tcg_gen_callN(info, ret, &t1);
2229}
2230
2231void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2232{
2233    TCGTemp *args[2] = { t1, t2 };
2234    tcg_gen_callN(info, ret, args);
2235}
2236
2237void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2238                   TCGTemp *t2, TCGTemp *t3)
2239{
2240    TCGTemp *args[3] = { t1, t2, t3 };
2241    tcg_gen_callN(info, ret, args);
2242}
2243
2244void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2245                   TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2246{
2247    TCGTemp *args[4] = { t1, t2, t3, t4 };
2248    tcg_gen_callN(info, ret, args);
2249}
2250
2251void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2252                   TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2253{
2254    TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2255    tcg_gen_callN(info, ret, args);
2256}
2257
2258void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2259                   TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2260{
2261    TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2262    tcg_gen_callN(info, ret, args);
2263}
2264
2265void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2266                   TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2267                   TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2268{
2269    TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2270    tcg_gen_callN(info, ret, args);
2271}
2272
2273static void tcg_reg_alloc_start(TCGContext *s)
2274{
2275    int i, n;
2276
2277    for (i = 0, n = s->nb_temps; i < n; i++) {
2278        TCGTemp *ts = &s->temps[i];
2279        TCGTempVal val = TEMP_VAL_MEM;
2280
2281        switch (ts->kind) {
2282        case TEMP_CONST:
2283            val = TEMP_VAL_CONST;
2284            break;
2285        case TEMP_FIXED:
2286            val = TEMP_VAL_REG;
2287            break;
2288        case TEMP_GLOBAL:
2289            break;
2290        case TEMP_EBB:
2291            val = TEMP_VAL_DEAD;
2292            /* fall through */
2293        case TEMP_TB:
2294            ts->mem_allocated = 0;
2295            break;
2296        default:
2297            g_assert_not_reached();
2298        }
2299        ts->val_type = val;
2300    }
2301
2302    memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2303}
2304
2305static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2306                                 TCGTemp *ts)
2307{
2308    int idx = temp_idx(ts);
2309
2310    switch (ts->kind) {
2311    case TEMP_FIXED:
2312    case TEMP_GLOBAL:
2313        pstrcpy(buf, buf_size, ts->name);
2314        break;
2315    case TEMP_TB:
2316        snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2317        break;
2318    case TEMP_EBB:
2319        snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2320        break;
2321    case TEMP_CONST:
2322        switch (ts->type) {
2323        case TCG_TYPE_I32:
2324            snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2325            break;
2326#if TCG_TARGET_REG_BITS > 32
2327        case TCG_TYPE_I64:
2328            snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2329            break;
2330#endif
2331        case TCG_TYPE_V64:
2332        case TCG_TYPE_V128:
2333        case TCG_TYPE_V256:
2334            snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2335                     64 << (ts->type - TCG_TYPE_V64), ts->val);
2336            break;
2337        default:
2338            g_assert_not_reached();
2339        }
2340        break;
2341    }
2342    return buf;
2343}
2344
2345static char *tcg_get_arg_str(TCGContext *s, char *buf,
2346                             int buf_size, TCGArg arg)
2347{
2348    return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2349}
2350
2351static const char * const cond_name[] =
2352{
2353    [TCG_COND_NEVER] = "never",
2354    [TCG_COND_ALWAYS] = "always",
2355    [TCG_COND_EQ] = "eq",
2356    [TCG_COND_NE] = "ne",
2357    [TCG_COND_LT] = "lt",
2358    [TCG_COND_GE] = "ge",
2359    [TCG_COND_LE] = "le",
2360    [TCG_COND_GT] = "gt",
2361    [TCG_COND_LTU] = "ltu",
2362    [TCG_COND_GEU] = "geu",
2363    [TCG_COND_LEU] = "leu",
2364    [TCG_COND_GTU] = "gtu"
2365};
2366
2367static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2368{
2369    [MO_UB]   = "ub",
2370    [MO_SB]   = "sb",
2371    [MO_LEUW] = "leuw",
2372    [MO_LESW] = "lesw",
2373    [MO_LEUL] = "leul",
2374    [MO_LESL] = "lesl",
2375    [MO_LEUQ] = "leq",
2376    [MO_BEUW] = "beuw",
2377    [MO_BESW] = "besw",
2378    [MO_BEUL] = "beul",
2379    [MO_BESL] = "besl",
2380    [MO_BEUQ] = "beq",
2381    [MO_128 + MO_BE] = "beo",
2382    [MO_128 + MO_LE] = "leo",
2383};
2384
2385static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2386    [MO_UNALN >> MO_ASHIFT]    = "un+",
2387    [MO_ALIGN >> MO_ASHIFT]    = "al+",
2388    [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2389    [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2390    [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2391    [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2392    [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2393    [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2394};
2395
2396static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2397    [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2398    [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2399    [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2400    [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2401    [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2402    [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2403};
2404
2405static const char bswap_flag_name[][6] = {
2406    [TCG_BSWAP_IZ] = "iz",
2407    [TCG_BSWAP_OZ] = "oz",
2408    [TCG_BSWAP_OS] = "os",
2409    [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2410    [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2411};
2412
2413static inline bool tcg_regset_single(TCGRegSet d)
2414{
2415    return (d & (d - 1)) == 0;
2416}
2417
2418static inline TCGReg tcg_regset_first(TCGRegSet d)
2419{
2420    if (TCG_TARGET_NB_REGS <= 32) {
2421        return ctz32(d);
2422    } else {
2423        return ctz64(d);
2424    }
2425}
2426
2427/* Return only the number of characters output -- no error return. */
2428#define ne_fprintf(...) \
2429    ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2430
2431static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2432{
2433    char buf[128];
2434    TCGOp *op;
2435
2436    QTAILQ_FOREACH(op, &s->ops, link) {
2437        int i, k, nb_oargs, nb_iargs, nb_cargs;
2438        const TCGOpDef *def;
2439        TCGOpcode c;
2440        int col = 0;
2441
2442        c = op->opc;
2443        def = &tcg_op_defs[c];
2444
2445        if (c == INDEX_op_insn_start) {
2446            nb_oargs = 0;
2447            col += ne_fprintf(f, "\n ----");
2448
2449            for (i = 0, k = s->insn_start_words; i < k; ++i) {
2450                col += ne_fprintf(f, " %016" PRIx64,
2451                                  tcg_get_insn_start_param(op, i));
2452            }
2453        } else if (c == INDEX_op_call) {
2454            const TCGHelperInfo *info = tcg_call_info(op);
2455            void *func = tcg_call_func(op);
2456
2457            /* variable number of arguments */
2458            nb_oargs = TCGOP_CALLO(op);
2459            nb_iargs = TCGOP_CALLI(op);
2460            nb_cargs = def->nb_cargs;
2461
2462            col += ne_fprintf(f, " %s ", def->name);
2463
2464            /*
2465             * Print the function name from TCGHelperInfo, if available.
2466             * Note that plugins have a template function for the info,
2467             * but the actual function pointer comes from the plugin.
2468             */
2469            if (func == info->func) {
2470                col += ne_fprintf(f, "%s", info->name);
2471            } else {
2472                col += ne_fprintf(f, "plugin(%p)", func);
2473            }
2474
2475            col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2476            for (i = 0; i < nb_oargs; i++) {
2477                col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2478                                                            op->args[i]));
2479            }
2480            for (i = 0; i < nb_iargs; i++) {
2481                TCGArg arg = op->args[nb_oargs + i];
2482                const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2483                col += ne_fprintf(f, ",%s", t);
2484            }
2485        } else {
2486            col += ne_fprintf(f, " %s ", def->name);
2487
2488            nb_oargs = def->nb_oargs;
2489            nb_iargs = def->nb_iargs;
2490            nb_cargs = def->nb_cargs;
2491
2492            if (def->flags & TCG_OPF_VECTOR) {
2493                col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2494                                  8 << TCGOP_VECE(op));
2495            }
2496
2497            k = 0;
2498            for (i = 0; i < nb_oargs; i++) {
2499                const char *sep =  k ? "," : "";
2500                col += ne_fprintf(f, "%s%s", sep,
2501                                  tcg_get_arg_str(s, buf, sizeof(buf),
2502                                                  op->args[k++]));
2503            }
2504            for (i = 0; i < nb_iargs; i++) {
2505                const char *sep =  k ? "," : "";
2506                col += ne_fprintf(f, "%s%s", sep,
2507                                  tcg_get_arg_str(s, buf, sizeof(buf),
2508                                                  op->args[k++]));
2509            }
2510            switch (c) {
2511            case INDEX_op_brcond_i32:
2512            case INDEX_op_setcond_i32:
2513            case INDEX_op_movcond_i32:
2514            case INDEX_op_brcond2_i32:
2515            case INDEX_op_setcond2_i32:
2516            case INDEX_op_brcond_i64:
2517            case INDEX_op_setcond_i64:
2518            case INDEX_op_movcond_i64:
2519            case INDEX_op_cmp_vec:
2520            case INDEX_op_cmpsel_vec:
2521                if (op->args[k] < ARRAY_SIZE(cond_name)
2522                    && cond_name[op->args[k]]) {
2523                    col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2524                } else {
2525                    col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2526                }
2527                i = 1;
2528                break;
2529            case INDEX_op_qemu_ld_a32_i32:
2530            case INDEX_op_qemu_ld_a64_i32:
2531            case INDEX_op_qemu_st_a32_i32:
2532            case INDEX_op_qemu_st_a64_i32:
2533            case INDEX_op_qemu_st8_a32_i32:
2534            case INDEX_op_qemu_st8_a64_i32:
2535            case INDEX_op_qemu_ld_a32_i64:
2536            case INDEX_op_qemu_ld_a64_i64:
2537            case INDEX_op_qemu_st_a32_i64:
2538            case INDEX_op_qemu_st_a64_i64:
2539            case INDEX_op_qemu_ld_a32_i128:
2540            case INDEX_op_qemu_ld_a64_i128:
2541            case INDEX_op_qemu_st_a32_i128:
2542            case INDEX_op_qemu_st_a64_i128:
2543                {
2544                    const char *s_al, *s_op, *s_at;
2545                    MemOpIdx oi = op->args[k++];
2546                    MemOp op = get_memop(oi);
2547                    unsigned ix = get_mmuidx(oi);
2548
2549                    s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2550                    s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2551                    s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2552                    op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2553
2554                    /* If all fields are accounted for, print symbolically. */
2555                    if (!op && s_al && s_op && s_at) {
2556                        col += ne_fprintf(f, ",%s%s%s,%u",
2557                                          s_at, s_al, s_op, ix);
2558                    } else {
2559                        op = get_memop(oi);
2560                        col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2561                    }
2562                    i = 1;
2563                }
2564                break;
2565            case INDEX_op_bswap16_i32:
2566            case INDEX_op_bswap16_i64:
2567            case INDEX_op_bswap32_i32:
2568            case INDEX_op_bswap32_i64:
2569            case INDEX_op_bswap64_i64:
2570                {
2571                    TCGArg flags = op->args[k];
2572                    const char *name = NULL;
2573
2574                    if (flags < ARRAY_SIZE(bswap_flag_name)) {
2575                        name = bswap_flag_name[flags];
2576                    }
2577                    if (name) {
2578                        col += ne_fprintf(f, ",%s", name);
2579                    } else {
2580                        col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2581                    }
2582                    i = k = 1;
2583                }
2584                break;
2585            default:
2586                i = 0;
2587                break;
2588            }
2589            switch (c) {
2590            case INDEX_op_set_label:
2591            case INDEX_op_br:
2592            case INDEX_op_brcond_i32:
2593            case INDEX_op_brcond_i64:
2594            case INDEX_op_brcond2_i32:
2595                col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2596                                  arg_label(op->args[k])->id);
2597                i++, k++;
2598                break;
2599            case INDEX_op_mb:
2600                {
2601                    TCGBar membar = op->args[k];
2602                    const char *b_op, *m_op;
2603
2604                    switch (membar & TCG_BAR_SC) {
2605                    case 0:
2606                        b_op = "none";
2607                        break;
2608                    case TCG_BAR_LDAQ:
2609                        b_op = "acq";
2610                        break;
2611                    case TCG_BAR_STRL:
2612                        b_op = "rel";
2613                        break;
2614                    case TCG_BAR_SC:
2615                        b_op = "seq";
2616                        break;
2617                    default:
2618                        g_assert_not_reached();
2619                    }
2620
2621                    switch (membar & TCG_MO_ALL) {
2622                    case 0:
2623                        m_op = "none";
2624                        break;
2625                    case TCG_MO_LD_LD:
2626                        m_op = "rr";
2627                        break;
2628                    case TCG_MO_LD_ST:
2629                        m_op = "rw";
2630                        break;
2631                    case TCG_MO_ST_LD:
2632                        m_op = "wr";
2633                        break;
2634                    case TCG_MO_ST_ST:
2635                        m_op = "ww";
2636                        break;
2637                    case TCG_MO_LD_LD | TCG_MO_LD_ST:
2638                        m_op = "rr+rw";
2639                        break;
2640                    case TCG_MO_LD_LD | TCG_MO_ST_LD:
2641                        m_op = "rr+wr";
2642                        break;
2643                    case TCG_MO_LD_LD | TCG_MO_ST_ST:
2644                        m_op = "rr+ww";
2645                        break;
2646                    case TCG_MO_LD_ST | TCG_MO_ST_LD:
2647                        m_op = "rw+wr";
2648                        break;
2649                    case TCG_MO_LD_ST | TCG_MO_ST_ST:
2650                        m_op = "rw+ww";
2651                        break;
2652                    case TCG_MO_ST_LD | TCG_MO_ST_ST:
2653                        m_op = "wr+ww";
2654                        break;
2655                    case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2656                        m_op = "rr+rw+wr";
2657                        break;
2658                    case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2659                        m_op = "rr+rw+ww";
2660                        break;
2661                    case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2662                        m_op = "rr+wr+ww";
2663                        break;
2664                    case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2665                        m_op = "rw+wr+ww";
2666                        break;
2667                    case TCG_MO_ALL:
2668                        m_op = "all";
2669                        break;
2670                    default:
2671                        g_assert_not_reached();
2672                    }
2673
2674                    col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2675                    i++, k++;
2676                }
2677                break;
2678            default:
2679                break;
2680            }
2681            for (; i < nb_cargs; i++, k++) {
2682                col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2683                                  op->args[k]);
2684            }
2685        }
2686
2687        if (have_prefs || op->life) {
2688            for (; col < 40; ++col) {
2689                putc(' ', f);
2690            }
2691        }
2692
2693        if (op->life) {
2694            unsigned life = op->life;
2695
2696            if (life & (SYNC_ARG * 3)) {
2697                ne_fprintf(f, "  sync:");
2698                for (i = 0; i < 2; ++i) {
2699                    if (life & (SYNC_ARG << i)) {
2700                        ne_fprintf(f, " %d", i);
2701                    }
2702                }
2703            }
2704            life /= DEAD_ARG;
2705            if (life) {
2706                ne_fprintf(f, "  dead:");
2707                for (i = 0; life; ++i, life >>= 1) {
2708                    if (life & 1) {
2709                        ne_fprintf(f, " %d", i);
2710                    }
2711                }
2712            }
2713        }
2714
2715        if (have_prefs) {
2716            for (i = 0; i < nb_oargs; ++i) {
2717                TCGRegSet set = output_pref(op, i);
2718
2719                if (i == 0) {
2720                    ne_fprintf(f, "  pref=");
2721                } else {
2722                    ne_fprintf(f, ",");
2723                }
2724                if (set == 0) {
2725                    ne_fprintf(f, "none");
2726                } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2727                    ne_fprintf(f, "all");
2728#ifdef CONFIG_DEBUG_TCG
2729                } else if (tcg_regset_single(set)) {
2730                    TCGReg reg = tcg_regset_first(set);
2731                    ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2732#endif
2733                } else if (TCG_TARGET_NB_REGS <= 32) {
2734                    ne_fprintf(f, "0x%x", (uint32_t)set);
2735                } else {
2736                    ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2737                }
2738            }
2739        }
2740
2741        putc('\n', f);
2742    }
2743}
2744
2745/* we give more priority to constraints with less registers */
2746static int get_constraint_priority(const TCGOpDef *def, int k)
2747{
2748    const TCGArgConstraint *arg_ct = &def->args_ct[k];
2749    int n = ctpop64(arg_ct->regs);
2750
2751    /*
2752     * Sort constraints of a single register first, which includes output
2753     * aliases (which must exactly match the input already allocated).
2754     */
2755    if (n == 1 || arg_ct->oalias) {
2756        return INT_MAX;
2757    }
2758
2759    /*
2760     * Sort register pairs next, first then second immediately after.
2761     * Arbitrarily sort multiple pairs by the index of the first reg;
2762     * there shouldn't be many pairs.
2763     */
2764    switch (arg_ct->pair) {
2765    case 1:
2766    case 3:
2767        return (k + 1) * 2;
2768    case 2:
2769        return (arg_ct->pair_index + 1) * 2 - 1;
2770    }
2771
2772    /* Finally, sort by decreasing register count. */
2773    assert(n > 1);
2774    return -n;
2775}
2776
2777/* sort from highest priority to lowest */
2778static void sort_constraints(TCGOpDef *def, int start, int n)
2779{
2780    int i, j;
2781    TCGArgConstraint *a = def->args_ct;
2782
2783    for (i = 0; i < n; i++) {
2784        a[start + i].sort_index = start + i;
2785    }
2786    if (n <= 1) {
2787        return;
2788    }
2789    for (i = 0; i < n - 1; i++) {
2790        for (j = i + 1; j < n; j++) {
2791            int p1 = get_constraint_priority(def, a[start + i].sort_index);
2792            int p2 = get_constraint_priority(def, a[start + j].sort_index);
2793            if (p1 < p2) {
2794                int tmp = a[start + i].sort_index;
2795                a[start + i].sort_index = a[start + j].sort_index;
2796                a[start + j].sort_index = tmp;
2797            }
2798        }
2799    }
2800}
2801
2802static void process_op_defs(TCGContext *s)
2803{
2804    TCGOpcode op;
2805
2806    for (op = 0; op < NB_OPS; op++) {
2807        TCGOpDef *def = &tcg_op_defs[op];
2808        const TCGTargetOpDef *tdefs;
2809        bool saw_alias_pair = false;
2810        int i, o, i2, o2, nb_args;
2811
2812        if (def->flags & TCG_OPF_NOT_PRESENT) {
2813            continue;
2814        }
2815
2816        nb_args = def->nb_iargs + def->nb_oargs;
2817        if (nb_args == 0) {
2818            continue;
2819        }
2820
2821        /*
2822         * Macro magic should make it impossible, but double-check that
2823         * the array index is in range.  Since the signness of an enum
2824         * is implementation defined, force the result to unsigned.
2825         */
2826        unsigned con_set = tcg_target_op_def(op);
2827        tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2828        tdefs = &constraint_sets[con_set];
2829
2830        for (i = 0; i < nb_args; i++) {
2831            const char *ct_str = tdefs->args_ct_str[i];
2832            bool input_p = i >= def->nb_oargs;
2833
2834            /* Incomplete TCGTargetOpDef entry. */
2835            tcg_debug_assert(ct_str != NULL);
2836
2837            switch (*ct_str) {
2838            case '0' ... '9':
2839                o = *ct_str - '0';
2840                tcg_debug_assert(input_p);
2841                tcg_debug_assert(o < def->nb_oargs);
2842                tcg_debug_assert(def->args_ct[o].regs != 0);
2843                tcg_debug_assert(!def->args_ct[o].oalias);
2844                def->args_ct[i] = def->args_ct[o];
2845                /* The output sets oalias.  */
2846                def->args_ct[o].oalias = 1;
2847                def->args_ct[o].alias_index = i;
2848                /* The input sets ialias. */
2849                def->args_ct[i].ialias = 1;
2850                def->args_ct[i].alias_index = o;
2851                if (def->args_ct[i].pair) {
2852                    saw_alias_pair = true;
2853                }
2854                tcg_debug_assert(ct_str[1] == '\0');
2855                continue;
2856
2857            case '&':
2858                tcg_debug_assert(!input_p);
2859                def->args_ct[i].newreg = true;
2860                ct_str++;
2861                break;
2862
2863            case 'p': /* plus */
2864                /* Allocate to the register after the previous. */
2865                tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2866                o = i - 1;
2867                tcg_debug_assert(!def->args_ct[o].pair);
2868                tcg_debug_assert(!def->args_ct[o].ct);
2869                def->args_ct[i] = (TCGArgConstraint){
2870                    .pair = 2,
2871                    .pair_index = o,
2872                    .regs = def->args_ct[o].regs << 1,
2873                };
2874                def->args_ct[o].pair = 1;
2875                def->args_ct[o].pair_index = i;
2876                tcg_debug_assert(ct_str[1] == '\0');
2877                continue;
2878
2879            case 'm': /* minus */
2880                /* Allocate to the register before the previous. */
2881                tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2882                o = i - 1;
2883                tcg_debug_assert(!def->args_ct[o].pair);
2884                tcg_debug_assert(!def->args_ct[o].ct);
2885                def->args_ct[i] = (TCGArgConstraint){
2886                    .pair = 1,
2887                    .pair_index = o,
2888                    .regs = def->args_ct[o].regs >> 1,
2889                };
2890                def->args_ct[o].pair = 2;
2891                def->args_ct[o].pair_index = i;
2892                tcg_debug_assert(ct_str[1] == '\0');
2893                continue;
2894            }
2895
2896            do {
2897                switch (*ct_str) {
2898                case 'i':
2899                    def->args_ct[i].ct |= TCG_CT_CONST;
2900                    break;
2901
2902                /* Include all of the target-specific constraints. */
2903
2904#undef CONST
2905#define CONST(CASE, MASK) \
2906    case CASE: def->args_ct[i].ct |= MASK; break;
2907#define REGS(CASE, MASK) \
2908    case CASE: def->args_ct[i].regs |= MASK; break;
2909
2910#include "tcg-target-con-str.h"
2911
2912#undef REGS
2913#undef CONST
2914                default:
2915                case '0' ... '9':
2916                case '&':
2917                case 'p':
2918                case 'm':
2919                    /* Typo in TCGTargetOpDef constraint. */
2920                    g_assert_not_reached();
2921                }
2922            } while (*++ct_str != '\0');
2923        }
2924
2925        /* TCGTargetOpDef entry with too much information? */
2926        tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2927
2928        /*
2929         * Fix up output pairs that are aliased with inputs.
2930         * When we created the alias, we copied pair from the output.
2931         * There are three cases:
2932         *    (1a) Pairs of inputs alias pairs of outputs.
2933         *    (1b) One input aliases the first of a pair of outputs.
2934         *    (2)  One input aliases the second of a pair of outputs.
2935         *
2936         * Case 1a is handled by making sure that the pair_index'es are
2937         * properly updated so that they appear the same as a pair of inputs.
2938         *
2939         * Case 1b is handled by setting the pair_index of the input to
2940         * itself, simply so it doesn't point to an unrelated argument.
2941         * Since we don't encounter the "second" during the input allocation
2942         * phase, nothing happens with the second half of the input pair.
2943         *
2944         * Case 2 is handled by setting the second input to pair=3, the
2945         * first output to pair=3, and the pair_index'es to match.
2946         */
2947        if (saw_alias_pair) {
2948            for (i = def->nb_oargs; i < nb_args; i++) {
2949                /*
2950                 * Since [0-9pm] must be alone in the constraint string,
2951                 * the only way they can both be set is if the pair comes
2952                 * from the output alias.
2953                 */
2954                if (!def->args_ct[i].ialias) {
2955                    continue;
2956                }
2957                switch (def->args_ct[i].pair) {
2958                case 0:
2959                    break;
2960                case 1:
2961                    o = def->args_ct[i].alias_index;
2962                    o2 = def->args_ct[o].pair_index;
2963                    tcg_debug_assert(def->args_ct[o].pair == 1);
2964                    tcg_debug_assert(def->args_ct[o2].pair == 2);
2965                    if (def->args_ct[o2].oalias) {
2966                        /* Case 1a */
2967                        i2 = def->args_ct[o2].alias_index;
2968                        tcg_debug_assert(def->args_ct[i2].pair == 2);
2969                        def->args_ct[i2].pair_index = i;
2970                        def->args_ct[i].pair_index = i2;
2971                    } else {
2972                        /* Case 1b */
2973                        def->args_ct[i].pair_index = i;
2974                    }
2975                    break;
2976                case 2:
2977                    o = def->args_ct[i].alias_index;
2978                    o2 = def->args_ct[o].pair_index;
2979                    tcg_debug_assert(def->args_ct[o].pair == 2);
2980                    tcg_debug_assert(def->args_ct[o2].pair == 1);
2981                    if (def->args_ct[o2].oalias) {
2982                        /* Case 1a */
2983                        i2 = def->args_ct[o2].alias_index;
2984                        tcg_debug_assert(def->args_ct[i2].pair == 1);
2985                        def->args_ct[i2].pair_index = i;
2986                        def->args_ct[i].pair_index = i2;
2987                    } else {
2988                        /* Case 2 */
2989                        def->args_ct[i].pair = 3;
2990                        def->args_ct[o2].pair = 3;
2991                        def->args_ct[i].pair_index = o2;
2992                        def->args_ct[o2].pair_index = i;
2993                    }
2994                    break;
2995                default:
2996                    g_assert_not_reached();
2997                }
2998            }
2999        }
3000
3001        /* sort the constraints (XXX: this is just an heuristic) */
3002        sort_constraints(def, 0, def->nb_oargs);
3003        sort_constraints(def, def->nb_oargs, def->nb_iargs);
3004    }
3005}
3006
3007static void remove_label_use(TCGOp *op, int idx)
3008{
3009    TCGLabel *label = arg_label(op->args[idx]);
3010    TCGLabelUse *use;
3011
3012    QSIMPLEQ_FOREACH(use, &label->branches, next) {
3013        if (use->op == op) {
3014            QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3015            return;
3016        }
3017    }
3018    g_assert_not_reached();
3019}
3020
3021void tcg_op_remove(TCGContext *s, TCGOp *op)
3022{
3023    switch (op->opc) {
3024    case INDEX_op_br:
3025        remove_label_use(op, 0);
3026        break;
3027    case INDEX_op_brcond_i32:
3028    case INDEX_op_brcond_i64:
3029        remove_label_use(op, 3);
3030        break;
3031    case INDEX_op_brcond2_i32:
3032        remove_label_use(op, 5);
3033        break;
3034    default:
3035        break;
3036    }
3037
3038    QTAILQ_REMOVE(&s->ops, op, link);
3039    QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3040    s->nb_ops--;
3041}
3042
3043void tcg_remove_ops_after(TCGOp *op)
3044{
3045    TCGContext *s = tcg_ctx;
3046
3047    while (true) {
3048        TCGOp *last = tcg_last_op();
3049        if (last == op) {
3050            return;
3051        }
3052        tcg_op_remove(s, last);
3053    }
3054}
3055
3056static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3057{
3058    TCGContext *s = tcg_ctx;
3059    TCGOp *op = NULL;
3060
3061    if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3062        QTAILQ_FOREACH(op, &s->free_ops, link) {
3063            if (nargs <= op->nargs) {
3064                QTAILQ_REMOVE(&s->free_ops, op, link);
3065                nargs = op->nargs;
3066                goto found;
3067            }
3068        }
3069    }
3070
3071    /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3072    nargs = MAX(4, nargs);
3073    op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3074
3075 found:
3076    memset(op, 0, offsetof(TCGOp, link));
3077    op->opc = opc;
3078    op->nargs = nargs;
3079
3080    /* Check for bitfield overflow. */
3081    tcg_debug_assert(op->nargs == nargs);
3082
3083    s->nb_ops++;
3084    return op;
3085}
3086
3087TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3088{
3089    TCGOp *op = tcg_op_alloc(opc, nargs);
3090    QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3091    return op;
3092}
3093
3094TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3095                            TCGOpcode opc, unsigned nargs)
3096{
3097    TCGOp *new_op = tcg_op_alloc(opc, nargs);
3098    QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3099    return new_op;
3100}
3101
3102TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3103                           TCGOpcode opc, unsigned nargs)
3104{
3105    TCGOp *new_op = tcg_op_alloc(opc, nargs);
3106    QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3107    return new_op;
3108}
3109
3110static void move_label_uses(TCGLabel *to, TCGLabel *from)
3111{
3112    TCGLabelUse *u;
3113
3114    QSIMPLEQ_FOREACH(u, &from->branches, next) {
3115        TCGOp *op = u->op;
3116        switch (op->opc) {
3117        case INDEX_op_br:
3118            op->args[0] = label_arg(to);
3119            break;
3120        case INDEX_op_brcond_i32:
3121        case INDEX_op_brcond_i64:
3122            op->args[3] = label_arg(to);
3123            break;
3124        case INDEX_op_brcond2_i32:
3125            op->args[5] = label_arg(to);
3126            break;
3127        default:
3128            g_assert_not_reached();
3129        }
3130    }
3131
3132    QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3133}
3134
3135/* Reachable analysis : remove unreachable code.  */
3136static void __attribute__((noinline))
3137reachable_code_pass(TCGContext *s)
3138{
3139    TCGOp *op, *op_next, *op_prev;
3140    bool dead = false;
3141
3142    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3143        bool remove = dead;
3144        TCGLabel *label;
3145
3146        switch (op->opc) {
3147        case INDEX_op_set_label:
3148            label = arg_label(op->args[0]);
3149
3150            /*
3151             * Note that the first op in the TB is always a load,
3152             * so there is always something before a label.
3153             */
3154            op_prev = QTAILQ_PREV(op, link);
3155
3156            /*
3157             * If we find two sequential labels, move all branches to
3158             * reference the second label and remove the first label.
3159             * Do this before branch to next optimization, so that the
3160             * middle label is out of the way.
3161             */
3162            if (op_prev->opc == INDEX_op_set_label) {
3163                move_label_uses(label, arg_label(op_prev->args[0]));
3164                tcg_op_remove(s, op_prev);
3165                op_prev = QTAILQ_PREV(op, link);
3166            }
3167
3168            /*
3169             * Optimization can fold conditional branches to unconditional.
3170             * If we find a label which is preceded by an unconditional
3171             * branch to next, remove the branch.  We couldn't do this when
3172             * processing the branch because any dead code between the branch
3173             * and label had not yet been removed.
3174             */
3175            if (op_prev->opc == INDEX_op_br &&
3176                label == arg_label(op_prev->args[0])) {
3177                tcg_op_remove(s, op_prev);
3178                /* Fall through means insns become live again.  */
3179                dead = false;
3180            }
3181
3182            if (QSIMPLEQ_EMPTY(&label->branches)) {
3183                /*
3184                 * While there is an occasional backward branch, virtually
3185                 * all branches generated by the translators are forward.
3186                 * Which means that generally we will have already removed
3187                 * all references to the label that will be, and there is
3188                 * little to be gained by iterating.
3189                 */
3190                remove = true;
3191            } else {
3192                /* Once we see a label, insns become live again.  */
3193                dead = false;
3194                remove = false;
3195            }
3196            break;
3197
3198        case INDEX_op_br:
3199        case INDEX_op_exit_tb:
3200        case INDEX_op_goto_ptr:
3201            /* Unconditional branches; everything following is dead.  */
3202            dead = true;
3203            break;
3204
3205        case INDEX_op_call:
3206            /* Notice noreturn helper calls, raising exceptions.  */
3207            if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3208                dead = true;
3209            }
3210            break;
3211
3212        case INDEX_op_insn_start:
3213            /* Never remove -- we need to keep these for unwind.  */
3214            remove = false;
3215            break;
3216
3217        default:
3218            break;
3219        }
3220
3221        if (remove) {
3222            tcg_op_remove(s, op);
3223        }
3224    }
3225}
3226
3227#define TS_DEAD  1
3228#define TS_MEM   2
3229
3230#define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3231#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3232
3233/* For liveness_pass_1, the register preferences for a given temp.  */
3234static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3235{
3236    return ts->state_ptr;
3237}
3238
3239/* For liveness_pass_1, reset the preferences for a given temp to the
3240 * maximal regset for its type.
3241 */
3242static inline void la_reset_pref(TCGTemp *ts)
3243{
3244    *la_temp_pref(ts)
3245        = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3246}
3247
3248/* liveness analysis: end of function: all temps are dead, and globals
3249   should be in memory. */
3250static void la_func_end(TCGContext *s, int ng, int nt)
3251{
3252    int i;
3253
3254    for (i = 0; i < ng; ++i) {
3255        s->temps[i].state = TS_DEAD | TS_MEM;
3256        la_reset_pref(&s->temps[i]);
3257    }
3258    for (i = ng; i < nt; ++i) {
3259        s->temps[i].state = TS_DEAD;
3260        la_reset_pref(&s->temps[i]);
3261    }
3262}
3263
3264/* liveness analysis: end of basic block: all temps are dead, globals
3265   and local temps should be in memory. */
3266static void la_bb_end(TCGContext *s, int ng, int nt)
3267{
3268    int i;
3269
3270    for (i = 0; i < nt; ++i) {
3271        TCGTemp *ts = &s->temps[i];
3272        int state;
3273
3274        switch (ts->kind) {
3275        case TEMP_FIXED:
3276        case TEMP_GLOBAL:
3277        case TEMP_TB:
3278            state = TS_DEAD | TS_MEM;
3279            break;
3280        case TEMP_EBB:
3281        case TEMP_CONST:
3282            state = TS_DEAD;
3283            break;
3284        default:
3285            g_assert_not_reached();
3286        }
3287        ts->state = state;
3288        la_reset_pref(ts);
3289    }
3290}
3291
3292/* liveness analysis: sync globals back to memory.  */
3293static void la_global_sync(TCGContext *s, int ng)
3294{
3295    int i;
3296
3297    for (i = 0; i < ng; ++i) {
3298        int state = s->temps[i].state;
3299        s->temps[i].state = state | TS_MEM;
3300        if (state == TS_DEAD) {
3301            /* If the global was previously dead, reset prefs.  */
3302            la_reset_pref(&s->temps[i]);
3303        }
3304    }
3305}
3306
3307/*
3308 * liveness analysis: conditional branch: all temps are dead unless
3309 * explicitly live-across-conditional-branch, globals and local temps
3310 * should be synced.
3311 */
3312static void la_bb_sync(TCGContext *s, int ng, int nt)
3313{
3314    la_global_sync(s, ng);
3315
3316    for (int i = ng; i < nt; ++i) {
3317        TCGTemp *ts = &s->temps[i];
3318        int state;
3319
3320        switch (ts->kind) {
3321        case TEMP_TB:
3322            state = ts->state;
3323            ts->state = state | TS_MEM;
3324            if (state != TS_DEAD) {
3325                continue;
3326            }
3327            break;
3328        case TEMP_EBB:
3329        case TEMP_CONST:
3330            continue;
3331        default:
3332            g_assert_not_reached();
3333        }
3334        la_reset_pref(&s->temps[i]);
3335    }
3336}
3337
3338/* liveness analysis: sync globals back to memory and kill.  */
3339static void la_global_kill(TCGContext *s, int ng)
3340{
3341    int i;
3342
3343    for (i = 0; i < ng; i++) {
3344        s->temps[i].state = TS_DEAD | TS_MEM;
3345        la_reset_pref(&s->temps[i]);
3346    }
3347}
3348
3349/* liveness analysis: note live globals crossing calls.  */
3350static void la_cross_call(TCGContext *s, int nt)
3351{
3352    TCGRegSet mask = ~tcg_target_call_clobber_regs;
3353    int i;
3354
3355    for (i = 0; i < nt; i++) {
3356        TCGTemp *ts = &s->temps[i];
3357        if (!(ts->state & TS_DEAD)) {
3358            TCGRegSet *pset = la_temp_pref(ts);
3359            TCGRegSet set = *pset;
3360
3361            set &= mask;
3362            /* If the combination is not possible, restart.  */
3363            if (set == 0) {
3364                set = tcg_target_available_regs[ts->type] & mask;
3365            }
3366            *pset = set;
3367        }
3368    }
3369}
3370
3371/*
3372 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3373 * to TEMP_EBB, if possible.
3374 */
3375static void __attribute__((noinline))
3376liveness_pass_0(TCGContext *s)
3377{
3378    void * const multiple_ebb = (void *)(uintptr_t)-1;
3379    int nb_temps = s->nb_temps;
3380    TCGOp *op, *ebb;
3381
3382    for (int i = s->nb_globals; i < nb_temps; ++i) {
3383        s->temps[i].state_ptr = NULL;
3384    }
3385
3386    /*
3387     * Represent each EBB by the op at which it begins.  In the case of
3388     * the first EBB, this is the first op, otherwise it is a label.
3389     * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3390     * within a single EBB, else MULTIPLE_EBB.
3391     */
3392    ebb = QTAILQ_FIRST(&s->ops);
3393    QTAILQ_FOREACH(op, &s->ops, link) {
3394        const TCGOpDef *def;
3395        int nb_oargs, nb_iargs;
3396
3397        switch (op->opc) {
3398        case INDEX_op_set_label:
3399            ebb = op;
3400            continue;
3401        case INDEX_op_discard:
3402            continue;
3403        case INDEX_op_call:
3404            nb_oargs = TCGOP_CALLO(op);
3405            nb_iargs = TCGOP_CALLI(op);
3406            break;
3407        default:
3408            def = &tcg_op_defs[op->opc];
3409            nb_oargs = def->nb_oargs;
3410            nb_iargs = def->nb_iargs;
3411            break;
3412        }
3413
3414        for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3415            TCGTemp *ts = arg_temp(op->args[i]);
3416
3417            if (ts->kind != TEMP_TB) {
3418                continue;
3419            }
3420            if (ts->state_ptr == NULL) {
3421                ts->state_ptr = ebb;
3422            } else if (ts->state_ptr != ebb) {
3423                ts->state_ptr = multiple_ebb;
3424            }
3425        }
3426    }
3427
3428    /*
3429     * For TEMP_TB that turned out not to be used beyond one EBB,
3430     * reduce the liveness to TEMP_EBB.
3431     */
3432    for (int i = s->nb_globals; i < nb_temps; ++i) {
3433        TCGTemp *ts = &s->temps[i];
3434        if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3435            ts->kind = TEMP_EBB;
3436        }
3437    }
3438}
3439
3440/* Liveness analysis : update the opc_arg_life array to tell if a
3441   given input arguments is dead. Instructions updating dead
3442   temporaries are removed. */
3443static void __attribute__((noinline))
3444liveness_pass_1(TCGContext *s)
3445{
3446    int nb_globals = s->nb_globals;
3447    int nb_temps = s->nb_temps;
3448    TCGOp *op, *op_prev;
3449    TCGRegSet *prefs;
3450    int i;
3451
3452    prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3453    for (i = 0; i < nb_temps; ++i) {
3454        s->temps[i].state_ptr = prefs + i;
3455    }
3456
3457    /* ??? Should be redundant with the exit_tb that ends the TB.  */
3458    la_func_end(s, nb_globals, nb_temps);
3459
3460    QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3461        int nb_iargs, nb_oargs;
3462        TCGOpcode opc_new, opc_new2;
3463        bool have_opc_new2;
3464        TCGLifeData arg_life = 0;
3465        TCGTemp *ts;
3466        TCGOpcode opc = op->opc;
3467        const TCGOpDef *def = &tcg_op_defs[opc];
3468
3469        switch (opc) {
3470        case INDEX_op_call:
3471            {
3472                const TCGHelperInfo *info = tcg_call_info(op);
3473                int call_flags = tcg_call_flags(op);
3474
3475                nb_oargs = TCGOP_CALLO(op);
3476                nb_iargs = TCGOP_CALLI(op);
3477
3478                /* pure functions can be removed if their result is unused */
3479                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3480                    for (i = 0; i < nb_oargs; i++) {
3481                        ts = arg_temp(op->args[i]);
3482                        if (ts->state != TS_DEAD) {
3483                            goto do_not_remove_call;
3484                        }
3485                    }
3486                    goto do_remove;
3487                }
3488            do_not_remove_call:
3489
3490                /* Output args are dead.  */
3491                for (i = 0; i < nb_oargs; i++) {
3492                    ts = arg_temp(op->args[i]);
3493                    if (ts->state & TS_DEAD) {
3494                        arg_life |= DEAD_ARG << i;
3495                    }
3496                    if (ts->state & TS_MEM) {
3497                        arg_life |= SYNC_ARG << i;
3498                    }
3499                    ts->state = TS_DEAD;
3500                    la_reset_pref(ts);
3501                }
3502
3503                /* Not used -- it will be tcg_target_call_oarg_reg().  */
3504                memset(op->output_pref, 0, sizeof(op->output_pref));
3505
3506                if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3507                                    TCG_CALL_NO_READ_GLOBALS))) {
3508                    la_global_kill(s, nb_globals);
3509                } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3510                    la_global_sync(s, nb_globals);
3511                }
3512
3513                /* Record arguments that die in this helper.  */
3514                for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3515                    ts = arg_temp(op->args[i]);
3516                    if (ts->state & TS_DEAD) {
3517                        arg_life |= DEAD_ARG << i;
3518                    }
3519                }
3520
3521                /* For all live registers, remove call-clobbered prefs.  */
3522                la_cross_call(s, nb_temps);
3523
3524                /*
3525                 * Input arguments are live for preceding opcodes.
3526                 *
3527                 * For those arguments that die, and will be allocated in
3528                 * registers, clear the register set for that arg, to be
3529                 * filled in below.  For args that will be on the stack,
3530                 * reset to any available reg.  Process arguments in reverse
3531                 * order so that if a temp is used more than once, the stack
3532                 * reset to max happens before the register reset to 0.
3533                 */
3534                for (i = nb_iargs - 1; i >= 0; i--) {
3535                    const TCGCallArgumentLoc *loc = &info->in[i];
3536                    ts = arg_temp(op->args[nb_oargs + i]);
3537
3538                    if (ts->state & TS_DEAD) {
3539                        switch (loc->kind) {
3540                        case TCG_CALL_ARG_NORMAL:
3541                        case TCG_CALL_ARG_EXTEND_U:
3542                        case TCG_CALL_ARG_EXTEND_S:
3543                            if (arg_slot_reg_p(loc->arg_slot)) {
3544                                *la_temp_pref(ts) = 0;
3545                                break;
3546                            }
3547                            /* fall through */
3548                        default:
3549                            *la_temp_pref(ts) =
3550                                tcg_target_available_regs[ts->type];
3551                            break;
3552                        }
3553                        ts->state &= ~TS_DEAD;
3554                    }
3555                }
3556
3557                /*
3558                 * For each input argument, add its input register to prefs.
3559                 * If a temp is used once, this produces a single set bit;
3560                 * if a temp is used multiple times, this produces a set.
3561                 */
3562                for (i = 0; i < nb_iargs; i++) {
3563                    const TCGCallArgumentLoc *loc = &info->in[i];
3564                    ts = arg_temp(op->args[nb_oargs + i]);
3565
3566                    switch (loc->kind) {
3567                    case TCG_CALL_ARG_NORMAL:
3568                    case TCG_CALL_ARG_EXTEND_U:
3569                    case TCG_CALL_ARG_EXTEND_S:
3570                        if (arg_slot_reg_p(loc->arg_slot)) {
3571                            tcg_regset_set_reg(*la_temp_pref(ts),
3572                                tcg_target_call_iarg_regs[loc->arg_slot]);
3573                        }
3574                        break;
3575                    default:
3576                        break;
3577                    }
3578                }
3579            }
3580            break;
3581        case INDEX_op_insn_start:
3582            break;
3583        case INDEX_op_discard:
3584            /* mark the temporary as dead */
3585            ts = arg_temp(op->args[0]);
3586            ts->state = TS_DEAD;
3587            la_reset_pref(ts);
3588            break;
3589
3590        case INDEX_op_add2_i32:
3591            opc_new = INDEX_op_add_i32;
3592            goto do_addsub2;
3593        case INDEX_op_sub2_i32:
3594            opc_new = INDEX_op_sub_i32;
3595            goto do_addsub2;
3596        case INDEX_op_add2_i64:
3597            opc_new = INDEX_op_add_i64;
3598            goto do_addsub2;
3599        case INDEX_op_sub2_i64:
3600            opc_new = INDEX_op_sub_i64;
3601        do_addsub2:
3602            nb_iargs = 4;
3603            nb_oargs = 2;
3604            /* Test if the high part of the operation is dead, but not
3605               the low part.  The result can be optimized to a simple
3606               add or sub.  This happens often for x86_64 guest when the
3607               cpu mode is set to 32 bit.  */
3608            if (arg_temp(op->args[1])->state == TS_DEAD) {
3609                if (arg_temp(op->args[0])->state == TS_DEAD) {
3610                    goto do_remove;
3611                }
3612                /* Replace the opcode and adjust the args in place,
3613                   leaving 3 unused args at the end.  */
3614                op->opc = opc = opc_new;
3615                op->args[1] = op->args[2];
3616                op->args[2] = op->args[4];
3617                /* Fall through and mark the single-word operation live.  */
3618                nb_iargs = 2;
3619                nb_oargs = 1;
3620            }
3621            goto do_not_remove;
3622
3623        case INDEX_op_mulu2_i32:
3624            opc_new = INDEX_op_mul_i32;
3625            opc_new2 = INDEX_op_muluh_i32;
3626            have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3627            goto do_mul2;
3628        case INDEX_op_muls2_i32:
3629            opc_new = INDEX_op_mul_i32;
3630            opc_new2 = INDEX_op_mulsh_i32;
3631            have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3632            goto do_mul2;
3633        case INDEX_op_mulu2_i64:
3634            opc_new = INDEX_op_mul_i64;
3635            opc_new2 = INDEX_op_muluh_i64;
3636            have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3637            goto do_mul2;
3638        case INDEX_op_muls2_i64:
3639            opc_new = INDEX_op_mul_i64;
3640            opc_new2 = INDEX_op_mulsh_i64;
3641            have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3642            goto do_mul2;
3643        do_mul2:
3644            nb_iargs = 2;
3645            nb_oargs = 2;
3646            if (arg_temp(op->args[1])->state == TS_DEAD) {
3647                if (arg_temp(op->args[0])->state == TS_DEAD) {
3648                    /* Both parts of the operation are dead.  */
3649                    goto do_remove;
3650                }
3651                /* The high part of the operation is dead; generate the low. */
3652                op->opc = opc = opc_new;
3653                op->args[1] = op->args[2];
3654                op->args[2] = op->args[3];
3655            } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3656                /* The low part of the operation is dead; generate the high. */
3657                op->opc = opc = opc_new2;
3658                op->args[0] = op->args[1];
3659                op->args[1] = op->args[2];
3660                op->args[2] = op->args[3];
3661            } else {
3662                goto do_not_remove;
3663            }
3664            /* Mark the single-word operation live.  */
3665            nb_oargs = 1;
3666            goto do_not_remove;
3667
3668        default:
3669            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3670            nb_iargs = def->nb_iargs;
3671            nb_oargs = def->nb_oargs;
3672
3673            /* Test if the operation can be removed because all
3674               its outputs are dead. We assume that nb_oargs == 0
3675               implies side effects */
3676            if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3677                for (i = 0; i < nb_oargs; i++) {
3678                    if (arg_temp(op->args[i])->state != TS_DEAD) {
3679                        goto do_not_remove;
3680                    }
3681                }
3682                goto do_remove;
3683            }
3684            goto do_not_remove;
3685
3686        do_remove:
3687            tcg_op_remove(s, op);
3688            break;
3689
3690        do_not_remove:
3691            for (i = 0; i < nb_oargs; i++) {
3692                ts = arg_temp(op->args[i]);
3693
3694                /* Remember the preference of the uses that followed.  */
3695                if (i < ARRAY_SIZE(op->output_pref)) {
3696                    op->output_pref[i] = *la_temp_pref(ts);
3697                }
3698
3699                /* Output args are dead.  */
3700                if (ts->state & TS_DEAD) {
3701                    arg_life |= DEAD_ARG << i;
3702                }
3703                if (ts->state & TS_MEM) {
3704                    arg_life |= SYNC_ARG << i;
3705                }
3706                ts->state = TS_DEAD;
3707                la_reset_pref(ts);
3708            }
3709
3710            /* If end of basic block, update.  */
3711            if (def->flags & TCG_OPF_BB_EXIT) {
3712                la_func_end(s, nb_globals, nb_temps);
3713            } else if (def->flags & TCG_OPF_COND_BRANCH) {
3714                la_bb_sync(s, nb_globals, nb_temps);
3715            } else if (def->flags & TCG_OPF_BB_END) {
3716                la_bb_end(s, nb_globals, nb_temps);
3717            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3718                la_global_sync(s, nb_globals);
3719                if (def->flags & TCG_OPF_CALL_CLOBBER) {
3720                    la_cross_call(s, nb_temps);
3721                }
3722            }
3723
3724            /* Record arguments that die in this opcode.  */
3725            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3726                ts = arg_temp(op->args[i]);
3727                if (ts->state & TS_DEAD) {
3728                    arg_life |= DEAD_ARG << i;
3729                }
3730            }
3731
3732            /* Input arguments are live for preceding opcodes.  */
3733            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3734                ts = arg_temp(op->args[i]);
3735                if (ts->state & TS_DEAD) {
3736                    /* For operands that were dead, initially allow
3737                       all regs for the type.  */
3738                    *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3739                    ts->state &= ~TS_DEAD;
3740                }
3741            }
3742
3743            /* Incorporate constraints for this operand.  */
3744            switch (opc) {
3745            case INDEX_op_mov_i32:
3746            case INDEX_op_mov_i64:
3747                /* Note that these are TCG_OPF_NOT_PRESENT and do not
3748                   have proper constraints.  That said, special case
3749                   moves to propagate preferences backward.  */
3750                if (IS_DEAD_ARG(1)) {
3751                    *la_temp_pref(arg_temp(op->args[0]))
3752                        = *la_temp_pref(arg_temp(op->args[1]));
3753                }
3754                break;
3755
3756            default:
3757                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3758                    const TCGArgConstraint *ct = &def->args_ct[i];
3759                    TCGRegSet set, *pset;
3760
3761                    ts = arg_temp(op->args[i]);
3762                    pset = la_temp_pref(ts);
3763                    set = *pset;
3764
3765                    set &= ct->regs;
3766                    if (ct->ialias) {
3767                        set &= output_pref(op, ct->alias_index);
3768                    }
3769                    /* If the combination is not possible, restart.  */
3770                    if (set == 0) {
3771                        set = ct->regs;
3772                    }
3773                    *pset = set;
3774                }
3775                break;
3776            }
3777            break;
3778        }
3779        op->life = arg_life;
3780    }
3781}
3782
3783/* Liveness analysis: Convert indirect regs to direct temporaries.  */
3784static bool __attribute__((noinline))
3785liveness_pass_2(TCGContext *s)
3786{
3787    int nb_globals = s->nb_globals;
3788    int nb_temps, i;
3789    bool changes = false;
3790    TCGOp *op, *op_next;
3791
3792    /* Create a temporary for each indirect global.  */
3793    for (i = 0; i < nb_globals; ++i) {
3794        TCGTemp *its = &s->temps[i];
3795        if (its->indirect_reg) {
3796            TCGTemp *dts = tcg_temp_alloc(s);
3797            dts->type = its->type;
3798            dts->base_type = its->base_type;
3799            dts->temp_subindex = its->temp_subindex;
3800            dts->kind = TEMP_EBB;
3801            its->state_ptr = dts;
3802        } else {
3803            its->state_ptr = NULL;
3804        }
3805        /* All globals begin dead.  */
3806        its->state = TS_DEAD;
3807    }
3808    for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3809        TCGTemp *its = &s->temps[i];
3810        its->state_ptr = NULL;
3811        its->state = TS_DEAD;
3812    }
3813
3814    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3815        TCGOpcode opc = op->opc;
3816        const TCGOpDef *def = &tcg_op_defs[opc];
3817        TCGLifeData arg_life = op->life;
3818        int nb_iargs, nb_oargs, call_flags;
3819        TCGTemp *arg_ts, *dir_ts;
3820
3821        if (opc == INDEX_op_call) {
3822            nb_oargs = TCGOP_CALLO(op);
3823            nb_iargs = TCGOP_CALLI(op);
3824            call_flags = tcg_call_flags(op);
3825        } else {
3826            nb_iargs = def->nb_iargs;
3827            nb_oargs = def->nb_oargs;
3828
3829            /* Set flags similar to how calls require.  */
3830            if (def->flags & TCG_OPF_COND_BRANCH) {
3831                /* Like reading globals: sync_globals */
3832                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3833            } else if (def->flags & TCG_OPF_BB_END) {
3834                /* Like writing globals: save_globals */
3835                call_flags = 0;
3836            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3837                /* Like reading globals: sync_globals */
3838                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3839            } else {
3840                /* No effect on globals.  */
3841                call_flags = (TCG_CALL_NO_READ_GLOBALS |
3842                              TCG_CALL_NO_WRITE_GLOBALS);
3843            }
3844        }
3845
3846        /* Make sure that input arguments are available.  */
3847        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3848            arg_ts = arg_temp(op->args[i]);
3849            dir_ts = arg_ts->state_ptr;
3850            if (dir_ts && arg_ts->state == TS_DEAD) {
3851                TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3852                                  ? INDEX_op_ld_i32
3853                                  : INDEX_op_ld_i64);
3854                TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3855
3856                lop->args[0] = temp_arg(dir_ts);
3857                lop->args[1] = temp_arg(arg_ts->mem_base);
3858                lop->args[2] = arg_ts->mem_offset;
3859
3860                /* Loaded, but synced with memory.  */
3861                arg_ts->state = TS_MEM;
3862            }
3863        }
3864
3865        /* Perform input replacement, and mark inputs that became dead.
3866           No action is required except keeping temp_state up to date
3867           so that we reload when needed.  */
3868        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3869            arg_ts = arg_temp(op->args[i]);
3870            dir_ts = arg_ts->state_ptr;
3871            if (dir_ts) {
3872                op->args[i] = temp_arg(dir_ts);
3873                changes = true;
3874                if (IS_DEAD_ARG(i)) {
3875                    arg_ts->state = TS_DEAD;
3876                }
3877            }
3878        }
3879
3880        /* Liveness analysis should ensure that the following are
3881           all correct, for call sites and basic block end points.  */
3882        if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3883            /* Nothing to do */
3884        } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3885            for (i = 0; i < nb_globals; ++i) {
3886                /* Liveness should see that globals are synced back,
3887                   that is, either TS_DEAD or TS_MEM.  */
3888                arg_ts = &s->temps[i];
3889                tcg_debug_assert(arg_ts->state_ptr == 0
3890                                 || arg_ts->state != 0);
3891            }
3892        } else {
3893            for (i = 0; i < nb_globals; ++i) {
3894                /* Liveness should see that globals are saved back,
3895                   that is, TS_DEAD, waiting to be reloaded.  */
3896                arg_ts = &s->temps[i];
3897                tcg_debug_assert(arg_ts->state_ptr == 0
3898                                 || arg_ts->state == TS_DEAD);
3899            }
3900        }
3901
3902        /* Outputs become available.  */
3903        if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3904            arg_ts = arg_temp(op->args[0]);
3905            dir_ts = arg_ts->state_ptr;
3906            if (dir_ts) {
3907                op->args[0] = temp_arg(dir_ts);
3908                changes = true;
3909
3910                /* The output is now live and modified.  */
3911                arg_ts->state = 0;
3912
3913                if (NEED_SYNC_ARG(0)) {
3914                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3915                                      ? INDEX_op_st_i32
3916                                      : INDEX_op_st_i64);
3917                    TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3918                    TCGTemp *out_ts = dir_ts;
3919
3920                    if (IS_DEAD_ARG(0)) {
3921                        out_ts = arg_temp(op->args[1]);
3922                        arg_ts->state = TS_DEAD;
3923                        tcg_op_remove(s, op);
3924                    } else {
3925                        arg_ts->state = TS_MEM;
3926                    }
3927
3928                    sop->args[0] = temp_arg(out_ts);
3929                    sop->args[1] = temp_arg(arg_ts->mem_base);
3930                    sop->args[2] = arg_ts->mem_offset;
3931                } else {
3932                    tcg_debug_assert(!IS_DEAD_ARG(0));
3933                }
3934            }
3935        } else {
3936            for (i = 0; i < nb_oargs; i++) {
3937                arg_ts = arg_temp(op->args[i]);
3938                dir_ts = arg_ts->state_ptr;
3939                if (!dir_ts) {
3940                    continue;
3941                }
3942                op->args[i] = temp_arg(dir_ts);
3943                changes = true;
3944
3945                /* The output is now live and modified.  */
3946                arg_ts->state = 0;
3947
3948                /* Sync outputs upon their last write.  */
3949                if (NEED_SYNC_ARG(i)) {
3950                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3951                                      ? INDEX_op_st_i32
3952                                      : INDEX_op_st_i64);
3953                    TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3954
3955                    sop->args[0] = temp_arg(dir_ts);
3956                    sop->args[1] = temp_arg(arg_ts->mem_base);
3957                    sop->args[2] = arg_ts->mem_offset;
3958
3959                    arg_ts->state = TS_MEM;
3960                }
3961                /* Drop outputs that are dead.  */
3962                if (IS_DEAD_ARG(i)) {
3963                    arg_ts->state = TS_DEAD;
3964                }
3965            }
3966        }
3967    }
3968
3969    return changes;
3970}
3971
3972static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3973{
3974    intptr_t off;
3975    int size, align;
3976
3977    /* When allocating an object, look at the full type. */
3978    size = tcg_type_size(ts->base_type);
3979    switch (ts->base_type) {
3980    case TCG_TYPE_I32:
3981        align = 4;
3982        break;
3983    case TCG_TYPE_I64:
3984    case TCG_TYPE_V64:
3985        align = 8;
3986        break;
3987    case TCG_TYPE_I128:
3988    case TCG_TYPE_V128:
3989    case TCG_TYPE_V256:
3990        /*
3991         * Note that we do not require aligned storage for V256,
3992         * and that we provide alignment for I128 to match V128,
3993         * even if that's above what the host ABI requires.
3994         */
3995        align = 16;
3996        break;
3997    default:
3998        g_assert_not_reached();
3999    }
4000
4001    /*
4002     * Assume the stack is sufficiently aligned.
4003     * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4004     * and do not require 16 byte vector alignment.  This seems slightly
4005     * easier than fully parameterizing the above switch statement.
4006     */
4007    align = MIN(TCG_TARGET_STACK_ALIGN, align);
4008    off = ROUND_UP(s->current_frame_offset, align);
4009
4010    /* If we've exhausted the stack frame, restart with a smaller TB. */
4011    if (off + size > s->frame_end) {
4012        tcg_raise_tb_overflow(s);
4013    }
4014    s->current_frame_offset = off + size;
4015#if defined(__sparc__)
4016    off += TCG_TARGET_STACK_BIAS;
4017#endif
4018
4019    /* If the object was subdivided, assign memory to all the parts. */
4020    if (ts->base_type != ts->type) {
4021        int part_size = tcg_type_size(ts->type);
4022        int part_count = size / part_size;
4023
4024        /*
4025         * Each part is allocated sequentially in tcg_temp_new_internal.
4026         * Jump back to the first part by subtracting the current index.
4027         */
4028        ts -= ts->temp_subindex;
4029        for (int i = 0; i < part_count; ++i) {
4030            ts[i].mem_offset = off + i * part_size;
4031            ts[i].mem_base = s->frame_temp;
4032            ts[i].mem_allocated = 1;
4033        }
4034    } else {
4035        ts->mem_offset = off;
4036        ts->mem_base = s->frame_temp;
4037        ts->mem_allocated = 1;
4038    }
4039}
4040
4041/* Assign @reg to @ts, and update reg_to_temp[]. */
4042static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4043{
4044    if (ts->val_type == TEMP_VAL_REG) {
4045        TCGReg old = ts->reg;
4046        tcg_debug_assert(s->reg_to_temp[old] == ts);
4047        if (old == reg) {
4048            return;
4049        }
4050        s->reg_to_temp[old] = NULL;
4051    }
4052    tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4053    s->reg_to_temp[reg] = ts;
4054    ts->val_type = TEMP_VAL_REG;
4055    ts->reg = reg;
4056}
4057
4058/* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4059static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4060{
4061    tcg_debug_assert(type != TEMP_VAL_REG);
4062    if (ts->val_type == TEMP_VAL_REG) {
4063        TCGReg reg = ts->reg;
4064        tcg_debug_assert(s->reg_to_temp[reg] == ts);
4065        s->reg_to_temp[reg] = NULL;
4066    }
4067    ts->val_type = type;
4068}
4069
4070static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4071
4072/* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4073   mark it free; otherwise mark it dead.  */
4074static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4075{
4076    TCGTempVal new_type;
4077
4078    switch (ts->kind) {
4079    case TEMP_FIXED:
4080        return;
4081    case TEMP_GLOBAL:
4082    case TEMP_TB:
4083        new_type = TEMP_VAL_MEM;
4084        break;
4085    case TEMP_EBB:
4086        new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4087        break;
4088    case TEMP_CONST:
4089        new_type = TEMP_VAL_CONST;
4090        break;
4091    default:
4092        g_assert_not_reached();
4093    }
4094    set_temp_val_nonreg(s, ts, new_type);
4095}
4096
4097/* Mark a temporary as dead.  */
4098static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4099{
4100    temp_free_or_dead(s, ts, 1);
4101}
4102
4103/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4104   registers needs to be allocated to store a constant.  If 'free_or_dead'
4105   is non-zero, subsequently release the temporary; if it is positive, the
4106   temp is dead; if it is negative, the temp is free.  */
4107static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4108                      TCGRegSet preferred_regs, int free_or_dead)
4109{
4110    if (!temp_readonly(ts) && !ts->mem_coherent) {
4111        if (!ts->mem_allocated) {
4112            temp_allocate_frame(s, ts);
4113        }
4114        switch (ts->val_type) {
4115        case TEMP_VAL_CONST:
4116            /* If we're going to free the temp immediately, then we won't
4117               require it later in a register, so attempt to store the
4118               constant to memory directly.  */
4119            if (free_or_dead
4120                && tcg_out_sti(s, ts->type, ts->val,
4121                               ts->mem_base->reg, ts->mem_offset)) {
4122                break;
4123            }
4124            temp_load(s, ts, tcg_target_available_regs[ts->type],
4125                      allocated_regs, preferred_regs);
4126            /* fallthrough */
4127
4128        case TEMP_VAL_REG:
4129            tcg_out_st(s, ts->type, ts->reg,
4130                       ts->mem_base->reg, ts->mem_offset);
4131            break;
4132
4133        case TEMP_VAL_MEM:
4134            break;
4135
4136        case TEMP_VAL_DEAD:
4137        default:
4138            g_assert_not_reached();
4139        }
4140        ts->mem_coherent = 1;
4141    }
4142    if (free_or_dead) {
4143        temp_free_or_dead(s, ts, free_or_dead);
4144    }
4145}
4146
4147/* free register 'reg' by spilling the corresponding temporary if necessary */
4148static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4149{
4150    TCGTemp *ts = s->reg_to_temp[reg];
4151    if (ts != NULL) {
4152        temp_sync(s, ts, allocated_regs, 0, -1);
4153    }
4154}
4155
4156/**
4157 * tcg_reg_alloc:
4158 * @required_regs: Set of registers in which we must allocate.
4159 * @allocated_regs: Set of registers which must be avoided.
4160 * @preferred_regs: Set of registers we should prefer.
4161 * @rev: True if we search the registers in "indirect" order.
4162 *
4163 * The allocated register must be in @required_regs & ~@allocated_regs,
4164 * but if we can put it in @preferred_regs we may save a move later.
4165 */
4166static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4167                            TCGRegSet allocated_regs,
4168                            TCGRegSet preferred_regs, bool rev)
4169{
4170    int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4171    TCGRegSet reg_ct[2];
4172    const int *order;
4173
4174    reg_ct[1] = required_regs & ~allocated_regs;
4175    tcg_debug_assert(reg_ct[1] != 0);
4176    reg_ct[0] = reg_ct[1] & preferred_regs;
4177
4178    /* Skip the preferred_regs option if it cannot be satisfied,
4179       or if the preference made no difference.  */
4180    f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4181
4182    order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4183
4184    /* Try free registers, preferences first.  */
4185    for (j = f; j < 2; j++) {
4186        TCGRegSet set = reg_ct[j];
4187
4188        if (tcg_regset_single(set)) {
4189            /* One register in the set.  */
4190            TCGReg reg = tcg_regset_first(set);
4191            if (s->reg_to_temp[reg] == NULL) {
4192                return reg;
4193            }
4194        } else {
4195            for (i = 0; i < n; i++) {
4196                TCGReg reg = order[i];
4197                if (s->reg_to_temp[reg] == NULL &&
4198                    tcg_regset_test_reg(set, reg)) {
4199                    return reg;
4200                }
4201            }
4202        }
4203    }
4204
4205    /* We must spill something.  */
4206    for (j = f; j < 2; j++) {
4207        TCGRegSet set = reg_ct[j];
4208
4209        if (tcg_regset_single(set)) {
4210            /* One register in the set.  */
4211            TCGReg reg = tcg_regset_first(set);
4212            tcg_reg_free(s, reg, allocated_regs);
4213            return reg;
4214        } else {
4215            for (i = 0; i < n; i++) {
4216                TCGReg reg = order[i];
4217                if (tcg_regset_test_reg(set, reg)) {
4218                    tcg_reg_free(s, reg, allocated_regs);
4219                    return reg;
4220                }
4221            }
4222        }
4223    }
4224
4225    g_assert_not_reached();
4226}
4227
4228static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4229                                 TCGRegSet allocated_regs,
4230                                 TCGRegSet preferred_regs, bool rev)
4231{
4232    int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4233    TCGRegSet reg_ct[2];
4234    const int *order;
4235
4236    /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4237    reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4238    tcg_debug_assert(reg_ct[1] != 0);
4239    reg_ct[0] = reg_ct[1] & preferred_regs;
4240
4241    order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4242
4243    /*
4244     * Skip the preferred_regs option if it cannot be satisfied,
4245     * or if the preference made no difference.
4246     */
4247    k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4248
4249    /*
4250     * Minimize the number of flushes by looking for 2 free registers first,
4251     * then a single flush, then two flushes.
4252     */
4253    for (fmin = 2; fmin >= 0; fmin--) {
4254        for (j = k; j < 2; j++) {
4255            TCGRegSet set = reg_ct[j];
4256
4257            for (i = 0; i < n; i++) {
4258                TCGReg reg = order[i];
4259
4260                if (tcg_regset_test_reg(set, reg)) {
4261                    int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4262                    if (f >= fmin) {
4263                        tcg_reg_free(s, reg, allocated_regs);
4264                        tcg_reg_free(s, reg + 1, allocated_regs);
4265                        return reg;
4266                    }
4267                }
4268            }
4269        }
4270    }
4271    g_assert_not_reached();
4272}
4273
4274/* Make sure the temporary is in a register.  If needed, allocate the register
4275   from DESIRED while avoiding ALLOCATED.  */
4276static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4277                      TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4278{
4279    TCGReg reg;
4280
4281    switch (ts->val_type) {
4282    case TEMP_VAL_REG:
4283        return;
4284    case TEMP_VAL_CONST:
4285        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4286                            preferred_regs, ts->indirect_base);
4287        if (ts->type <= TCG_TYPE_I64) {
4288            tcg_out_movi(s, ts->type, reg, ts->val);
4289        } else {
4290            uint64_t val = ts->val;
4291            MemOp vece = MO_64;
4292
4293            /*
4294             * Find the minimal vector element that matches the constant.
4295             * The targets will, in general, have to do this search anyway,
4296             * do this generically.
4297             */
4298            if (val == dup_const(MO_8, val)) {
4299                vece = MO_8;
4300            } else if (val == dup_const(MO_16, val)) {
4301                vece = MO_16;
4302            } else if (val == dup_const(MO_32, val)) {
4303                vece = MO_32;
4304            }
4305
4306            tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4307        }
4308        ts->mem_coherent = 0;
4309        break;
4310    case TEMP_VAL_MEM:
4311        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4312                            preferred_regs, ts->indirect_base);
4313        tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4314        ts->mem_coherent = 1;
4315        break;
4316    case TEMP_VAL_DEAD:
4317    default:
4318        g_assert_not_reached();
4319    }
4320    set_temp_val_reg(s, ts, reg);
4321}
4322
4323/* Save a temporary to memory. 'allocated_regs' is used in case a
4324   temporary registers needs to be allocated to store a constant.  */
4325static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4326{
4327    /* The liveness analysis already ensures that globals are back
4328       in memory. Keep an tcg_debug_assert for safety. */
4329    tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4330}
4331
4332/* save globals to their canonical location and assume they can be
4333   modified be the following code. 'allocated_regs' is used in case a
4334   temporary registers needs to be allocated to store a constant. */
4335static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4336{
4337    int i, n;
4338
4339    for (i = 0, n = s->nb_globals; i < n; i++) {
4340        temp_save(s, &s->temps[i], allocated_regs);
4341    }
4342}
4343
4344/* sync globals to their canonical location and assume they can be
4345   read by the following code. 'allocated_regs' is used in case a
4346   temporary registers needs to be allocated to store a constant. */
4347static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4348{
4349    int i, n;
4350
4351    for (i = 0, n = s->nb_globals; i < n; i++) {
4352        TCGTemp *ts = &s->temps[i];
4353        tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4354                         || ts->kind == TEMP_FIXED
4355                         || ts->mem_coherent);
4356    }
4357}
4358
4359/* at the end of a basic block, we assume all temporaries are dead and
4360   all globals are stored at their canonical location. */
4361static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4362{
4363    int i;
4364
4365    for (i = s->nb_globals; i < s->nb_temps; i++) {
4366        TCGTemp *ts = &s->temps[i];
4367
4368        switch (ts->kind) {
4369        case TEMP_TB:
4370            temp_save(s, ts, allocated_regs);
4371            break;
4372        case TEMP_EBB:
4373            /* The liveness analysis already ensures that temps are dead.
4374               Keep an tcg_debug_assert for safety. */
4375            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4376            break;
4377        case TEMP_CONST:
4378            /* Similarly, we should have freed any allocated register. */
4379            tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4380            break;
4381        default:
4382            g_assert_not_reached();
4383        }
4384    }
4385
4386    save_globals(s, allocated_regs);
4387}
4388
4389/*
4390 * At a conditional branch, we assume all temporaries are dead unless
4391 * explicitly live-across-conditional-branch; all globals and local
4392 * temps are synced to their location.
4393 */
4394static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4395{
4396    sync_globals(s, allocated_regs);
4397
4398    for (int i = s->nb_globals; i < s->nb_temps; i++) {
4399        TCGTemp *ts = &s->temps[i];
4400        /*
4401         * The liveness analysis already ensures that temps are dead.
4402         * Keep tcg_debug_asserts for safety.
4403         */
4404        switch (ts->kind) {
4405        case TEMP_TB:
4406            tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4407            break;
4408        case TEMP_EBB:
4409        case TEMP_CONST:
4410            break;
4411        default:
4412            g_assert_not_reached();
4413        }
4414    }
4415}
4416
4417/*
4418 * Specialized code generation for INDEX_op_mov_* with a constant.
4419 */
4420static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4421                                  tcg_target_ulong val, TCGLifeData arg_life,
4422                                  TCGRegSet preferred_regs)
4423{
4424    /* ENV should not be modified.  */
4425    tcg_debug_assert(!temp_readonly(ots));
4426
4427    /* The movi is not explicitly generated here.  */
4428    set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4429    ots->val = val;
4430    ots->mem_coherent = 0;
4431    if (NEED_SYNC_ARG(0)) {
4432        temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4433    } else if (IS_DEAD_ARG(0)) {
4434        temp_dead(s, ots);
4435    }
4436}
4437
4438/*
4439 * Specialized code generation for INDEX_op_mov_*.
4440 */
4441static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4442{
4443    const TCGLifeData arg_life = op->life;
4444    TCGRegSet allocated_regs, preferred_regs;
4445    TCGTemp *ts, *ots;
4446    TCGType otype, itype;
4447    TCGReg oreg, ireg;
4448
4449    allocated_regs = s->reserved_regs;
4450    preferred_regs = output_pref(op, 0);
4451    ots = arg_temp(op->args[0]);
4452    ts = arg_temp(op->args[1]);
4453
4454    /* ENV should not be modified.  */
4455    tcg_debug_assert(!temp_readonly(ots));
4456
4457    /* Note that otype != itype for no-op truncation.  */
4458    otype = ots->type;
4459    itype = ts->type;
4460
4461    if (ts->val_type == TEMP_VAL_CONST) {
4462        /* propagate constant or generate sti */
4463        tcg_target_ulong val = ts->val;
4464        if (IS_DEAD_ARG(1)) {
4465            temp_dead(s, ts);
4466        }
4467        tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4468        return;
4469    }
4470
4471    /* If the source value is in memory we're going to be forced
4472       to have it in a register in order to perform the copy.  Copy
4473       the SOURCE value into its own register first, that way we
4474       don't have to reload SOURCE the next time it is used. */
4475    if (ts->val_type == TEMP_VAL_MEM) {
4476        temp_load(s, ts, tcg_target_available_regs[itype],
4477                  allocated_regs, preferred_regs);
4478    }
4479    tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4480    ireg = ts->reg;
4481
4482    if (IS_DEAD_ARG(0)) {
4483        /* mov to a non-saved dead register makes no sense (even with
4484           liveness analysis disabled). */
4485        tcg_debug_assert(NEED_SYNC_ARG(0));
4486        if (!ots->mem_allocated) {
4487            temp_allocate_frame(s, ots);
4488        }
4489        tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4490        if (IS_DEAD_ARG(1)) {
4491            temp_dead(s, ts);
4492        }
4493        temp_dead(s, ots);
4494        return;
4495    }
4496
4497    if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4498        /*
4499         * The mov can be suppressed.  Kill input first, so that it
4500         * is unlinked from reg_to_temp, then set the output to the
4501         * reg that we saved from the input.
4502         */
4503        temp_dead(s, ts);
4504        oreg = ireg;
4505    } else {
4506        if (ots->val_type == TEMP_VAL_REG) {
4507            oreg = ots->reg;
4508        } else {
4509            /* Make sure to not spill the input register during allocation. */
4510            oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4511                                 allocated_regs | ((TCGRegSet)1 << ireg),
4512                                 preferred_regs, ots->indirect_base);
4513        }
4514        if (!tcg_out_mov(s, otype, oreg, ireg)) {
4515            /*
4516             * Cross register class move not supported.
4517             * Store the source register into the destination slot
4518             * and leave the destination temp as TEMP_VAL_MEM.
4519             */
4520            assert(!temp_readonly(ots));
4521            if (!ts->mem_allocated) {
4522                temp_allocate_frame(s, ots);
4523            }
4524            tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4525            set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4526            ots->mem_coherent = 1;
4527            return;
4528        }
4529    }
4530    set_temp_val_reg(s, ots, oreg);
4531    ots->mem_coherent = 0;
4532
4533    if (NEED_SYNC_ARG(0)) {
4534        temp_sync(s, ots, allocated_regs, 0, 0);
4535    }
4536}
4537
4538/*
4539 * Specialized code generation for INDEX_op_dup_vec.
4540 */
4541static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4542{
4543    const TCGLifeData arg_life = op->life;
4544    TCGRegSet dup_out_regs, dup_in_regs;
4545    TCGTemp *its, *ots;
4546    TCGType itype, vtype;
4547    unsigned vece;
4548    int lowpart_ofs;
4549    bool ok;
4550
4551    ots = arg_temp(op->args[0]);
4552    its = arg_temp(op->args[1]);
4553
4554    /* ENV should not be modified.  */
4555    tcg_debug_assert(!temp_readonly(ots));
4556
4557    itype = its->type;
4558    vece = TCGOP_VECE(op);
4559    vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4560
4561    if (its->val_type == TEMP_VAL_CONST) {
4562        /* Propagate constant via movi -> dupi.  */
4563        tcg_target_ulong val = its->val;
4564        if (IS_DEAD_ARG(1)) {
4565            temp_dead(s, its);
4566        }
4567        tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4568        return;
4569    }
4570
4571    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4572    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4573
4574    /* Allocate the output register now.  */
4575    if (ots->val_type != TEMP_VAL_REG) {
4576        TCGRegSet allocated_regs = s->reserved_regs;
4577        TCGReg oreg;
4578
4579        if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4580            /* Make sure to not spill the input register. */
4581            tcg_regset_set_reg(allocated_regs, its->reg);
4582        }
4583        oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4584                             output_pref(op, 0), ots->indirect_base);
4585        set_temp_val_reg(s, ots, oreg);
4586    }
4587
4588    switch (its->val_type) {
4589    case TEMP_VAL_REG:
4590        /*
4591         * The dup constriaints must be broad, covering all possible VECE.
4592         * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4593         * to fail, indicating that extra moves are required for that case.
4594         */
4595        if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4596            if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4597                goto done;
4598            }
4599            /* Try again from memory or a vector input register.  */
4600        }
4601        if (!its->mem_coherent) {
4602            /*
4603             * The input register is not synced, and so an extra store
4604             * would be required to use memory.  Attempt an integer-vector
4605             * register move first.  We do not have a TCGRegSet for this.
4606             */
4607            if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4608                break;
4609            }
4610            /* Sync the temp back to its slot and load from there.  */
4611            temp_sync(s, its, s->reserved_regs, 0, 0);
4612        }
4613        /* fall through */
4614
4615    case TEMP_VAL_MEM:
4616        lowpart_ofs = 0;
4617        if (HOST_BIG_ENDIAN) {
4618            lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4619        }
4620        if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4621                             its->mem_offset + lowpart_ofs)) {
4622            goto done;
4623        }
4624        /* Load the input into the destination vector register. */
4625        tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4626        break;
4627
4628    default:
4629        g_assert_not_reached();
4630    }
4631
4632    /* We now have a vector input register, so dup must succeed. */
4633    ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4634    tcg_debug_assert(ok);
4635
4636 done:
4637    ots->mem_coherent = 0;
4638    if (IS_DEAD_ARG(1)) {
4639        temp_dead(s, its);
4640    }
4641    if (NEED_SYNC_ARG(0)) {
4642        temp_sync(s, ots, s->reserved_regs, 0, 0);
4643    }
4644    if (IS_DEAD_ARG(0)) {
4645        temp_dead(s, ots);
4646    }
4647}
4648
4649static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4650{
4651    const TCGLifeData arg_life = op->life;
4652    const TCGOpDef * const def = &tcg_op_defs[op->opc];
4653    TCGRegSet i_allocated_regs;
4654    TCGRegSet o_allocated_regs;
4655    int i, k, nb_iargs, nb_oargs;
4656    TCGReg reg;
4657    TCGArg arg;
4658    const TCGArgConstraint *arg_ct;
4659    TCGTemp *ts;
4660    TCGArg new_args[TCG_MAX_OP_ARGS];
4661    int const_args[TCG_MAX_OP_ARGS];
4662
4663    nb_oargs = def->nb_oargs;
4664    nb_iargs = def->nb_iargs;
4665
4666    /* copy constants */
4667    memcpy(new_args + nb_oargs + nb_iargs,
4668           op->args + nb_oargs + nb_iargs,
4669           sizeof(TCGArg) * def->nb_cargs);
4670
4671    i_allocated_regs = s->reserved_regs;
4672    o_allocated_regs = s->reserved_regs;
4673
4674    /* satisfy input constraints */
4675    for (k = 0; k < nb_iargs; k++) {
4676        TCGRegSet i_preferred_regs, i_required_regs;
4677        bool allocate_new_reg, copyto_new_reg;
4678        TCGTemp *ts2;
4679        int i1, i2;
4680
4681        i = def->args_ct[nb_oargs + k].sort_index;
4682        arg = op->args[i];
4683        arg_ct = &def->args_ct[i];
4684        ts = arg_temp(arg);
4685
4686        if (ts->val_type == TEMP_VAL_CONST
4687            && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4688            /* constant is OK for instruction */
4689            const_args[i] = 1;
4690            new_args[i] = ts->val;
4691            continue;
4692        }
4693
4694        reg = ts->reg;
4695        i_preferred_regs = 0;
4696        i_required_regs = arg_ct->regs;
4697        allocate_new_reg = false;
4698        copyto_new_reg = false;
4699
4700        switch (arg_ct->pair) {
4701        case 0: /* not paired */
4702            if (arg_ct->ialias) {
4703                i_preferred_regs = output_pref(op, arg_ct->alias_index);
4704
4705                /*
4706                 * If the input is readonly, then it cannot also be an
4707                 * output and aliased to itself.  If the input is not
4708                 * dead after the instruction, we must allocate a new
4709                 * register and move it.
4710                 */
4711                if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4712                    || def->args_ct[arg_ct->alias_index].newreg) {
4713                    allocate_new_reg = true;
4714                } else if (ts->val_type == TEMP_VAL_REG) {
4715                    /*
4716                     * Check if the current register has already been
4717                     * allocated for another input.
4718                     */
4719                    allocate_new_reg =
4720                        tcg_regset_test_reg(i_allocated_regs, reg);
4721                }
4722            }
4723            if (!allocate_new_reg) {
4724                temp_load(s, ts, i_required_regs, i_allocated_regs,
4725                          i_preferred_regs);
4726                reg = ts->reg;
4727                allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4728            }
4729            if (allocate_new_reg) {
4730                /*
4731                 * Allocate a new register matching the constraint
4732                 * and move the temporary register into it.
4733                 */
4734                temp_load(s, ts, tcg_target_available_regs[ts->type],
4735                          i_allocated_regs, 0);
4736                reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4737                                    i_preferred_regs, ts->indirect_base);
4738                copyto_new_reg = true;
4739            }
4740            break;
4741
4742        case 1:
4743            /* First of an input pair; if i1 == i2, the second is an output. */
4744            i1 = i;
4745            i2 = arg_ct->pair_index;
4746            ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4747
4748            /*
4749             * It is easier to default to allocating a new pair
4750             * and to identify a few cases where it's not required.
4751             */
4752            if (arg_ct->ialias) {
4753                i_preferred_regs = output_pref(op, arg_ct->alias_index);
4754                if (IS_DEAD_ARG(i1) &&
4755                    IS_DEAD_ARG(i2) &&
4756                    !temp_readonly(ts) &&
4757                    ts->val_type == TEMP_VAL_REG &&
4758                    ts->reg < TCG_TARGET_NB_REGS - 1 &&
4759                    tcg_regset_test_reg(i_required_regs, reg) &&
4760                    !tcg_regset_test_reg(i_allocated_regs, reg) &&
4761                    !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4762                    (ts2
4763                     ? ts2->val_type == TEMP_VAL_REG &&
4764                       ts2->reg == reg + 1 &&
4765                       !temp_readonly(ts2)
4766                     : s->reg_to_temp[reg + 1] == NULL)) {
4767                    break;
4768                }
4769            } else {
4770                /* Without aliasing, the pair must also be an input. */
4771                tcg_debug_assert(ts2);
4772                if (ts->val_type == TEMP_VAL_REG &&
4773                    ts2->val_type == TEMP_VAL_REG &&
4774                    ts2->reg == reg + 1 &&
4775                    tcg_regset_test_reg(i_required_regs, reg)) {
4776                    break;
4777                }
4778            }
4779            reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4780                                     0, ts->indirect_base);
4781            goto do_pair;
4782
4783        case 2: /* pair second */
4784            reg = new_args[arg_ct->pair_index] + 1;
4785            goto do_pair;
4786
4787        case 3: /* ialias with second output, no first input */
4788            tcg_debug_assert(arg_ct->ialias);
4789            i_preferred_regs = output_pref(op, arg_ct->alias_index);
4790
4791            if (IS_DEAD_ARG(i) &&
4792                !temp_readonly(ts) &&
4793                ts->val_type == TEMP_VAL_REG &&
4794                reg > 0 &&
4795                s->reg_to_temp[reg - 1] == NULL &&
4796                tcg_regset_test_reg(i_required_regs, reg) &&
4797                !tcg_regset_test_reg(i_allocated_regs, reg) &&
4798                !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4799                tcg_regset_set_reg(i_allocated_regs, reg - 1);
4800                break;
4801            }
4802            reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4803                                     i_allocated_regs, 0,
4804                                     ts->indirect_base);
4805            tcg_regset_set_reg(i_allocated_regs, reg);
4806            reg += 1;
4807            goto do_pair;
4808
4809        do_pair:
4810            /*
4811             * If an aliased input is not dead after the instruction,
4812             * we must allocate a new register and move it.
4813             */
4814            if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4815                TCGRegSet t_allocated_regs = i_allocated_regs;
4816
4817                /*
4818                 * Because of the alias, and the continued life, make sure
4819                 * that the temp is somewhere *other* than the reg pair,
4820                 * and we get a copy in reg.
4821                 */
4822                tcg_regset_set_reg(t_allocated_regs, reg);
4823                tcg_regset_set_reg(t_allocated_regs, reg + 1);
4824                if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4825                    /* If ts was already in reg, copy it somewhere else. */
4826                    TCGReg nr;
4827                    bool ok;
4828
4829                    tcg_debug_assert(ts->kind != TEMP_FIXED);
4830                    nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4831                                       t_allocated_regs, 0, ts->indirect_base);
4832                    ok = tcg_out_mov(s, ts->type, nr, reg);
4833                    tcg_debug_assert(ok);
4834
4835                    set_temp_val_reg(s, ts, nr);
4836                } else {
4837                    temp_load(s, ts, tcg_target_available_regs[ts->type],
4838                              t_allocated_regs, 0);
4839                    copyto_new_reg = true;
4840                }
4841            } else {
4842                /* Preferably allocate to reg, otherwise copy. */
4843                i_required_regs = (TCGRegSet)1 << reg;
4844                temp_load(s, ts, i_required_regs, i_allocated_regs,
4845                          i_preferred_regs);
4846                copyto_new_reg = ts->reg != reg;
4847            }
4848            break;
4849
4850        default:
4851            g_assert_not_reached();
4852        }
4853
4854        if (copyto_new_reg) {
4855            if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4856                /*
4857                 * Cross register class move not supported.  Sync the
4858                 * temp back to its slot and load from there.
4859                 */
4860                temp_sync(s, ts, i_allocated_regs, 0, 0);
4861                tcg_out_ld(s, ts->type, reg,
4862                           ts->mem_base->reg, ts->mem_offset);
4863            }
4864        }
4865        new_args[i] = reg;
4866        const_args[i] = 0;
4867        tcg_regset_set_reg(i_allocated_regs, reg);
4868    }
4869
4870    /* mark dead temporaries and free the associated registers */
4871    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4872        if (IS_DEAD_ARG(i)) {
4873            temp_dead(s, arg_temp(op->args[i]));
4874        }
4875    }
4876
4877    if (def->flags & TCG_OPF_COND_BRANCH) {
4878        tcg_reg_alloc_cbranch(s, i_allocated_regs);
4879    } else if (def->flags & TCG_OPF_BB_END) {
4880        tcg_reg_alloc_bb_end(s, i_allocated_regs);
4881    } else {
4882        if (def->flags & TCG_OPF_CALL_CLOBBER) {
4883            /* XXX: permit generic clobber register list ? */
4884            for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4885                if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4886                    tcg_reg_free(s, i, i_allocated_regs);
4887                }
4888            }
4889        }
4890        if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4891            /* sync globals if the op has side effects and might trigger
4892               an exception. */
4893            sync_globals(s, i_allocated_regs);
4894        }
4895
4896        /* satisfy the output constraints */
4897        for(k = 0; k < nb_oargs; k++) {
4898            i = def->args_ct[k].sort_index;
4899            arg = op->args[i];
4900            arg_ct = &def->args_ct[i];
4901            ts = arg_temp(arg);
4902
4903            /* ENV should not be modified.  */
4904            tcg_debug_assert(!temp_readonly(ts));
4905
4906            switch (arg_ct->pair) {
4907            case 0: /* not paired */
4908                if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4909                    reg = new_args[arg_ct->alias_index];
4910                } else if (arg_ct->newreg) {
4911                    reg = tcg_reg_alloc(s, arg_ct->regs,
4912                                        i_allocated_regs | o_allocated_regs,
4913                                        output_pref(op, k), ts->indirect_base);
4914                } else {
4915                    reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4916                                        output_pref(op, k), ts->indirect_base);
4917                }
4918                break;
4919
4920            case 1: /* first of pair */
4921                tcg_debug_assert(!arg_ct->newreg);
4922                if (arg_ct->oalias) {
4923                    reg = new_args[arg_ct->alias_index];
4924                    break;
4925                }
4926                reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4927                                         output_pref(op, k), ts->indirect_base);
4928                break;
4929
4930            case 2: /* second of pair */
4931                tcg_debug_assert(!arg_ct->newreg);
4932                if (arg_ct->oalias) {
4933                    reg = new_args[arg_ct->alias_index];
4934                } else {
4935                    reg = new_args[arg_ct->pair_index] + 1;
4936                }
4937                break;
4938
4939            case 3: /* first of pair, aliasing with a second input */
4940                tcg_debug_assert(!arg_ct->newreg);
4941                reg = new_args[arg_ct->pair_index] - 1;
4942                break;
4943
4944            default:
4945                g_assert_not_reached();
4946            }
4947            tcg_regset_set_reg(o_allocated_regs, reg);
4948            set_temp_val_reg(s, ts, reg);
4949            ts->mem_coherent = 0;
4950            new_args[i] = reg;
4951        }
4952    }
4953
4954    /* emit instruction */
4955    switch (op->opc) {
4956    case INDEX_op_ext8s_i32:
4957        tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4958        break;
4959    case INDEX_op_ext8s_i64:
4960        tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4961        break;
4962    case INDEX_op_ext8u_i32:
4963    case INDEX_op_ext8u_i64:
4964        tcg_out_ext8u(s, new_args[0], new_args[1]);
4965        break;
4966    case INDEX_op_ext16s_i32:
4967        tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4968        break;
4969    case INDEX_op_ext16s_i64:
4970        tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4971        break;
4972    case INDEX_op_ext16u_i32:
4973    case INDEX_op_ext16u_i64:
4974        tcg_out_ext16u(s, new_args[0], new_args[1]);
4975        break;
4976    case INDEX_op_ext32s_i64:
4977        tcg_out_ext32s(s, new_args[0], new_args[1]);
4978        break;
4979    case INDEX_op_ext32u_i64:
4980        tcg_out_ext32u(s, new_args[0], new_args[1]);
4981        break;
4982    case INDEX_op_ext_i32_i64:
4983        tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4984        break;
4985    case INDEX_op_extu_i32_i64:
4986        tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4987        break;
4988    case INDEX_op_extrl_i64_i32:
4989        tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
4990        break;
4991    default:
4992        if (def->flags & TCG_OPF_VECTOR) {
4993            tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4994                           new_args, const_args);
4995        } else {
4996            tcg_out_op(s, op->opc, new_args, const_args);
4997        }
4998        break;
4999    }
5000
5001    /* move the outputs in the correct register if needed */
5002    for(i = 0; i < nb_oargs; i++) {
5003        ts = arg_temp(op->args[i]);
5004
5005        /* ENV should not be modified.  */
5006        tcg_debug_assert(!temp_readonly(ts));
5007
5008        if (NEED_SYNC_ARG(i)) {
5009            temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5010        } else if (IS_DEAD_ARG(i)) {
5011            temp_dead(s, ts);
5012        }
5013    }
5014}
5015
5016static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5017{
5018    const TCGLifeData arg_life = op->life;
5019    TCGTemp *ots, *itsl, *itsh;
5020    TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5021
5022    /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5023    tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5024    tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5025
5026    ots = arg_temp(op->args[0]);
5027    itsl = arg_temp(op->args[1]);
5028    itsh = arg_temp(op->args[2]);
5029
5030    /* ENV should not be modified.  */
5031    tcg_debug_assert(!temp_readonly(ots));
5032
5033    /* Allocate the output register now.  */
5034    if (ots->val_type != TEMP_VAL_REG) {
5035        TCGRegSet allocated_regs = s->reserved_regs;
5036        TCGRegSet dup_out_regs =
5037            tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5038        TCGReg oreg;
5039
5040        /* Make sure to not spill the input registers. */
5041        if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5042            tcg_regset_set_reg(allocated_regs, itsl->reg);
5043        }
5044        if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5045            tcg_regset_set_reg(allocated_regs, itsh->reg);
5046        }
5047
5048        oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5049                             output_pref(op, 0), ots->indirect_base);
5050        set_temp_val_reg(s, ots, oreg);
5051    }
5052
5053    /* Promote dup2 of immediates to dupi_vec. */
5054    if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5055        uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5056        MemOp vece = MO_64;
5057
5058        if (val == dup_const(MO_8, val)) {
5059            vece = MO_8;
5060        } else if (val == dup_const(MO_16, val)) {
5061            vece = MO_16;
5062        } else if (val == dup_const(MO_32, val)) {
5063            vece = MO_32;
5064        }
5065
5066        tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5067        goto done;
5068    }
5069
5070    /* If the two inputs form one 64-bit value, try dupm_vec. */
5071    if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5072        itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5073        itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5074        TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5075
5076        temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5077        temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5078
5079        if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5080                             its->mem_base->reg, its->mem_offset)) {
5081            goto done;
5082        }
5083    }
5084
5085    /* Fall back to generic expansion. */
5086    return false;
5087
5088 done:
5089    ots->mem_coherent = 0;
5090    if (IS_DEAD_ARG(1)) {
5091        temp_dead(s, itsl);
5092    }
5093    if (IS_DEAD_ARG(2)) {
5094        temp_dead(s, itsh);
5095    }
5096    if (NEED_SYNC_ARG(0)) {
5097        temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5098    } else if (IS_DEAD_ARG(0)) {
5099        temp_dead(s, ots);
5100    }
5101    return true;
5102}
5103
5104static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5105                         TCGRegSet allocated_regs)
5106{
5107    if (ts->val_type == TEMP_VAL_REG) {
5108        if (ts->reg != reg) {
5109            tcg_reg_free(s, reg, allocated_regs);
5110            if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5111                /*
5112                 * Cross register class move not supported.  Sync the
5113                 * temp back to its slot and load from there.
5114                 */
5115                temp_sync(s, ts, allocated_regs, 0, 0);
5116                tcg_out_ld(s, ts->type, reg,
5117                           ts->mem_base->reg, ts->mem_offset);
5118            }
5119        }
5120    } else {
5121        TCGRegSet arg_set = 0;
5122
5123        tcg_reg_free(s, reg, allocated_regs);
5124        tcg_regset_set_reg(arg_set, reg);
5125        temp_load(s, ts, arg_set, allocated_regs, 0);
5126    }
5127}
5128
5129static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5130                         TCGRegSet allocated_regs)
5131{
5132    /*
5133     * When the destination is on the stack, load up the temp and store.
5134     * If there are many call-saved registers, the temp might live to
5135     * see another use; otherwise it'll be discarded.
5136     */
5137    temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5138    tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5139               arg_slot_stk_ofs(arg_slot));
5140}
5141
5142static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5143                            TCGTemp *ts, TCGRegSet *allocated_regs)
5144{
5145    if (arg_slot_reg_p(l->arg_slot)) {
5146        TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5147        load_arg_reg(s, reg, ts, *allocated_regs);
5148        tcg_regset_set_reg(*allocated_regs, reg);
5149    } else {
5150        load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5151    }
5152}
5153
5154static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5155                         intptr_t ref_off, TCGRegSet *allocated_regs)
5156{
5157    TCGReg reg;
5158
5159    if (arg_slot_reg_p(arg_slot)) {
5160        reg = tcg_target_call_iarg_regs[arg_slot];
5161        tcg_reg_free(s, reg, *allocated_regs);
5162        tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5163        tcg_regset_set_reg(*allocated_regs, reg);
5164    } else {
5165        reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5166                            *allocated_regs, 0, false);
5167        tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5168        tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5169                   arg_slot_stk_ofs(arg_slot));
5170    }
5171}
5172
5173static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5174{
5175    const int nb_oargs = TCGOP_CALLO(op);
5176    const int nb_iargs = TCGOP_CALLI(op);
5177    const TCGLifeData arg_life = op->life;
5178    const TCGHelperInfo *info = tcg_call_info(op);
5179    TCGRegSet allocated_regs = s->reserved_regs;
5180    int i;
5181
5182    /*
5183     * Move inputs into place in reverse order,
5184     * so that we place stacked arguments first.
5185     */
5186    for (i = nb_iargs - 1; i >= 0; --i) {
5187        const TCGCallArgumentLoc *loc = &info->in[i];
5188        TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5189
5190        switch (loc->kind) {
5191        case TCG_CALL_ARG_NORMAL:
5192        case TCG_CALL_ARG_EXTEND_U:
5193        case TCG_CALL_ARG_EXTEND_S:
5194            load_arg_normal(s, loc, ts, &allocated_regs);
5195            break;
5196        case TCG_CALL_ARG_BY_REF:
5197            load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5198            load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5199                         arg_slot_stk_ofs(loc->ref_slot),
5200                         &allocated_regs);
5201            break;
5202        case TCG_CALL_ARG_BY_REF_N:
5203            load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5204            break;
5205        default:
5206            g_assert_not_reached();
5207        }
5208    }
5209
5210    /* Mark dead temporaries and free the associated registers.  */
5211    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5212        if (IS_DEAD_ARG(i)) {
5213            temp_dead(s, arg_temp(op->args[i]));
5214        }
5215    }
5216
5217    /* Clobber call registers.  */
5218    for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5219        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5220            tcg_reg_free(s, i, allocated_regs);
5221        }
5222    }
5223
5224    /*
5225     * Save globals if they might be written by the helper,
5226     * sync them if they might be read.
5227     */
5228    if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5229        /* Nothing to do */
5230    } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5231        sync_globals(s, allocated_regs);
5232    } else {
5233        save_globals(s, allocated_regs);
5234    }
5235
5236    /*
5237     * If the ABI passes a pointer to the returned struct as the first
5238     * argument, load that now.  Pass a pointer to the output home slot.
5239     */
5240    if (info->out_kind == TCG_CALL_RET_BY_REF) {
5241        TCGTemp *ts = arg_temp(op->args[0]);
5242
5243        if (!ts->mem_allocated) {
5244            temp_allocate_frame(s, ts);
5245        }
5246        load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5247    }
5248
5249    tcg_out_call(s, tcg_call_func(op), info);
5250
5251    /* Assign output registers and emit moves if needed.  */
5252    switch (info->out_kind) {
5253    case TCG_CALL_RET_NORMAL:
5254        for (i = 0; i < nb_oargs; i++) {
5255            TCGTemp *ts = arg_temp(op->args[i]);
5256            TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5257
5258            /* ENV should not be modified.  */
5259            tcg_debug_assert(!temp_readonly(ts));
5260
5261            set_temp_val_reg(s, ts, reg);
5262            ts->mem_coherent = 0;
5263        }
5264        break;
5265
5266    case TCG_CALL_RET_BY_VEC:
5267        {
5268            TCGTemp *ts = arg_temp(op->args[0]);
5269
5270            tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5271            tcg_debug_assert(ts->temp_subindex == 0);
5272            if (!ts->mem_allocated) {
5273                temp_allocate_frame(s, ts);
5274            }
5275            tcg_out_st(s, TCG_TYPE_V128,
5276                       tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5277                       ts->mem_base->reg, ts->mem_offset);
5278        }
5279        /* fall through to mark all parts in memory */
5280
5281    case TCG_CALL_RET_BY_REF:
5282        /* The callee has performed a write through the reference. */
5283        for (i = 0; i < nb_oargs; i++) {
5284            TCGTemp *ts = arg_temp(op->args[i]);
5285            ts->val_type = TEMP_VAL_MEM;
5286        }
5287        break;
5288
5289    default:
5290        g_assert_not_reached();
5291    }
5292
5293    /* Flush or discard output registers as needed. */
5294    for (i = 0; i < nb_oargs; i++) {
5295        TCGTemp *ts = arg_temp(op->args[i]);
5296        if (NEED_SYNC_ARG(i)) {
5297            temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5298        } else if (IS_DEAD_ARG(i)) {
5299            temp_dead(s, ts);
5300        }
5301    }
5302}
5303
5304/**
5305 * atom_and_align_for_opc:
5306 * @s: tcg context
5307 * @opc: memory operation code
5308 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5309 * @allow_two_ops: true if we are prepared to issue two operations
5310 *
5311 * Return the alignment and atomicity to use for the inline fast path
5312 * for the given memory operation.  The alignment may be larger than
5313 * that specified in @opc, and the correct alignment will be diagnosed
5314 * by the slow path helper.
5315 *
5316 * If @allow_two_ops, the host is prepared to test for 2x alignment,
5317 * and issue two loads or stores for subalignment.
5318 */
5319static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5320                                           MemOp host_atom, bool allow_two_ops)
5321{
5322    MemOp align = get_alignment_bits(opc);
5323    MemOp size = opc & MO_SIZE;
5324    MemOp half = size ? size - 1 : 0;
5325    MemOp atmax;
5326    MemOp atom;
5327
5328    /* When serialized, no further atomicity required.  */
5329    if (s->gen_tb->cflags & CF_PARALLEL) {
5330        atom = opc & MO_ATOM_MASK;
5331    } else {
5332        atom = MO_ATOM_NONE;
5333    }
5334
5335    switch (atom) {
5336    case MO_ATOM_NONE:
5337        /* The operation requires no specific atomicity. */
5338        atmax = MO_8;
5339        break;
5340
5341    case MO_ATOM_IFALIGN:
5342        atmax = size;
5343        break;
5344
5345    case MO_ATOM_IFALIGN_PAIR:
5346        atmax = half;
5347        break;
5348
5349    case MO_ATOM_WITHIN16:
5350        atmax = size;
5351        if (size == MO_128) {
5352            /* Misalignment implies !within16, and therefore no atomicity. */
5353        } else if (host_atom != MO_ATOM_WITHIN16) {
5354            /* The host does not implement within16, so require alignment. */
5355            align = MAX(align, size);
5356        }
5357        break;
5358
5359    case MO_ATOM_WITHIN16_PAIR:
5360        atmax = size;
5361        /*
5362         * Misalignment implies !within16, and therefore half atomicity.
5363         * Any host prepared for two operations can implement this with
5364         * half alignment.
5365         */
5366        if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5367            align = MAX(align, half);
5368        }
5369        break;
5370
5371    case MO_ATOM_SUBALIGN:
5372        atmax = size;
5373        if (host_atom != MO_ATOM_SUBALIGN) {
5374            /* If unaligned but not odd, there are subobjects up to half. */
5375            if (allow_two_ops) {
5376                align = MAX(align, half);
5377            } else {
5378                align = MAX(align, size);
5379            }
5380        }
5381        break;
5382
5383    default:
5384        g_assert_not_reached();
5385    }
5386
5387    return (TCGAtomAlign){ .atom = atmax, .align = align };
5388}
5389
5390/*
5391 * Similarly for qemu_ld/st slow path helpers.
5392 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5393 * using only the provided backend tcg_out_* functions.
5394 */
5395
5396static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5397{
5398    int ofs = arg_slot_stk_ofs(slot);
5399
5400    /*
5401     * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5402     * require extension to uint64_t, adjust the address for uint32_t.
5403     */
5404    if (HOST_BIG_ENDIAN &&
5405        TCG_TARGET_REG_BITS == 64 &&
5406        type == TCG_TYPE_I32) {
5407        ofs += 4;
5408    }
5409    return ofs;
5410}
5411
5412static void tcg_out_helper_load_slots(TCGContext *s,
5413                                      unsigned nmov, TCGMovExtend *mov,
5414                                      const TCGLdstHelperParam *parm)
5415{
5416    unsigned i;
5417    TCGReg dst3;
5418
5419    /*
5420     * Start from the end, storing to the stack first.
5421     * This frees those registers, so we need not consider overlap.
5422     */
5423    for (i = nmov; i-- > 0; ) {
5424        unsigned slot = mov[i].dst;
5425
5426        if (arg_slot_reg_p(slot)) {
5427            goto found_reg;
5428        }
5429
5430        TCGReg src = mov[i].src;
5431        TCGType dst_type = mov[i].dst_type;
5432        MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5433
5434        /* The argument is going onto the stack; extend into scratch. */
5435        if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5436            tcg_debug_assert(parm->ntmp != 0);
5437            mov[i].dst = src = parm->tmp[0];
5438            tcg_out_movext1(s, &mov[i]);
5439        }
5440
5441        tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5442                   tcg_out_helper_stk_ofs(dst_type, slot));
5443    }
5444    return;
5445
5446 found_reg:
5447    /*
5448     * The remaining arguments are in registers.
5449     * Convert slot numbers to argument registers.
5450     */
5451    nmov = i + 1;
5452    for (i = 0; i < nmov; ++i) {
5453        mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5454    }
5455
5456    switch (nmov) {
5457    case 4:
5458        /* The backend must have provided enough temps for the worst case. */
5459        tcg_debug_assert(parm->ntmp >= 2);
5460
5461        dst3 = mov[3].dst;
5462        for (unsigned j = 0; j < 3; ++j) {
5463            if (dst3 == mov[j].src) {
5464                /*
5465                 * Conflict. Copy the source to a temporary, perform the
5466                 * remaining moves, then the extension from our scratch
5467                 * on the way out.
5468                 */
5469                TCGReg scratch = parm->tmp[1];
5470
5471                tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5472                tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5473                tcg_out_movext1_new_src(s, &mov[3], scratch);
5474                break;
5475            }
5476        }
5477
5478        /* No conflicts: perform this move and continue. */
5479        tcg_out_movext1(s, &mov[3]);
5480        /* fall through */
5481
5482    case 3:
5483        tcg_out_movext3(s, mov, mov + 1, mov + 2,
5484                        parm->ntmp ? parm->tmp[0] : -1);
5485        break;
5486    case 2:
5487        tcg_out_movext2(s, mov, mov + 1,
5488                        parm->ntmp ? parm->tmp[0] : -1);
5489        break;
5490    case 1:
5491        tcg_out_movext1(s, mov);
5492        break;
5493    default:
5494        g_assert_not_reached();
5495    }
5496}
5497
5498static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5499                                    TCGType type, tcg_target_long imm,
5500                                    const TCGLdstHelperParam *parm)
5501{
5502    if (arg_slot_reg_p(slot)) {
5503        tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5504    } else {
5505        int ofs = tcg_out_helper_stk_ofs(type, slot);
5506        if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5507            tcg_debug_assert(parm->ntmp != 0);
5508            tcg_out_movi(s, type, parm->tmp[0], imm);
5509            tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5510        }
5511    }
5512}
5513
5514static void tcg_out_helper_load_common_args(TCGContext *s,
5515                                            const TCGLabelQemuLdst *ldst,
5516                                            const TCGLdstHelperParam *parm,
5517                                            const TCGHelperInfo *info,
5518                                            unsigned next_arg)
5519{
5520    TCGMovExtend ptr_mov = {
5521        .dst_type = TCG_TYPE_PTR,
5522        .src_type = TCG_TYPE_PTR,
5523        .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5524    };
5525    const TCGCallArgumentLoc *loc = &info->in[0];
5526    TCGType type;
5527    unsigned slot;
5528    tcg_target_ulong imm;
5529
5530    /*
5531     * Handle env, which is always first.
5532     */
5533    ptr_mov.dst = loc->arg_slot;
5534    ptr_mov.src = TCG_AREG0;
5535    tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5536
5537    /*
5538     * Handle oi.
5539     */
5540    imm = ldst->oi;
5541    loc = &info->in[next_arg];
5542    type = TCG_TYPE_I32;
5543    switch (loc->kind) {
5544    case TCG_CALL_ARG_NORMAL:
5545        break;
5546    case TCG_CALL_ARG_EXTEND_U:
5547    case TCG_CALL_ARG_EXTEND_S:
5548        /* No extension required for MemOpIdx. */
5549        tcg_debug_assert(imm <= INT32_MAX);
5550        type = TCG_TYPE_REG;
5551        break;
5552    default:
5553        g_assert_not_reached();
5554    }
5555    tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5556    next_arg++;
5557
5558    /*
5559     * Handle ra.
5560     */
5561    loc = &info->in[next_arg];
5562    slot = loc->arg_slot;
5563    if (parm->ra_gen) {
5564        int arg_reg = -1;
5565        TCGReg ra_reg;
5566
5567        if (arg_slot_reg_p(slot)) {
5568            arg_reg = tcg_target_call_iarg_regs[slot];
5569        }
5570        ra_reg = parm->ra_gen(s, ldst, arg_reg);
5571
5572        ptr_mov.dst = slot;
5573        ptr_mov.src = ra_reg;
5574        tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5575    } else {
5576        imm = (uintptr_t)ldst->raddr;
5577        tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5578    }
5579}
5580
5581static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5582                                       const TCGCallArgumentLoc *loc,
5583                                       TCGType dst_type, TCGType src_type,
5584                                       TCGReg lo, TCGReg hi)
5585{
5586    MemOp reg_mo;
5587
5588    if (dst_type <= TCG_TYPE_REG) {
5589        MemOp src_ext;
5590
5591        switch (loc->kind) {
5592        case TCG_CALL_ARG_NORMAL:
5593            src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5594            break;
5595        case TCG_CALL_ARG_EXTEND_U:
5596            dst_type = TCG_TYPE_REG;
5597            src_ext = MO_UL;
5598            break;
5599        case TCG_CALL_ARG_EXTEND_S:
5600            dst_type = TCG_TYPE_REG;
5601            src_ext = MO_SL;
5602            break;
5603        default:
5604            g_assert_not_reached();
5605        }
5606
5607        mov[0].dst = loc->arg_slot;
5608        mov[0].dst_type = dst_type;
5609        mov[0].src = lo;
5610        mov[0].src_type = src_type;
5611        mov[0].src_ext = src_ext;
5612        return 1;
5613    }
5614
5615    if (TCG_TARGET_REG_BITS == 32) {
5616        assert(dst_type == TCG_TYPE_I64);
5617        reg_mo = MO_32;
5618    } else {
5619        assert(dst_type == TCG_TYPE_I128);
5620        reg_mo = MO_64;
5621    }
5622
5623    mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5624    mov[0].src = lo;
5625    mov[0].dst_type = TCG_TYPE_REG;
5626    mov[0].src_type = TCG_TYPE_REG;
5627    mov[0].src_ext = reg_mo;
5628
5629    mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5630    mov[1].src = hi;
5631    mov[1].dst_type = TCG_TYPE_REG;
5632    mov[1].src_type = TCG_TYPE_REG;
5633    mov[1].src_ext = reg_mo;
5634
5635    return 2;
5636}
5637
5638static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5639                                   const TCGLdstHelperParam *parm)
5640{
5641    const TCGHelperInfo *info;
5642    const TCGCallArgumentLoc *loc;
5643    TCGMovExtend mov[2];
5644    unsigned next_arg, nmov;
5645    MemOp mop = get_memop(ldst->oi);
5646
5647    switch (mop & MO_SIZE) {
5648    case MO_8:
5649    case MO_16:
5650    case MO_32:
5651        info = &info_helper_ld32_mmu;
5652        break;
5653    case MO_64:
5654        info = &info_helper_ld64_mmu;
5655        break;
5656    case MO_128:
5657        info = &info_helper_ld128_mmu;
5658        break;
5659    default:
5660        g_assert_not_reached();
5661    }
5662
5663    /* Defer env argument. */
5664    next_arg = 1;
5665
5666    loc = &info->in[next_arg];
5667    if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5668        /*
5669         * 32-bit host with 32-bit guest: zero-extend the guest address
5670         * to 64-bits for the helper by storing the low part, then
5671         * load a zero for the high part.
5672         */
5673        tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5674                               TCG_TYPE_I32, TCG_TYPE_I32,
5675                               ldst->addrlo_reg, -1);
5676        tcg_out_helper_load_slots(s, 1, mov, parm);
5677
5678        tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5679                                TCG_TYPE_I32, 0, parm);
5680        next_arg += 2;
5681    } else {
5682        nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5683                                      ldst->addrlo_reg, ldst->addrhi_reg);
5684        tcg_out_helper_load_slots(s, nmov, mov, parm);
5685        next_arg += nmov;
5686    }
5687
5688    switch (info->out_kind) {
5689    case TCG_CALL_RET_NORMAL:
5690    case TCG_CALL_RET_BY_VEC:
5691        break;
5692    case TCG_CALL_RET_BY_REF:
5693        /*
5694         * The return reference is in the first argument slot.
5695         * We need memory in which to return: re-use the top of stack.
5696         */
5697        {
5698            int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5699
5700            if (arg_slot_reg_p(0)) {
5701                tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5702                                 TCG_REG_CALL_STACK, ofs_slot0);
5703            } else {
5704                tcg_debug_assert(parm->ntmp != 0);
5705                tcg_out_addi_ptr(s, parm->tmp[0],
5706                                 TCG_REG_CALL_STACK, ofs_slot0);
5707                tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5708                           TCG_REG_CALL_STACK, ofs_slot0);
5709            }
5710        }
5711        break;
5712    default:
5713        g_assert_not_reached();
5714    }
5715
5716    tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5717}
5718
5719static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5720                                  bool load_sign,
5721                                  const TCGLdstHelperParam *parm)
5722{
5723    MemOp mop = get_memop(ldst->oi);
5724    TCGMovExtend mov[2];
5725    int ofs_slot0;
5726
5727    switch (ldst->type) {
5728    case TCG_TYPE_I64:
5729        if (TCG_TARGET_REG_BITS == 32) {
5730            break;
5731        }
5732        /* fall through */
5733
5734    case TCG_TYPE_I32:
5735        mov[0].dst = ldst->datalo_reg;
5736        mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5737        mov[0].dst_type = ldst->type;
5738        mov[0].src_type = TCG_TYPE_REG;
5739
5740        /*
5741         * If load_sign, then we allowed the helper to perform the
5742         * appropriate sign extension to tcg_target_ulong, and all
5743         * we need now is a plain move.
5744         *
5745         * If they do not, then we expect the relevant extension
5746         * instruction to be no more expensive than a move, and
5747         * we thus save the icache etc by only using one of two
5748         * helper functions.
5749         */
5750        if (load_sign || !(mop & MO_SIGN)) {
5751            if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5752                mov[0].src_ext = MO_32;
5753            } else {
5754                mov[0].src_ext = MO_64;
5755            }
5756        } else {
5757            mov[0].src_ext = mop & MO_SSIZE;
5758        }
5759        tcg_out_movext1(s, mov);
5760        return;
5761
5762    case TCG_TYPE_I128:
5763        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5764        ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5765        switch (TCG_TARGET_CALL_RET_I128) {
5766        case TCG_CALL_RET_NORMAL:
5767            break;
5768        case TCG_CALL_RET_BY_VEC:
5769            tcg_out_st(s, TCG_TYPE_V128,
5770                       tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5771                       TCG_REG_CALL_STACK, ofs_slot0);
5772            /* fall through */
5773        case TCG_CALL_RET_BY_REF:
5774            tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5775                       TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5776            tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5777                       TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5778            return;
5779        default:
5780            g_assert_not_reached();
5781        }
5782        break;
5783
5784    default:
5785        g_assert_not_reached();
5786    }
5787
5788    mov[0].dst = ldst->datalo_reg;
5789    mov[0].src =
5790        tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5791    mov[0].dst_type = TCG_TYPE_REG;
5792    mov[0].src_type = TCG_TYPE_REG;
5793    mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5794
5795    mov[1].dst = ldst->datahi_reg;
5796    mov[1].src =
5797        tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5798    mov[1].dst_type = TCG_TYPE_REG;
5799    mov[1].src_type = TCG_TYPE_REG;
5800    mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5801
5802    tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5803}
5804
5805static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5806                                   const TCGLdstHelperParam *parm)
5807{
5808    const TCGHelperInfo *info;
5809    const TCGCallArgumentLoc *loc;
5810    TCGMovExtend mov[4];
5811    TCGType data_type;
5812    unsigned next_arg, nmov, n;
5813    MemOp mop = get_memop(ldst->oi);
5814
5815    switch (mop & MO_SIZE) {
5816    case MO_8:
5817    case MO_16:
5818    case MO_32:
5819        info = &info_helper_st32_mmu;
5820        data_type = TCG_TYPE_I32;
5821        break;
5822    case MO_64:
5823        info = &info_helper_st64_mmu;
5824        data_type = TCG_TYPE_I64;
5825        break;
5826    case MO_128:
5827        info = &info_helper_st128_mmu;
5828        data_type = TCG_TYPE_I128;
5829        break;
5830    default:
5831        g_assert_not_reached();
5832    }
5833
5834    /* Defer env argument. */
5835    next_arg = 1;
5836    nmov = 0;
5837
5838    /* Handle addr argument. */
5839    loc = &info->in[next_arg];
5840    if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5841        /*
5842         * 32-bit host with 32-bit guest: zero-extend the guest address
5843         * to 64-bits for the helper by storing the low part.  Later,
5844         * after we have processed the register inputs, we will load a
5845         * zero for the high part.
5846         */
5847        tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5848                               TCG_TYPE_I32, TCG_TYPE_I32,
5849                               ldst->addrlo_reg, -1);
5850        next_arg += 2;
5851        nmov += 1;
5852    } else {
5853        n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5854                                   ldst->addrlo_reg, ldst->addrhi_reg);
5855        next_arg += n;
5856        nmov += n;
5857    }
5858
5859    /* Handle data argument. */
5860    loc = &info->in[next_arg];
5861    switch (loc->kind) {
5862    case TCG_CALL_ARG_NORMAL:
5863    case TCG_CALL_ARG_EXTEND_U:
5864    case TCG_CALL_ARG_EXTEND_S:
5865        n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5866                                   ldst->datalo_reg, ldst->datahi_reg);
5867        next_arg += n;
5868        nmov += n;
5869        tcg_out_helper_load_slots(s, nmov, mov, parm);
5870        break;
5871
5872    case TCG_CALL_ARG_BY_REF:
5873        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5874        tcg_debug_assert(data_type == TCG_TYPE_I128);
5875        tcg_out_st(s, TCG_TYPE_I64,
5876                   HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5877                   TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5878        tcg_out_st(s, TCG_TYPE_I64,
5879                   HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5880                   TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5881
5882        tcg_out_helper_load_slots(s, nmov, mov, parm);
5883
5884        if (arg_slot_reg_p(loc->arg_slot)) {
5885            tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
5886                             TCG_REG_CALL_STACK,
5887                             arg_slot_stk_ofs(loc->ref_slot));
5888        } else {
5889            tcg_debug_assert(parm->ntmp != 0);
5890            tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
5891                             arg_slot_stk_ofs(loc->ref_slot));
5892            tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5893                       TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
5894        }
5895        next_arg += 2;
5896        break;
5897
5898    default:
5899        g_assert_not_reached();
5900    }
5901
5902    if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5903        /* Zero extend the address by loading a zero for the high part. */
5904        loc = &info->in[1 + !HOST_BIG_ENDIAN];
5905        tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
5906    }
5907
5908    tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5909}
5910
5911void tcg_dump_op_count(GString *buf)
5912{
5913    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5914}
5915
5916int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
5917{
5918    int i, start_words, num_insns;
5919    TCGOp *op;
5920
5921    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
5922                 && qemu_log_in_addr_range(pc_start))) {
5923        FILE *logfile = qemu_log_trylock();
5924        if (logfile) {
5925            fprintf(logfile, "OP:\n");
5926            tcg_dump_ops(s, logfile, false);
5927            fprintf(logfile, "\n");
5928            qemu_log_unlock(logfile);
5929        }
5930    }
5931
5932#ifdef CONFIG_DEBUG_TCG
5933    /* Ensure all labels referenced have been emitted.  */
5934    {
5935        TCGLabel *l;
5936        bool error = false;
5937
5938        QSIMPLEQ_FOREACH(l, &s->labels, next) {
5939            if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
5940                qemu_log_mask(CPU_LOG_TB_OP,
5941                              "$L%d referenced but not present.\n", l->id);
5942                error = true;
5943            }
5944        }
5945        assert(!error);
5946    }
5947#endif
5948
5949    tcg_optimize(s);
5950
5951    reachable_code_pass(s);
5952    liveness_pass_0(s);
5953    liveness_pass_1(s);
5954
5955    if (s->nb_indirects > 0) {
5956        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
5957                     && qemu_log_in_addr_range(pc_start))) {
5958            FILE *logfile = qemu_log_trylock();
5959            if (logfile) {
5960                fprintf(logfile, "OP before indirect lowering:\n");
5961                tcg_dump_ops(s, logfile, false);
5962                fprintf(logfile, "\n");
5963                qemu_log_unlock(logfile);
5964            }
5965        }
5966
5967        /* Replace indirect temps with direct temps.  */
5968        if (liveness_pass_2(s)) {
5969            /* If changes were made, re-run liveness.  */
5970            liveness_pass_1(s);
5971        }
5972    }
5973
5974    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5975                 && qemu_log_in_addr_range(pc_start))) {
5976        FILE *logfile = qemu_log_trylock();
5977        if (logfile) {
5978            fprintf(logfile, "OP after optimization and liveness analysis:\n");
5979            tcg_dump_ops(s, logfile, true);
5980            fprintf(logfile, "\n");
5981            qemu_log_unlock(logfile);
5982        }
5983    }
5984
5985    /* Initialize goto_tb jump offsets. */
5986    tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
5987    tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
5988    tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
5989    tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
5990
5991    tcg_reg_alloc_start(s);
5992
5993    /*
5994     * Reset the buffer pointers when restarting after overflow.
5995     * TODO: Move this into translate-all.c with the rest of the
5996     * buffer management.  Having only this done here is confusing.
5997     */
5998    s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
5999    s->code_ptr = s->code_buf;
6000
6001#ifdef TCG_TARGET_NEED_LDST_LABELS
6002    QSIMPLEQ_INIT(&s->ldst_labels);
6003#endif
6004#ifdef TCG_TARGET_NEED_POOL_LABELS
6005    s->pool_labels = NULL;
6006#endif
6007
6008    start_words = s->insn_start_words;
6009    s->gen_insn_data =
6010        tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6011
6012    num_insns = -1;
6013    QTAILQ_FOREACH(op, &s->ops, link) {
6014        TCGOpcode opc = op->opc;
6015
6016        switch (opc) {
6017        case INDEX_op_mov_i32:
6018        case INDEX_op_mov_i64:
6019        case INDEX_op_mov_vec:
6020            tcg_reg_alloc_mov(s, op);
6021            break;
6022        case INDEX_op_dup_vec:
6023            tcg_reg_alloc_dup(s, op);
6024            break;
6025        case INDEX_op_insn_start:
6026            if (num_insns >= 0) {
6027                size_t off = tcg_current_code_size(s);
6028                s->gen_insn_end_off[num_insns] = off;
6029                /* Assert that we do not overflow our stored offset.  */
6030                assert(s->gen_insn_end_off[num_insns] == off);
6031            }
6032            num_insns++;
6033            for (i = 0; i < start_words; ++i) {
6034                s->gen_insn_data[num_insns * start_words + i] =
6035                    tcg_get_insn_start_param(op, i);
6036            }
6037            break;
6038        case INDEX_op_discard:
6039            temp_dead(s, arg_temp(op->args[0]));
6040            break;
6041        case INDEX_op_set_label:
6042            tcg_reg_alloc_bb_end(s, s->reserved_regs);
6043            tcg_out_label(s, arg_label(op->args[0]));
6044            break;
6045        case INDEX_op_call:
6046            tcg_reg_alloc_call(s, op);
6047            break;
6048        case INDEX_op_exit_tb:
6049            tcg_out_exit_tb(s, op->args[0]);
6050            break;
6051        case INDEX_op_goto_tb:
6052            tcg_out_goto_tb(s, op->args[0]);
6053            break;
6054        case INDEX_op_dup2_vec:
6055            if (tcg_reg_alloc_dup2(s, op)) {
6056                break;
6057            }
6058            /* fall through */
6059        default:
6060            /* Sanity check that we've not introduced any unhandled opcodes. */
6061            tcg_debug_assert(tcg_op_supported(opc));
6062            /* Note: in order to speed up the code, it would be much
6063               faster to have specialized register allocator functions for
6064               some common argument patterns */
6065            tcg_reg_alloc_op(s, op);
6066            break;
6067        }
6068        /* Test for (pending) buffer overflow.  The assumption is that any
6069           one operation beginning below the high water mark cannot overrun
6070           the buffer completely.  Thus we can test for overflow after
6071           generating code without having to check during generation.  */
6072        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6073            return -1;
6074        }
6075        /* Test for TB overflow, as seen by gen_insn_end_off.  */
6076        if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6077            return -2;
6078        }
6079    }
6080    tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6081    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6082
6083    /* Generate TB finalization at the end of block */
6084#ifdef TCG_TARGET_NEED_LDST_LABELS
6085    i = tcg_out_ldst_finalize(s);
6086    if (i < 0) {
6087        return i;
6088    }
6089#endif
6090#ifdef TCG_TARGET_NEED_POOL_LABELS
6091    i = tcg_out_pool_finalize(s);
6092    if (i < 0) {
6093        return i;
6094    }
6095#endif
6096    if (!tcg_resolve_relocs(s)) {
6097        return -2;
6098    }
6099
6100#ifndef CONFIG_TCG_INTERPRETER
6101    /* flush instruction cache */
6102    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6103                        (uintptr_t)s->code_buf,
6104                        tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6105#endif
6106
6107    return tcg_current_code_size(s);
6108}
6109
6110void tcg_dump_info(GString *buf)
6111{
6112    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
6113}
6114
6115#ifdef ELF_HOST_MACHINE
6116/* In order to use this feature, the backend needs to do three things:
6117
6118   (1) Define ELF_HOST_MACHINE to indicate both what value to
6119       put into the ELF image and to indicate support for the feature.
6120
6121   (2) Define tcg_register_jit.  This should create a buffer containing
6122       the contents of a .debug_frame section that describes the post-
6123       prologue unwind info for the tcg machine.
6124
6125   (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6126*/
6127
6128/* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6129typedef enum {
6130    JIT_NOACTION = 0,
6131    JIT_REGISTER_FN,
6132    JIT_UNREGISTER_FN
6133} jit_actions_t;
6134
6135struct jit_code_entry {
6136    struct jit_code_entry *next_entry;
6137    struct jit_code_entry *prev_entry;
6138    const void *symfile_addr;
6139    uint64_t symfile_size;
6140};
6141
6142struct jit_descriptor {
6143    uint32_t version;
6144    uint32_t action_flag;
6145    struct jit_code_entry *relevant_entry;
6146    struct jit_code_entry *first_entry;
6147};
6148
6149void __jit_debug_register_code(void) __attribute__((noinline));
6150void __jit_debug_register_code(void)
6151{
6152    asm("");
6153}
6154
6155/* Must statically initialize the version, because GDB may check
6156   the version before we can set it.  */
6157struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6158
6159/* End GDB interface.  */
6160
6161static int find_string(const char *strtab, const char *str)
6162{
6163    const char *p = strtab + 1;
6164
6165    while (1) {
6166        if (strcmp(p, str) == 0) {
6167            return p - strtab;
6168        }
6169        p += strlen(p) + 1;
6170    }
6171}
6172
6173static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6174                                 const void *debug_frame,
6175                                 size_t debug_frame_size)
6176{
6177    struct __attribute__((packed)) DebugInfo {
6178        uint32_t  len;
6179        uint16_t  version;
6180        uint32_t  abbrev;
6181        uint8_t   ptr_size;
6182        uint8_t   cu_die;
6183        uint16_t  cu_lang;
6184        uintptr_t cu_low_pc;
6185        uintptr_t cu_high_pc;
6186        uint8_t   fn_die;
6187        char      fn_name[16];
6188        uintptr_t fn_low_pc;
6189        uintptr_t fn_high_pc;
6190        uint8_t   cu_eoc;
6191    };
6192
6193    struct ElfImage {
6194        ElfW(Ehdr) ehdr;
6195        ElfW(Phdr) phdr;
6196        ElfW(Shdr) shdr[7];
6197        ElfW(Sym)  sym[2];
6198        struct DebugInfo di;
6199        uint8_t    da[24];
6200        char       str[80];
6201    };
6202
6203    struct ElfImage *img;
6204
6205    static const struct ElfImage img_template = {
6206        .ehdr = {
6207            .e_ident[EI_MAG0] = ELFMAG0,
6208            .e_ident[EI_MAG1] = ELFMAG1,
6209            .e_ident[EI_MAG2] = ELFMAG2,
6210            .e_ident[EI_MAG3] = ELFMAG3,
6211            .e_ident[EI_CLASS] = ELF_CLASS,
6212            .e_ident[EI_DATA] = ELF_DATA,
6213            .e_ident[EI_VERSION] = EV_CURRENT,
6214            .e_type = ET_EXEC,
6215            .e_machine = ELF_HOST_MACHINE,
6216            .e_version = EV_CURRENT,
6217            .e_phoff = offsetof(struct ElfImage, phdr),
6218            .e_shoff = offsetof(struct ElfImage, shdr),
6219            .e_ehsize = sizeof(ElfW(Shdr)),
6220            .e_phentsize = sizeof(ElfW(Phdr)),
6221            .e_phnum = 1,
6222            .e_shentsize = sizeof(ElfW(Shdr)),
6223            .e_shnum = ARRAY_SIZE(img->shdr),
6224            .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6225#ifdef ELF_HOST_FLAGS
6226            .e_flags = ELF_HOST_FLAGS,
6227#endif
6228#ifdef ELF_OSABI
6229            .e_ident[EI_OSABI] = ELF_OSABI,
6230#endif
6231        },
6232        .phdr = {
6233            .p_type = PT_LOAD,
6234            .p_flags = PF_X,
6235        },
6236        .shdr = {
6237            [0] = { .sh_type = SHT_NULL },
6238            /* Trick: The contents of code_gen_buffer are not present in
6239               this fake ELF file; that got allocated elsewhere.  Therefore
6240               we mark .text as SHT_NOBITS (similar to .bss) so that readers
6241               will not look for contents.  We can record any address.  */
6242            [1] = { /* .text */
6243                .sh_type = SHT_NOBITS,
6244                .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6245            },
6246            [2] = { /* .debug_info */
6247                .sh_type = SHT_PROGBITS,
6248                .sh_offset = offsetof(struct ElfImage, di),
6249                .sh_size = sizeof(struct DebugInfo),
6250            },
6251            [3] = { /* .debug_abbrev */
6252                .sh_type = SHT_PROGBITS,
6253                .sh_offset = offsetof(struct ElfImage, da),
6254                .sh_size = sizeof(img->da),
6255            },
6256            [4] = { /* .debug_frame */
6257                .sh_type = SHT_PROGBITS,
6258                .sh_offset = sizeof(struct ElfImage),
6259            },
6260            [5] = { /* .symtab */
6261                .sh_type = SHT_SYMTAB,
6262                .sh_offset = offsetof(struct ElfImage, sym),
6263                .sh_size = sizeof(img->sym),
6264                .sh_info = 1,
6265                .sh_link = ARRAY_SIZE(img->shdr) - 1,
6266                .sh_entsize = sizeof(ElfW(Sym)),
6267            },
6268            [6] = { /* .strtab */
6269                .sh_type = SHT_STRTAB,
6270                .sh_offset = offsetof(struct ElfImage, str),
6271                .sh_size = sizeof(img->str),
6272            }
6273        },
6274        .sym = {
6275            [1] = { /* code_gen_buffer */
6276                .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6277                .st_shndx = 1,
6278            }
6279        },
6280        .di = {
6281            .len = sizeof(struct DebugInfo) - 4,
6282            .version = 2,
6283            .ptr_size = sizeof(void *),
6284            .cu_die = 1,
6285            .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6286            .fn_die = 2,
6287            .fn_name = "code_gen_buffer"
6288        },
6289        .da = {
6290            1,          /* abbrev number (the cu) */
6291            0x11, 1,    /* DW_TAG_compile_unit, has children */
6292            0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6293            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6294            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6295            0, 0,       /* end of abbrev */
6296            2,          /* abbrev number (the fn) */
6297            0x2e, 0,    /* DW_TAG_subprogram, no children */
6298            0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6299            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6300            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6301            0, 0,       /* end of abbrev */
6302            0           /* no more abbrev */
6303        },
6304        .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6305               ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6306    };
6307
6308    /* We only need a single jit entry; statically allocate it.  */
6309    static struct jit_code_entry one_entry;
6310
6311    uintptr_t buf = (uintptr_t)buf_ptr;
6312    size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6313    DebugFrameHeader *dfh;
6314
6315    img = g_malloc(img_size);
6316    *img = img_template;
6317
6318    img->phdr.p_vaddr = buf;
6319    img->phdr.p_paddr = buf;
6320    img->phdr.p_memsz = buf_size;
6321
6322    img->shdr[1].sh_name = find_string(img->str, ".text");
6323    img->shdr[1].sh_addr = buf;
6324    img->shdr[1].sh_size = buf_size;
6325
6326    img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6327    img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6328
6329    img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6330    img->shdr[4].sh_size = debug_frame_size;
6331
6332    img->shdr[5].sh_name = find_string(img->str, ".symtab");
6333    img->shdr[6].sh_name = find_string(img->str, ".strtab");
6334
6335    img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6336    img->sym[1].st_value = buf;
6337    img->sym[1].st_size = buf_size;
6338
6339    img->di.cu_low_pc = buf;
6340    img->di.cu_high_pc = buf + buf_size;
6341    img->di.fn_low_pc = buf;
6342    img->di.fn_high_pc = buf + buf_size;
6343
6344    dfh = (DebugFrameHeader *)(img + 1);
6345    memcpy(dfh, debug_frame, debug_frame_size);
6346    dfh->fde.func_start = buf;
6347    dfh->fde.func_len = buf_size;
6348
6349#ifdef DEBUG_JIT
6350    /* Enable this block to be able to debug the ELF image file creation.
6351       One can use readelf, objdump, or other inspection utilities.  */
6352    {
6353        g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6354        FILE *f = fopen(jit, "w+b");
6355        if (f) {
6356            if (fwrite(img, img_size, 1, f) != img_size) {
6357                /* Avoid stupid unused return value warning for fwrite.  */
6358            }
6359            fclose(f);
6360        }
6361    }
6362#endif
6363
6364    one_entry.symfile_addr = img;
6365    one_entry.symfile_size = img_size;
6366
6367    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6368    __jit_debug_descriptor.relevant_entry = &one_entry;
6369    __jit_debug_descriptor.first_entry = &one_entry;
6370    __jit_debug_register_code();
6371}
6372#else
6373/* No support for the feature.  Provide the entry point expected by exec.c,
6374   and implement the internal function we declared earlier.  */
6375
6376static void tcg_register_jit_int(const void *buf, size_t size,
6377                                 const void *debug_frame,
6378                                 size_t debug_frame_size)
6379{
6380}
6381
6382void tcg_register_jit(const void *buf, size_t buf_size)
6383{
6384}
6385#endif /* ELF_HOST_MACHINE */
6386
6387#if !TCG_TARGET_MAYBE_vec
6388void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6389{
6390    g_assert_not_reached();
6391}
6392#endif
6393