qemu/tcg/aarch64/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Initial TCG Implementation for aarch64
   3 *
   4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
   5 * Written by Claudio Fontana
   6 *
   7 * This work is licensed under the terms of the GNU GPL, version 2 or
   8 * (at your option) any later version.
   9 *
  10 * See the COPYING file in the top-level directory for details.
  11 */
  12
  13#include "tcg-pool.inc.c"
  14#include "qemu/bitops.h"
  15
  16/* We're going to re-use TCGType in setting of the SF bit, which controls
  17   the size of the operation performed.  If we know the values match, it
  18   makes things much cleaner.  */
  19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
  20
  21#ifdef CONFIG_DEBUG_TCG
  22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  23    "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
  24    "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
  25    "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
  26    "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
  27};
  28#endif /* CONFIG_DEBUG_TCG */
  29
  30static const int tcg_target_reg_alloc_order[] = {
  31    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
  32    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
  33    TCG_REG_X28, /* we will reserve this for guest_base if configured */
  34
  35    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
  36    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
  37    TCG_REG_X16, TCG_REG_X17,
  38
  39    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  40    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
  41
  42    /* X18 reserved by system */
  43    /* X19 reserved for AREG0 */
  44    /* X29 reserved as fp */
  45    /* X30 reserved as temporary */
  46};
  47
  48static const int tcg_target_call_iarg_regs[8] = {
  49    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  50    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
  51};
  52static const int tcg_target_call_oarg_regs[1] = {
  53    TCG_REG_X0
  54};
  55
  56#define TCG_REG_TMP TCG_REG_X30
  57
  58#ifndef CONFIG_SOFTMMU
  59/* Note that XZR cannot be encoded in the address base register slot,
  60   as that actaully encodes SP.  So if we need to zero-extend the guest
  61   address, via the address index register slot, we need to load even
  62   a zero guest base into a register.  */
  63#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
  64#define TCG_REG_GUEST_BASE TCG_REG_X28
  65#endif
  66
  67static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
  68{
  69    ptrdiff_t offset = target - code_ptr;
  70    tcg_debug_assert(offset == sextract64(offset, 0, 26));
  71    /* read instruction, mask away previous PC_REL26 parameter contents,
  72       set the proper offset, then write back the instruction. */
  73    *code_ptr = deposit32(*code_ptr, 0, 26, offset);
  74}
  75
  76static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr,
  77                                     tcg_insn_unit *target)
  78{
  79    ptrdiff_t offset = target - code_ptr;
  80    tcg_insn_unit insn;
  81    tcg_debug_assert(offset == sextract64(offset, 0, 26));
  82    /* read instruction, mask away previous PC_REL26 parameter contents,
  83       set the proper offset, then write back the instruction. */
  84    insn = atomic_read(code_ptr);
  85    atomic_set(code_ptr, deposit32(insn, 0, 26, offset));
  86}
  87
  88static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
  89{
  90    ptrdiff_t offset = target - code_ptr;
  91    tcg_debug_assert(offset == sextract64(offset, 0, 19));
  92    *code_ptr = deposit32(*code_ptr, 5, 19, offset);
  93}
  94
  95static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
  96                               intptr_t value, intptr_t addend)
  97{
  98    tcg_debug_assert(addend == 0);
  99    switch (type) {
 100    case R_AARCH64_JUMP26:
 101    case R_AARCH64_CALL26:
 102        reloc_pc26(code_ptr, (tcg_insn_unit *)value);
 103        break;
 104    case R_AARCH64_CONDBR19:
 105        reloc_pc19(code_ptr, (tcg_insn_unit *)value);
 106        break;
 107    default:
 108        tcg_abort();
 109    }
 110}
 111
 112#define TCG_CT_CONST_AIMM 0x100
 113#define TCG_CT_CONST_LIMM 0x200
 114#define TCG_CT_CONST_ZERO 0x400
 115#define TCG_CT_CONST_MONE 0x800
 116
 117/* parse target specific constraints */
 118static const char *target_parse_constraint(TCGArgConstraint *ct,
 119                                           const char *ct_str, TCGType type)
 120{
 121    switch (*ct_str++) {
 122    case 'r':
 123        ct->ct |= TCG_CT_REG;
 124        ct->u.regs = 0xffffffffu;
 125        break;
 126    case 'l': /* qemu_ld / qemu_st address, data_reg */
 127        ct->ct |= TCG_CT_REG;
 128        ct->u.regs = 0xffffffffu;
 129#ifdef CONFIG_SOFTMMU
 130        /* x0 and x1 will be overwritten when reading the tlb entry,
 131           and x2, and x3 for helper args, better to avoid using them. */
 132        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
 133        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
 134        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
 135        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
 136#endif
 137        break;
 138    case 'A': /* Valid for arithmetic immediate (positive or negative).  */
 139        ct->ct |= TCG_CT_CONST_AIMM;
 140        break;
 141    case 'L': /* Valid for logical immediate.  */
 142        ct->ct |= TCG_CT_CONST_LIMM;
 143        break;
 144    case 'M': /* minus one */
 145        ct->ct |= TCG_CT_CONST_MONE;
 146        break;
 147    case 'Z': /* zero */
 148        ct->ct |= TCG_CT_CONST_ZERO;
 149        break;
 150    default:
 151        return NULL;
 152    }
 153    return ct_str;
 154}
 155
 156static inline bool is_aimm(uint64_t val)
 157{
 158    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
 159}
 160
 161static inline bool is_limm(uint64_t val)
 162{
 163    /* Taking a simplified view of the logical immediates for now, ignoring
 164       the replication that can happen across the field.  Match bit patterns
 165       of the forms
 166           0....01....1
 167           0..01..10..0
 168       and their inverses.  */
 169
 170    /* Make things easier below, by testing the form with msb clear. */
 171    if ((int64_t)val < 0) {
 172        val = ~val;
 173    }
 174    if (val == 0) {
 175        return false;
 176    }
 177    val += val & -val;
 178    return (val & (val - 1)) == 0;
 179}
 180
 181static int tcg_target_const_match(tcg_target_long val, TCGType type,
 182                                  const TCGArgConstraint *arg_ct)
 183{
 184    int ct = arg_ct->ct;
 185
 186    if (ct & TCG_CT_CONST) {
 187        return 1;
 188    }
 189    if (type == TCG_TYPE_I32) {
 190        val = (int32_t)val;
 191    }
 192    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
 193        return 1;
 194    }
 195    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
 196        return 1;
 197    }
 198    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 199        return 1;
 200    }
 201    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
 202        return 1;
 203    }
 204
 205    return 0;
 206}
 207
 208enum aarch64_cond_code {
 209    COND_EQ = 0x0,
 210    COND_NE = 0x1,
 211    COND_CS = 0x2,     /* Unsigned greater or equal */
 212    COND_HS = COND_CS, /* ALIAS greater or equal */
 213    COND_CC = 0x3,     /* Unsigned less than */
 214    COND_LO = COND_CC, /* ALIAS Lower */
 215    COND_MI = 0x4,     /* Negative */
 216    COND_PL = 0x5,     /* Zero or greater */
 217    COND_VS = 0x6,     /* Overflow */
 218    COND_VC = 0x7,     /* No overflow */
 219    COND_HI = 0x8,     /* Unsigned greater than */
 220    COND_LS = 0x9,     /* Unsigned less or equal */
 221    COND_GE = 0xa,
 222    COND_LT = 0xb,
 223    COND_GT = 0xc,
 224    COND_LE = 0xd,
 225    COND_AL = 0xe,
 226    COND_NV = 0xf, /* behaves like COND_AL here */
 227};
 228
 229static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
 230    [TCG_COND_EQ] = COND_EQ,
 231    [TCG_COND_NE] = COND_NE,
 232    [TCG_COND_LT] = COND_LT,
 233    [TCG_COND_GE] = COND_GE,
 234    [TCG_COND_LE] = COND_LE,
 235    [TCG_COND_GT] = COND_GT,
 236    /* unsigned */
 237    [TCG_COND_LTU] = COND_LO,
 238    [TCG_COND_GTU] = COND_HI,
 239    [TCG_COND_GEU] = COND_HS,
 240    [TCG_COND_LEU] = COND_LS,
 241};
 242
 243typedef enum {
 244    LDST_ST = 0,    /* store */
 245    LDST_LD = 1,    /* load */
 246    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
 247    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
 248} AArch64LdstType;
 249
 250/* We encode the format of the insn into the beginning of the name, so that
 251   we can have the preprocessor help "typecheck" the insn vs the output
 252   function.  Arm didn't provide us with nice names for the formats, so we
 253   use the section number of the architecture reference manual in which the
 254   instruction group is described.  */
 255typedef enum {
 256    /* Compare and branch (immediate).  */
 257    I3201_CBZ       = 0x34000000,
 258    I3201_CBNZ      = 0x35000000,
 259
 260    /* Conditional branch (immediate).  */
 261    I3202_B_C       = 0x54000000,
 262
 263    /* Unconditional branch (immediate).  */
 264    I3206_B         = 0x14000000,
 265    I3206_BL        = 0x94000000,
 266
 267    /* Unconditional branch (register).  */
 268    I3207_BR        = 0xd61f0000,
 269    I3207_BLR       = 0xd63f0000,
 270    I3207_RET       = 0xd65f0000,
 271
 272    /* Load literal for loading the address at pc-relative offset */
 273    I3305_LDR       = 0x58000000,
 274    /* Load/store register.  Described here as 3.3.12, but the helper
 275       that emits them can transform to 3.3.10 or 3.3.13.  */
 276    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
 277    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
 278    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
 279    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
 280
 281    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
 282    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
 283    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
 284    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
 285
 286    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
 287    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
 288
 289    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
 290    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
 291    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
 292
 293    I3312_TO_I3310  = 0x00200800,
 294    I3312_TO_I3313  = 0x01000000,
 295
 296    /* Load/store register pair instructions.  */
 297    I3314_LDP       = 0x28400000,
 298    I3314_STP       = 0x28000000,
 299
 300    /* Add/subtract immediate instructions.  */
 301    I3401_ADDI      = 0x11000000,
 302    I3401_ADDSI     = 0x31000000,
 303    I3401_SUBI      = 0x51000000,
 304    I3401_SUBSI     = 0x71000000,
 305
 306    /* Bitfield instructions.  */
 307    I3402_BFM       = 0x33000000,
 308    I3402_SBFM      = 0x13000000,
 309    I3402_UBFM      = 0x53000000,
 310
 311    /* Extract instruction.  */
 312    I3403_EXTR      = 0x13800000,
 313
 314    /* Logical immediate instructions.  */
 315    I3404_ANDI      = 0x12000000,
 316    I3404_ORRI      = 0x32000000,
 317    I3404_EORI      = 0x52000000,
 318
 319    /* Move wide immediate instructions.  */
 320    I3405_MOVN      = 0x12800000,
 321    I3405_MOVZ      = 0x52800000,
 322    I3405_MOVK      = 0x72800000,
 323
 324    /* PC relative addressing instructions.  */
 325    I3406_ADR       = 0x10000000,
 326    I3406_ADRP      = 0x90000000,
 327
 328    /* Add/subtract shifted register instructions (without a shift).  */
 329    I3502_ADD       = 0x0b000000,
 330    I3502_ADDS      = 0x2b000000,
 331    I3502_SUB       = 0x4b000000,
 332    I3502_SUBS      = 0x6b000000,
 333
 334    /* Add/subtract shifted register instructions (with a shift).  */
 335    I3502S_ADD_LSL  = I3502_ADD,
 336
 337    /* Add/subtract with carry instructions.  */
 338    I3503_ADC       = 0x1a000000,
 339    I3503_SBC       = 0x5a000000,
 340
 341    /* Conditional select instructions.  */
 342    I3506_CSEL      = 0x1a800000,
 343    I3506_CSINC     = 0x1a800400,
 344    I3506_CSINV     = 0x5a800000,
 345    I3506_CSNEG     = 0x5a800400,
 346
 347    /* Data-processing (1 source) instructions.  */
 348    I3507_CLZ       = 0x5ac01000,
 349    I3507_RBIT      = 0x5ac00000,
 350    I3507_REV16     = 0x5ac00400,
 351    I3507_REV32     = 0x5ac00800,
 352    I3507_REV64     = 0x5ac00c00,
 353
 354    /* Data-processing (2 source) instructions.  */
 355    I3508_LSLV      = 0x1ac02000,
 356    I3508_LSRV      = 0x1ac02400,
 357    I3508_ASRV      = 0x1ac02800,
 358    I3508_RORV      = 0x1ac02c00,
 359    I3508_SMULH     = 0x9b407c00,
 360    I3508_UMULH     = 0x9bc07c00,
 361    I3508_UDIV      = 0x1ac00800,
 362    I3508_SDIV      = 0x1ac00c00,
 363
 364    /* Data-processing (3 source) instructions.  */
 365    I3509_MADD      = 0x1b000000,
 366    I3509_MSUB      = 0x1b008000,
 367
 368    /* Logical shifted register instructions (without a shift).  */
 369    I3510_AND       = 0x0a000000,
 370    I3510_BIC       = 0x0a200000,
 371    I3510_ORR       = 0x2a000000,
 372    I3510_ORN       = 0x2a200000,
 373    I3510_EOR       = 0x4a000000,
 374    I3510_EON       = 0x4a200000,
 375    I3510_ANDS      = 0x6a000000,
 376
 377    NOP             = 0xd503201f,
 378    /* System instructions.  */
 379    DMB_ISH         = 0xd50338bf,
 380    DMB_LD          = 0x00000100,
 381    DMB_ST          = 0x00000200,
 382} AArch64Insn;
 383
 384static inline uint32_t tcg_in32(TCGContext *s)
 385{
 386    uint32_t v = *(uint32_t *)s->code_ptr;
 387    return v;
 388}
 389
 390/* Emit an opcode with "type-checking" of the format.  */
 391#define tcg_out_insn(S, FMT, OP, ...) \
 392    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
 393
 394static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
 395{
 396    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
 397}
 398
 399static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
 400                              TCGReg rt, int imm19)
 401{
 402    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
 403}
 404
 405static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
 406                              TCGCond c, int imm19)
 407{
 408    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
 409}
 410
 411static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
 412{
 413    tcg_out32(s, insn | (imm26 & 0x03ffffff));
 414}
 415
 416static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
 417{
 418    tcg_out32(s, insn | rn << 5);
 419}
 420
 421static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
 422                              TCGReg r1, TCGReg r2, TCGReg rn,
 423                              tcg_target_long ofs, bool pre, bool w)
 424{
 425    insn |= 1u << 31; /* ext */
 426    insn |= pre << 24;
 427    insn |= w << 23;
 428
 429    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
 430    insn |= (ofs & (0x7f << 3)) << (15 - 3);
 431
 432    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
 433}
 434
 435static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
 436                              TCGReg rd, TCGReg rn, uint64_t aimm)
 437{
 438    if (aimm > 0xfff) {
 439        tcg_debug_assert((aimm & 0xfff) == 0);
 440        aimm >>= 12;
 441        tcg_debug_assert(aimm <= 0xfff);
 442        aimm |= 1 << 12;  /* apply LSL 12 */
 443    }
 444    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
 445}
 446
 447/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
 448   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
 449   that feed the DecodeBitMasks pseudo function.  */
 450static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
 451                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
 452{
 453    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
 454              | rn << 5 | rd);
 455}
 456
 457#define tcg_out_insn_3404  tcg_out_insn_3402
 458
 459static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
 460                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
 461{
 462    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
 463              | rn << 5 | rd);
 464}
 465
 466/* This function is used for the Move (wide immediate) instruction group.
 467   Note that SHIFT is a full shift count, not the 2 bit HW field. */
 468static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
 469                              TCGReg rd, uint16_t half, unsigned shift)
 470{
 471    tcg_debug_assert((shift & ~0x30) == 0);
 472    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
 473}
 474
 475static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
 476                              TCGReg rd, int64_t disp)
 477{
 478    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
 479}
 480
 481/* This function is for both 3.5.2 (Add/Subtract shifted register), for
 482   the rare occasion when we actually want to supply a shift amount.  */
 483static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
 484                                      TCGType ext, TCGReg rd, TCGReg rn,
 485                                      TCGReg rm, int imm6)
 486{
 487    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
 488}
 489
 490/* This function is for 3.5.2 (Add/subtract shifted register),
 491   and 3.5.10 (Logical shifted register), for the vast majorty of cases
 492   when we don't want to apply a shift.  Thus it can also be used for
 493   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
 494static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
 495                              TCGReg rd, TCGReg rn, TCGReg rm)
 496{
 497    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
 498}
 499
 500#define tcg_out_insn_3503  tcg_out_insn_3502
 501#define tcg_out_insn_3508  tcg_out_insn_3502
 502#define tcg_out_insn_3510  tcg_out_insn_3502
 503
 504static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
 505                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
 506{
 507    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
 508              | tcg_cond_to_aarch64[c] << 12);
 509}
 510
 511static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
 512                              TCGReg rd, TCGReg rn)
 513{
 514    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
 515}
 516
 517static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
 518                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
 519{
 520    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
 521}
 522
 523static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
 524                              TCGReg rd, TCGReg base, TCGType ext,
 525                              TCGReg regoff)
 526{
 527    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 528    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
 529              0x4000 | ext << 13 | base << 5 | rd);
 530}
 531
 532static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
 533                              TCGReg rd, TCGReg rn, intptr_t offset)
 534{
 535    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
 536}
 537
 538static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
 539                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
 540{
 541    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 542    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
 543}
 544
 545/* Register to register move using ORR (shifted register with no shift). */
 546static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
 547{
 548    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
 549}
 550
 551/* Register to register move using ADDI (move to/from SP).  */
 552static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
 553{
 554    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
 555}
 556
 557/* This function is used for the Logical (immediate) instruction group.
 558   The value of LIMM must satisfy IS_LIMM.  See the comment above about
 559   only supporting simplified logical immediates.  */
 560static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
 561                             TCGReg rd, TCGReg rn, uint64_t limm)
 562{
 563    unsigned h, l, r, c;
 564
 565    tcg_debug_assert(is_limm(limm));
 566
 567    h = clz64(limm);
 568    l = ctz64(limm);
 569    if (l == 0) {
 570        r = 0;                  /* form 0....01....1 */
 571        c = ctz64(~limm) - 1;
 572        if (h == 0) {
 573            r = clz64(~limm);   /* form 1..10..01..1 */
 574            c += r;
 575        }
 576    } else {
 577        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
 578        c = r - h - 1;
 579    }
 580    if (ext == TCG_TYPE_I32) {
 581        r &= 31;
 582        c &= 31;
 583    }
 584
 585    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
 586}
 587
 588static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
 589                         tcg_target_long value)
 590{
 591    tcg_target_long svalue = value;
 592    tcg_target_long ivalue = ~value;
 593    tcg_target_long t0, t1, t2;
 594    int s0, s1;
 595    AArch64Insn opc;
 596
 597    /* For 32-bit values, discard potential garbage in value.  For 64-bit
 598       values within [2**31, 2**32-1], we can create smaller sequences by
 599       interpreting this as a negative 32-bit number, while ensuring that
 600       the high 32 bits are cleared by setting SF=0.  */
 601    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
 602        svalue = (int32_t)value;
 603        value = (uint32_t)value;
 604        ivalue = (uint32_t)ivalue;
 605        type = TCG_TYPE_I32;
 606    }
 607
 608    /* Speed things up by handling the common case of small positive
 609       and negative values specially.  */
 610    if ((value & ~0xffffull) == 0) {
 611        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
 612        return;
 613    } else if ((ivalue & ~0xffffull) == 0) {
 614        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
 615        return;
 616    }
 617
 618    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
 619       use the sign-extended value.  That lets us match rotated values such
 620       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
 621    if (is_limm(svalue)) {
 622        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
 623        return;
 624    }
 625
 626    /* Look for host pointer values within 4G of the PC.  This happens
 627       often when loading pointers to QEMU's own data structures.  */
 628    if (type == TCG_TYPE_I64) {
 629        tcg_target_long disp = value - (intptr_t)s->code_ptr;
 630        if (disp == sextract64(disp, 0, 21)) {
 631            tcg_out_insn(s, 3406, ADR, rd, disp);
 632            return;
 633        }
 634        disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
 635        if (disp == sextract64(disp, 0, 21)) {
 636            tcg_out_insn(s, 3406, ADRP, rd, disp);
 637            if (value & 0xfff) {
 638                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
 639            }
 640            return;
 641        }
 642    }
 643
 644    /* Would it take fewer insns to begin with MOVN?  */
 645    if (ctpop64(value) >= 32) {
 646        t0 = ivalue;
 647        opc = I3405_MOVN;
 648    } else {
 649        t0 = value;
 650        opc = I3405_MOVZ;
 651    }
 652    s0 = ctz64(t0) & (63 & -16);
 653    t1 = t0 & ~(0xffffUL << s0);
 654    s1 = ctz64(t1) & (63 & -16);
 655    t2 = t1 & ~(0xffffUL << s1);
 656    if (t2 == 0) {
 657        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
 658        if (t1 != 0) {
 659            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
 660        }
 661        return;
 662    }
 663
 664    /* For more than 2 insns, dump it into the constant pool.  */
 665    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
 666    tcg_out_insn(s, 3305, LDR, 0, rd);
 667}
 668
 669/* Define something more legible for general use.  */
 670#define tcg_out_ldst_r  tcg_out_insn_3310
 671
 672static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
 673                         TCGReg rd, TCGReg rn, intptr_t offset)
 674{
 675    TCGMemOp size = (uint32_t)insn >> 30;
 676
 677    /* If the offset is naturally aligned and in range, then we can
 678       use the scaled uimm12 encoding */
 679    if (offset >= 0 && !(offset & ((1 << size) - 1))) {
 680        uintptr_t scaled_uimm = offset >> size;
 681        if (scaled_uimm <= 0xfff) {
 682            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
 683            return;
 684        }
 685    }
 686
 687    /* Small signed offsets can use the unscaled encoding.  */
 688    if (offset >= -256 && offset < 256) {
 689        tcg_out_insn_3312(s, insn, rd, rn, offset);
 690        return;
 691    }
 692
 693    /* Worst-case scenario, move offset to temp register, use reg offset.  */
 694    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
 695    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
 696}
 697
 698static inline void tcg_out_mov(TCGContext *s,
 699                               TCGType type, TCGReg ret, TCGReg arg)
 700{
 701    if (ret != arg) {
 702        tcg_out_movr(s, type, ret, arg);
 703    }
 704}
 705
 706static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
 707                              TCGReg arg1, intptr_t arg2)
 708{
 709    tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
 710                 arg, arg1, arg2);
 711}
 712
 713static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
 714                              TCGReg arg1, intptr_t arg2)
 715{
 716    tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
 717                 arg, arg1, arg2);
 718}
 719
 720static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 721                               TCGReg base, intptr_t ofs)
 722{
 723    if (val == 0) {
 724        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
 725        return true;
 726    }
 727    return false;
 728}
 729
 730static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
 731                               TCGReg rn, unsigned int a, unsigned int b)
 732{
 733    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
 734}
 735
 736static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
 737                                TCGReg rn, unsigned int a, unsigned int b)
 738{
 739    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
 740}
 741
 742static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
 743                                TCGReg rn, unsigned int a, unsigned int b)
 744{
 745    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
 746}
 747
 748static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
 749                                TCGReg rn, TCGReg rm, unsigned int a)
 750{
 751    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
 752}
 753
 754static inline void tcg_out_shl(TCGContext *s, TCGType ext,
 755                               TCGReg rd, TCGReg rn, unsigned int m)
 756{
 757    int bits = ext ? 64 : 32;
 758    int max = bits - 1;
 759    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
 760}
 761
 762static inline void tcg_out_shr(TCGContext *s, TCGType ext,
 763                               TCGReg rd, TCGReg rn, unsigned int m)
 764{
 765    int max = ext ? 63 : 31;
 766    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
 767}
 768
 769static inline void tcg_out_sar(TCGContext *s, TCGType ext,
 770                               TCGReg rd, TCGReg rn, unsigned int m)
 771{
 772    int max = ext ? 63 : 31;
 773    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
 774}
 775
 776static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
 777                                TCGReg rd, TCGReg rn, unsigned int m)
 778{
 779    int max = ext ? 63 : 31;
 780    tcg_out_extr(s, ext, rd, rn, rn, m & max);
 781}
 782
 783static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
 784                                TCGReg rd, TCGReg rn, unsigned int m)
 785{
 786    int bits = ext ? 64 : 32;
 787    int max = bits - 1;
 788    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
 789}
 790
 791static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
 792                               TCGReg rn, unsigned lsb, unsigned width)
 793{
 794    unsigned size = ext ? 64 : 32;
 795    unsigned a = (size - lsb) & (size - 1);
 796    unsigned b = width - 1;
 797    tcg_out_bfm(s, ext, rd, rn, a, b);
 798}
 799
 800static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
 801                        tcg_target_long b, bool const_b)
 802{
 803    if (const_b) {
 804        /* Using CMP or CMN aliases.  */
 805        if (b >= 0) {
 806            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
 807        } else {
 808            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
 809        }
 810    } else {
 811        /* Using CMP alias SUBS wzr, Wn, Wm */
 812        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
 813    }
 814}
 815
 816static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
 817{
 818    ptrdiff_t offset = target - s->code_ptr;
 819    tcg_debug_assert(offset == sextract64(offset, 0, 26));
 820    tcg_out_insn(s, 3206, B, offset);
 821}
 822
 823static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
 824{
 825    ptrdiff_t offset = target - s->code_ptr;
 826    if (offset == sextract64(offset, 0, 26)) {
 827        tcg_out_insn(s, 3206, BL, offset);
 828    } else {
 829        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
 830        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
 831    }
 832}
 833
 834static inline void tcg_out_goto_noaddr(TCGContext *s)
 835{
 836    /* We pay attention here to not modify the branch target by reading from
 837       the buffer. This ensure that caches and memory are kept coherent during
 838       retranslation.  Mask away possible garbage in the high bits for the
 839       first translation, while keeping the offset bits for retranslation. */
 840    uint32_t old = tcg_in32(s);
 841    tcg_out_insn(s, 3206, B, old);
 842}
 843
 844static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
 845{
 846    /* See comments in tcg_out_goto_noaddr.  */
 847    uint32_t old = tcg_in32(s) >> 5;
 848    tcg_out_insn(s, 3202, B_C, c, old);
 849}
 850
 851static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
 852{
 853    tcg_out_insn(s, 3207, BLR, reg);
 854}
 855
 856static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
 857{
 858    ptrdiff_t offset = target - s->code_ptr;
 859    if (offset == sextract64(offset, 0, 26)) {
 860        tcg_out_insn(s, 3206, BL, offset);
 861    } else {
 862        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
 863        tcg_out_callr(s, TCG_REG_TMP);
 864    }
 865}
 866
 867void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
 868                              uintptr_t addr)
 869{
 870    tcg_insn_unit i1, i2;
 871    TCGType rt = TCG_TYPE_I64;
 872    TCGReg  rd = TCG_REG_TMP;
 873    uint64_t pair;
 874
 875    ptrdiff_t offset = addr - jmp_addr;
 876
 877    if (offset == sextract64(offset, 0, 26)) {
 878        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
 879        i2 = NOP;
 880    } else {
 881        offset = (addr >> 12) - (jmp_addr >> 12);
 882
 883        /* patch ADRP */
 884        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
 885        /* patch ADDI */
 886        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
 887    }
 888    pair = (uint64_t)i2 << 32 | i1;
 889    atomic_set((uint64_t *)jmp_addr, pair);
 890    flush_icache_range(jmp_addr, jmp_addr + 8);
 891}
 892
 893static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
 894{
 895    if (!l->has_value) {
 896        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
 897        tcg_out_goto_noaddr(s);
 898    } else {
 899        tcg_out_goto(s, l->u.value_ptr);
 900    }
 901}
 902
 903static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
 904                           TCGArg b, bool b_const, TCGLabel *l)
 905{
 906    intptr_t offset;
 907    bool need_cmp;
 908
 909    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
 910        need_cmp = false;
 911    } else {
 912        need_cmp = true;
 913        tcg_out_cmp(s, ext, a, b, b_const);
 914    }
 915
 916    if (!l->has_value) {
 917        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
 918        offset = tcg_in32(s) >> 5;
 919    } else {
 920        offset = l->u.value_ptr - s->code_ptr;
 921        tcg_debug_assert(offset == sextract64(offset, 0, 19));
 922    }
 923
 924    if (need_cmp) {
 925        tcg_out_insn(s, 3202, B_C, c, offset);
 926    } else if (c == TCG_COND_EQ) {
 927        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
 928    } else {
 929        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
 930    }
 931}
 932
 933static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
 934{
 935    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
 936}
 937
 938static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
 939{
 940    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
 941}
 942
 943static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
 944{
 945    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
 946}
 947
 948static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
 949                               TCGReg rd, TCGReg rn)
 950{
 951    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
 952    int bits = (8 << s_bits) - 1;
 953    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
 954}
 955
 956static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
 957                               TCGReg rd, TCGReg rn)
 958{
 959    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
 960    int bits = (8 << s_bits) - 1;
 961    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
 962}
 963
 964static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
 965                            TCGReg rn, int64_t aimm)
 966{
 967    if (aimm >= 0) {
 968        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
 969    } else {
 970        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
 971    }
 972}
 973
 974static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
 975                                   TCGReg rh, TCGReg al, TCGReg ah,
 976                                   tcg_target_long bl, tcg_target_long bh,
 977                                   bool const_bl, bool const_bh, bool sub)
 978{
 979    TCGReg orig_rl = rl;
 980    AArch64Insn insn;
 981
 982    if (rl == ah || (!const_bh && rl == bh)) {
 983        rl = TCG_REG_TMP;
 984    }
 985
 986    if (const_bl) {
 987        insn = I3401_ADDSI;
 988        if ((bl < 0) ^ sub) {
 989            insn = I3401_SUBSI;
 990            bl = -bl;
 991        }
 992        if (unlikely(al == TCG_REG_XZR)) {
 993            /* ??? We want to allow al to be zero for the benefit of
 994               negation via subtraction.  However, that leaves open the
 995               possibility of adding 0+const in the low part, and the
 996               immediate add instructions encode XSP not XZR.  Don't try
 997               anything more elaborate here than loading another zero.  */
 998            al = TCG_REG_TMP;
 999            tcg_out_movi(s, ext, al, 0);
1000        }
1001        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1002    } else {
1003        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1004    }
1005
1006    insn = I3503_ADC;
1007    if (const_bh) {
1008        /* Note that the only two constants we support are 0 and -1, and
1009           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1010        if ((bh != 0) ^ sub) {
1011            insn = I3503_SBC;
1012        }
1013        bh = TCG_REG_XZR;
1014    } else if (sub) {
1015        insn = I3503_SBC;
1016    }
1017    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1018
1019    tcg_out_mov(s, ext, orig_rl, rl);
1020}
1021
1022static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1023{
1024    static const uint32_t sync[] = {
1025        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1026        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1027        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1028        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1029        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1030    };
1031    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1032}
1033
1034static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1035                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1036{
1037    TCGReg a1 = a0;
1038    if (is_ctz) {
1039        a1 = TCG_REG_TMP;
1040        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1041    }
1042    if (const_b && b == (ext ? 64 : 32)) {
1043        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1044    } else {
1045        AArch64Insn sel = I3506_CSEL;
1046
1047        tcg_out_cmp(s, ext, a0, 0, 1);
1048        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1049
1050        if (const_b) {
1051            if (b == -1) {
1052                b = TCG_REG_XZR;
1053                sel = I3506_CSINV;
1054            } else if (b == 0) {
1055                b = TCG_REG_XZR;
1056            } else {
1057                tcg_out_movi(s, ext, d, b);
1058                b = d;
1059            }
1060        }
1061        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1062    }
1063}
1064
1065#ifdef CONFIG_SOFTMMU
1066#include "tcg-ldst.inc.c"
1067
1068/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1069 *                                     TCGMemOpIdx oi, uintptr_t ra)
1070 */
1071static void * const qemu_ld_helpers[16] = {
1072    [MO_UB]   = helper_ret_ldub_mmu,
1073    [MO_LEUW] = helper_le_lduw_mmu,
1074    [MO_LEUL] = helper_le_ldul_mmu,
1075    [MO_LEQ]  = helper_le_ldq_mmu,
1076    [MO_BEUW] = helper_be_lduw_mmu,
1077    [MO_BEUL] = helper_be_ldul_mmu,
1078    [MO_BEQ]  = helper_be_ldq_mmu,
1079};
1080
1081/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1082 *                                     uintxx_t val, TCGMemOpIdx oi,
1083 *                                     uintptr_t ra)
1084 */
1085static void * const qemu_st_helpers[16] = {
1086    [MO_UB]   = helper_ret_stb_mmu,
1087    [MO_LEUW] = helper_le_stw_mmu,
1088    [MO_LEUL] = helper_le_stl_mmu,
1089    [MO_LEQ]  = helper_le_stq_mmu,
1090    [MO_BEUW] = helper_be_stw_mmu,
1091    [MO_BEUL] = helper_be_stl_mmu,
1092    [MO_BEQ]  = helper_be_stq_mmu,
1093};
1094
1095static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1096{
1097    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1098    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1099    tcg_out_insn(s, 3406, ADR, rd, offset);
1100}
1101
1102static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1103{
1104    TCGMemOpIdx oi = lb->oi;
1105    TCGMemOp opc = get_memop(oi);
1106    TCGMemOp size = opc & MO_SIZE;
1107
1108    reloc_pc19(lb->label_ptr[0], s->code_ptr);
1109
1110    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1111    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1112    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1113    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1114    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1115    if (opc & MO_SIGN) {
1116        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1117    } else {
1118        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1119    }
1120
1121    tcg_out_goto(s, lb->raddr);
1122}
1123
1124static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1125{
1126    TCGMemOpIdx oi = lb->oi;
1127    TCGMemOp opc = get_memop(oi);
1128    TCGMemOp size = opc & MO_SIZE;
1129
1130    reloc_pc19(lb->label_ptr[0], s->code_ptr);
1131
1132    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1133    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1134    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1135    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1136    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1137    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1138    tcg_out_goto(s, lb->raddr);
1139}
1140
1141static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1142                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1143                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1144{
1145    TCGLabelQemuLdst *label = new_ldst_label(s);
1146
1147    label->is_ld = is_ld;
1148    label->oi = oi;
1149    label->type = ext;
1150    label->datalo_reg = data_reg;
1151    label->addrlo_reg = addr_reg;
1152    label->raddr = raddr;
1153    label->label_ptr[0] = label_ptr;
1154}
1155
1156/* Load and compare a TLB entry, emitting the conditional jump to the
1157   slow path for the failure case, which will be patched later when finalizing
1158   the slow path. Generated code returns the host addend in X1,
1159   clobbers X0,X2,X3,TMP. */
1160static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1161                             tcg_insn_unit **label_ptr, int mem_index,
1162                             bool is_read)
1163{
1164    int tlb_offset = is_read ?
1165        offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1166        : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1167    unsigned a_bits = get_alignment_bits(opc);
1168    unsigned s_bits = opc & MO_SIZE;
1169    unsigned a_mask = (1u << a_bits) - 1;
1170    unsigned s_mask = (1u << s_bits) - 1;
1171    TCGReg base = TCG_AREG0, x3;
1172    uint64_t tlb_mask;
1173
1174    /* For aligned accesses, we check the first byte and include the alignment
1175       bits within the address.  For unaligned access, we check that we don't
1176       cross pages using the address of the last byte of the access.  */
1177    if (a_bits >= s_bits) {
1178        x3 = addr_reg;
1179    } else {
1180        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1181                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1182        x3 = TCG_REG_X3;
1183    }
1184    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1185
1186    /* Extract the TLB index from the address into X0.
1187       X0<CPU_TLB_BITS:0> =
1188       addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1189    tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1190                 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1191
1192    /* Store the page mask part of the address into X3.  */
1193    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1194                     TCG_REG_X3, x3, tlb_mask);
1195
1196    /* Add any "high bits" from the tlb offset to the env address into X2,
1197       to take advantage of the LSL12 form of the ADDI instruction.
1198       X2 = env + (tlb_offset & 0xfff000) */
1199    if (tlb_offset & 0xfff000) {
1200        tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1201                     tlb_offset & 0xfff000);
1202        base = TCG_REG_X2;
1203    }
1204
1205    /* Merge the tlb index contribution into X2.
1206       X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1207    tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1208                 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1209
1210    /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1211       X0 = load [X2 + (tlb_offset & 0x000fff)] */
1212    tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1213                 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1214
1215    /* Load the tlb addend. Do that early to avoid stalling.
1216       X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1217    tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1218                 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1219                 (is_read ? offsetof(CPUTLBEntry, addr_read)
1220                  : offsetof(CPUTLBEntry, addr_write)));
1221
1222    /* Perform the address comparison. */
1223    tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1224
1225    /* If not equal, we jump to the slow path. */
1226    *label_ptr = s->code_ptr;
1227    tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1228}
1229
1230#endif /* CONFIG_SOFTMMU */
1231
1232static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1233                                   TCGReg data_r, TCGReg addr_r,
1234                                   TCGType otype, TCGReg off_r)
1235{
1236    const TCGMemOp bswap = memop & MO_BSWAP;
1237
1238    switch (memop & MO_SSIZE) {
1239    case MO_UB:
1240        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1241        break;
1242    case MO_SB:
1243        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1244                       data_r, addr_r, otype, off_r);
1245        break;
1246    case MO_UW:
1247        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1248        if (bswap) {
1249            tcg_out_rev16(s, data_r, data_r);
1250        }
1251        break;
1252    case MO_SW:
1253        if (bswap) {
1254            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1255            tcg_out_rev16(s, data_r, data_r);
1256            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1257        } else {
1258            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1259                           data_r, addr_r, otype, off_r);
1260        }
1261        break;
1262    case MO_UL:
1263        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1264        if (bswap) {
1265            tcg_out_rev32(s, data_r, data_r);
1266        }
1267        break;
1268    case MO_SL:
1269        if (bswap) {
1270            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1271            tcg_out_rev32(s, data_r, data_r);
1272            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1273        } else {
1274            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1275        }
1276        break;
1277    case MO_Q:
1278        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1279        if (bswap) {
1280            tcg_out_rev64(s, data_r, data_r);
1281        }
1282        break;
1283    default:
1284        tcg_abort();
1285    }
1286}
1287
1288static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1289                                   TCGReg data_r, TCGReg addr_r,
1290                                   TCGType otype, TCGReg off_r)
1291{
1292    const TCGMemOp bswap = memop & MO_BSWAP;
1293
1294    switch (memop & MO_SIZE) {
1295    case MO_8:
1296        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1297        break;
1298    case MO_16:
1299        if (bswap && data_r != TCG_REG_XZR) {
1300            tcg_out_rev16(s, TCG_REG_TMP, data_r);
1301            data_r = TCG_REG_TMP;
1302        }
1303        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1304        break;
1305    case MO_32:
1306        if (bswap && data_r != TCG_REG_XZR) {
1307            tcg_out_rev32(s, TCG_REG_TMP, data_r);
1308            data_r = TCG_REG_TMP;
1309        }
1310        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1311        break;
1312    case MO_64:
1313        if (bswap && data_r != TCG_REG_XZR) {
1314            tcg_out_rev64(s, TCG_REG_TMP, data_r);
1315            data_r = TCG_REG_TMP;
1316        }
1317        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1318        break;
1319    default:
1320        tcg_abort();
1321    }
1322}
1323
1324static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1325                            TCGMemOpIdx oi, TCGType ext)
1326{
1327    TCGMemOp memop = get_memop(oi);
1328    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1329#ifdef CONFIG_SOFTMMU
1330    unsigned mem_index = get_mmuidx(oi);
1331    tcg_insn_unit *label_ptr;
1332
1333    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1334    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1335                           TCG_REG_X1, otype, addr_reg);
1336    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1337                        s->code_ptr, label_ptr);
1338#else /* !CONFIG_SOFTMMU */
1339    if (USE_GUEST_BASE) {
1340        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1341                               TCG_REG_GUEST_BASE, otype, addr_reg);
1342    } else {
1343        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1344                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1345    }
1346#endif /* CONFIG_SOFTMMU */
1347}
1348
1349static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1350                            TCGMemOpIdx oi)
1351{
1352    TCGMemOp memop = get_memop(oi);
1353    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1354#ifdef CONFIG_SOFTMMU
1355    unsigned mem_index = get_mmuidx(oi);
1356    tcg_insn_unit *label_ptr;
1357
1358    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1359    tcg_out_qemu_st_direct(s, memop, data_reg,
1360                           TCG_REG_X1, otype, addr_reg);
1361    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1362                        data_reg, addr_reg, s->code_ptr, label_ptr);
1363#else /* !CONFIG_SOFTMMU */
1364    if (USE_GUEST_BASE) {
1365        tcg_out_qemu_st_direct(s, memop, data_reg,
1366                               TCG_REG_GUEST_BASE, otype, addr_reg);
1367    } else {
1368        tcg_out_qemu_st_direct(s, memop, data_reg,
1369                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1370    }
1371#endif /* CONFIG_SOFTMMU */
1372}
1373
1374static tcg_insn_unit *tb_ret_addr;
1375
1376static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1377                       const TCGArg args[TCG_MAX_OP_ARGS],
1378                       const int const_args[TCG_MAX_OP_ARGS])
1379{
1380    /* 99% of the time, we can signal the use of extension registers
1381       by looking to see if the opcode handles 64-bit data.  */
1382    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1383
1384    /* Hoist the loads of the most common arguments.  */
1385    TCGArg a0 = args[0];
1386    TCGArg a1 = args[1];
1387    TCGArg a2 = args[2];
1388    int c2 = const_args[2];
1389
1390    /* Some operands are defined with "rZ" constraint, a register or
1391       the zero register.  These need not actually test args[I] == 0.  */
1392#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1393
1394    switch (opc) {
1395    case INDEX_op_exit_tb:
1396        /* Reuse the zeroing that exists for goto_ptr.  */
1397        if (a0 == 0) {
1398            tcg_out_goto_long(s, s->code_gen_epilogue);
1399        } else {
1400            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1401            tcg_out_goto_long(s, tb_ret_addr);
1402        }
1403        break;
1404
1405    case INDEX_op_goto_tb:
1406        if (s->tb_jmp_insn_offset != NULL) {
1407            /* TCG_TARGET_HAS_direct_jump */
1408            /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1409               write can be used to patch the target address. */
1410            if ((uintptr_t)s->code_ptr & 7) {
1411                tcg_out32(s, NOP);
1412            }
1413            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1414            /* actual branch destination will be patched by
1415               tb_target_set_jmp_target later. */
1416            tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1417            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1418        } else {
1419            /* !TCG_TARGET_HAS_direct_jump */
1420            tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1421            intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1422            tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1423        }
1424        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1425        s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
1426        break;
1427
1428    case INDEX_op_goto_ptr:
1429        tcg_out_insn(s, 3207, BR, a0);
1430        break;
1431
1432    case INDEX_op_br:
1433        tcg_out_goto_label(s, arg_label(a0));
1434        break;
1435
1436    case INDEX_op_ld8u_i32:
1437    case INDEX_op_ld8u_i64:
1438        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
1439        break;
1440    case INDEX_op_ld8s_i32:
1441        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
1442        break;
1443    case INDEX_op_ld8s_i64:
1444        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
1445        break;
1446    case INDEX_op_ld16u_i32:
1447    case INDEX_op_ld16u_i64:
1448        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
1449        break;
1450    case INDEX_op_ld16s_i32:
1451        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
1452        break;
1453    case INDEX_op_ld16s_i64:
1454        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
1455        break;
1456    case INDEX_op_ld_i32:
1457    case INDEX_op_ld32u_i64:
1458        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
1459        break;
1460    case INDEX_op_ld32s_i64:
1461        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
1462        break;
1463    case INDEX_op_ld_i64:
1464        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
1465        break;
1466
1467    case INDEX_op_st8_i32:
1468    case INDEX_op_st8_i64:
1469        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
1470        break;
1471    case INDEX_op_st16_i32:
1472    case INDEX_op_st16_i64:
1473        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
1474        break;
1475    case INDEX_op_st_i32:
1476    case INDEX_op_st32_i64:
1477        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
1478        break;
1479    case INDEX_op_st_i64:
1480        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
1481        break;
1482
1483    case INDEX_op_add_i32:
1484        a2 = (int32_t)a2;
1485        /* FALLTHRU */
1486    case INDEX_op_add_i64:
1487        if (c2) {
1488            tcg_out_addsubi(s, ext, a0, a1, a2);
1489        } else {
1490            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1491        }
1492        break;
1493
1494    case INDEX_op_sub_i32:
1495        a2 = (int32_t)a2;
1496        /* FALLTHRU */
1497    case INDEX_op_sub_i64:
1498        if (c2) {
1499            tcg_out_addsubi(s, ext, a0, a1, -a2);
1500        } else {
1501            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1502        }
1503        break;
1504
1505    case INDEX_op_neg_i64:
1506    case INDEX_op_neg_i32:
1507        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1508        break;
1509
1510    case INDEX_op_and_i32:
1511        a2 = (int32_t)a2;
1512        /* FALLTHRU */
1513    case INDEX_op_and_i64:
1514        if (c2) {
1515            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1516        } else {
1517            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1518        }
1519        break;
1520
1521    case INDEX_op_andc_i32:
1522        a2 = (int32_t)a2;
1523        /* FALLTHRU */
1524    case INDEX_op_andc_i64:
1525        if (c2) {
1526            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1527        } else {
1528            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1529        }
1530        break;
1531
1532    case INDEX_op_or_i32:
1533        a2 = (int32_t)a2;
1534        /* FALLTHRU */
1535    case INDEX_op_or_i64:
1536        if (c2) {
1537            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1538        } else {
1539            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1540        }
1541        break;
1542
1543    case INDEX_op_orc_i32:
1544        a2 = (int32_t)a2;
1545        /* FALLTHRU */
1546    case INDEX_op_orc_i64:
1547        if (c2) {
1548            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1549        } else {
1550            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1551        }
1552        break;
1553
1554    case INDEX_op_xor_i32:
1555        a2 = (int32_t)a2;
1556        /* FALLTHRU */
1557    case INDEX_op_xor_i64:
1558        if (c2) {
1559            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1560        } else {
1561            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1562        }
1563        break;
1564
1565    case INDEX_op_eqv_i32:
1566        a2 = (int32_t)a2;
1567        /* FALLTHRU */
1568    case INDEX_op_eqv_i64:
1569        if (c2) {
1570            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1571        } else {
1572            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1573        }
1574        break;
1575
1576    case INDEX_op_not_i64:
1577    case INDEX_op_not_i32:
1578        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1579        break;
1580
1581    case INDEX_op_mul_i64:
1582    case INDEX_op_mul_i32:
1583        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1584        break;
1585
1586    case INDEX_op_div_i64:
1587    case INDEX_op_div_i32:
1588        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1589        break;
1590    case INDEX_op_divu_i64:
1591    case INDEX_op_divu_i32:
1592        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1593        break;
1594
1595    case INDEX_op_rem_i64:
1596    case INDEX_op_rem_i32:
1597        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1598        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1599        break;
1600    case INDEX_op_remu_i64:
1601    case INDEX_op_remu_i32:
1602        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1603        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1604        break;
1605
1606    case INDEX_op_shl_i64:
1607    case INDEX_op_shl_i32:
1608        if (c2) {
1609            tcg_out_shl(s, ext, a0, a1, a2);
1610        } else {
1611            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1612        }
1613        break;
1614
1615    case INDEX_op_shr_i64:
1616    case INDEX_op_shr_i32:
1617        if (c2) {
1618            tcg_out_shr(s, ext, a0, a1, a2);
1619        } else {
1620            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1621        }
1622        break;
1623
1624    case INDEX_op_sar_i64:
1625    case INDEX_op_sar_i32:
1626        if (c2) {
1627            tcg_out_sar(s, ext, a0, a1, a2);
1628        } else {
1629            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1630        }
1631        break;
1632
1633    case INDEX_op_rotr_i64:
1634    case INDEX_op_rotr_i32:
1635        if (c2) {
1636            tcg_out_rotr(s, ext, a0, a1, a2);
1637        } else {
1638            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1639        }
1640        break;
1641
1642    case INDEX_op_rotl_i64:
1643    case INDEX_op_rotl_i32:
1644        if (c2) {
1645            tcg_out_rotl(s, ext, a0, a1, a2);
1646        } else {
1647            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1648            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1649        }
1650        break;
1651
1652    case INDEX_op_clz_i64:
1653    case INDEX_op_clz_i32:
1654        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
1655        break;
1656    case INDEX_op_ctz_i64:
1657    case INDEX_op_ctz_i32:
1658        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
1659        break;
1660
1661    case INDEX_op_brcond_i32:
1662        a1 = (int32_t)a1;
1663        /* FALLTHRU */
1664    case INDEX_op_brcond_i64:
1665        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1666        break;
1667
1668    case INDEX_op_setcond_i32:
1669        a2 = (int32_t)a2;
1670        /* FALLTHRU */
1671    case INDEX_op_setcond_i64:
1672        tcg_out_cmp(s, ext, a1, a2, c2);
1673        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
1674        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1675                     TCG_REG_XZR, tcg_invert_cond(args[3]));
1676        break;
1677
1678    case INDEX_op_movcond_i32:
1679        a2 = (int32_t)a2;
1680        /* FALLTHRU */
1681    case INDEX_op_movcond_i64:
1682        tcg_out_cmp(s, ext, a1, a2, c2);
1683        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1684        break;
1685
1686    case INDEX_op_qemu_ld_i32:
1687    case INDEX_op_qemu_ld_i64:
1688        tcg_out_qemu_ld(s, a0, a1, a2, ext);
1689        break;
1690    case INDEX_op_qemu_st_i32:
1691    case INDEX_op_qemu_st_i64:
1692        tcg_out_qemu_st(s, REG0(0), a1, a2);
1693        break;
1694
1695    case INDEX_op_bswap64_i64:
1696        tcg_out_rev64(s, a0, a1);
1697        break;
1698    case INDEX_op_bswap32_i64:
1699    case INDEX_op_bswap32_i32:
1700        tcg_out_rev32(s, a0, a1);
1701        break;
1702    case INDEX_op_bswap16_i64:
1703    case INDEX_op_bswap16_i32:
1704        tcg_out_rev16(s, a0, a1);
1705        break;
1706
1707    case INDEX_op_ext8s_i64:
1708    case INDEX_op_ext8s_i32:
1709        tcg_out_sxt(s, ext, MO_8, a0, a1);
1710        break;
1711    case INDEX_op_ext16s_i64:
1712    case INDEX_op_ext16s_i32:
1713        tcg_out_sxt(s, ext, MO_16, a0, a1);
1714        break;
1715    case INDEX_op_ext_i32_i64:
1716    case INDEX_op_ext32s_i64:
1717        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1718        break;
1719    case INDEX_op_ext8u_i64:
1720    case INDEX_op_ext8u_i32:
1721        tcg_out_uxt(s, MO_8, a0, a1);
1722        break;
1723    case INDEX_op_ext16u_i64:
1724    case INDEX_op_ext16u_i32:
1725        tcg_out_uxt(s, MO_16, a0, a1);
1726        break;
1727    case INDEX_op_extu_i32_i64:
1728    case INDEX_op_ext32u_i64:
1729        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1730        break;
1731
1732    case INDEX_op_deposit_i64:
1733    case INDEX_op_deposit_i32:
1734        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1735        break;
1736
1737    case INDEX_op_extract_i64:
1738    case INDEX_op_extract_i32:
1739        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
1740        break;
1741
1742    case INDEX_op_sextract_i64:
1743    case INDEX_op_sextract_i32:
1744        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
1745        break;
1746
1747    case INDEX_op_add2_i32:
1748        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1749                        (int32_t)args[4], args[5], const_args[4],
1750                        const_args[5], false);
1751        break;
1752    case INDEX_op_add2_i64:
1753        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1754                        args[5], const_args[4], const_args[5], false);
1755        break;
1756    case INDEX_op_sub2_i32:
1757        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1758                        (int32_t)args[4], args[5], const_args[4],
1759                        const_args[5], true);
1760        break;
1761    case INDEX_op_sub2_i64:
1762        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1763                        args[5], const_args[4], const_args[5], true);
1764        break;
1765
1766    case INDEX_op_muluh_i64:
1767        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1768        break;
1769    case INDEX_op_mulsh_i64:
1770        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1771        break;
1772
1773    case INDEX_op_mb:
1774        tcg_out_mb(s, a0);
1775        break;
1776
1777    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
1778    case INDEX_op_mov_i64:
1779    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
1780    case INDEX_op_movi_i64:
1781    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
1782    default:
1783        tcg_abort();
1784    }
1785
1786#undef REG0
1787}
1788
1789static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
1790{
1791    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
1792    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
1793    static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
1794    static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
1795    static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
1796    static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
1797    static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
1798    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
1799    static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
1800    static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
1801    static const TCGTargetOpDef r_r_rAL
1802        = { .args_ct_str = { "r", "r", "rAL" } };
1803    static const TCGTargetOpDef dep
1804        = { .args_ct_str = { "r", "0", "rZ" } };
1805    static const TCGTargetOpDef movc
1806        = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
1807    static const TCGTargetOpDef add2
1808        = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
1809
1810    switch (op) {
1811    case INDEX_op_goto_ptr:
1812        return &r;
1813
1814    case INDEX_op_ld8u_i32:
1815    case INDEX_op_ld8s_i32:
1816    case INDEX_op_ld16u_i32:
1817    case INDEX_op_ld16s_i32:
1818    case INDEX_op_ld_i32:
1819    case INDEX_op_ld8u_i64:
1820    case INDEX_op_ld8s_i64:
1821    case INDEX_op_ld16u_i64:
1822    case INDEX_op_ld16s_i64:
1823    case INDEX_op_ld32u_i64:
1824    case INDEX_op_ld32s_i64:
1825    case INDEX_op_ld_i64:
1826    case INDEX_op_neg_i32:
1827    case INDEX_op_neg_i64:
1828    case INDEX_op_not_i32:
1829    case INDEX_op_not_i64:
1830    case INDEX_op_bswap16_i32:
1831    case INDEX_op_bswap32_i32:
1832    case INDEX_op_bswap16_i64:
1833    case INDEX_op_bswap32_i64:
1834    case INDEX_op_bswap64_i64:
1835    case INDEX_op_ext8s_i32:
1836    case INDEX_op_ext16s_i32:
1837    case INDEX_op_ext8u_i32:
1838    case INDEX_op_ext16u_i32:
1839    case INDEX_op_ext8s_i64:
1840    case INDEX_op_ext16s_i64:
1841    case INDEX_op_ext32s_i64:
1842    case INDEX_op_ext8u_i64:
1843    case INDEX_op_ext16u_i64:
1844    case INDEX_op_ext32u_i64:
1845    case INDEX_op_ext_i32_i64:
1846    case INDEX_op_extu_i32_i64:
1847    case INDEX_op_extract_i32:
1848    case INDEX_op_extract_i64:
1849    case INDEX_op_sextract_i32:
1850    case INDEX_op_sextract_i64:
1851        return &r_r;
1852
1853    case INDEX_op_st8_i32:
1854    case INDEX_op_st16_i32:
1855    case INDEX_op_st_i32:
1856    case INDEX_op_st8_i64:
1857    case INDEX_op_st16_i64:
1858    case INDEX_op_st32_i64:
1859    case INDEX_op_st_i64:
1860        return &rZ_r;
1861
1862    case INDEX_op_add_i32:
1863    case INDEX_op_add_i64:
1864    case INDEX_op_sub_i32:
1865    case INDEX_op_sub_i64:
1866    case INDEX_op_setcond_i32:
1867    case INDEX_op_setcond_i64:
1868        return &r_r_rA;
1869
1870    case INDEX_op_mul_i32:
1871    case INDEX_op_mul_i64:
1872    case INDEX_op_div_i32:
1873    case INDEX_op_div_i64:
1874    case INDEX_op_divu_i32:
1875    case INDEX_op_divu_i64:
1876    case INDEX_op_rem_i32:
1877    case INDEX_op_rem_i64:
1878    case INDEX_op_remu_i32:
1879    case INDEX_op_remu_i64:
1880    case INDEX_op_muluh_i64:
1881    case INDEX_op_mulsh_i64:
1882        return &r_r_r;
1883
1884    case INDEX_op_and_i32:
1885    case INDEX_op_and_i64:
1886    case INDEX_op_or_i32:
1887    case INDEX_op_or_i64:
1888    case INDEX_op_xor_i32:
1889    case INDEX_op_xor_i64:
1890    case INDEX_op_andc_i32:
1891    case INDEX_op_andc_i64:
1892    case INDEX_op_orc_i32:
1893    case INDEX_op_orc_i64:
1894    case INDEX_op_eqv_i32:
1895    case INDEX_op_eqv_i64:
1896        return &r_r_rL;
1897
1898    case INDEX_op_shl_i32:
1899    case INDEX_op_shr_i32:
1900    case INDEX_op_sar_i32:
1901    case INDEX_op_rotl_i32:
1902    case INDEX_op_rotr_i32:
1903    case INDEX_op_shl_i64:
1904    case INDEX_op_shr_i64:
1905    case INDEX_op_sar_i64:
1906    case INDEX_op_rotl_i64:
1907    case INDEX_op_rotr_i64:
1908        return &r_r_ri;
1909
1910    case INDEX_op_clz_i32:
1911    case INDEX_op_ctz_i32:
1912    case INDEX_op_clz_i64:
1913    case INDEX_op_ctz_i64:
1914        return &r_r_rAL;
1915
1916    case INDEX_op_brcond_i32:
1917    case INDEX_op_brcond_i64:
1918        return &r_rA;
1919
1920    case INDEX_op_movcond_i32:
1921    case INDEX_op_movcond_i64:
1922        return &movc;
1923
1924    case INDEX_op_qemu_ld_i32:
1925    case INDEX_op_qemu_ld_i64:
1926        return &r_l;
1927    case INDEX_op_qemu_st_i32:
1928    case INDEX_op_qemu_st_i64:
1929        return &lZ_l;
1930
1931    case INDEX_op_deposit_i32:
1932    case INDEX_op_deposit_i64:
1933        return &dep;
1934
1935    case INDEX_op_add2_i32:
1936    case INDEX_op_add2_i64:
1937    case INDEX_op_sub2_i32:
1938    case INDEX_op_sub2_i64:
1939        return &add2;
1940
1941    default:
1942        return NULL;
1943    }
1944}
1945
1946static void tcg_target_init(TCGContext *s)
1947{
1948    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
1949    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
1950
1951    tcg_target_call_clobber_regs = 0xfffffffu;
1952    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
1953    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
1954    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
1955    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
1956    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
1957    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
1958    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
1959    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
1960    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
1961    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
1962    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
1963
1964    s->reserved_regs = 0;
1965    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1966    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1967    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1968    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1969}
1970
1971/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
1972#define PUSH_SIZE  ((30 - 19 + 1) * 8)
1973
1974#define FRAME_SIZE \
1975    ((PUSH_SIZE \
1976      + TCG_STATIC_CALL_ARGS_SIZE \
1977      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1978      + TCG_TARGET_STACK_ALIGN - 1) \
1979     & ~(TCG_TARGET_STACK_ALIGN - 1))
1980
1981/* We're expecting a 2 byte uleb128 encoded value.  */
1982QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1983
1984/* We're expecting to use a single ADDI insn.  */
1985QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1986
1987static void tcg_target_qemu_prologue(TCGContext *s)
1988{
1989    TCGReg r;
1990
1991    /* Push (FP, LR) and allocate space for all saved registers.  */
1992    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1993                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
1994
1995    /* Set up frame pointer for canonical unwinding.  */
1996    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1997
1998    /* Store callee-preserved regs x19..x28.  */
1999    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2000        int ofs = (r - TCG_REG_X19 + 2) * 8;
2001        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2002    }
2003
2004    /* Make stack space for TCG locals.  */
2005    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2006                 FRAME_SIZE - PUSH_SIZE);
2007
2008    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2009    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2010                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2011
2012#if !defined(CONFIG_SOFTMMU)
2013    if (USE_GUEST_BASE) {
2014        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2015        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2016    }
2017#endif
2018
2019    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2020    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2021
2022    /*
2023     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2024     * and fall through to the rest of the epilogue.
2025     */
2026    s->code_gen_epilogue = s->code_ptr;
2027    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2028
2029    /* TB epilogue */
2030    tb_ret_addr = s->code_ptr;
2031
2032    /* Remove TCG locals stack space.  */
2033    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2034                 FRAME_SIZE - PUSH_SIZE);
2035
2036    /* Restore registers x19..x28.  */
2037    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2038        int ofs = (r - TCG_REG_X19 + 2) * 8;
2039        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2040    }
2041
2042    /* Pop (FP, LR), restore SP to previous frame.  */
2043    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2044                 TCG_REG_SP, PUSH_SIZE, 0, 1);
2045    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2046}
2047
2048static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2049{
2050    int i;
2051    for (i = 0; i < count; ++i) {
2052        p[i] = NOP;
2053    }
2054}
2055
2056typedef struct {
2057    DebugFrameHeader h;
2058    uint8_t fde_def_cfa[4];
2059    uint8_t fde_reg_ofs[24];
2060} DebugFrame;
2061
2062#define ELF_HOST_MACHINE EM_AARCH64
2063
2064static const DebugFrame debug_frame = {
2065    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2066    .h.cie.id = -1,
2067    .h.cie.version = 1,
2068    .h.cie.code_align = 1,
2069    .h.cie.data_align = 0x78,             /* sleb128 -8 */
2070    .h.cie.return_column = TCG_REG_LR,
2071
2072    /* Total FDE size does not include the "len" member.  */
2073    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2074
2075    .fde_def_cfa = {
2076        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
2077        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2078        (FRAME_SIZE >> 7)
2079    },
2080    .fde_reg_ofs = {
2081        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
2082        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
2083        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
2084        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
2085        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
2086        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
2087        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
2088        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
2089        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
2090        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
2091        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
2092        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
2093    }
2094};
2095
2096void tcg_register_jit(void *buf, size_t buf_size)
2097{
2098    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2099}
2100