qemu/tcg/aarch64/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Initial TCG Implementation for aarch64
   3 *
   4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
   5 * Written by Claudio Fontana
   6 *
   7 * This work is licensed under the terms of the GNU GPL, version 2 or
   8 * (at your option) any later version.
   9 *
  10 * See the COPYING file in the top-level directory for details.
  11 */
  12
  13#include "tcg-be-ldst.h"
  14#include "qemu/bitops.h"
  15
  16/* We're going to re-use TCGType in setting of the SF bit, which controls
  17   the size of the operation performed.  If we know the values match, it
  18   makes things much cleaner.  */
  19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
  20
  21#ifndef NDEBUG
  22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  23    "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
  24    "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
  25    "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
  26    "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
  27};
  28#endif /* NDEBUG */
  29
  30static const int tcg_target_reg_alloc_order[] = {
  31    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
  32    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
  33    TCG_REG_X28, /* we will reserve this for guest_base if configured */
  34
  35    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
  36    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
  37    TCG_REG_X16, TCG_REG_X17,
  38
  39    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  40    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
  41
  42    /* X18 reserved by system */
  43    /* X19 reserved for AREG0 */
  44    /* X29 reserved as fp */
  45    /* X30 reserved as temporary */
  46};
  47
  48static const int tcg_target_call_iarg_regs[8] = {
  49    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  50    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
  51};
  52static const int tcg_target_call_oarg_regs[1] = {
  53    TCG_REG_X0
  54};
  55
  56#define TCG_REG_TMP TCG_REG_X30
  57
  58#ifndef CONFIG_SOFTMMU
  59/* Note that XZR cannot be encoded in the address base register slot,
  60   as that actaully encodes SP.  So if we need to zero-extend the guest
  61   address, via the address index register slot, we need to load even
  62   a zero guest base into a register.  */
  63#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
  64#define TCG_REG_GUEST_BASE TCG_REG_X28
  65#endif
  66
  67static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
  68{
  69    ptrdiff_t offset = target - code_ptr;
  70    assert(offset == sextract64(offset, 0, 26));
  71    /* read instruction, mask away previous PC_REL26 parameter contents,
  72       set the proper offset, then write back the instruction. */
  73    *code_ptr = deposit32(*code_ptr, 0, 26, offset);
  74}
  75
  76static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
  77{
  78    ptrdiff_t offset = target - code_ptr;
  79    assert(offset == sextract64(offset, 0, 19));
  80    *code_ptr = deposit32(*code_ptr, 5, 19, offset);
  81}
  82
  83static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
  84                               intptr_t value, intptr_t addend)
  85{
  86    assert(addend == 0);
  87    switch (type) {
  88    case R_AARCH64_JUMP26:
  89    case R_AARCH64_CALL26:
  90        reloc_pc26(code_ptr, (tcg_insn_unit *)value);
  91        break;
  92    case R_AARCH64_CONDBR19:
  93        reloc_pc19(code_ptr, (tcg_insn_unit *)value);
  94        break;
  95    default:
  96        tcg_abort();
  97    }
  98}
  99
 100#define TCG_CT_CONST_AIMM 0x100
 101#define TCG_CT_CONST_LIMM 0x200
 102#define TCG_CT_CONST_ZERO 0x400
 103#define TCG_CT_CONST_MONE 0x800
 104
 105/* parse target specific constraints */
 106static int target_parse_constraint(TCGArgConstraint *ct,
 107                                   const char **pct_str)
 108{
 109    const char *ct_str = *pct_str;
 110
 111    switch (ct_str[0]) {
 112    case 'r':
 113        ct->ct |= TCG_CT_REG;
 114        tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
 115        break;
 116    case 'l': /* qemu_ld / qemu_st address, data_reg */
 117        ct->ct |= TCG_CT_REG;
 118        tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
 119#ifdef CONFIG_SOFTMMU
 120        /* x0 and x1 will be overwritten when reading the tlb entry,
 121           and x2, and x3 for helper args, better to avoid using them. */
 122        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
 123        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
 124        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
 125        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
 126#endif
 127        break;
 128    case 'A': /* Valid for arithmetic immediate (positive or negative).  */
 129        ct->ct |= TCG_CT_CONST_AIMM;
 130        break;
 131    case 'L': /* Valid for logical immediate.  */
 132        ct->ct |= TCG_CT_CONST_LIMM;
 133        break;
 134    case 'M': /* minus one */
 135        ct->ct |= TCG_CT_CONST_MONE;
 136        break;
 137    case 'Z': /* zero */
 138        ct->ct |= TCG_CT_CONST_ZERO;
 139        break;
 140    default:
 141        return -1;
 142    }
 143
 144    ct_str++;
 145    *pct_str = ct_str;
 146    return 0;
 147}
 148
 149static inline bool is_aimm(uint64_t val)
 150{
 151    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
 152}
 153
 154static inline bool is_limm(uint64_t val)
 155{
 156    /* Taking a simplified view of the logical immediates for now, ignoring
 157       the replication that can happen across the field.  Match bit patterns
 158       of the forms
 159           0....01....1
 160           0..01..10..0
 161       and their inverses.  */
 162
 163    /* Make things easier below, by testing the form with msb clear. */
 164    if ((int64_t)val < 0) {
 165        val = ~val;
 166    }
 167    if (val == 0) {
 168        return false;
 169    }
 170    val += val & -val;
 171    return (val & (val - 1)) == 0;
 172}
 173
 174static int tcg_target_const_match(tcg_target_long val, TCGType type,
 175                                  const TCGArgConstraint *arg_ct)
 176{
 177    int ct = arg_ct->ct;
 178
 179    if (ct & TCG_CT_CONST) {
 180        return 1;
 181    }
 182    if (type == TCG_TYPE_I32) {
 183        val = (int32_t)val;
 184    }
 185    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
 186        return 1;
 187    }
 188    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
 189        return 1;
 190    }
 191    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 192        return 1;
 193    }
 194    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
 195        return 1;
 196    }
 197
 198    return 0;
 199}
 200
 201enum aarch64_cond_code {
 202    COND_EQ = 0x0,
 203    COND_NE = 0x1,
 204    COND_CS = 0x2,     /* Unsigned greater or equal */
 205    COND_HS = COND_CS, /* ALIAS greater or equal */
 206    COND_CC = 0x3,     /* Unsigned less than */
 207    COND_LO = COND_CC, /* ALIAS Lower */
 208    COND_MI = 0x4,     /* Negative */
 209    COND_PL = 0x5,     /* Zero or greater */
 210    COND_VS = 0x6,     /* Overflow */
 211    COND_VC = 0x7,     /* No overflow */
 212    COND_HI = 0x8,     /* Unsigned greater than */
 213    COND_LS = 0x9,     /* Unsigned less or equal */
 214    COND_GE = 0xa,
 215    COND_LT = 0xb,
 216    COND_GT = 0xc,
 217    COND_LE = 0xd,
 218    COND_AL = 0xe,
 219    COND_NV = 0xf, /* behaves like COND_AL here */
 220};
 221
 222static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
 223    [TCG_COND_EQ] = COND_EQ,
 224    [TCG_COND_NE] = COND_NE,
 225    [TCG_COND_LT] = COND_LT,
 226    [TCG_COND_GE] = COND_GE,
 227    [TCG_COND_LE] = COND_LE,
 228    [TCG_COND_GT] = COND_GT,
 229    /* unsigned */
 230    [TCG_COND_LTU] = COND_LO,
 231    [TCG_COND_GTU] = COND_HI,
 232    [TCG_COND_GEU] = COND_HS,
 233    [TCG_COND_LEU] = COND_LS,
 234};
 235
 236typedef enum {
 237    LDST_ST = 0,    /* store */
 238    LDST_LD = 1,    /* load */
 239    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
 240    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
 241} AArch64LdstType;
 242
 243/* We encode the format of the insn into the beginning of the name, so that
 244   we can have the preprocessor help "typecheck" the insn vs the output
 245   function.  Arm didn't provide us with nice names for the formats, so we
 246   use the section number of the architecture reference manual in which the
 247   instruction group is described.  */
 248typedef enum {
 249    /* Compare and branch (immediate).  */
 250    I3201_CBZ       = 0x34000000,
 251    I3201_CBNZ      = 0x35000000,
 252
 253    /* Conditional branch (immediate).  */
 254    I3202_B_C       = 0x54000000,
 255
 256    /* Unconditional branch (immediate).  */
 257    I3206_B         = 0x14000000,
 258    I3206_BL        = 0x94000000,
 259
 260    /* Unconditional branch (register).  */
 261    I3207_BR        = 0xd61f0000,
 262    I3207_BLR       = 0xd63f0000,
 263    I3207_RET       = 0xd65f0000,
 264
 265    /* Load/store register.  Described here as 3.3.12, but the helper
 266       that emits them can transform to 3.3.10 or 3.3.13.  */
 267    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
 268    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
 269    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
 270    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
 271
 272    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
 273    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
 274    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
 275    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
 276
 277    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
 278    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
 279
 280    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
 281    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
 282    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
 283
 284    I3312_TO_I3310  = 0x00200800,
 285    I3312_TO_I3313  = 0x01000000,
 286
 287    /* Load/store register pair instructions.  */
 288    I3314_LDP       = 0x28400000,
 289    I3314_STP       = 0x28000000,
 290
 291    /* Add/subtract immediate instructions.  */
 292    I3401_ADDI      = 0x11000000,
 293    I3401_ADDSI     = 0x31000000,
 294    I3401_SUBI      = 0x51000000,
 295    I3401_SUBSI     = 0x71000000,
 296
 297    /* Bitfield instructions.  */
 298    I3402_BFM       = 0x33000000,
 299    I3402_SBFM      = 0x13000000,
 300    I3402_UBFM      = 0x53000000,
 301
 302    /* Extract instruction.  */
 303    I3403_EXTR      = 0x13800000,
 304
 305    /* Logical immediate instructions.  */
 306    I3404_ANDI      = 0x12000000,
 307    I3404_ORRI      = 0x32000000,
 308    I3404_EORI      = 0x52000000,
 309
 310    /* Move wide immediate instructions.  */
 311    I3405_MOVN      = 0x12800000,
 312    I3405_MOVZ      = 0x52800000,
 313    I3405_MOVK      = 0x72800000,
 314
 315    /* PC relative addressing instructions.  */
 316    I3406_ADR       = 0x10000000,
 317    I3406_ADRP      = 0x90000000,
 318
 319    /* Add/subtract shifted register instructions (without a shift).  */
 320    I3502_ADD       = 0x0b000000,
 321    I3502_ADDS      = 0x2b000000,
 322    I3502_SUB       = 0x4b000000,
 323    I3502_SUBS      = 0x6b000000,
 324
 325    /* Add/subtract shifted register instructions (with a shift).  */
 326    I3502S_ADD_LSL  = I3502_ADD,
 327
 328    /* Add/subtract with carry instructions.  */
 329    I3503_ADC       = 0x1a000000,
 330    I3503_SBC       = 0x5a000000,
 331
 332    /* Conditional select instructions.  */
 333    I3506_CSEL      = 0x1a800000,
 334    I3506_CSINC     = 0x1a800400,
 335
 336    /* Data-processing (1 source) instructions.  */
 337    I3507_REV16     = 0x5ac00400,
 338    I3507_REV32     = 0x5ac00800,
 339    I3507_REV64     = 0x5ac00c00,
 340
 341    /* Data-processing (2 source) instructions.  */
 342    I3508_LSLV      = 0x1ac02000,
 343    I3508_LSRV      = 0x1ac02400,
 344    I3508_ASRV      = 0x1ac02800,
 345    I3508_RORV      = 0x1ac02c00,
 346    I3508_SMULH     = 0x9b407c00,
 347    I3508_UMULH     = 0x9bc07c00,
 348    I3508_UDIV      = 0x1ac00800,
 349    I3508_SDIV      = 0x1ac00c00,
 350
 351    /* Data-processing (3 source) instructions.  */
 352    I3509_MADD      = 0x1b000000,
 353    I3509_MSUB      = 0x1b008000,
 354
 355    /* Logical shifted register instructions (without a shift).  */
 356    I3510_AND       = 0x0a000000,
 357    I3510_BIC       = 0x0a200000,
 358    I3510_ORR       = 0x2a000000,
 359    I3510_ORN       = 0x2a200000,
 360    I3510_EOR       = 0x4a000000,
 361    I3510_EON       = 0x4a200000,
 362    I3510_ANDS      = 0x6a000000,
 363} AArch64Insn;
 364
 365static inline uint32_t tcg_in32(TCGContext *s)
 366{
 367    uint32_t v = *(uint32_t *)s->code_ptr;
 368    return v;
 369}
 370
 371/* Emit an opcode with "type-checking" of the format.  */
 372#define tcg_out_insn(S, FMT, OP, ...) \
 373    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
 374
 375static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
 376                              TCGReg rt, int imm19)
 377{
 378    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
 379}
 380
 381static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
 382                              TCGCond c, int imm19)
 383{
 384    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
 385}
 386
 387static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
 388{
 389    tcg_out32(s, insn | (imm26 & 0x03ffffff));
 390}
 391
 392static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
 393{
 394    tcg_out32(s, insn | rn << 5);
 395}
 396
 397static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
 398                              TCGReg r1, TCGReg r2, TCGReg rn,
 399                              tcg_target_long ofs, bool pre, bool w)
 400{
 401    insn |= 1u << 31; /* ext */
 402    insn |= pre << 24;
 403    insn |= w << 23;
 404
 405    assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
 406    insn |= (ofs & (0x7f << 3)) << (15 - 3);
 407
 408    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
 409}
 410
 411static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
 412                              TCGReg rd, TCGReg rn, uint64_t aimm)
 413{
 414    if (aimm > 0xfff) {
 415        assert((aimm & 0xfff) == 0);
 416        aimm >>= 12;
 417        assert(aimm <= 0xfff);
 418        aimm |= 1 << 12;  /* apply LSL 12 */
 419    }
 420    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
 421}
 422
 423/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
 424   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
 425   that feed the DecodeBitMasks pseudo function.  */
 426static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
 427                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
 428{
 429    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
 430              | rn << 5 | rd);
 431}
 432
 433#define tcg_out_insn_3404  tcg_out_insn_3402
 434
 435static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
 436                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
 437{
 438    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
 439              | rn << 5 | rd);
 440}
 441
 442/* This function is used for the Move (wide immediate) instruction group.
 443   Note that SHIFT is a full shift count, not the 2 bit HW field. */
 444static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
 445                              TCGReg rd, uint16_t half, unsigned shift)
 446{
 447    assert((shift & ~0x30) == 0);
 448    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
 449}
 450
 451static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
 452                              TCGReg rd, int64_t disp)
 453{
 454    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
 455}
 456
 457/* This function is for both 3.5.2 (Add/Subtract shifted register), for
 458   the rare occasion when we actually want to supply a shift amount.  */
 459static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
 460                                      TCGType ext, TCGReg rd, TCGReg rn,
 461                                      TCGReg rm, int imm6)
 462{
 463    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
 464}
 465
 466/* This function is for 3.5.2 (Add/subtract shifted register),
 467   and 3.5.10 (Logical shifted register), for the vast majorty of cases
 468   when we don't want to apply a shift.  Thus it can also be used for
 469   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
 470static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
 471                              TCGReg rd, TCGReg rn, TCGReg rm)
 472{
 473    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
 474}
 475
 476#define tcg_out_insn_3503  tcg_out_insn_3502
 477#define tcg_out_insn_3508  tcg_out_insn_3502
 478#define tcg_out_insn_3510  tcg_out_insn_3502
 479
 480static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
 481                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
 482{
 483    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
 484              | tcg_cond_to_aarch64[c] << 12);
 485}
 486
 487static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
 488                              TCGReg rd, TCGReg rn)
 489{
 490    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
 491}
 492
 493static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
 494                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
 495{
 496    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
 497}
 498
 499static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
 500                              TCGReg rd, TCGReg base, TCGType ext,
 501                              TCGReg regoff)
 502{
 503    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 504    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
 505              0x4000 | ext << 13 | base << 5 | rd);
 506}
 507
 508static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
 509                              TCGReg rd, TCGReg rn, intptr_t offset)
 510{
 511    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
 512}
 513
 514static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
 515                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
 516{
 517    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 518    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
 519}
 520
 521/* Register to register move using ORR (shifted register with no shift). */
 522static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
 523{
 524    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
 525}
 526
 527/* Register to register move using ADDI (move to/from SP).  */
 528static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
 529{
 530    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
 531}
 532
 533/* This function is used for the Logical (immediate) instruction group.
 534   The value of LIMM must satisfy IS_LIMM.  See the comment above about
 535   only supporting simplified logical immediates.  */
 536static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
 537                             TCGReg rd, TCGReg rn, uint64_t limm)
 538{
 539    unsigned h, l, r, c;
 540
 541    assert(is_limm(limm));
 542
 543    h = clz64(limm);
 544    l = ctz64(limm);
 545    if (l == 0) {
 546        r = 0;                  /* form 0....01....1 */
 547        c = ctz64(~limm) - 1;
 548        if (h == 0) {
 549            r = clz64(~limm);   /* form 1..10..01..1 */
 550            c += r;
 551        }
 552    } else {
 553        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
 554        c = r - h - 1;
 555    }
 556    if (ext == TCG_TYPE_I32) {
 557        r &= 31;
 558        c &= 31;
 559    }
 560
 561    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
 562}
 563
 564static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
 565                         tcg_target_long value)
 566{
 567    AArch64Insn insn;
 568    int i, wantinv, shift;
 569    tcg_target_long svalue = value;
 570    tcg_target_long ivalue = ~value;
 571    tcg_target_long imask;
 572
 573    /* For 32-bit values, discard potential garbage in value.  For 64-bit
 574       values within [2**31, 2**32-1], we can create smaller sequences by
 575       interpreting this as a negative 32-bit number, while ensuring that
 576       the high 32 bits are cleared by setting SF=0.  */
 577    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
 578        svalue = (int32_t)value;
 579        value = (uint32_t)value;
 580        ivalue = (uint32_t)ivalue;
 581        type = TCG_TYPE_I32;
 582    }
 583
 584    /* Speed things up by handling the common case of small positive
 585       and negative values specially.  */
 586    if ((value & ~0xffffull) == 0) {
 587        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
 588        return;
 589    } else if ((ivalue & ~0xffffull) == 0) {
 590        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
 591        return;
 592    }
 593
 594    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
 595       use the sign-extended value.  That lets us match rotated values such
 596       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
 597    if (is_limm(svalue)) {
 598        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
 599        return;
 600    }
 601
 602    /* Look for host pointer values within 4G of the PC.  This happens
 603       often when loading pointers to QEMU's own data structures.  */
 604    if (type == TCG_TYPE_I64) {
 605        tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
 606        if (disp == sextract64(disp, 0, 21)) {
 607            tcg_out_insn(s, 3406, ADRP, rd, disp);
 608            if (value & 0xfff) {
 609                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
 610            }
 611            return;
 612        }
 613    }
 614
 615    /* Would it take fewer insns to begin with MOVN?  For the value and its
 616       inverse, count the number of 16-bit lanes that are 0.  */
 617    for (i = wantinv = imask = 0; i < 64; i += 16) {
 618        tcg_target_long mask = 0xffffull << i;
 619        if ((value & mask) == 0) {
 620            wantinv -= 1;
 621        }
 622        if ((ivalue & mask) == 0) {
 623            wantinv += 1;
 624            imask |= mask;
 625        }
 626    }
 627
 628    /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN.  */
 629    insn = I3405_MOVZ;
 630    if (wantinv > 0) {
 631        value = ivalue;
 632        insn = I3405_MOVN;
 633    }
 634
 635    /* Find the lowest lane that is not 0x0000.  */
 636    shift = ctz64(value) & (63 & -16);
 637    tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift);
 638
 639    if (wantinv > 0) {
 640        /* Re-invert the value, so MOVK sees non-inverted bits.  */
 641        value = ~value;
 642        /* Clear out all the 0xffff lanes.  */
 643        value ^= imask;
 644    }
 645    /* Clear out the lane that we just set.  */
 646    value &= ~(0xffffUL << shift);
 647
 648    /* Iterate until all lanes have been set, and thus cleared from VALUE.  */
 649    while (value) {
 650        shift = ctz64(value) & (63 & -16);
 651        tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
 652        value &= ~(0xffffUL << shift);
 653    }
 654}
 655
 656/* Define something more legible for general use.  */
 657#define tcg_out_ldst_r  tcg_out_insn_3310
 658
 659static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
 660                         TCGReg rd, TCGReg rn, intptr_t offset)
 661{
 662    TCGMemOp size = (uint32_t)insn >> 30;
 663
 664    /* If the offset is naturally aligned and in range, then we can
 665       use the scaled uimm12 encoding */
 666    if (offset >= 0 && !(offset & ((1 << size) - 1))) {
 667        uintptr_t scaled_uimm = offset >> size;
 668        if (scaled_uimm <= 0xfff) {
 669            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
 670            return;
 671        }
 672    }
 673
 674    /* Small signed offsets can use the unscaled encoding.  */
 675    if (offset >= -256 && offset < 256) {
 676        tcg_out_insn_3312(s, insn, rd, rn, offset);
 677        return;
 678    }
 679
 680    /* Worst-case scenario, move offset to temp register, use reg offset.  */
 681    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
 682    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
 683}
 684
 685static inline void tcg_out_mov(TCGContext *s,
 686                               TCGType type, TCGReg ret, TCGReg arg)
 687{
 688    if (ret != arg) {
 689        tcg_out_movr(s, type, ret, arg);
 690    }
 691}
 692
 693static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
 694                              TCGReg arg1, intptr_t arg2)
 695{
 696    tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
 697                 arg, arg1, arg2);
 698}
 699
 700static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
 701                              TCGReg arg1, intptr_t arg2)
 702{
 703    tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
 704                 arg, arg1, arg2);
 705}
 706
 707static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
 708                               TCGReg rn, unsigned int a, unsigned int b)
 709{
 710    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
 711}
 712
 713static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
 714                                TCGReg rn, unsigned int a, unsigned int b)
 715{
 716    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
 717}
 718
 719static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
 720                                TCGReg rn, unsigned int a, unsigned int b)
 721{
 722    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
 723}
 724
 725static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
 726                                TCGReg rn, TCGReg rm, unsigned int a)
 727{
 728    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
 729}
 730
 731static inline void tcg_out_shl(TCGContext *s, TCGType ext,
 732                               TCGReg rd, TCGReg rn, unsigned int m)
 733{
 734    int bits = ext ? 64 : 32;
 735    int max = bits - 1;
 736    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
 737}
 738
 739static inline void tcg_out_shr(TCGContext *s, TCGType ext,
 740                               TCGReg rd, TCGReg rn, unsigned int m)
 741{
 742    int max = ext ? 63 : 31;
 743    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
 744}
 745
 746static inline void tcg_out_sar(TCGContext *s, TCGType ext,
 747                               TCGReg rd, TCGReg rn, unsigned int m)
 748{
 749    int max = ext ? 63 : 31;
 750    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
 751}
 752
 753static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
 754                                TCGReg rd, TCGReg rn, unsigned int m)
 755{
 756    int max = ext ? 63 : 31;
 757    tcg_out_extr(s, ext, rd, rn, rn, m & max);
 758}
 759
 760static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
 761                                TCGReg rd, TCGReg rn, unsigned int m)
 762{
 763    int bits = ext ? 64 : 32;
 764    int max = bits - 1;
 765    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
 766}
 767
 768static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
 769                               TCGReg rn, unsigned lsb, unsigned width)
 770{
 771    unsigned size = ext ? 64 : 32;
 772    unsigned a = (size - lsb) & (size - 1);
 773    unsigned b = width - 1;
 774    tcg_out_bfm(s, ext, rd, rn, a, b);
 775}
 776
 777static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
 778                        tcg_target_long b, bool const_b)
 779{
 780    if (const_b) {
 781        /* Using CMP or CMN aliases.  */
 782        if (b >= 0) {
 783            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
 784        } else {
 785            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
 786        }
 787    } else {
 788        /* Using CMP alias SUBS wzr, Wn, Wm */
 789        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
 790    }
 791}
 792
 793static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
 794{
 795    ptrdiff_t offset = target - s->code_ptr;
 796    assert(offset == sextract64(offset, 0, 26));
 797    tcg_out_insn(s, 3206, B, offset);
 798}
 799
 800static inline void tcg_out_goto_noaddr(TCGContext *s)
 801{
 802    /* We pay attention here to not modify the branch target by reading from
 803       the buffer. This ensure that caches and memory are kept coherent during
 804       retranslation.  Mask away possible garbage in the high bits for the
 805       first translation, while keeping the offset bits for retranslation. */
 806    uint32_t old = tcg_in32(s);
 807    tcg_out_insn(s, 3206, B, old);
 808}
 809
 810static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
 811{
 812    /* See comments in tcg_out_goto_noaddr.  */
 813    uint32_t old = tcg_in32(s) >> 5;
 814    tcg_out_insn(s, 3202, B_C, c, old);
 815}
 816
 817static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
 818{
 819    tcg_out_insn(s, 3207, BLR, reg);
 820}
 821
 822static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
 823{
 824    ptrdiff_t offset = target - s->code_ptr;
 825    if (offset == sextract64(offset, 0, 26)) {
 826        tcg_out_insn(s, 3206, BL, offset);
 827    } else {
 828        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
 829        tcg_out_callr(s, TCG_REG_TMP);
 830    }
 831}
 832
 833void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
 834{
 835    tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
 836    tcg_insn_unit *target = (tcg_insn_unit *)addr;
 837
 838    reloc_pc26(code_ptr, target);
 839    flush_icache_range(jmp_addr, jmp_addr + 4);
 840}
 841
 842static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
 843{
 844    if (!l->has_value) {
 845        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
 846        tcg_out_goto_noaddr(s);
 847    } else {
 848        tcg_out_goto(s, l->u.value_ptr);
 849    }
 850}
 851
 852static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
 853                           TCGArg b, bool b_const, TCGLabel *l)
 854{
 855    intptr_t offset;
 856    bool need_cmp;
 857
 858    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
 859        need_cmp = false;
 860    } else {
 861        need_cmp = true;
 862        tcg_out_cmp(s, ext, a, b, b_const);
 863    }
 864
 865    if (!l->has_value) {
 866        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
 867        offset = tcg_in32(s) >> 5;
 868    } else {
 869        offset = l->u.value_ptr - s->code_ptr;
 870        assert(offset == sextract64(offset, 0, 19));
 871    }
 872
 873    if (need_cmp) {
 874        tcg_out_insn(s, 3202, B_C, c, offset);
 875    } else if (c == TCG_COND_EQ) {
 876        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
 877    } else {
 878        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
 879    }
 880}
 881
 882static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
 883{
 884    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
 885}
 886
 887static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
 888{
 889    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
 890}
 891
 892static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
 893{
 894    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
 895}
 896
 897static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
 898                               TCGReg rd, TCGReg rn)
 899{
 900    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
 901    int bits = (8 << s_bits) - 1;
 902    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
 903}
 904
 905static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
 906                               TCGReg rd, TCGReg rn)
 907{
 908    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
 909    int bits = (8 << s_bits) - 1;
 910    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
 911}
 912
 913static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
 914                            TCGReg rn, int64_t aimm)
 915{
 916    if (aimm >= 0) {
 917        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
 918    } else {
 919        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
 920    }
 921}
 922
 923static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
 924                                   TCGReg rh, TCGReg al, TCGReg ah,
 925                                   tcg_target_long bl, tcg_target_long bh,
 926                                   bool const_bl, bool const_bh, bool sub)
 927{
 928    TCGReg orig_rl = rl;
 929    AArch64Insn insn;
 930
 931    if (rl == ah || (!const_bh && rl == bh)) {
 932        rl = TCG_REG_TMP;
 933    }
 934
 935    if (const_bl) {
 936        insn = I3401_ADDSI;
 937        if ((bl < 0) ^ sub) {
 938            insn = I3401_SUBSI;
 939            bl = -bl;
 940        }
 941        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
 942    } else {
 943        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
 944    }
 945
 946    insn = I3503_ADC;
 947    if (const_bh) {
 948        /* Note that the only two constants we support are 0 and -1, and
 949           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
 950        if ((bh != 0) ^ sub) {
 951            insn = I3503_SBC;
 952        }
 953        bh = TCG_REG_XZR;
 954    } else if (sub) {
 955        insn = I3503_SBC;
 956    }
 957    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
 958
 959    tcg_out_mov(s, ext, orig_rl, rl);
 960}
 961
 962#ifdef CONFIG_SOFTMMU
 963/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
 964 *                                     TCGMemOpIdx oi, uintptr_t ra)
 965 */
 966static void * const qemu_ld_helpers[16] = {
 967    [MO_UB]   = helper_ret_ldub_mmu,
 968    [MO_LEUW] = helper_le_lduw_mmu,
 969    [MO_LEUL] = helper_le_ldul_mmu,
 970    [MO_LEQ]  = helper_le_ldq_mmu,
 971    [MO_BEUW] = helper_be_lduw_mmu,
 972    [MO_BEUL] = helper_be_ldul_mmu,
 973    [MO_BEQ]  = helper_be_ldq_mmu,
 974};
 975
 976/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
 977 *                                     uintxx_t val, TCGMemOpIdx oi,
 978 *                                     uintptr_t ra)
 979 */
 980static void * const qemu_st_helpers[16] = {
 981    [MO_UB]   = helper_ret_stb_mmu,
 982    [MO_LEUW] = helper_le_stw_mmu,
 983    [MO_LEUL] = helper_le_stl_mmu,
 984    [MO_LEQ]  = helper_le_stq_mmu,
 985    [MO_BEUW] = helper_be_stw_mmu,
 986    [MO_BEUL] = helper_be_stl_mmu,
 987    [MO_BEQ]  = helper_be_stq_mmu,
 988};
 989
 990static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
 991{
 992    ptrdiff_t offset = tcg_pcrel_diff(s, target);
 993    assert(offset == sextract64(offset, 0, 21));
 994    tcg_out_insn(s, 3406, ADR, rd, offset);
 995}
 996
 997static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 998{
 999    TCGMemOpIdx oi = lb->oi;
1000    TCGMemOp opc = get_memop(oi);
1001    TCGMemOp size = opc & MO_SIZE;
1002
1003    reloc_pc19(lb->label_ptr[0], s->code_ptr);
1004
1005    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1006    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1007    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1008    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1009    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1010    if (opc & MO_SIGN) {
1011        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1012    } else {
1013        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1014    }
1015
1016    tcg_out_goto(s, lb->raddr);
1017}
1018
1019static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1020{
1021    TCGMemOpIdx oi = lb->oi;
1022    TCGMemOp opc = get_memop(oi);
1023    TCGMemOp size = opc & MO_SIZE;
1024
1025    reloc_pc19(lb->label_ptr[0], s->code_ptr);
1026
1027    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1028    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1029    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1030    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1031    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1032    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1033    tcg_out_goto(s, lb->raddr);
1034}
1035
1036static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1037                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1038                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1039{
1040    TCGLabelQemuLdst *label = new_ldst_label(s);
1041
1042    label->is_ld = is_ld;
1043    label->oi = oi;
1044    label->type = ext;
1045    label->datalo_reg = data_reg;
1046    label->addrlo_reg = addr_reg;
1047    label->raddr = raddr;
1048    label->label_ptr[0] = label_ptr;
1049}
1050
1051/* Load and compare a TLB entry, emitting the conditional jump to the
1052   slow path for the failure case, which will be patched later when finalizing
1053   the slow path. Generated code returns the host addend in X1,
1054   clobbers X0,X2,X3,TMP. */
1055static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1056                             tcg_insn_unit **label_ptr, int mem_index,
1057                             bool is_read)
1058{
1059    int tlb_offset = is_read ?
1060        offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1061        : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1062    int s_mask = (1 << (opc & MO_SIZE)) - 1;
1063    TCGReg base = TCG_AREG0, x3;
1064    uint64_t tlb_mask;
1065
1066    /* For aligned accesses, we check the first byte and include the alignment
1067       bits within the address.  For unaligned access, we check that we don't
1068       cross pages using the address of the last byte of the access.  */
1069    if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) {
1070        tlb_mask = TARGET_PAGE_MASK | s_mask;
1071        x3 = addr_reg;
1072    } else {
1073        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1074                     TCG_REG_X3, addr_reg, s_mask);
1075        tlb_mask = TARGET_PAGE_MASK;
1076        x3 = TCG_REG_X3;
1077    }
1078
1079    /* Extract the TLB index from the address into X0.
1080       X0<CPU_TLB_BITS:0> =
1081       addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1082    tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1083                 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1084
1085    /* Store the page mask part of the address into X3.  */
1086    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1087                     TCG_REG_X3, x3, tlb_mask);
1088
1089    /* Add any "high bits" from the tlb offset to the env address into X2,
1090       to take advantage of the LSL12 form of the ADDI instruction.
1091       X2 = env + (tlb_offset & 0xfff000) */
1092    if (tlb_offset & 0xfff000) {
1093        tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1094                     tlb_offset & 0xfff000);
1095        base = TCG_REG_X2;
1096    }
1097
1098    /* Merge the tlb index contribution into X2.
1099       X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1100    tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1101                 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1102
1103    /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1104       X0 = load [X2 + (tlb_offset & 0x000fff)] */
1105    tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1106                 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1107
1108    /* Load the tlb addend. Do that early to avoid stalling.
1109       X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1110    tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1111                 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1112                 (is_read ? offsetof(CPUTLBEntry, addr_read)
1113                  : offsetof(CPUTLBEntry, addr_write)));
1114
1115    /* Perform the address comparison. */
1116    tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1117
1118    /* If not equal, we jump to the slow path. */
1119    *label_ptr = s->code_ptr;
1120    tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1121}
1122
1123#endif /* CONFIG_SOFTMMU */
1124
1125static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1126                                   TCGReg data_r, TCGReg addr_r,
1127                                   TCGType otype, TCGReg off_r)
1128{
1129    const TCGMemOp bswap = memop & MO_BSWAP;
1130
1131    switch (memop & MO_SSIZE) {
1132    case MO_UB:
1133        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1134        break;
1135    case MO_SB:
1136        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1137                       data_r, addr_r, otype, off_r);
1138        break;
1139    case MO_UW:
1140        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1141        if (bswap) {
1142            tcg_out_rev16(s, data_r, data_r);
1143        }
1144        break;
1145    case MO_SW:
1146        if (bswap) {
1147            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1148            tcg_out_rev16(s, data_r, data_r);
1149            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1150        } else {
1151            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1152                           data_r, addr_r, otype, off_r);
1153        }
1154        break;
1155    case MO_UL:
1156        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1157        if (bswap) {
1158            tcg_out_rev32(s, data_r, data_r);
1159        }
1160        break;
1161    case MO_SL:
1162        if (bswap) {
1163            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1164            tcg_out_rev32(s, data_r, data_r);
1165            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1166        } else {
1167            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1168        }
1169        break;
1170    case MO_Q:
1171        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1172        if (bswap) {
1173            tcg_out_rev64(s, data_r, data_r);
1174        }
1175        break;
1176    default:
1177        tcg_abort();
1178    }
1179}
1180
1181static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1182                                   TCGReg data_r, TCGReg addr_r,
1183                                   TCGType otype, TCGReg off_r)
1184{
1185    const TCGMemOp bswap = memop & MO_BSWAP;
1186
1187    switch (memop & MO_SIZE) {
1188    case MO_8:
1189        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1190        break;
1191    case MO_16:
1192        if (bswap && data_r != TCG_REG_XZR) {
1193            tcg_out_rev16(s, TCG_REG_TMP, data_r);
1194            data_r = TCG_REG_TMP;
1195        }
1196        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1197        break;
1198    case MO_32:
1199        if (bswap && data_r != TCG_REG_XZR) {
1200            tcg_out_rev32(s, TCG_REG_TMP, data_r);
1201            data_r = TCG_REG_TMP;
1202        }
1203        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1204        break;
1205    case MO_64:
1206        if (bswap && data_r != TCG_REG_XZR) {
1207            tcg_out_rev64(s, TCG_REG_TMP, data_r);
1208            data_r = TCG_REG_TMP;
1209        }
1210        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1211        break;
1212    default:
1213        tcg_abort();
1214    }
1215}
1216
1217static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1218                            TCGMemOpIdx oi, TCGType ext)
1219{
1220    TCGMemOp memop = get_memop(oi);
1221    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1222#ifdef CONFIG_SOFTMMU
1223    unsigned mem_index = get_mmuidx(oi);
1224    tcg_insn_unit *label_ptr;
1225
1226    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1227    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1228                           TCG_REG_X1, otype, addr_reg);
1229    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1230                        s->code_ptr, label_ptr);
1231#else /* !CONFIG_SOFTMMU */
1232    if (USE_GUEST_BASE) {
1233        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1234                               TCG_REG_GUEST_BASE, otype, addr_reg);
1235    } else {
1236        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1237                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1238    }
1239#endif /* CONFIG_SOFTMMU */
1240}
1241
1242static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1243                            TCGMemOpIdx oi)
1244{
1245    TCGMemOp memop = get_memop(oi);
1246    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1247#ifdef CONFIG_SOFTMMU
1248    unsigned mem_index = get_mmuidx(oi);
1249    tcg_insn_unit *label_ptr;
1250
1251    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1252    tcg_out_qemu_st_direct(s, memop, data_reg,
1253                           TCG_REG_X1, otype, addr_reg);
1254    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1255                        data_reg, addr_reg, s->code_ptr, label_ptr);
1256#else /* !CONFIG_SOFTMMU */
1257    if (USE_GUEST_BASE) {
1258        tcg_out_qemu_st_direct(s, memop, data_reg,
1259                               TCG_REG_GUEST_BASE, otype, addr_reg);
1260    } else {
1261        tcg_out_qemu_st_direct(s, memop, data_reg,
1262                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1263    }
1264#endif /* CONFIG_SOFTMMU */
1265}
1266
1267static tcg_insn_unit *tb_ret_addr;
1268
1269static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1270                       const TCGArg args[TCG_MAX_OP_ARGS],
1271                       const int const_args[TCG_MAX_OP_ARGS])
1272{
1273    /* 99% of the time, we can signal the use of extension registers
1274       by looking to see if the opcode handles 64-bit data.  */
1275    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1276
1277    /* Hoist the loads of the most common arguments.  */
1278    TCGArg a0 = args[0];
1279    TCGArg a1 = args[1];
1280    TCGArg a2 = args[2];
1281    int c2 = const_args[2];
1282
1283    /* Some operands are defined with "rZ" constraint, a register or
1284       the zero register.  These need not actually test args[I] == 0.  */
1285#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1286
1287    switch (opc) {
1288    case INDEX_op_exit_tb:
1289        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1290        tcg_out_goto(s, tb_ret_addr);
1291        break;
1292
1293    case INDEX_op_goto_tb:
1294#ifndef USE_DIRECT_JUMP
1295#error "USE_DIRECT_JUMP required for aarch64"
1296#endif
1297        assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
1298        s->tb_jmp_offset[a0] = tcg_current_code_size(s);
1299        /* actual branch destination will be patched by
1300           aarch64_tb_set_jmp_target later, beware retranslation. */
1301        tcg_out_goto_noaddr(s);
1302        s->tb_next_offset[a0] = tcg_current_code_size(s);
1303        break;
1304
1305    case INDEX_op_br:
1306        tcg_out_goto_label(s, arg_label(a0));
1307        break;
1308
1309    case INDEX_op_ld8u_i32:
1310    case INDEX_op_ld8u_i64:
1311        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
1312        break;
1313    case INDEX_op_ld8s_i32:
1314        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
1315        break;
1316    case INDEX_op_ld8s_i64:
1317        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
1318        break;
1319    case INDEX_op_ld16u_i32:
1320    case INDEX_op_ld16u_i64:
1321        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
1322        break;
1323    case INDEX_op_ld16s_i32:
1324        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
1325        break;
1326    case INDEX_op_ld16s_i64:
1327        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
1328        break;
1329    case INDEX_op_ld_i32:
1330    case INDEX_op_ld32u_i64:
1331        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
1332        break;
1333    case INDEX_op_ld32s_i64:
1334        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
1335        break;
1336    case INDEX_op_ld_i64:
1337        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
1338        break;
1339
1340    case INDEX_op_st8_i32:
1341    case INDEX_op_st8_i64:
1342        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
1343        break;
1344    case INDEX_op_st16_i32:
1345    case INDEX_op_st16_i64:
1346        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
1347        break;
1348    case INDEX_op_st_i32:
1349    case INDEX_op_st32_i64:
1350        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
1351        break;
1352    case INDEX_op_st_i64:
1353        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
1354        break;
1355
1356    case INDEX_op_add_i32:
1357        a2 = (int32_t)a2;
1358        /* FALLTHRU */
1359    case INDEX_op_add_i64:
1360        if (c2) {
1361            tcg_out_addsubi(s, ext, a0, a1, a2);
1362        } else {
1363            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1364        }
1365        break;
1366
1367    case INDEX_op_sub_i32:
1368        a2 = (int32_t)a2;
1369        /* FALLTHRU */
1370    case INDEX_op_sub_i64:
1371        if (c2) {
1372            tcg_out_addsubi(s, ext, a0, a1, -a2);
1373        } else {
1374            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1375        }
1376        break;
1377
1378    case INDEX_op_neg_i64:
1379    case INDEX_op_neg_i32:
1380        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1381        break;
1382
1383    case INDEX_op_and_i32:
1384        a2 = (int32_t)a2;
1385        /* FALLTHRU */
1386    case INDEX_op_and_i64:
1387        if (c2) {
1388            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1389        } else {
1390            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1391        }
1392        break;
1393
1394    case INDEX_op_andc_i32:
1395        a2 = (int32_t)a2;
1396        /* FALLTHRU */
1397    case INDEX_op_andc_i64:
1398        if (c2) {
1399            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1400        } else {
1401            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1402        }
1403        break;
1404
1405    case INDEX_op_or_i32:
1406        a2 = (int32_t)a2;
1407        /* FALLTHRU */
1408    case INDEX_op_or_i64:
1409        if (c2) {
1410            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1411        } else {
1412            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1413        }
1414        break;
1415
1416    case INDEX_op_orc_i32:
1417        a2 = (int32_t)a2;
1418        /* FALLTHRU */
1419    case INDEX_op_orc_i64:
1420        if (c2) {
1421            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1422        } else {
1423            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1424        }
1425        break;
1426
1427    case INDEX_op_xor_i32:
1428        a2 = (int32_t)a2;
1429        /* FALLTHRU */
1430    case INDEX_op_xor_i64:
1431        if (c2) {
1432            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1433        } else {
1434            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1435        }
1436        break;
1437
1438    case INDEX_op_eqv_i32:
1439        a2 = (int32_t)a2;
1440        /* FALLTHRU */
1441    case INDEX_op_eqv_i64:
1442        if (c2) {
1443            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1444        } else {
1445            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1446        }
1447        break;
1448
1449    case INDEX_op_not_i64:
1450    case INDEX_op_not_i32:
1451        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1452        break;
1453
1454    case INDEX_op_mul_i64:
1455    case INDEX_op_mul_i32:
1456        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1457        break;
1458
1459    case INDEX_op_div_i64:
1460    case INDEX_op_div_i32:
1461        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1462        break;
1463    case INDEX_op_divu_i64:
1464    case INDEX_op_divu_i32:
1465        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1466        break;
1467
1468    case INDEX_op_rem_i64:
1469    case INDEX_op_rem_i32:
1470        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1471        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1472        break;
1473    case INDEX_op_remu_i64:
1474    case INDEX_op_remu_i32:
1475        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1476        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1477        break;
1478
1479    case INDEX_op_shl_i64:
1480    case INDEX_op_shl_i32:
1481        if (c2) {
1482            tcg_out_shl(s, ext, a0, a1, a2);
1483        } else {
1484            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1485        }
1486        break;
1487
1488    case INDEX_op_shr_i64:
1489    case INDEX_op_shr_i32:
1490        if (c2) {
1491            tcg_out_shr(s, ext, a0, a1, a2);
1492        } else {
1493            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1494        }
1495        break;
1496
1497    case INDEX_op_sar_i64:
1498    case INDEX_op_sar_i32:
1499        if (c2) {
1500            tcg_out_sar(s, ext, a0, a1, a2);
1501        } else {
1502            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1503        }
1504        break;
1505
1506    case INDEX_op_rotr_i64:
1507    case INDEX_op_rotr_i32:
1508        if (c2) {
1509            tcg_out_rotr(s, ext, a0, a1, a2);
1510        } else {
1511            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1512        }
1513        break;
1514
1515    case INDEX_op_rotl_i64:
1516    case INDEX_op_rotl_i32:
1517        if (c2) {
1518            tcg_out_rotl(s, ext, a0, a1, a2);
1519        } else {
1520            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1521            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1522        }
1523        break;
1524
1525    case INDEX_op_brcond_i32:
1526        a1 = (int32_t)a1;
1527        /* FALLTHRU */
1528    case INDEX_op_brcond_i64:
1529        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1530        break;
1531
1532    case INDEX_op_setcond_i32:
1533        a2 = (int32_t)a2;
1534        /* FALLTHRU */
1535    case INDEX_op_setcond_i64:
1536        tcg_out_cmp(s, ext, a1, a2, c2);
1537        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
1538        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1539                     TCG_REG_XZR, tcg_invert_cond(args[3]));
1540        break;
1541
1542    case INDEX_op_movcond_i32:
1543        a2 = (int32_t)a2;
1544        /* FALLTHRU */
1545    case INDEX_op_movcond_i64:
1546        tcg_out_cmp(s, ext, a1, a2, c2);
1547        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1548        break;
1549
1550    case INDEX_op_qemu_ld_i32:
1551    case INDEX_op_qemu_ld_i64:
1552        tcg_out_qemu_ld(s, a0, a1, a2, ext);
1553        break;
1554    case INDEX_op_qemu_st_i32:
1555    case INDEX_op_qemu_st_i64:
1556        tcg_out_qemu_st(s, REG0(0), a1, a2);
1557        break;
1558
1559    case INDEX_op_bswap64_i64:
1560        tcg_out_rev64(s, a0, a1);
1561        break;
1562    case INDEX_op_bswap32_i64:
1563    case INDEX_op_bswap32_i32:
1564        tcg_out_rev32(s, a0, a1);
1565        break;
1566    case INDEX_op_bswap16_i64:
1567    case INDEX_op_bswap16_i32:
1568        tcg_out_rev16(s, a0, a1);
1569        break;
1570
1571    case INDEX_op_ext8s_i64:
1572    case INDEX_op_ext8s_i32:
1573        tcg_out_sxt(s, ext, MO_8, a0, a1);
1574        break;
1575    case INDEX_op_ext16s_i64:
1576    case INDEX_op_ext16s_i32:
1577        tcg_out_sxt(s, ext, MO_16, a0, a1);
1578        break;
1579    case INDEX_op_ext_i32_i64:
1580    case INDEX_op_ext32s_i64:
1581        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1582        break;
1583    case INDEX_op_ext8u_i64:
1584    case INDEX_op_ext8u_i32:
1585        tcg_out_uxt(s, MO_8, a0, a1);
1586        break;
1587    case INDEX_op_ext16u_i64:
1588    case INDEX_op_ext16u_i32:
1589        tcg_out_uxt(s, MO_16, a0, a1);
1590        break;
1591    case INDEX_op_extu_i32_i64:
1592    case INDEX_op_ext32u_i64:
1593        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1594        break;
1595
1596    case INDEX_op_deposit_i64:
1597    case INDEX_op_deposit_i32:
1598        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1599        break;
1600
1601    case INDEX_op_add2_i32:
1602        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1603                        (int32_t)args[4], args[5], const_args[4],
1604                        const_args[5], false);
1605        break;
1606    case INDEX_op_add2_i64:
1607        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1608                        args[5], const_args[4], const_args[5], false);
1609        break;
1610    case INDEX_op_sub2_i32:
1611        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1612                        (int32_t)args[4], args[5], const_args[4],
1613                        const_args[5], true);
1614        break;
1615    case INDEX_op_sub2_i64:
1616        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1617                        args[5], const_args[4], const_args[5], true);
1618        break;
1619
1620    case INDEX_op_muluh_i64:
1621        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1622        break;
1623    case INDEX_op_mulsh_i64:
1624        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1625        break;
1626
1627    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
1628    case INDEX_op_mov_i64:
1629    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
1630    case INDEX_op_movi_i64:
1631    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
1632    default:
1633        tcg_abort();
1634    }
1635
1636#undef REG0
1637}
1638
1639static const TCGTargetOpDef aarch64_op_defs[] = {
1640    { INDEX_op_exit_tb, { } },
1641    { INDEX_op_goto_tb, { } },
1642    { INDEX_op_br, { } },
1643
1644    { INDEX_op_ld8u_i32, { "r", "r" } },
1645    { INDEX_op_ld8s_i32, { "r", "r" } },
1646    { INDEX_op_ld16u_i32, { "r", "r" } },
1647    { INDEX_op_ld16s_i32, { "r", "r" } },
1648    { INDEX_op_ld_i32, { "r", "r" } },
1649    { INDEX_op_ld8u_i64, { "r", "r" } },
1650    { INDEX_op_ld8s_i64, { "r", "r" } },
1651    { INDEX_op_ld16u_i64, { "r", "r" } },
1652    { INDEX_op_ld16s_i64, { "r", "r" } },
1653    { INDEX_op_ld32u_i64, { "r", "r" } },
1654    { INDEX_op_ld32s_i64, { "r", "r" } },
1655    { INDEX_op_ld_i64, { "r", "r" } },
1656
1657    { INDEX_op_st8_i32, { "rZ", "r" } },
1658    { INDEX_op_st16_i32, { "rZ", "r" } },
1659    { INDEX_op_st_i32, { "rZ", "r" } },
1660    { INDEX_op_st8_i64, { "rZ", "r" } },
1661    { INDEX_op_st16_i64, { "rZ", "r" } },
1662    { INDEX_op_st32_i64, { "rZ", "r" } },
1663    { INDEX_op_st_i64, { "rZ", "r" } },
1664
1665    { INDEX_op_add_i32, { "r", "r", "rA" } },
1666    { INDEX_op_add_i64, { "r", "r", "rA" } },
1667    { INDEX_op_sub_i32, { "r", "r", "rA" } },
1668    { INDEX_op_sub_i64, { "r", "r", "rA" } },
1669    { INDEX_op_mul_i32, { "r", "r", "r" } },
1670    { INDEX_op_mul_i64, { "r", "r", "r" } },
1671    { INDEX_op_div_i32, { "r", "r", "r" } },
1672    { INDEX_op_div_i64, { "r", "r", "r" } },
1673    { INDEX_op_divu_i32, { "r", "r", "r" } },
1674    { INDEX_op_divu_i64, { "r", "r", "r" } },
1675    { INDEX_op_rem_i32, { "r", "r", "r" } },
1676    { INDEX_op_rem_i64, { "r", "r", "r" } },
1677    { INDEX_op_remu_i32, { "r", "r", "r" } },
1678    { INDEX_op_remu_i64, { "r", "r", "r" } },
1679    { INDEX_op_and_i32, { "r", "r", "rL" } },
1680    { INDEX_op_and_i64, { "r", "r", "rL" } },
1681    { INDEX_op_or_i32, { "r", "r", "rL" } },
1682    { INDEX_op_or_i64, { "r", "r", "rL" } },
1683    { INDEX_op_xor_i32, { "r", "r", "rL" } },
1684    { INDEX_op_xor_i64, { "r", "r", "rL" } },
1685    { INDEX_op_andc_i32, { "r", "r", "rL" } },
1686    { INDEX_op_andc_i64, { "r", "r", "rL" } },
1687    { INDEX_op_orc_i32, { "r", "r", "rL" } },
1688    { INDEX_op_orc_i64, { "r", "r", "rL" } },
1689    { INDEX_op_eqv_i32, { "r", "r", "rL" } },
1690    { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1691
1692    { INDEX_op_neg_i32, { "r", "r" } },
1693    { INDEX_op_neg_i64, { "r", "r" } },
1694    { INDEX_op_not_i32, { "r", "r" } },
1695    { INDEX_op_not_i64, { "r", "r" } },
1696
1697    { INDEX_op_shl_i32, { "r", "r", "ri" } },
1698    { INDEX_op_shr_i32, { "r", "r", "ri" } },
1699    { INDEX_op_sar_i32, { "r", "r", "ri" } },
1700    { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1701    { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1702    { INDEX_op_shl_i64, { "r", "r", "ri" } },
1703    { INDEX_op_shr_i64, { "r", "r", "ri" } },
1704    { INDEX_op_sar_i64, { "r", "r", "ri" } },
1705    { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1706    { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1707
1708    { INDEX_op_brcond_i32, { "r", "rA" } },
1709    { INDEX_op_brcond_i64, { "r", "rA" } },
1710    { INDEX_op_setcond_i32, { "r", "r", "rA" } },
1711    { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1712    { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } },
1713    { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1714
1715    { INDEX_op_qemu_ld_i32, { "r", "l" } },
1716    { INDEX_op_qemu_ld_i64, { "r", "l" } },
1717    { INDEX_op_qemu_st_i32, { "lZ", "l" } },
1718    { INDEX_op_qemu_st_i64, { "lZ", "l" } },
1719
1720    { INDEX_op_bswap16_i32, { "r", "r" } },
1721    { INDEX_op_bswap32_i32, { "r", "r" } },
1722    { INDEX_op_bswap16_i64, { "r", "r" } },
1723    { INDEX_op_bswap32_i64, { "r", "r" } },
1724    { INDEX_op_bswap64_i64, { "r", "r" } },
1725
1726    { INDEX_op_ext8s_i32, { "r", "r" } },
1727    { INDEX_op_ext16s_i32, { "r", "r" } },
1728    { INDEX_op_ext8u_i32, { "r", "r" } },
1729    { INDEX_op_ext16u_i32, { "r", "r" } },
1730
1731    { INDEX_op_ext8s_i64, { "r", "r" } },
1732    { INDEX_op_ext16s_i64, { "r", "r" } },
1733    { INDEX_op_ext32s_i64, { "r", "r" } },
1734    { INDEX_op_ext8u_i64, { "r", "r" } },
1735    { INDEX_op_ext16u_i64, { "r", "r" } },
1736    { INDEX_op_ext32u_i64, { "r", "r" } },
1737    { INDEX_op_ext_i32_i64, { "r", "r" } },
1738    { INDEX_op_extu_i32_i64, { "r", "r" } },
1739
1740    { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1741    { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1742
1743    { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1744    { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1745    { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1746    { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1747
1748    { INDEX_op_muluh_i64, { "r", "r", "r" } },
1749    { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1750
1751    { -1 },
1752};
1753
1754static void tcg_target_init(TCGContext *s)
1755{
1756    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1757    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1758
1759    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1760                     (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1761                     (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1762                     (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1763                     (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1764                     (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1765                     (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1766                     (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1767                     (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1768                     (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1769                     (1 << TCG_REG_X18) | (1 << TCG_REG_X30));
1770
1771    tcg_regset_clear(s->reserved_regs);
1772    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1773    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1774    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1775    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1776
1777    tcg_add_target_add_op_defs(aarch64_op_defs);
1778}
1779
1780/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
1781#define PUSH_SIZE  ((30 - 19 + 1) * 8)
1782
1783#define FRAME_SIZE \
1784    ((PUSH_SIZE \
1785      + TCG_STATIC_CALL_ARGS_SIZE \
1786      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1787      + TCG_TARGET_STACK_ALIGN - 1) \
1788     & ~(TCG_TARGET_STACK_ALIGN - 1))
1789
1790/* We're expecting a 2 byte uleb128 encoded value.  */
1791QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1792
1793/* We're expecting to use a single ADDI insn.  */
1794QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1795
1796static void tcg_target_qemu_prologue(TCGContext *s)
1797{
1798    TCGReg r;
1799
1800    /* Push (FP, LR) and allocate space for all saved registers.  */
1801    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1802                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
1803
1804    /* Set up frame pointer for canonical unwinding.  */
1805    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1806
1807    /* Store callee-preserved regs x19..x28.  */
1808    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1809        int ofs = (r - TCG_REG_X19 + 2) * 8;
1810        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1811    }
1812
1813    /* Make stack space for TCG locals.  */
1814    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1815                 FRAME_SIZE - PUSH_SIZE);
1816
1817    /* Inform TCG about how to find TCG locals with register, offset, size.  */
1818    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1819                  CPU_TEMP_BUF_NLONGS * sizeof(long));
1820
1821#if !defined(CONFIG_SOFTMMU)
1822    if (USE_GUEST_BASE) {
1823        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
1824        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1825    }
1826#endif
1827
1828    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1829    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
1830
1831    tb_ret_addr = s->code_ptr;
1832
1833    /* Remove TCG locals stack space.  */
1834    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1835                 FRAME_SIZE - PUSH_SIZE);
1836
1837    /* Restore registers x19..x28.  */
1838    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1839        int ofs = (r - TCG_REG_X19 + 2) * 8;
1840        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1841    }
1842
1843    /* Pop (FP, LR), restore SP to previous frame.  */
1844    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
1845                 TCG_REG_SP, PUSH_SIZE, 0, 1);
1846    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
1847}
1848
1849typedef struct {
1850    DebugFrameHeader h;
1851    uint8_t fde_def_cfa[4];
1852    uint8_t fde_reg_ofs[24];
1853} DebugFrame;
1854
1855#define ELF_HOST_MACHINE EM_AARCH64
1856
1857static const DebugFrame debug_frame = {
1858    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1859    .h.cie.id = -1,
1860    .h.cie.version = 1,
1861    .h.cie.code_align = 1,
1862    .h.cie.data_align = 0x78,             /* sleb128 -8 */
1863    .h.cie.return_column = TCG_REG_LR,
1864
1865    /* Total FDE size does not include the "len" member.  */
1866    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
1867
1868    .fde_def_cfa = {
1869        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
1870        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
1871        (FRAME_SIZE >> 7)
1872    },
1873    .fde_reg_ofs = {
1874        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
1875        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
1876        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
1877        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
1878        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
1879        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
1880        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
1881        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
1882        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
1883        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
1884        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
1885        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
1886    }
1887};
1888
1889void tcg_register_jit(void *buf, size_t buf_size)
1890{
1891    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
1892}
1893