qemu/tcg/aarch64/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Initial TCG Implementation for aarch64
   3 *
   4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
   5 * Written by Claudio Fontana
   6 *
   7 * This work is licensed under the terms of the GNU GPL, version 2 or
   8 * (at your option) any later version.
   9 *
  10 * See the COPYING file in the top-level directory for details.
  11 */
  12
  13#include "tcg-be-ldst.h"
  14#include "qemu/bitops.h"
  15
  16/* We're going to re-use TCGType in setting of the SF bit, which controls
  17   the size of the operation performed.  If we know the values match, it
  18   makes things much cleaner.  */
  19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
  20
  21#ifdef CONFIG_DEBUG_TCG
  22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  23    "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
  24    "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
  25    "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
  26    "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
  27};
  28#endif /* CONFIG_DEBUG_TCG */
  29
  30static const int tcg_target_reg_alloc_order[] = {
  31    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
  32    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
  33    TCG_REG_X28, /* we will reserve this for guest_base if configured */
  34
  35    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
  36    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
  37    TCG_REG_X16, TCG_REG_X17,
  38
  39    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  40    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
  41
  42    /* X18 reserved by system */
  43    /* X19 reserved for AREG0 */
  44    /* X29 reserved as fp */
  45    /* X30 reserved as temporary */
  46};
  47
  48static const int tcg_target_call_iarg_regs[8] = {
  49    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  50    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
  51};
  52static const int tcg_target_call_oarg_regs[1] = {
  53    TCG_REG_X0
  54};
  55
  56#define TCG_REG_TMP TCG_REG_X30
  57
  58#ifndef CONFIG_SOFTMMU
  59/* Note that XZR cannot be encoded in the address base register slot,
  60   as that actaully encodes SP.  So if we need to zero-extend the guest
  61   address, via the address index register slot, we need to load even
  62   a zero guest base into a register.  */
  63#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
  64#define TCG_REG_GUEST_BASE TCG_REG_X28
  65#endif
  66
  67static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
  68{
  69    ptrdiff_t offset = target - code_ptr;
  70    tcg_debug_assert(offset == sextract64(offset, 0, 26));
  71    /* read instruction, mask away previous PC_REL26 parameter contents,
  72       set the proper offset, then write back the instruction. */
  73    *code_ptr = deposit32(*code_ptr, 0, 26, offset);
  74}
  75
  76static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr,
  77                                     tcg_insn_unit *target)
  78{
  79    ptrdiff_t offset = target - code_ptr;
  80    tcg_insn_unit insn;
  81    tcg_debug_assert(offset == sextract64(offset, 0, 26));
  82    /* read instruction, mask away previous PC_REL26 parameter contents,
  83       set the proper offset, then write back the instruction. */
  84    insn = atomic_read(code_ptr);
  85    atomic_set(code_ptr, deposit32(insn, 0, 26, offset));
  86}
  87
  88static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
  89{
  90    ptrdiff_t offset = target - code_ptr;
  91    tcg_debug_assert(offset == sextract64(offset, 0, 19));
  92    *code_ptr = deposit32(*code_ptr, 5, 19, offset);
  93}
  94
  95static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
  96                               intptr_t value, intptr_t addend)
  97{
  98    tcg_debug_assert(addend == 0);
  99    switch (type) {
 100    case R_AARCH64_JUMP26:
 101    case R_AARCH64_CALL26:
 102        reloc_pc26(code_ptr, (tcg_insn_unit *)value);
 103        break;
 104    case R_AARCH64_CONDBR19:
 105        reloc_pc19(code_ptr, (tcg_insn_unit *)value);
 106        break;
 107    default:
 108        tcg_abort();
 109    }
 110}
 111
 112#define TCG_CT_CONST_AIMM 0x100
 113#define TCG_CT_CONST_LIMM 0x200
 114#define TCG_CT_CONST_ZERO 0x400
 115#define TCG_CT_CONST_MONE 0x800
 116
 117/* parse target specific constraints */
 118static int target_parse_constraint(TCGArgConstraint *ct,
 119                                   const char **pct_str)
 120{
 121    const char *ct_str = *pct_str;
 122
 123    switch (ct_str[0]) {
 124    case 'r':
 125        ct->ct |= TCG_CT_REG;
 126        tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
 127        break;
 128    case 'l': /* qemu_ld / qemu_st address, data_reg */
 129        ct->ct |= TCG_CT_REG;
 130        tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
 131#ifdef CONFIG_SOFTMMU
 132        /* x0 and x1 will be overwritten when reading the tlb entry,
 133           and x2, and x3 for helper args, better to avoid using them. */
 134        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
 135        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
 136        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
 137        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
 138#endif
 139        break;
 140    case 'A': /* Valid for arithmetic immediate (positive or negative).  */
 141        ct->ct |= TCG_CT_CONST_AIMM;
 142        break;
 143    case 'L': /* Valid for logical immediate.  */
 144        ct->ct |= TCG_CT_CONST_LIMM;
 145        break;
 146    case 'M': /* minus one */
 147        ct->ct |= TCG_CT_CONST_MONE;
 148        break;
 149    case 'Z': /* zero */
 150        ct->ct |= TCG_CT_CONST_ZERO;
 151        break;
 152    default:
 153        return -1;
 154    }
 155
 156    ct_str++;
 157    *pct_str = ct_str;
 158    return 0;
 159}
 160
 161static inline bool is_aimm(uint64_t val)
 162{
 163    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
 164}
 165
 166static inline bool is_limm(uint64_t val)
 167{
 168    /* Taking a simplified view of the logical immediates for now, ignoring
 169       the replication that can happen across the field.  Match bit patterns
 170       of the forms
 171           0....01....1
 172           0..01..10..0
 173       and their inverses.  */
 174
 175    /* Make things easier below, by testing the form with msb clear. */
 176    if ((int64_t)val < 0) {
 177        val = ~val;
 178    }
 179    if (val == 0) {
 180        return false;
 181    }
 182    val += val & -val;
 183    return (val & (val - 1)) == 0;
 184}
 185
 186static int tcg_target_const_match(tcg_target_long val, TCGType type,
 187                                  const TCGArgConstraint *arg_ct)
 188{
 189    int ct = arg_ct->ct;
 190
 191    if (ct & TCG_CT_CONST) {
 192        return 1;
 193    }
 194    if (type == TCG_TYPE_I32) {
 195        val = (int32_t)val;
 196    }
 197    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
 198        return 1;
 199    }
 200    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
 201        return 1;
 202    }
 203    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 204        return 1;
 205    }
 206    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
 207        return 1;
 208    }
 209
 210    return 0;
 211}
 212
 213enum aarch64_cond_code {
 214    COND_EQ = 0x0,
 215    COND_NE = 0x1,
 216    COND_CS = 0x2,     /* Unsigned greater or equal */
 217    COND_HS = COND_CS, /* ALIAS greater or equal */
 218    COND_CC = 0x3,     /* Unsigned less than */
 219    COND_LO = COND_CC, /* ALIAS Lower */
 220    COND_MI = 0x4,     /* Negative */
 221    COND_PL = 0x5,     /* Zero or greater */
 222    COND_VS = 0x6,     /* Overflow */
 223    COND_VC = 0x7,     /* No overflow */
 224    COND_HI = 0x8,     /* Unsigned greater than */
 225    COND_LS = 0x9,     /* Unsigned less or equal */
 226    COND_GE = 0xa,
 227    COND_LT = 0xb,
 228    COND_GT = 0xc,
 229    COND_LE = 0xd,
 230    COND_AL = 0xe,
 231    COND_NV = 0xf, /* behaves like COND_AL here */
 232};
 233
 234static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
 235    [TCG_COND_EQ] = COND_EQ,
 236    [TCG_COND_NE] = COND_NE,
 237    [TCG_COND_LT] = COND_LT,
 238    [TCG_COND_GE] = COND_GE,
 239    [TCG_COND_LE] = COND_LE,
 240    [TCG_COND_GT] = COND_GT,
 241    /* unsigned */
 242    [TCG_COND_LTU] = COND_LO,
 243    [TCG_COND_GTU] = COND_HI,
 244    [TCG_COND_GEU] = COND_HS,
 245    [TCG_COND_LEU] = COND_LS,
 246};
 247
 248typedef enum {
 249    LDST_ST = 0,    /* store */
 250    LDST_LD = 1,    /* load */
 251    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
 252    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
 253} AArch64LdstType;
 254
 255/* We encode the format of the insn into the beginning of the name, so that
 256   we can have the preprocessor help "typecheck" the insn vs the output
 257   function.  Arm didn't provide us with nice names for the formats, so we
 258   use the section number of the architecture reference manual in which the
 259   instruction group is described.  */
 260typedef enum {
 261    /* Compare and branch (immediate).  */
 262    I3201_CBZ       = 0x34000000,
 263    I3201_CBNZ      = 0x35000000,
 264
 265    /* Conditional branch (immediate).  */
 266    I3202_B_C       = 0x54000000,
 267
 268    /* Unconditional branch (immediate).  */
 269    I3206_B         = 0x14000000,
 270    I3206_BL        = 0x94000000,
 271
 272    /* Unconditional branch (register).  */
 273    I3207_BR        = 0xd61f0000,
 274    I3207_BLR       = 0xd63f0000,
 275    I3207_RET       = 0xd65f0000,
 276
 277    /* Load/store register.  Described here as 3.3.12, but the helper
 278       that emits them can transform to 3.3.10 or 3.3.13.  */
 279    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
 280    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
 281    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
 282    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
 283
 284    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
 285    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
 286    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
 287    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
 288
 289    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
 290    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
 291
 292    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
 293    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
 294    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
 295
 296    I3312_TO_I3310  = 0x00200800,
 297    I3312_TO_I3313  = 0x01000000,
 298
 299    /* Load/store register pair instructions.  */
 300    I3314_LDP       = 0x28400000,
 301    I3314_STP       = 0x28000000,
 302
 303    /* Add/subtract immediate instructions.  */
 304    I3401_ADDI      = 0x11000000,
 305    I3401_ADDSI     = 0x31000000,
 306    I3401_SUBI      = 0x51000000,
 307    I3401_SUBSI     = 0x71000000,
 308
 309    /* Bitfield instructions.  */
 310    I3402_BFM       = 0x33000000,
 311    I3402_SBFM      = 0x13000000,
 312    I3402_UBFM      = 0x53000000,
 313
 314    /* Extract instruction.  */
 315    I3403_EXTR      = 0x13800000,
 316
 317    /* Logical immediate instructions.  */
 318    I3404_ANDI      = 0x12000000,
 319    I3404_ORRI      = 0x32000000,
 320    I3404_EORI      = 0x52000000,
 321
 322    /* Move wide immediate instructions.  */
 323    I3405_MOVN      = 0x12800000,
 324    I3405_MOVZ      = 0x52800000,
 325    I3405_MOVK      = 0x72800000,
 326
 327    /* PC relative addressing instructions.  */
 328    I3406_ADR       = 0x10000000,
 329    I3406_ADRP      = 0x90000000,
 330
 331    /* Add/subtract shifted register instructions (without a shift).  */
 332    I3502_ADD       = 0x0b000000,
 333    I3502_ADDS      = 0x2b000000,
 334    I3502_SUB       = 0x4b000000,
 335    I3502_SUBS      = 0x6b000000,
 336
 337    /* Add/subtract shifted register instructions (with a shift).  */
 338    I3502S_ADD_LSL  = I3502_ADD,
 339
 340    /* Add/subtract with carry instructions.  */
 341    I3503_ADC       = 0x1a000000,
 342    I3503_SBC       = 0x5a000000,
 343
 344    /* Conditional select instructions.  */
 345    I3506_CSEL      = 0x1a800000,
 346    I3506_CSINC     = 0x1a800400,
 347
 348    /* Data-processing (1 source) instructions.  */
 349    I3507_REV16     = 0x5ac00400,
 350    I3507_REV32     = 0x5ac00800,
 351    I3507_REV64     = 0x5ac00c00,
 352
 353    /* Data-processing (2 source) instructions.  */
 354    I3508_LSLV      = 0x1ac02000,
 355    I3508_LSRV      = 0x1ac02400,
 356    I3508_ASRV      = 0x1ac02800,
 357    I3508_RORV      = 0x1ac02c00,
 358    I3508_SMULH     = 0x9b407c00,
 359    I3508_UMULH     = 0x9bc07c00,
 360    I3508_UDIV      = 0x1ac00800,
 361    I3508_SDIV      = 0x1ac00c00,
 362
 363    /* Data-processing (3 source) instructions.  */
 364    I3509_MADD      = 0x1b000000,
 365    I3509_MSUB      = 0x1b008000,
 366
 367    /* Logical shifted register instructions (without a shift).  */
 368    I3510_AND       = 0x0a000000,
 369    I3510_BIC       = 0x0a200000,
 370    I3510_ORR       = 0x2a000000,
 371    I3510_ORN       = 0x2a200000,
 372    I3510_EOR       = 0x4a000000,
 373    I3510_EON       = 0x4a200000,
 374    I3510_ANDS      = 0x6a000000,
 375
 376    /* System instructions.  */
 377    DMB_ISH         = 0xd50338bf,
 378    DMB_LD          = 0x00000100,
 379    DMB_ST          = 0x00000200,
 380} AArch64Insn;
 381
 382static inline uint32_t tcg_in32(TCGContext *s)
 383{
 384    uint32_t v = *(uint32_t *)s->code_ptr;
 385    return v;
 386}
 387
 388/* Emit an opcode with "type-checking" of the format.  */
 389#define tcg_out_insn(S, FMT, OP, ...) \
 390    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
 391
 392static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
 393                              TCGReg rt, int imm19)
 394{
 395    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
 396}
 397
 398static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
 399                              TCGCond c, int imm19)
 400{
 401    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
 402}
 403
 404static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
 405{
 406    tcg_out32(s, insn | (imm26 & 0x03ffffff));
 407}
 408
 409static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
 410{
 411    tcg_out32(s, insn | rn << 5);
 412}
 413
 414static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
 415                              TCGReg r1, TCGReg r2, TCGReg rn,
 416                              tcg_target_long ofs, bool pre, bool w)
 417{
 418    insn |= 1u << 31; /* ext */
 419    insn |= pre << 24;
 420    insn |= w << 23;
 421
 422    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
 423    insn |= (ofs & (0x7f << 3)) << (15 - 3);
 424
 425    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
 426}
 427
 428static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
 429                              TCGReg rd, TCGReg rn, uint64_t aimm)
 430{
 431    if (aimm > 0xfff) {
 432        tcg_debug_assert((aimm & 0xfff) == 0);
 433        aimm >>= 12;
 434        tcg_debug_assert(aimm <= 0xfff);
 435        aimm |= 1 << 12;  /* apply LSL 12 */
 436    }
 437    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
 438}
 439
 440/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
 441   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
 442   that feed the DecodeBitMasks pseudo function.  */
 443static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
 444                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
 445{
 446    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
 447              | rn << 5 | rd);
 448}
 449
 450#define tcg_out_insn_3404  tcg_out_insn_3402
 451
 452static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
 453                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
 454{
 455    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
 456              | rn << 5 | rd);
 457}
 458
 459/* This function is used for the Move (wide immediate) instruction group.
 460   Note that SHIFT is a full shift count, not the 2 bit HW field. */
 461static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
 462                              TCGReg rd, uint16_t half, unsigned shift)
 463{
 464    tcg_debug_assert((shift & ~0x30) == 0);
 465    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
 466}
 467
 468static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
 469                              TCGReg rd, int64_t disp)
 470{
 471    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
 472}
 473
 474/* This function is for both 3.5.2 (Add/Subtract shifted register), for
 475   the rare occasion when we actually want to supply a shift amount.  */
 476static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
 477                                      TCGType ext, TCGReg rd, TCGReg rn,
 478                                      TCGReg rm, int imm6)
 479{
 480    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
 481}
 482
 483/* This function is for 3.5.2 (Add/subtract shifted register),
 484   and 3.5.10 (Logical shifted register), for the vast majorty of cases
 485   when we don't want to apply a shift.  Thus it can also be used for
 486   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
 487static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
 488                              TCGReg rd, TCGReg rn, TCGReg rm)
 489{
 490    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
 491}
 492
 493#define tcg_out_insn_3503  tcg_out_insn_3502
 494#define tcg_out_insn_3508  tcg_out_insn_3502
 495#define tcg_out_insn_3510  tcg_out_insn_3502
 496
 497static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
 498                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
 499{
 500    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
 501              | tcg_cond_to_aarch64[c] << 12);
 502}
 503
 504static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
 505                              TCGReg rd, TCGReg rn)
 506{
 507    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
 508}
 509
 510static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
 511                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
 512{
 513    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
 514}
 515
 516static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
 517                              TCGReg rd, TCGReg base, TCGType ext,
 518                              TCGReg regoff)
 519{
 520    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 521    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
 522              0x4000 | ext << 13 | base << 5 | rd);
 523}
 524
 525static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
 526                              TCGReg rd, TCGReg rn, intptr_t offset)
 527{
 528    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
 529}
 530
 531static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
 532                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
 533{
 534    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 535    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
 536}
 537
 538/* Register to register move using ORR (shifted register with no shift). */
 539static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
 540{
 541    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
 542}
 543
 544/* Register to register move using ADDI (move to/from SP).  */
 545static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
 546{
 547    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
 548}
 549
 550/* This function is used for the Logical (immediate) instruction group.
 551   The value of LIMM must satisfy IS_LIMM.  See the comment above about
 552   only supporting simplified logical immediates.  */
 553static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
 554                             TCGReg rd, TCGReg rn, uint64_t limm)
 555{
 556    unsigned h, l, r, c;
 557
 558    tcg_debug_assert(is_limm(limm));
 559
 560    h = clz64(limm);
 561    l = ctz64(limm);
 562    if (l == 0) {
 563        r = 0;                  /* form 0....01....1 */
 564        c = ctz64(~limm) - 1;
 565        if (h == 0) {
 566            r = clz64(~limm);   /* form 1..10..01..1 */
 567            c += r;
 568        }
 569    } else {
 570        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
 571        c = r - h - 1;
 572    }
 573    if (ext == TCG_TYPE_I32) {
 574        r &= 31;
 575        c &= 31;
 576    }
 577
 578    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
 579}
 580
 581static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
 582                         tcg_target_long value)
 583{
 584    int i, wantinv, shift;
 585    tcg_target_long svalue = value;
 586    tcg_target_long ivalue = ~value;
 587
 588    /* For 32-bit values, discard potential garbage in value.  For 64-bit
 589       values within [2**31, 2**32-1], we can create smaller sequences by
 590       interpreting this as a negative 32-bit number, while ensuring that
 591       the high 32 bits are cleared by setting SF=0.  */
 592    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
 593        svalue = (int32_t)value;
 594        value = (uint32_t)value;
 595        ivalue = (uint32_t)ivalue;
 596        type = TCG_TYPE_I32;
 597    }
 598
 599    /* Speed things up by handling the common case of small positive
 600       and negative values specially.  */
 601    if ((value & ~0xffffull) == 0) {
 602        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
 603        return;
 604    } else if ((ivalue & ~0xffffull) == 0) {
 605        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
 606        return;
 607    }
 608
 609    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
 610       use the sign-extended value.  That lets us match rotated values such
 611       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
 612    if (is_limm(svalue)) {
 613        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
 614        return;
 615    }
 616
 617    /* Look for host pointer values within 4G of the PC.  This happens
 618       often when loading pointers to QEMU's own data structures.  */
 619    if (type == TCG_TYPE_I64) {
 620        tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
 621        if (disp == sextract64(disp, 0, 21)) {
 622            tcg_out_insn(s, 3406, ADRP, rd, disp);
 623            if (value & 0xfff) {
 624                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
 625            }
 626            return;
 627        }
 628    }
 629
 630    /* Would it take fewer insns to begin with MOVN?  For the value and its
 631       inverse, count the number of 16-bit lanes that are 0.  */
 632    for (i = wantinv = 0; i < 64; i += 16) {
 633        tcg_target_long mask = 0xffffull << i;
 634        wantinv -= ((value & mask) == 0);
 635        wantinv += ((ivalue & mask) == 0);
 636    }
 637
 638    if (wantinv <= 0) {
 639        /* Find the lowest lane that is not 0x0000.  */
 640        shift = ctz64(value) & (63 & -16);
 641        tcg_out_insn(s, 3405, MOVZ, type, rd, value >> shift, shift);
 642        /* Clear out the lane that we just set.  */
 643        value &= ~(0xffffUL << shift);
 644        /* Iterate until all non-zero lanes have been processed.  */
 645        while (value) {
 646            shift = ctz64(value) & (63 & -16);
 647            tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
 648            value &= ~(0xffffUL << shift);
 649        }
 650    } else {
 651        /* Like above, but with the inverted value and MOVN to start.  */
 652        shift = ctz64(ivalue) & (63 & -16);
 653        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue >> shift, shift);
 654        ivalue &= ~(0xffffUL << shift);
 655        while (ivalue) {
 656            shift = ctz64(ivalue) & (63 & -16);
 657            /* Provide MOVK with the non-inverted value.  */
 658            tcg_out_insn(s, 3405, MOVK, type, rd, ~(ivalue >> shift), shift);
 659            ivalue &= ~(0xffffUL << shift);
 660        }
 661    }
 662}
 663
 664/* Define something more legible for general use.  */
 665#define tcg_out_ldst_r  tcg_out_insn_3310
 666
 667static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
 668                         TCGReg rd, TCGReg rn, intptr_t offset)
 669{
 670    TCGMemOp size = (uint32_t)insn >> 30;
 671
 672    /* If the offset is naturally aligned and in range, then we can
 673       use the scaled uimm12 encoding */
 674    if (offset >= 0 && !(offset & ((1 << size) - 1))) {
 675        uintptr_t scaled_uimm = offset >> size;
 676        if (scaled_uimm <= 0xfff) {
 677            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
 678            return;
 679        }
 680    }
 681
 682    /* Small signed offsets can use the unscaled encoding.  */
 683    if (offset >= -256 && offset < 256) {
 684        tcg_out_insn_3312(s, insn, rd, rn, offset);
 685        return;
 686    }
 687
 688    /* Worst-case scenario, move offset to temp register, use reg offset.  */
 689    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
 690    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
 691}
 692
 693static inline void tcg_out_mov(TCGContext *s,
 694                               TCGType type, TCGReg ret, TCGReg arg)
 695{
 696    if (ret != arg) {
 697        tcg_out_movr(s, type, ret, arg);
 698    }
 699}
 700
 701static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
 702                              TCGReg arg1, intptr_t arg2)
 703{
 704    tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
 705                 arg, arg1, arg2);
 706}
 707
 708static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
 709                              TCGReg arg1, intptr_t arg2)
 710{
 711    tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
 712                 arg, arg1, arg2);
 713}
 714
 715static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 716                               TCGReg base, intptr_t ofs)
 717{
 718    if (val == 0) {
 719        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
 720        return true;
 721    }
 722    return false;
 723}
 724
 725static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
 726                               TCGReg rn, unsigned int a, unsigned int b)
 727{
 728    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
 729}
 730
 731static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
 732                                TCGReg rn, unsigned int a, unsigned int b)
 733{
 734    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
 735}
 736
 737static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
 738                                TCGReg rn, unsigned int a, unsigned int b)
 739{
 740    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
 741}
 742
 743static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
 744                                TCGReg rn, TCGReg rm, unsigned int a)
 745{
 746    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
 747}
 748
 749static inline void tcg_out_shl(TCGContext *s, TCGType ext,
 750                               TCGReg rd, TCGReg rn, unsigned int m)
 751{
 752    int bits = ext ? 64 : 32;
 753    int max = bits - 1;
 754    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
 755}
 756
 757static inline void tcg_out_shr(TCGContext *s, TCGType ext,
 758                               TCGReg rd, TCGReg rn, unsigned int m)
 759{
 760    int max = ext ? 63 : 31;
 761    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
 762}
 763
 764static inline void tcg_out_sar(TCGContext *s, TCGType ext,
 765                               TCGReg rd, TCGReg rn, unsigned int m)
 766{
 767    int max = ext ? 63 : 31;
 768    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
 769}
 770
 771static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
 772                                TCGReg rd, TCGReg rn, unsigned int m)
 773{
 774    int max = ext ? 63 : 31;
 775    tcg_out_extr(s, ext, rd, rn, rn, m & max);
 776}
 777
 778static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
 779                                TCGReg rd, TCGReg rn, unsigned int m)
 780{
 781    int bits = ext ? 64 : 32;
 782    int max = bits - 1;
 783    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
 784}
 785
 786static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
 787                               TCGReg rn, unsigned lsb, unsigned width)
 788{
 789    unsigned size = ext ? 64 : 32;
 790    unsigned a = (size - lsb) & (size - 1);
 791    unsigned b = width - 1;
 792    tcg_out_bfm(s, ext, rd, rn, a, b);
 793}
 794
 795static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
 796                        tcg_target_long b, bool const_b)
 797{
 798    if (const_b) {
 799        /* Using CMP or CMN aliases.  */
 800        if (b >= 0) {
 801            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
 802        } else {
 803            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
 804        }
 805    } else {
 806        /* Using CMP alias SUBS wzr, Wn, Wm */
 807        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
 808    }
 809}
 810
 811static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
 812{
 813    ptrdiff_t offset = target - s->code_ptr;
 814    tcg_debug_assert(offset == sextract64(offset, 0, 26));
 815    tcg_out_insn(s, 3206, B, offset);
 816}
 817
 818static inline void tcg_out_goto_noaddr(TCGContext *s)
 819{
 820    /* We pay attention here to not modify the branch target by reading from
 821       the buffer. This ensure that caches and memory are kept coherent during
 822       retranslation.  Mask away possible garbage in the high bits for the
 823       first translation, while keeping the offset bits for retranslation. */
 824    uint32_t old = tcg_in32(s);
 825    tcg_out_insn(s, 3206, B, old);
 826}
 827
 828static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
 829{
 830    /* See comments in tcg_out_goto_noaddr.  */
 831    uint32_t old = tcg_in32(s) >> 5;
 832    tcg_out_insn(s, 3202, B_C, c, old);
 833}
 834
 835static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
 836{
 837    tcg_out_insn(s, 3207, BLR, reg);
 838}
 839
 840static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
 841{
 842    ptrdiff_t offset = target - s->code_ptr;
 843    if (offset == sextract64(offset, 0, 26)) {
 844        tcg_out_insn(s, 3206, BL, offset);
 845    } else {
 846        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
 847        tcg_out_callr(s, TCG_REG_TMP);
 848    }
 849}
 850
 851void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
 852{
 853    tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
 854    tcg_insn_unit *target = (tcg_insn_unit *)addr;
 855
 856    reloc_pc26_atomic(code_ptr, target);
 857    flush_icache_range(jmp_addr, jmp_addr + 4);
 858}
 859
 860static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
 861{
 862    if (!l->has_value) {
 863        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
 864        tcg_out_goto_noaddr(s);
 865    } else {
 866        tcg_out_goto(s, l->u.value_ptr);
 867    }
 868}
 869
 870static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
 871                           TCGArg b, bool b_const, TCGLabel *l)
 872{
 873    intptr_t offset;
 874    bool need_cmp;
 875
 876    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
 877        need_cmp = false;
 878    } else {
 879        need_cmp = true;
 880        tcg_out_cmp(s, ext, a, b, b_const);
 881    }
 882
 883    if (!l->has_value) {
 884        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
 885        offset = tcg_in32(s) >> 5;
 886    } else {
 887        offset = l->u.value_ptr - s->code_ptr;
 888        tcg_debug_assert(offset == sextract64(offset, 0, 19));
 889    }
 890
 891    if (need_cmp) {
 892        tcg_out_insn(s, 3202, B_C, c, offset);
 893    } else if (c == TCG_COND_EQ) {
 894        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
 895    } else {
 896        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
 897    }
 898}
 899
 900static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
 901{
 902    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
 903}
 904
 905static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
 906{
 907    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
 908}
 909
 910static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
 911{
 912    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
 913}
 914
 915static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
 916                               TCGReg rd, TCGReg rn)
 917{
 918    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
 919    int bits = (8 << s_bits) - 1;
 920    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
 921}
 922
 923static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
 924                               TCGReg rd, TCGReg rn)
 925{
 926    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
 927    int bits = (8 << s_bits) - 1;
 928    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
 929}
 930
 931static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
 932                            TCGReg rn, int64_t aimm)
 933{
 934    if (aimm >= 0) {
 935        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
 936    } else {
 937        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
 938    }
 939}
 940
 941static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
 942                                   TCGReg rh, TCGReg al, TCGReg ah,
 943                                   tcg_target_long bl, tcg_target_long bh,
 944                                   bool const_bl, bool const_bh, bool sub)
 945{
 946    TCGReg orig_rl = rl;
 947    AArch64Insn insn;
 948
 949    if (rl == ah || (!const_bh && rl == bh)) {
 950        rl = TCG_REG_TMP;
 951    }
 952
 953    if (const_bl) {
 954        insn = I3401_ADDSI;
 955        if ((bl < 0) ^ sub) {
 956            insn = I3401_SUBSI;
 957            bl = -bl;
 958        }
 959        if (unlikely(al == TCG_REG_XZR)) {
 960            /* ??? We want to allow al to be zero for the benefit of
 961               negation via subtraction.  However, that leaves open the
 962               possibility of adding 0+const in the low part, and the
 963               immediate add instructions encode XSP not XZR.  Don't try
 964               anything more elaborate here than loading another zero.  */
 965            al = TCG_REG_TMP;
 966            tcg_out_movi(s, ext, al, 0);
 967        }
 968        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
 969    } else {
 970        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
 971    }
 972
 973    insn = I3503_ADC;
 974    if (const_bh) {
 975        /* Note that the only two constants we support are 0 and -1, and
 976           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
 977        if ((bh != 0) ^ sub) {
 978            insn = I3503_SBC;
 979        }
 980        bh = TCG_REG_XZR;
 981    } else if (sub) {
 982        insn = I3503_SBC;
 983    }
 984    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
 985
 986    tcg_out_mov(s, ext, orig_rl, rl);
 987}
 988
 989static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
 990{
 991    static const uint32_t sync[] = {
 992        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
 993        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
 994        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
 995        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
 996        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
 997    };
 998    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
 999}
1000
1001#ifdef CONFIG_SOFTMMU
1002/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1003 *                                     TCGMemOpIdx oi, uintptr_t ra)
1004 */
1005static void * const qemu_ld_helpers[16] = {
1006    [MO_UB]   = helper_ret_ldub_mmu,
1007    [MO_LEUW] = helper_le_lduw_mmu,
1008    [MO_LEUL] = helper_le_ldul_mmu,
1009    [MO_LEQ]  = helper_le_ldq_mmu,
1010    [MO_BEUW] = helper_be_lduw_mmu,
1011    [MO_BEUL] = helper_be_ldul_mmu,
1012    [MO_BEQ]  = helper_be_ldq_mmu,
1013};
1014
1015/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1016 *                                     uintxx_t val, TCGMemOpIdx oi,
1017 *                                     uintptr_t ra)
1018 */
1019static void * const qemu_st_helpers[16] = {
1020    [MO_UB]   = helper_ret_stb_mmu,
1021    [MO_LEUW] = helper_le_stw_mmu,
1022    [MO_LEUL] = helper_le_stl_mmu,
1023    [MO_LEQ]  = helper_le_stq_mmu,
1024    [MO_BEUW] = helper_be_stw_mmu,
1025    [MO_BEUL] = helper_be_stl_mmu,
1026    [MO_BEQ]  = helper_be_stq_mmu,
1027};
1028
1029static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1030{
1031    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1032    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1033    tcg_out_insn(s, 3406, ADR, rd, offset);
1034}
1035
1036static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1037{
1038    TCGMemOpIdx oi = lb->oi;
1039    TCGMemOp opc = get_memop(oi);
1040    TCGMemOp size = opc & MO_SIZE;
1041
1042    reloc_pc19(lb->label_ptr[0], s->code_ptr);
1043
1044    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1045    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1046    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1047    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1048    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1049    if (opc & MO_SIGN) {
1050        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1051    } else {
1052        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1053    }
1054
1055    tcg_out_goto(s, lb->raddr);
1056}
1057
1058static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1059{
1060    TCGMemOpIdx oi = lb->oi;
1061    TCGMemOp opc = get_memop(oi);
1062    TCGMemOp size = opc & MO_SIZE;
1063
1064    reloc_pc19(lb->label_ptr[0], s->code_ptr);
1065
1066    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1067    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1068    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1069    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1070    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1071    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1072    tcg_out_goto(s, lb->raddr);
1073}
1074
1075static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1076                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1077                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1078{
1079    TCGLabelQemuLdst *label = new_ldst_label(s);
1080
1081    label->is_ld = is_ld;
1082    label->oi = oi;
1083    label->type = ext;
1084    label->datalo_reg = data_reg;
1085    label->addrlo_reg = addr_reg;
1086    label->raddr = raddr;
1087    label->label_ptr[0] = label_ptr;
1088}
1089
1090/* Load and compare a TLB entry, emitting the conditional jump to the
1091   slow path for the failure case, which will be patched later when finalizing
1092   the slow path. Generated code returns the host addend in X1,
1093   clobbers X0,X2,X3,TMP. */
1094static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1095                             tcg_insn_unit **label_ptr, int mem_index,
1096                             bool is_read)
1097{
1098    int tlb_offset = is_read ?
1099        offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1100        : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1101    unsigned a_bits = get_alignment_bits(opc);
1102    unsigned s_bits = opc & MO_SIZE;
1103    unsigned a_mask = (1u << a_bits) - 1;
1104    unsigned s_mask = (1u << s_bits) - 1;
1105    TCGReg base = TCG_AREG0, x3;
1106    uint64_t tlb_mask;
1107
1108    /* For aligned accesses, we check the first byte and include the alignment
1109       bits within the address.  For unaligned access, we check that we don't
1110       cross pages using the address of the last byte of the access.  */
1111    if (a_bits >= s_bits) {
1112        x3 = addr_reg;
1113    } else {
1114        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1115                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1116        x3 = TCG_REG_X3;
1117    }
1118    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1119
1120    /* Extract the TLB index from the address into X0.
1121       X0<CPU_TLB_BITS:0> =
1122       addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1123    tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1124                 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1125
1126    /* Store the page mask part of the address into X3.  */
1127    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1128                     TCG_REG_X3, x3, tlb_mask);
1129
1130    /* Add any "high bits" from the tlb offset to the env address into X2,
1131       to take advantage of the LSL12 form of the ADDI instruction.
1132       X2 = env + (tlb_offset & 0xfff000) */
1133    if (tlb_offset & 0xfff000) {
1134        tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1135                     tlb_offset & 0xfff000);
1136        base = TCG_REG_X2;
1137    }
1138
1139    /* Merge the tlb index contribution into X2.
1140       X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1141    tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1142                 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1143
1144    /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1145       X0 = load [X2 + (tlb_offset & 0x000fff)] */
1146    tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1147                 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1148
1149    /* Load the tlb addend. Do that early to avoid stalling.
1150       X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1151    tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1152                 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1153                 (is_read ? offsetof(CPUTLBEntry, addr_read)
1154                  : offsetof(CPUTLBEntry, addr_write)));
1155
1156    /* Perform the address comparison. */
1157    tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1158
1159    /* If not equal, we jump to the slow path. */
1160    *label_ptr = s->code_ptr;
1161    tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1162}
1163
1164#endif /* CONFIG_SOFTMMU */
1165
1166static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1167                                   TCGReg data_r, TCGReg addr_r,
1168                                   TCGType otype, TCGReg off_r)
1169{
1170    const TCGMemOp bswap = memop & MO_BSWAP;
1171
1172    switch (memop & MO_SSIZE) {
1173    case MO_UB:
1174        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1175        break;
1176    case MO_SB:
1177        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1178                       data_r, addr_r, otype, off_r);
1179        break;
1180    case MO_UW:
1181        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1182        if (bswap) {
1183            tcg_out_rev16(s, data_r, data_r);
1184        }
1185        break;
1186    case MO_SW:
1187        if (bswap) {
1188            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1189            tcg_out_rev16(s, data_r, data_r);
1190            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1191        } else {
1192            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1193                           data_r, addr_r, otype, off_r);
1194        }
1195        break;
1196    case MO_UL:
1197        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1198        if (bswap) {
1199            tcg_out_rev32(s, data_r, data_r);
1200        }
1201        break;
1202    case MO_SL:
1203        if (bswap) {
1204            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1205            tcg_out_rev32(s, data_r, data_r);
1206            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1207        } else {
1208            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1209        }
1210        break;
1211    case MO_Q:
1212        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1213        if (bswap) {
1214            tcg_out_rev64(s, data_r, data_r);
1215        }
1216        break;
1217    default:
1218        tcg_abort();
1219    }
1220}
1221
1222static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1223                                   TCGReg data_r, TCGReg addr_r,
1224                                   TCGType otype, TCGReg off_r)
1225{
1226    const TCGMemOp bswap = memop & MO_BSWAP;
1227
1228    switch (memop & MO_SIZE) {
1229    case MO_8:
1230        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1231        break;
1232    case MO_16:
1233        if (bswap && data_r != TCG_REG_XZR) {
1234            tcg_out_rev16(s, TCG_REG_TMP, data_r);
1235            data_r = TCG_REG_TMP;
1236        }
1237        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1238        break;
1239    case MO_32:
1240        if (bswap && data_r != TCG_REG_XZR) {
1241            tcg_out_rev32(s, TCG_REG_TMP, data_r);
1242            data_r = TCG_REG_TMP;
1243        }
1244        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1245        break;
1246    case MO_64:
1247        if (bswap && data_r != TCG_REG_XZR) {
1248            tcg_out_rev64(s, TCG_REG_TMP, data_r);
1249            data_r = TCG_REG_TMP;
1250        }
1251        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1252        break;
1253    default:
1254        tcg_abort();
1255    }
1256}
1257
1258static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1259                            TCGMemOpIdx oi, TCGType ext)
1260{
1261    TCGMemOp memop = get_memop(oi);
1262    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1263#ifdef CONFIG_SOFTMMU
1264    unsigned mem_index = get_mmuidx(oi);
1265    tcg_insn_unit *label_ptr;
1266
1267    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1268    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1269                           TCG_REG_X1, otype, addr_reg);
1270    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1271                        s->code_ptr, label_ptr);
1272#else /* !CONFIG_SOFTMMU */
1273    if (USE_GUEST_BASE) {
1274        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1275                               TCG_REG_GUEST_BASE, otype, addr_reg);
1276    } else {
1277        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1278                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1279    }
1280#endif /* CONFIG_SOFTMMU */
1281}
1282
1283static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1284                            TCGMemOpIdx oi)
1285{
1286    TCGMemOp memop = get_memop(oi);
1287    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1288#ifdef CONFIG_SOFTMMU
1289    unsigned mem_index = get_mmuidx(oi);
1290    tcg_insn_unit *label_ptr;
1291
1292    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1293    tcg_out_qemu_st_direct(s, memop, data_reg,
1294                           TCG_REG_X1, otype, addr_reg);
1295    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1296                        data_reg, addr_reg, s->code_ptr, label_ptr);
1297#else /* !CONFIG_SOFTMMU */
1298    if (USE_GUEST_BASE) {
1299        tcg_out_qemu_st_direct(s, memop, data_reg,
1300                               TCG_REG_GUEST_BASE, otype, addr_reg);
1301    } else {
1302        tcg_out_qemu_st_direct(s, memop, data_reg,
1303                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1304    }
1305#endif /* CONFIG_SOFTMMU */
1306}
1307
1308static tcg_insn_unit *tb_ret_addr;
1309
1310static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1311                       const TCGArg args[TCG_MAX_OP_ARGS],
1312                       const int const_args[TCG_MAX_OP_ARGS])
1313{
1314    /* 99% of the time, we can signal the use of extension registers
1315       by looking to see if the opcode handles 64-bit data.  */
1316    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1317
1318    /* Hoist the loads of the most common arguments.  */
1319    TCGArg a0 = args[0];
1320    TCGArg a1 = args[1];
1321    TCGArg a2 = args[2];
1322    int c2 = const_args[2];
1323
1324    /* Some operands are defined with "rZ" constraint, a register or
1325       the zero register.  These need not actually test args[I] == 0.  */
1326#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1327
1328    switch (opc) {
1329    case INDEX_op_exit_tb:
1330        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1331        tcg_out_goto(s, tb_ret_addr);
1332        break;
1333
1334    case INDEX_op_goto_tb:
1335#ifndef USE_DIRECT_JUMP
1336#error "USE_DIRECT_JUMP required for aarch64"
1337#endif
1338        /* consistency for USE_DIRECT_JUMP */
1339        tcg_debug_assert(s->tb_jmp_insn_offset != NULL);
1340        s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1341        /* actual branch destination will be patched by
1342           aarch64_tb_set_jmp_target later, beware retranslation. */
1343        tcg_out_goto_noaddr(s);
1344        s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
1345        break;
1346
1347    case INDEX_op_br:
1348        tcg_out_goto_label(s, arg_label(a0));
1349        break;
1350
1351    case INDEX_op_ld8u_i32:
1352    case INDEX_op_ld8u_i64:
1353        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
1354        break;
1355    case INDEX_op_ld8s_i32:
1356        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
1357        break;
1358    case INDEX_op_ld8s_i64:
1359        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
1360        break;
1361    case INDEX_op_ld16u_i32:
1362    case INDEX_op_ld16u_i64:
1363        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
1364        break;
1365    case INDEX_op_ld16s_i32:
1366        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
1367        break;
1368    case INDEX_op_ld16s_i64:
1369        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
1370        break;
1371    case INDEX_op_ld_i32:
1372    case INDEX_op_ld32u_i64:
1373        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
1374        break;
1375    case INDEX_op_ld32s_i64:
1376        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
1377        break;
1378    case INDEX_op_ld_i64:
1379        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
1380        break;
1381
1382    case INDEX_op_st8_i32:
1383    case INDEX_op_st8_i64:
1384        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
1385        break;
1386    case INDEX_op_st16_i32:
1387    case INDEX_op_st16_i64:
1388        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
1389        break;
1390    case INDEX_op_st_i32:
1391    case INDEX_op_st32_i64:
1392        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
1393        break;
1394    case INDEX_op_st_i64:
1395        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
1396        break;
1397
1398    case INDEX_op_add_i32:
1399        a2 = (int32_t)a2;
1400        /* FALLTHRU */
1401    case INDEX_op_add_i64:
1402        if (c2) {
1403            tcg_out_addsubi(s, ext, a0, a1, a2);
1404        } else {
1405            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1406        }
1407        break;
1408
1409    case INDEX_op_sub_i32:
1410        a2 = (int32_t)a2;
1411        /* FALLTHRU */
1412    case INDEX_op_sub_i64:
1413        if (c2) {
1414            tcg_out_addsubi(s, ext, a0, a1, -a2);
1415        } else {
1416            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1417        }
1418        break;
1419
1420    case INDEX_op_neg_i64:
1421    case INDEX_op_neg_i32:
1422        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1423        break;
1424
1425    case INDEX_op_and_i32:
1426        a2 = (int32_t)a2;
1427        /* FALLTHRU */
1428    case INDEX_op_and_i64:
1429        if (c2) {
1430            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1431        } else {
1432            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1433        }
1434        break;
1435
1436    case INDEX_op_andc_i32:
1437        a2 = (int32_t)a2;
1438        /* FALLTHRU */
1439    case INDEX_op_andc_i64:
1440        if (c2) {
1441            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1442        } else {
1443            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1444        }
1445        break;
1446
1447    case INDEX_op_or_i32:
1448        a2 = (int32_t)a2;
1449        /* FALLTHRU */
1450    case INDEX_op_or_i64:
1451        if (c2) {
1452            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1453        } else {
1454            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1455        }
1456        break;
1457
1458    case INDEX_op_orc_i32:
1459        a2 = (int32_t)a2;
1460        /* FALLTHRU */
1461    case INDEX_op_orc_i64:
1462        if (c2) {
1463            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1464        } else {
1465            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1466        }
1467        break;
1468
1469    case INDEX_op_xor_i32:
1470        a2 = (int32_t)a2;
1471        /* FALLTHRU */
1472    case INDEX_op_xor_i64:
1473        if (c2) {
1474            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1475        } else {
1476            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1477        }
1478        break;
1479
1480    case INDEX_op_eqv_i32:
1481        a2 = (int32_t)a2;
1482        /* FALLTHRU */
1483    case INDEX_op_eqv_i64:
1484        if (c2) {
1485            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1486        } else {
1487            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1488        }
1489        break;
1490
1491    case INDEX_op_not_i64:
1492    case INDEX_op_not_i32:
1493        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1494        break;
1495
1496    case INDEX_op_mul_i64:
1497    case INDEX_op_mul_i32:
1498        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1499        break;
1500
1501    case INDEX_op_div_i64:
1502    case INDEX_op_div_i32:
1503        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1504        break;
1505    case INDEX_op_divu_i64:
1506    case INDEX_op_divu_i32:
1507        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1508        break;
1509
1510    case INDEX_op_rem_i64:
1511    case INDEX_op_rem_i32:
1512        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1513        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1514        break;
1515    case INDEX_op_remu_i64:
1516    case INDEX_op_remu_i32:
1517        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1518        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1519        break;
1520
1521    case INDEX_op_shl_i64:
1522    case INDEX_op_shl_i32:
1523        if (c2) {
1524            tcg_out_shl(s, ext, a0, a1, a2);
1525        } else {
1526            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1527        }
1528        break;
1529
1530    case INDEX_op_shr_i64:
1531    case INDEX_op_shr_i32:
1532        if (c2) {
1533            tcg_out_shr(s, ext, a0, a1, a2);
1534        } else {
1535            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1536        }
1537        break;
1538
1539    case INDEX_op_sar_i64:
1540    case INDEX_op_sar_i32:
1541        if (c2) {
1542            tcg_out_sar(s, ext, a0, a1, a2);
1543        } else {
1544            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1545        }
1546        break;
1547
1548    case INDEX_op_rotr_i64:
1549    case INDEX_op_rotr_i32:
1550        if (c2) {
1551            tcg_out_rotr(s, ext, a0, a1, a2);
1552        } else {
1553            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1554        }
1555        break;
1556
1557    case INDEX_op_rotl_i64:
1558    case INDEX_op_rotl_i32:
1559        if (c2) {
1560            tcg_out_rotl(s, ext, a0, a1, a2);
1561        } else {
1562            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1563            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1564        }
1565        break;
1566
1567    case INDEX_op_brcond_i32:
1568        a1 = (int32_t)a1;
1569        /* FALLTHRU */
1570    case INDEX_op_brcond_i64:
1571        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1572        break;
1573
1574    case INDEX_op_setcond_i32:
1575        a2 = (int32_t)a2;
1576        /* FALLTHRU */
1577    case INDEX_op_setcond_i64:
1578        tcg_out_cmp(s, ext, a1, a2, c2);
1579        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
1580        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1581                     TCG_REG_XZR, tcg_invert_cond(args[3]));
1582        break;
1583
1584    case INDEX_op_movcond_i32:
1585        a2 = (int32_t)a2;
1586        /* FALLTHRU */
1587    case INDEX_op_movcond_i64:
1588        tcg_out_cmp(s, ext, a1, a2, c2);
1589        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1590        break;
1591
1592    case INDEX_op_qemu_ld_i32:
1593    case INDEX_op_qemu_ld_i64:
1594        tcg_out_qemu_ld(s, a0, a1, a2, ext);
1595        break;
1596    case INDEX_op_qemu_st_i32:
1597    case INDEX_op_qemu_st_i64:
1598        tcg_out_qemu_st(s, REG0(0), a1, a2);
1599        break;
1600
1601    case INDEX_op_bswap64_i64:
1602        tcg_out_rev64(s, a0, a1);
1603        break;
1604    case INDEX_op_bswap32_i64:
1605    case INDEX_op_bswap32_i32:
1606        tcg_out_rev32(s, a0, a1);
1607        break;
1608    case INDEX_op_bswap16_i64:
1609    case INDEX_op_bswap16_i32:
1610        tcg_out_rev16(s, a0, a1);
1611        break;
1612
1613    case INDEX_op_ext8s_i64:
1614    case INDEX_op_ext8s_i32:
1615        tcg_out_sxt(s, ext, MO_8, a0, a1);
1616        break;
1617    case INDEX_op_ext16s_i64:
1618    case INDEX_op_ext16s_i32:
1619        tcg_out_sxt(s, ext, MO_16, a0, a1);
1620        break;
1621    case INDEX_op_ext_i32_i64:
1622    case INDEX_op_ext32s_i64:
1623        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1624        break;
1625    case INDEX_op_ext8u_i64:
1626    case INDEX_op_ext8u_i32:
1627        tcg_out_uxt(s, MO_8, a0, a1);
1628        break;
1629    case INDEX_op_ext16u_i64:
1630    case INDEX_op_ext16u_i32:
1631        tcg_out_uxt(s, MO_16, a0, a1);
1632        break;
1633    case INDEX_op_extu_i32_i64:
1634    case INDEX_op_ext32u_i64:
1635        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1636        break;
1637
1638    case INDEX_op_deposit_i64:
1639    case INDEX_op_deposit_i32:
1640        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1641        break;
1642
1643    case INDEX_op_add2_i32:
1644        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1645                        (int32_t)args[4], args[5], const_args[4],
1646                        const_args[5], false);
1647        break;
1648    case INDEX_op_add2_i64:
1649        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1650                        args[5], const_args[4], const_args[5], false);
1651        break;
1652    case INDEX_op_sub2_i32:
1653        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1654                        (int32_t)args[4], args[5], const_args[4],
1655                        const_args[5], true);
1656        break;
1657    case INDEX_op_sub2_i64:
1658        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1659                        args[5], const_args[4], const_args[5], true);
1660        break;
1661
1662    case INDEX_op_muluh_i64:
1663        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1664        break;
1665    case INDEX_op_mulsh_i64:
1666        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1667        break;
1668
1669    case INDEX_op_mb:
1670        tcg_out_mb(s, a0);
1671        break;
1672
1673    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
1674    case INDEX_op_mov_i64:
1675    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
1676    case INDEX_op_movi_i64:
1677    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
1678    default:
1679        tcg_abort();
1680    }
1681
1682#undef REG0
1683}
1684
1685static const TCGTargetOpDef aarch64_op_defs[] = {
1686    { INDEX_op_exit_tb, { } },
1687    { INDEX_op_goto_tb, { } },
1688    { INDEX_op_br, { } },
1689
1690    { INDEX_op_ld8u_i32, { "r", "r" } },
1691    { INDEX_op_ld8s_i32, { "r", "r" } },
1692    { INDEX_op_ld16u_i32, { "r", "r" } },
1693    { INDEX_op_ld16s_i32, { "r", "r" } },
1694    { INDEX_op_ld_i32, { "r", "r" } },
1695    { INDEX_op_ld8u_i64, { "r", "r" } },
1696    { INDEX_op_ld8s_i64, { "r", "r" } },
1697    { INDEX_op_ld16u_i64, { "r", "r" } },
1698    { INDEX_op_ld16s_i64, { "r", "r" } },
1699    { INDEX_op_ld32u_i64, { "r", "r" } },
1700    { INDEX_op_ld32s_i64, { "r", "r" } },
1701    { INDEX_op_ld_i64, { "r", "r" } },
1702
1703    { INDEX_op_st8_i32, { "rZ", "r" } },
1704    { INDEX_op_st16_i32, { "rZ", "r" } },
1705    { INDEX_op_st_i32, { "rZ", "r" } },
1706    { INDEX_op_st8_i64, { "rZ", "r" } },
1707    { INDEX_op_st16_i64, { "rZ", "r" } },
1708    { INDEX_op_st32_i64, { "rZ", "r" } },
1709    { INDEX_op_st_i64, { "rZ", "r" } },
1710
1711    { INDEX_op_add_i32, { "r", "r", "rA" } },
1712    { INDEX_op_add_i64, { "r", "r", "rA" } },
1713    { INDEX_op_sub_i32, { "r", "r", "rA" } },
1714    { INDEX_op_sub_i64, { "r", "r", "rA" } },
1715    { INDEX_op_mul_i32, { "r", "r", "r" } },
1716    { INDEX_op_mul_i64, { "r", "r", "r" } },
1717    { INDEX_op_div_i32, { "r", "r", "r" } },
1718    { INDEX_op_div_i64, { "r", "r", "r" } },
1719    { INDEX_op_divu_i32, { "r", "r", "r" } },
1720    { INDEX_op_divu_i64, { "r", "r", "r" } },
1721    { INDEX_op_rem_i32, { "r", "r", "r" } },
1722    { INDEX_op_rem_i64, { "r", "r", "r" } },
1723    { INDEX_op_remu_i32, { "r", "r", "r" } },
1724    { INDEX_op_remu_i64, { "r", "r", "r" } },
1725    { INDEX_op_and_i32, { "r", "r", "rL" } },
1726    { INDEX_op_and_i64, { "r", "r", "rL" } },
1727    { INDEX_op_or_i32, { "r", "r", "rL" } },
1728    { INDEX_op_or_i64, { "r", "r", "rL" } },
1729    { INDEX_op_xor_i32, { "r", "r", "rL" } },
1730    { INDEX_op_xor_i64, { "r", "r", "rL" } },
1731    { INDEX_op_andc_i32, { "r", "r", "rL" } },
1732    { INDEX_op_andc_i64, { "r", "r", "rL" } },
1733    { INDEX_op_orc_i32, { "r", "r", "rL" } },
1734    { INDEX_op_orc_i64, { "r", "r", "rL" } },
1735    { INDEX_op_eqv_i32, { "r", "r", "rL" } },
1736    { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1737
1738    { INDEX_op_neg_i32, { "r", "r" } },
1739    { INDEX_op_neg_i64, { "r", "r" } },
1740    { INDEX_op_not_i32, { "r", "r" } },
1741    { INDEX_op_not_i64, { "r", "r" } },
1742
1743    { INDEX_op_shl_i32, { "r", "r", "ri" } },
1744    { INDEX_op_shr_i32, { "r", "r", "ri" } },
1745    { INDEX_op_sar_i32, { "r", "r", "ri" } },
1746    { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1747    { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1748    { INDEX_op_shl_i64, { "r", "r", "ri" } },
1749    { INDEX_op_shr_i64, { "r", "r", "ri" } },
1750    { INDEX_op_sar_i64, { "r", "r", "ri" } },
1751    { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1752    { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1753
1754    { INDEX_op_brcond_i32, { "r", "rA" } },
1755    { INDEX_op_brcond_i64, { "r", "rA" } },
1756    { INDEX_op_setcond_i32, { "r", "r", "rA" } },
1757    { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1758    { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } },
1759    { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1760
1761    { INDEX_op_qemu_ld_i32, { "r", "l" } },
1762    { INDEX_op_qemu_ld_i64, { "r", "l" } },
1763    { INDEX_op_qemu_st_i32, { "lZ", "l" } },
1764    { INDEX_op_qemu_st_i64, { "lZ", "l" } },
1765
1766    { INDEX_op_bswap16_i32, { "r", "r" } },
1767    { INDEX_op_bswap32_i32, { "r", "r" } },
1768    { INDEX_op_bswap16_i64, { "r", "r" } },
1769    { INDEX_op_bswap32_i64, { "r", "r" } },
1770    { INDEX_op_bswap64_i64, { "r", "r" } },
1771
1772    { INDEX_op_ext8s_i32, { "r", "r" } },
1773    { INDEX_op_ext16s_i32, { "r", "r" } },
1774    { INDEX_op_ext8u_i32, { "r", "r" } },
1775    { INDEX_op_ext16u_i32, { "r", "r" } },
1776
1777    { INDEX_op_ext8s_i64, { "r", "r" } },
1778    { INDEX_op_ext16s_i64, { "r", "r" } },
1779    { INDEX_op_ext32s_i64, { "r", "r" } },
1780    { INDEX_op_ext8u_i64, { "r", "r" } },
1781    { INDEX_op_ext16u_i64, { "r", "r" } },
1782    { INDEX_op_ext32u_i64, { "r", "r" } },
1783    { INDEX_op_ext_i32_i64, { "r", "r" } },
1784    { INDEX_op_extu_i32_i64, { "r", "r" } },
1785
1786    { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1787    { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1788
1789    { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1790    { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1791    { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1792    { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1793
1794    { INDEX_op_muluh_i64, { "r", "r", "r" } },
1795    { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1796
1797    { INDEX_op_mb, { } },
1798    { -1 },
1799};
1800
1801static void tcg_target_init(TCGContext *s)
1802{
1803    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1804    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1805
1806    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1807                     (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1808                     (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1809                     (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1810                     (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1811                     (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1812                     (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1813                     (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1814                     (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1815                     (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1816                     (1 << TCG_REG_X18) | (1 << TCG_REG_X30));
1817
1818    tcg_regset_clear(s->reserved_regs);
1819    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1820    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1821    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1822    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1823
1824    tcg_add_target_add_op_defs(aarch64_op_defs);
1825}
1826
1827/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
1828#define PUSH_SIZE  ((30 - 19 + 1) * 8)
1829
1830#define FRAME_SIZE \
1831    ((PUSH_SIZE \
1832      + TCG_STATIC_CALL_ARGS_SIZE \
1833      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1834      + TCG_TARGET_STACK_ALIGN - 1) \
1835     & ~(TCG_TARGET_STACK_ALIGN - 1))
1836
1837/* We're expecting a 2 byte uleb128 encoded value.  */
1838QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1839
1840/* We're expecting to use a single ADDI insn.  */
1841QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1842
1843static void tcg_target_qemu_prologue(TCGContext *s)
1844{
1845    TCGReg r;
1846
1847    /* Push (FP, LR) and allocate space for all saved registers.  */
1848    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1849                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
1850
1851    /* Set up frame pointer for canonical unwinding.  */
1852    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1853
1854    /* Store callee-preserved regs x19..x28.  */
1855    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1856        int ofs = (r - TCG_REG_X19 + 2) * 8;
1857        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1858    }
1859
1860    /* Make stack space for TCG locals.  */
1861    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1862                 FRAME_SIZE - PUSH_SIZE);
1863
1864    /* Inform TCG about how to find TCG locals with register, offset, size.  */
1865    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1866                  CPU_TEMP_BUF_NLONGS * sizeof(long));
1867
1868#if !defined(CONFIG_SOFTMMU)
1869    if (USE_GUEST_BASE) {
1870        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
1871        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1872    }
1873#endif
1874
1875    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1876    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
1877
1878    tb_ret_addr = s->code_ptr;
1879
1880    /* Remove TCG locals stack space.  */
1881    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1882                 FRAME_SIZE - PUSH_SIZE);
1883
1884    /* Restore registers x19..x28.  */
1885    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1886        int ofs = (r - TCG_REG_X19 + 2) * 8;
1887        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1888    }
1889
1890    /* Pop (FP, LR), restore SP to previous frame.  */
1891    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
1892                 TCG_REG_SP, PUSH_SIZE, 0, 1);
1893    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
1894}
1895
1896typedef struct {
1897    DebugFrameHeader h;
1898    uint8_t fde_def_cfa[4];
1899    uint8_t fde_reg_ofs[24];
1900} DebugFrame;
1901
1902#define ELF_HOST_MACHINE EM_AARCH64
1903
1904static const DebugFrame debug_frame = {
1905    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1906    .h.cie.id = -1,
1907    .h.cie.version = 1,
1908    .h.cie.code_align = 1,
1909    .h.cie.data_align = 0x78,             /* sleb128 -8 */
1910    .h.cie.return_column = TCG_REG_LR,
1911
1912    /* Total FDE size does not include the "len" member.  */
1913    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
1914
1915    .fde_def_cfa = {
1916        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
1917        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
1918        (FRAME_SIZE >> 7)
1919    },
1920    .fde_reg_ofs = {
1921        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
1922        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
1923        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
1924        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
1925        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
1926        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
1927        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
1928        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
1929        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
1930        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
1931        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
1932        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
1933    }
1934};
1935
1936void tcg_register_jit(void *buf, size_t buf_size)
1937{
1938    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
1939}
1940