qemu/tcg/aarch64/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Initial TCG Implementation for aarch64
   3 *
   4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
   5 * Written by Claudio Fontana
   6 *
   7 * This work is licensed under the terms of the GNU GPL, version 2 or
   8 * (at your option) any later version.
   9 *
  10 * See the COPYING file in the top-level directory for details.
  11 */
  12
  13#include "tcg-pool.inc.c"
  14#include "qemu/bitops.h"
  15
  16/* We're going to re-use TCGType in setting of the SF bit, which controls
  17   the size of the operation performed.  If we know the values match, it
  18   makes things much cleaner.  */
  19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
  20
  21#ifdef CONFIG_DEBUG_TCG
  22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  23    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  24    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  25    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  26    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
  27
  28    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
  29    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
  30    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
  31    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
  32};
  33#endif /* CONFIG_DEBUG_TCG */
  34
  35static const int tcg_target_reg_alloc_order[] = {
  36    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
  37    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
  38    TCG_REG_X28, /* we will reserve this for guest_base if configured */
  39
  40    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
  41    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
  42    TCG_REG_X16, TCG_REG_X17,
  43
  44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
  46
  47    /* X18 reserved by system */
  48    /* X19 reserved for AREG0 */
  49    /* X29 reserved as fp */
  50    /* X30 reserved as temporary */
  51
  52    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
  53    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
  54    /* V8 - V15 are call-saved, and skipped.  */
  55    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
  56    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
  57    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
  58    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
  59};
  60
  61static const int tcg_target_call_iarg_regs[8] = {
  62    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  63    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
  64};
  65static const int tcg_target_call_oarg_regs[1] = {
  66    TCG_REG_X0
  67};
  68
  69#define TCG_REG_TMP TCG_REG_X30
  70#define TCG_VEC_TMP TCG_REG_V31
  71
  72#ifndef CONFIG_SOFTMMU
  73/* Note that XZR cannot be encoded in the address base register slot,
  74   as that actaully encodes SP.  So if we need to zero-extend the guest
  75   address, via the address index register slot, we need to load even
  76   a zero guest base into a register.  */
  77#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
  78#define TCG_REG_GUEST_BASE TCG_REG_X28
  79#endif
  80
  81static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
  82{
  83    ptrdiff_t offset = target - code_ptr;
  84    tcg_debug_assert(offset == sextract64(offset, 0, 26));
  85    /* read instruction, mask away previous PC_REL26 parameter contents,
  86       set the proper offset, then write back the instruction. */
  87    *code_ptr = deposit32(*code_ptr, 0, 26, offset);
  88}
  89
  90static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr,
  91                                     tcg_insn_unit *target)
  92{
  93    ptrdiff_t offset = target - code_ptr;
  94    tcg_insn_unit insn;
  95    tcg_debug_assert(offset == sextract64(offset, 0, 26));
  96    /* read instruction, mask away previous PC_REL26 parameter contents,
  97       set the proper offset, then write back the instruction. */
  98    insn = atomic_read(code_ptr);
  99    atomic_set(code_ptr, deposit32(insn, 0, 26, offset));
 100}
 101
 102static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
 103{
 104    ptrdiff_t offset = target - code_ptr;
 105    tcg_debug_assert(offset == sextract64(offset, 0, 19));
 106    *code_ptr = deposit32(*code_ptr, 5, 19, offset);
 107}
 108
 109static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
 110                               intptr_t value, intptr_t addend)
 111{
 112    tcg_debug_assert(addend == 0);
 113    switch (type) {
 114    case R_AARCH64_JUMP26:
 115    case R_AARCH64_CALL26:
 116        reloc_pc26(code_ptr, (tcg_insn_unit *)value);
 117        break;
 118    case R_AARCH64_CONDBR19:
 119        reloc_pc19(code_ptr, (tcg_insn_unit *)value);
 120        break;
 121    default:
 122        tcg_abort();
 123    }
 124}
 125
 126#define TCG_CT_CONST_AIMM 0x100
 127#define TCG_CT_CONST_LIMM 0x200
 128#define TCG_CT_CONST_ZERO 0x400
 129#define TCG_CT_CONST_MONE 0x800
 130
 131/* parse target specific constraints */
 132static const char *target_parse_constraint(TCGArgConstraint *ct,
 133                                           const char *ct_str, TCGType type)
 134{
 135    switch (*ct_str++) {
 136    case 'r': /* general registers */
 137        ct->ct |= TCG_CT_REG;
 138        ct->u.regs |= 0xffffffffu;
 139        break;
 140    case 'w': /* advsimd registers */
 141        ct->ct |= TCG_CT_REG;
 142        ct->u.regs |= 0xffffffff00000000ull;
 143        break;
 144    case 'l': /* qemu_ld / qemu_st address, data_reg */
 145        ct->ct |= TCG_CT_REG;
 146        ct->u.regs = 0xffffffffu;
 147#ifdef CONFIG_SOFTMMU
 148        /* x0 and x1 will be overwritten when reading the tlb entry,
 149           and x2, and x3 for helper args, better to avoid using them. */
 150        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
 151        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
 152        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
 153        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
 154#endif
 155        break;
 156    case 'A': /* Valid for arithmetic immediate (positive or negative).  */
 157        ct->ct |= TCG_CT_CONST_AIMM;
 158        break;
 159    case 'L': /* Valid for logical immediate.  */
 160        ct->ct |= TCG_CT_CONST_LIMM;
 161        break;
 162    case 'M': /* minus one */
 163        ct->ct |= TCG_CT_CONST_MONE;
 164        break;
 165    case 'Z': /* zero */
 166        ct->ct |= TCG_CT_CONST_ZERO;
 167        break;
 168    default:
 169        return NULL;
 170    }
 171    return ct_str;
 172}
 173
 174/* Match a constant valid for addition (12-bit, optionally shifted).  */
 175static inline bool is_aimm(uint64_t val)
 176{
 177    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
 178}
 179
 180/* Match a constant valid for logical operations.  */
 181static inline bool is_limm(uint64_t val)
 182{
 183    /* Taking a simplified view of the logical immediates for now, ignoring
 184       the replication that can happen across the field.  Match bit patterns
 185       of the forms
 186           0....01....1
 187           0..01..10..0
 188       and their inverses.  */
 189
 190    /* Make things easier below, by testing the form with msb clear. */
 191    if ((int64_t)val < 0) {
 192        val = ~val;
 193    }
 194    if (val == 0) {
 195        return false;
 196    }
 197    val += val & -val;
 198    return (val & (val - 1)) == 0;
 199}
 200
 201/* Match a constant that is valid for vectors.  */
 202static bool is_fimm(uint64_t v64, int *op, int *cmode, int *imm8)
 203{
 204    int i;
 205
 206    *op = 0;
 207    /* Match replication across 8 bits.  */
 208    if (v64 == dup_const(MO_8, v64)) {
 209        *cmode = 0xe;
 210        *imm8 = v64 & 0xff;
 211        return true;
 212    }
 213    /* Match replication across 16 bits.  */
 214    if (v64 == dup_const(MO_16, v64)) {
 215        uint16_t v16 = v64;
 216
 217        if (v16 == (v16 & 0xff)) {
 218            *cmode = 0x8;
 219            *imm8 = v16 & 0xff;
 220            return true;
 221        } else if (v16 == (v16 & 0xff00)) {
 222            *cmode = 0xa;
 223            *imm8 = v16 >> 8;
 224            return true;
 225        }
 226    }
 227    /* Match replication across 32 bits.  */
 228    if (v64 == dup_const(MO_32, v64)) {
 229        uint32_t v32 = v64;
 230
 231        if (v32 == (v32 & 0xff)) {
 232            *cmode = 0x0;
 233            *imm8 = v32 & 0xff;
 234            return true;
 235        } else if (v32 == (v32 & 0xff00)) {
 236            *cmode = 0x2;
 237            *imm8 = (v32 >> 8) & 0xff;
 238            return true;
 239        } else if (v32 == (v32 & 0xff0000)) {
 240            *cmode = 0x4;
 241            *imm8 = (v32 >> 16) & 0xff;
 242            return true;
 243        } else if (v32 == (v32 & 0xff000000)) {
 244            *cmode = 0x6;
 245            *imm8 = v32 >> 24;
 246            return true;
 247        } else if ((v32 & 0xffff00ff) == 0xff) {
 248            *cmode = 0xc;
 249            *imm8 = (v32 >> 8) & 0xff;
 250            return true;
 251        } else if ((v32 & 0xff00ffff) == 0xffff) {
 252            *cmode = 0xd;
 253            *imm8 = (v32 >> 16) & 0xff;
 254            return true;
 255        }
 256        /* Match forms of a float32.  */
 257        if (extract32(v32, 0, 19) == 0
 258            && (extract32(v32, 25, 6) == 0x20
 259                || extract32(v32, 25, 6) == 0x1f)) {
 260            *cmode = 0xf;
 261            *imm8 = (extract32(v32, 31, 1) << 7)
 262                  | (extract32(v32, 25, 1) << 6)
 263                  | extract32(v32, 19, 6);
 264            return true;
 265        }
 266    }
 267    /* Match forms of a float64.  */
 268    if (extract64(v64, 0, 48) == 0
 269        && (extract64(v64, 54, 9) == 0x100
 270            || extract64(v64, 54, 9) == 0x0ff)) {
 271        *cmode = 0xf;
 272        *op = 1;
 273        *imm8 = (extract64(v64, 63, 1) << 7)
 274              | (extract64(v64, 54, 1) << 6)
 275              | extract64(v64, 48, 6);
 276        return true;
 277    }
 278    /* Match bytes of 0x00 and 0xff.  */
 279    for (i = 0; i < 64; i += 8) {
 280        uint64_t byte = extract64(v64, i, 8);
 281        if (byte != 0 && byte != 0xff) {
 282            break;
 283        }
 284    }
 285    if (i == 64) {
 286        *cmode = 0xe;
 287        *op = 1;
 288        *imm8 = (extract64(v64, 0, 1) << 0)
 289              | (extract64(v64, 8, 1) << 1)
 290              | (extract64(v64, 16, 1) << 2)
 291              | (extract64(v64, 24, 1) << 3)
 292              | (extract64(v64, 32, 1) << 4)
 293              | (extract64(v64, 40, 1) << 5)
 294              | (extract64(v64, 48, 1) << 6)
 295              | (extract64(v64, 56, 1) << 7);
 296        return true;
 297    }
 298    return false;
 299}
 300
 301static int tcg_target_const_match(tcg_target_long val, TCGType type,
 302                                  const TCGArgConstraint *arg_ct)
 303{
 304    int ct = arg_ct->ct;
 305
 306    if (ct & TCG_CT_CONST) {
 307        return 1;
 308    }
 309    if (type == TCG_TYPE_I32) {
 310        val = (int32_t)val;
 311    }
 312    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
 313        return 1;
 314    }
 315    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
 316        return 1;
 317    }
 318    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 319        return 1;
 320    }
 321    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
 322        return 1;
 323    }
 324
 325    return 0;
 326}
 327
 328enum aarch64_cond_code {
 329    COND_EQ = 0x0,
 330    COND_NE = 0x1,
 331    COND_CS = 0x2,     /* Unsigned greater or equal */
 332    COND_HS = COND_CS, /* ALIAS greater or equal */
 333    COND_CC = 0x3,     /* Unsigned less than */
 334    COND_LO = COND_CC, /* ALIAS Lower */
 335    COND_MI = 0x4,     /* Negative */
 336    COND_PL = 0x5,     /* Zero or greater */
 337    COND_VS = 0x6,     /* Overflow */
 338    COND_VC = 0x7,     /* No overflow */
 339    COND_HI = 0x8,     /* Unsigned greater than */
 340    COND_LS = 0x9,     /* Unsigned less or equal */
 341    COND_GE = 0xa,
 342    COND_LT = 0xb,
 343    COND_GT = 0xc,
 344    COND_LE = 0xd,
 345    COND_AL = 0xe,
 346    COND_NV = 0xf, /* behaves like COND_AL here */
 347};
 348
 349static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
 350    [TCG_COND_EQ] = COND_EQ,
 351    [TCG_COND_NE] = COND_NE,
 352    [TCG_COND_LT] = COND_LT,
 353    [TCG_COND_GE] = COND_GE,
 354    [TCG_COND_LE] = COND_LE,
 355    [TCG_COND_GT] = COND_GT,
 356    /* unsigned */
 357    [TCG_COND_LTU] = COND_LO,
 358    [TCG_COND_GTU] = COND_HI,
 359    [TCG_COND_GEU] = COND_HS,
 360    [TCG_COND_LEU] = COND_LS,
 361};
 362
 363typedef enum {
 364    LDST_ST = 0,    /* store */
 365    LDST_LD = 1,    /* load */
 366    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
 367    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
 368} AArch64LdstType;
 369
 370/* We encode the format of the insn into the beginning of the name, so that
 371   we can have the preprocessor help "typecheck" the insn vs the output
 372   function.  Arm didn't provide us with nice names for the formats, so we
 373   use the section number of the architecture reference manual in which the
 374   instruction group is described.  */
 375typedef enum {
 376    /* Compare and branch (immediate).  */
 377    I3201_CBZ       = 0x34000000,
 378    I3201_CBNZ      = 0x35000000,
 379
 380    /* Conditional branch (immediate).  */
 381    I3202_B_C       = 0x54000000,
 382
 383    /* Unconditional branch (immediate).  */
 384    I3206_B         = 0x14000000,
 385    I3206_BL        = 0x94000000,
 386
 387    /* Unconditional branch (register).  */
 388    I3207_BR        = 0xd61f0000,
 389    I3207_BLR       = 0xd63f0000,
 390    I3207_RET       = 0xd65f0000,
 391
 392    /* Load literal for loading the address at pc-relative offset */
 393    I3305_LDR       = 0x58000000,
 394    I3305_LDR_v64   = 0x5c000000,
 395    I3305_LDR_v128  = 0x9c000000,
 396
 397    /* Load/store register.  Described here as 3.3.12, but the helper
 398       that emits them can transform to 3.3.10 or 3.3.13.  */
 399    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
 400    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
 401    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
 402    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
 403
 404    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
 405    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
 406    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
 407    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
 408
 409    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
 410    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
 411
 412    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
 413    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
 414    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
 415
 416    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
 417    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
 418
 419    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
 420    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
 421
 422    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
 423    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
 424
 425    I3312_TO_I3310  = 0x00200800,
 426    I3312_TO_I3313  = 0x01000000,
 427
 428    /* Load/store register pair instructions.  */
 429    I3314_LDP       = 0x28400000,
 430    I3314_STP       = 0x28000000,
 431
 432    /* Add/subtract immediate instructions.  */
 433    I3401_ADDI      = 0x11000000,
 434    I3401_ADDSI     = 0x31000000,
 435    I3401_SUBI      = 0x51000000,
 436    I3401_SUBSI     = 0x71000000,
 437
 438    /* Bitfield instructions.  */
 439    I3402_BFM       = 0x33000000,
 440    I3402_SBFM      = 0x13000000,
 441    I3402_UBFM      = 0x53000000,
 442
 443    /* Extract instruction.  */
 444    I3403_EXTR      = 0x13800000,
 445
 446    /* Logical immediate instructions.  */
 447    I3404_ANDI      = 0x12000000,
 448    I3404_ORRI      = 0x32000000,
 449    I3404_EORI      = 0x52000000,
 450
 451    /* Move wide immediate instructions.  */
 452    I3405_MOVN      = 0x12800000,
 453    I3405_MOVZ      = 0x52800000,
 454    I3405_MOVK      = 0x72800000,
 455
 456    /* PC relative addressing instructions.  */
 457    I3406_ADR       = 0x10000000,
 458    I3406_ADRP      = 0x90000000,
 459
 460    /* Add/subtract shifted register instructions (without a shift).  */
 461    I3502_ADD       = 0x0b000000,
 462    I3502_ADDS      = 0x2b000000,
 463    I3502_SUB       = 0x4b000000,
 464    I3502_SUBS      = 0x6b000000,
 465
 466    /* Add/subtract shifted register instructions (with a shift).  */
 467    I3502S_ADD_LSL  = I3502_ADD,
 468
 469    /* Add/subtract with carry instructions.  */
 470    I3503_ADC       = 0x1a000000,
 471    I3503_SBC       = 0x5a000000,
 472
 473    /* Conditional select instructions.  */
 474    I3506_CSEL      = 0x1a800000,
 475    I3506_CSINC     = 0x1a800400,
 476    I3506_CSINV     = 0x5a800000,
 477    I3506_CSNEG     = 0x5a800400,
 478
 479    /* Data-processing (1 source) instructions.  */
 480    I3507_CLZ       = 0x5ac01000,
 481    I3507_RBIT      = 0x5ac00000,
 482    I3507_REV16     = 0x5ac00400,
 483    I3507_REV32     = 0x5ac00800,
 484    I3507_REV64     = 0x5ac00c00,
 485
 486    /* Data-processing (2 source) instructions.  */
 487    I3508_LSLV      = 0x1ac02000,
 488    I3508_LSRV      = 0x1ac02400,
 489    I3508_ASRV      = 0x1ac02800,
 490    I3508_RORV      = 0x1ac02c00,
 491    I3508_SMULH     = 0x9b407c00,
 492    I3508_UMULH     = 0x9bc07c00,
 493    I3508_UDIV      = 0x1ac00800,
 494    I3508_SDIV      = 0x1ac00c00,
 495
 496    /* Data-processing (3 source) instructions.  */
 497    I3509_MADD      = 0x1b000000,
 498    I3509_MSUB      = 0x1b008000,
 499
 500    /* Logical shifted register instructions (without a shift).  */
 501    I3510_AND       = 0x0a000000,
 502    I3510_BIC       = 0x0a200000,
 503    I3510_ORR       = 0x2a000000,
 504    I3510_ORN       = 0x2a200000,
 505    I3510_EOR       = 0x4a000000,
 506    I3510_EON       = 0x4a200000,
 507    I3510_ANDS      = 0x6a000000,
 508
 509    /* AdvSIMD copy */
 510    I3605_DUP      = 0x0e000400,
 511    I3605_INS      = 0x4e001c00,
 512    I3605_UMOV     = 0x0e003c00,
 513
 514    /* AdvSIMD modified immediate */
 515    I3606_MOVI      = 0x0f000400,
 516
 517    /* AdvSIMD shift by immediate */
 518    I3614_SSHR      = 0x0f000400,
 519    I3614_SSRA      = 0x0f001400,
 520    I3614_SHL       = 0x0f005400,
 521    I3614_USHR      = 0x2f000400,
 522    I3614_USRA      = 0x2f001400,
 523
 524    /* AdvSIMD three same.  */
 525    I3616_ADD       = 0x0e208400,
 526    I3616_AND       = 0x0e201c00,
 527    I3616_BIC       = 0x0e601c00,
 528    I3616_EOR       = 0x2e201c00,
 529    I3616_MUL       = 0x0e209c00,
 530    I3616_ORR       = 0x0ea01c00,
 531    I3616_ORN       = 0x0ee01c00,
 532    I3616_SUB       = 0x2e208400,
 533    I3616_CMGT      = 0x0e203400,
 534    I3616_CMGE      = 0x0e203c00,
 535    I3616_CMTST     = 0x0e208c00,
 536    I3616_CMHI      = 0x2e203400,
 537    I3616_CMHS      = 0x2e203c00,
 538    I3616_CMEQ      = 0x2e208c00,
 539
 540    /* AdvSIMD two-reg misc.  */
 541    I3617_CMGT0     = 0x0e208800,
 542    I3617_CMEQ0     = 0x0e209800,
 543    I3617_CMLT0     = 0x0e20a800,
 544    I3617_CMGE0     = 0x2e208800,
 545    I3617_CMLE0     = 0x2e20a800,
 546    I3617_NOT       = 0x2e205800,
 547    I3617_NEG       = 0x2e20b800,
 548
 549    /* System instructions.  */
 550    NOP             = 0xd503201f,
 551    DMB_ISH         = 0xd50338bf,
 552    DMB_LD          = 0x00000100,
 553    DMB_ST          = 0x00000200,
 554} AArch64Insn;
 555
 556static inline uint32_t tcg_in32(TCGContext *s)
 557{
 558    uint32_t v = *(uint32_t *)s->code_ptr;
 559    return v;
 560}
 561
 562/* Emit an opcode with "type-checking" of the format.  */
 563#define tcg_out_insn(S, FMT, OP, ...) \
 564    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
 565
 566static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
 567{
 568    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
 569}
 570
 571static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
 572                              TCGReg rt, int imm19)
 573{
 574    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
 575}
 576
 577static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
 578                              TCGCond c, int imm19)
 579{
 580    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
 581}
 582
 583static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
 584{
 585    tcg_out32(s, insn | (imm26 & 0x03ffffff));
 586}
 587
 588static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
 589{
 590    tcg_out32(s, insn | rn << 5);
 591}
 592
 593static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
 594                              TCGReg r1, TCGReg r2, TCGReg rn,
 595                              tcg_target_long ofs, bool pre, bool w)
 596{
 597    insn |= 1u << 31; /* ext */
 598    insn |= pre << 24;
 599    insn |= w << 23;
 600
 601    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
 602    insn |= (ofs & (0x7f << 3)) << (15 - 3);
 603
 604    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
 605}
 606
 607static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
 608                              TCGReg rd, TCGReg rn, uint64_t aimm)
 609{
 610    if (aimm > 0xfff) {
 611        tcg_debug_assert((aimm & 0xfff) == 0);
 612        aimm >>= 12;
 613        tcg_debug_assert(aimm <= 0xfff);
 614        aimm |= 1 << 12;  /* apply LSL 12 */
 615    }
 616    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
 617}
 618
 619/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
 620   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
 621   that feed the DecodeBitMasks pseudo function.  */
 622static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
 623                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
 624{
 625    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
 626              | rn << 5 | rd);
 627}
 628
 629#define tcg_out_insn_3404  tcg_out_insn_3402
 630
 631static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
 632                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
 633{
 634    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
 635              | rn << 5 | rd);
 636}
 637
 638/* This function is used for the Move (wide immediate) instruction group.
 639   Note that SHIFT is a full shift count, not the 2 bit HW field. */
 640static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
 641                              TCGReg rd, uint16_t half, unsigned shift)
 642{
 643    tcg_debug_assert((shift & ~0x30) == 0);
 644    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
 645}
 646
 647static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
 648                              TCGReg rd, int64_t disp)
 649{
 650    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
 651}
 652
 653/* This function is for both 3.5.2 (Add/Subtract shifted register), for
 654   the rare occasion when we actually want to supply a shift amount.  */
 655static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
 656                                      TCGType ext, TCGReg rd, TCGReg rn,
 657                                      TCGReg rm, int imm6)
 658{
 659    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
 660}
 661
 662/* This function is for 3.5.2 (Add/subtract shifted register),
 663   and 3.5.10 (Logical shifted register), for the vast majorty of cases
 664   when we don't want to apply a shift.  Thus it can also be used for
 665   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
 666static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
 667                              TCGReg rd, TCGReg rn, TCGReg rm)
 668{
 669    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
 670}
 671
 672#define tcg_out_insn_3503  tcg_out_insn_3502
 673#define tcg_out_insn_3508  tcg_out_insn_3502
 674#define tcg_out_insn_3510  tcg_out_insn_3502
 675
 676static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
 677                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
 678{
 679    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
 680              | tcg_cond_to_aarch64[c] << 12);
 681}
 682
 683static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
 684                              TCGReg rd, TCGReg rn)
 685{
 686    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
 687}
 688
 689static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
 690                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
 691{
 692    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
 693}
 694
 695static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
 696                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
 697{
 698    /* Note that bit 11 set means general register input.  Therefore
 699       we can handle both register sets with one function.  */
 700    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
 701              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
 702}
 703
 704static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
 705                              TCGReg rd, bool op, int cmode, uint8_t imm8)
 706{
 707    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
 708              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
 709}
 710
 711static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
 712                              TCGReg rd, TCGReg rn, unsigned immhb)
 713{
 714    tcg_out32(s, insn | q << 30 | immhb << 16
 715              | (rn & 0x1f) << 5 | (rd & 0x1f));
 716}
 717
 718static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
 719                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
 720{
 721    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
 722              | (rn & 0x1f) << 5 | (rd & 0x1f));
 723}
 724
 725static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
 726                              unsigned size, TCGReg rd, TCGReg rn)
 727{
 728    tcg_out32(s, insn | q << 30 | (size << 22)
 729              | (rn & 0x1f) << 5 | (rd & 0x1f));
 730}
 731
 732static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
 733                              TCGReg rd, TCGReg base, TCGType ext,
 734                              TCGReg regoff)
 735{
 736    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 737    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
 738              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
 739}
 740
 741static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
 742                              TCGReg rd, TCGReg rn, intptr_t offset)
 743{
 744    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
 745}
 746
 747static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
 748                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
 749{
 750    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 751    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
 752              | rn << 5 | (rd & 0x1f));
 753}
 754
 755/* Register to register move using ORR (shifted register with no shift). */
 756static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
 757{
 758    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
 759}
 760
 761/* Register to register move using ADDI (move to/from SP).  */
 762static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
 763{
 764    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
 765}
 766
 767/* This function is used for the Logical (immediate) instruction group.
 768   The value of LIMM must satisfy IS_LIMM.  See the comment above about
 769   only supporting simplified logical immediates.  */
 770static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
 771                             TCGReg rd, TCGReg rn, uint64_t limm)
 772{
 773    unsigned h, l, r, c;
 774
 775    tcg_debug_assert(is_limm(limm));
 776
 777    h = clz64(limm);
 778    l = ctz64(limm);
 779    if (l == 0) {
 780        r = 0;                  /* form 0....01....1 */
 781        c = ctz64(~limm) - 1;
 782        if (h == 0) {
 783            r = clz64(~limm);   /* form 1..10..01..1 */
 784            c += r;
 785        }
 786    } else {
 787        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
 788        c = r - h - 1;
 789    }
 790    if (ext == TCG_TYPE_I32) {
 791        r &= 31;
 792        c &= 31;
 793    }
 794
 795    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
 796}
 797
 798static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
 799                             TCGReg rd, uint64_t v64)
 800{
 801    int op, cmode, imm8;
 802
 803    if (is_fimm(v64, &op, &cmode, &imm8)) {
 804        tcg_out_insn(s, 3606, MOVI, type == TCG_TYPE_V128, rd, op, cmode, imm8);
 805    } else if (type == TCG_TYPE_V128) {
 806        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
 807        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
 808    } else {
 809        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
 810        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
 811    }
 812}
 813
 814static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
 815                         tcg_target_long value)
 816{
 817    tcg_target_long svalue = value;
 818    tcg_target_long ivalue = ~value;
 819    tcg_target_long t0, t1, t2;
 820    int s0, s1;
 821    AArch64Insn opc;
 822
 823    switch (type) {
 824    case TCG_TYPE_I32:
 825    case TCG_TYPE_I64:
 826        tcg_debug_assert(rd < 32);
 827        break;
 828
 829    case TCG_TYPE_V64:
 830    case TCG_TYPE_V128:
 831        tcg_debug_assert(rd >= 32);
 832        tcg_out_dupi_vec(s, type, rd, value);
 833        return;
 834
 835    default:
 836        g_assert_not_reached();
 837    }
 838
 839    /* For 32-bit values, discard potential garbage in value.  For 64-bit
 840       values within [2**31, 2**32-1], we can create smaller sequences by
 841       interpreting this as a negative 32-bit number, while ensuring that
 842       the high 32 bits are cleared by setting SF=0.  */
 843    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
 844        svalue = (int32_t)value;
 845        value = (uint32_t)value;
 846        ivalue = (uint32_t)ivalue;
 847        type = TCG_TYPE_I32;
 848    }
 849
 850    /* Speed things up by handling the common case of small positive
 851       and negative values specially.  */
 852    if ((value & ~0xffffull) == 0) {
 853        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
 854        return;
 855    } else if ((ivalue & ~0xffffull) == 0) {
 856        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
 857        return;
 858    }
 859
 860    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
 861       use the sign-extended value.  That lets us match rotated values such
 862       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
 863    if (is_limm(svalue)) {
 864        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
 865        return;
 866    }
 867
 868    /* Look for host pointer values within 4G of the PC.  This happens
 869       often when loading pointers to QEMU's own data structures.  */
 870    if (type == TCG_TYPE_I64) {
 871        tcg_target_long disp = value - (intptr_t)s->code_ptr;
 872        if (disp == sextract64(disp, 0, 21)) {
 873            tcg_out_insn(s, 3406, ADR, rd, disp);
 874            return;
 875        }
 876        disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
 877        if (disp == sextract64(disp, 0, 21)) {
 878            tcg_out_insn(s, 3406, ADRP, rd, disp);
 879            if (value & 0xfff) {
 880                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
 881            }
 882            return;
 883        }
 884    }
 885
 886    /* Would it take fewer insns to begin with MOVN?  */
 887    if (ctpop64(value) >= 32) {
 888        t0 = ivalue;
 889        opc = I3405_MOVN;
 890    } else {
 891        t0 = value;
 892        opc = I3405_MOVZ;
 893    }
 894    s0 = ctz64(t0) & (63 & -16);
 895    t1 = t0 & ~(0xffffUL << s0);
 896    s1 = ctz64(t1) & (63 & -16);
 897    t2 = t1 & ~(0xffffUL << s1);
 898    if (t2 == 0) {
 899        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
 900        if (t1 != 0) {
 901            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
 902        }
 903        return;
 904    }
 905
 906    /* For more than 2 insns, dump it into the constant pool.  */
 907    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
 908    tcg_out_insn(s, 3305, LDR, 0, rd);
 909}
 910
 911/* Define something more legible for general use.  */
 912#define tcg_out_ldst_r  tcg_out_insn_3310
 913
 914static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
 915                         TCGReg rn, intptr_t offset, int lgsize)
 916{
 917    /* If the offset is naturally aligned and in range, then we can
 918       use the scaled uimm12 encoding */
 919    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
 920        uintptr_t scaled_uimm = offset >> lgsize;
 921        if (scaled_uimm <= 0xfff) {
 922            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
 923            return;
 924        }
 925    }
 926
 927    /* Small signed offsets can use the unscaled encoding.  */
 928    if (offset >= -256 && offset < 256) {
 929        tcg_out_insn_3312(s, insn, rd, rn, offset);
 930        return;
 931    }
 932
 933    /* Worst-case scenario, move offset to temp register, use reg offset.  */
 934    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
 935    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
 936}
 937
 938static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
 939{
 940    if (ret == arg) {
 941        return;
 942    }
 943    switch (type) {
 944    case TCG_TYPE_I32:
 945    case TCG_TYPE_I64:
 946        if (ret < 32 && arg < 32) {
 947            tcg_out_movr(s, type, ret, arg);
 948            break;
 949        } else if (ret < 32) {
 950            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
 951            break;
 952        } else if (arg < 32) {
 953            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
 954            break;
 955        }
 956        /* FALLTHRU */
 957
 958    case TCG_TYPE_V64:
 959        tcg_debug_assert(ret >= 32 && arg >= 32);
 960        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
 961        break;
 962    case TCG_TYPE_V128:
 963        tcg_debug_assert(ret >= 32 && arg >= 32);
 964        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
 965        break;
 966
 967    default:
 968        g_assert_not_reached();
 969    }
 970}
 971
 972static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
 973                       TCGReg base, intptr_t ofs)
 974{
 975    AArch64Insn insn;
 976    int lgsz;
 977
 978    switch (type) {
 979    case TCG_TYPE_I32:
 980        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
 981        lgsz = 2;
 982        break;
 983    case TCG_TYPE_I64:
 984        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
 985        lgsz = 3;
 986        break;
 987    case TCG_TYPE_V64:
 988        insn = I3312_LDRVD;
 989        lgsz = 3;
 990        break;
 991    case TCG_TYPE_V128:
 992        insn = I3312_LDRVQ;
 993        lgsz = 4;
 994        break;
 995    default:
 996        g_assert_not_reached();
 997    }
 998    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
 999}
1000
1001static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1002                       TCGReg base, intptr_t ofs)
1003{
1004    AArch64Insn insn;
1005    int lgsz;
1006
1007    switch (type) {
1008    case TCG_TYPE_I32:
1009        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1010        lgsz = 2;
1011        break;
1012    case TCG_TYPE_I64:
1013        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1014        lgsz = 3;
1015        break;
1016    case TCG_TYPE_V64:
1017        insn = I3312_STRVD;
1018        lgsz = 3;
1019        break;
1020    case TCG_TYPE_V128:
1021        insn = I3312_STRVQ;
1022        lgsz = 4;
1023        break;
1024    default:
1025        g_assert_not_reached();
1026    }
1027    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1028}
1029
1030static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1031                               TCGReg base, intptr_t ofs)
1032{
1033    if (type <= TCG_TYPE_I64 && val == 0) {
1034        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1035        return true;
1036    }
1037    return false;
1038}
1039
1040static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1041                               TCGReg rn, unsigned int a, unsigned int b)
1042{
1043    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1044}
1045
1046static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1047                                TCGReg rn, unsigned int a, unsigned int b)
1048{
1049    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1050}
1051
1052static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1053                                TCGReg rn, unsigned int a, unsigned int b)
1054{
1055    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1056}
1057
1058static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1059                                TCGReg rn, TCGReg rm, unsigned int a)
1060{
1061    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1062}
1063
1064static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1065                               TCGReg rd, TCGReg rn, unsigned int m)
1066{
1067    int bits = ext ? 64 : 32;
1068    int max = bits - 1;
1069    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1070}
1071
1072static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1073                               TCGReg rd, TCGReg rn, unsigned int m)
1074{
1075    int max = ext ? 63 : 31;
1076    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1077}
1078
1079static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1080                               TCGReg rd, TCGReg rn, unsigned int m)
1081{
1082    int max = ext ? 63 : 31;
1083    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1084}
1085
1086static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1087                                TCGReg rd, TCGReg rn, unsigned int m)
1088{
1089    int max = ext ? 63 : 31;
1090    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1091}
1092
1093static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1094                                TCGReg rd, TCGReg rn, unsigned int m)
1095{
1096    int bits = ext ? 64 : 32;
1097    int max = bits - 1;
1098    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1099}
1100
1101static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1102                               TCGReg rn, unsigned lsb, unsigned width)
1103{
1104    unsigned size = ext ? 64 : 32;
1105    unsigned a = (size - lsb) & (size - 1);
1106    unsigned b = width - 1;
1107    tcg_out_bfm(s, ext, rd, rn, a, b);
1108}
1109
1110static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1111                        tcg_target_long b, bool const_b)
1112{
1113    if (const_b) {
1114        /* Using CMP or CMN aliases.  */
1115        if (b >= 0) {
1116            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1117        } else {
1118            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1119        }
1120    } else {
1121        /* Using CMP alias SUBS wzr, Wn, Wm */
1122        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1123    }
1124}
1125
1126static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1127{
1128    ptrdiff_t offset = target - s->code_ptr;
1129    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1130    tcg_out_insn(s, 3206, B, offset);
1131}
1132
1133static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1134{
1135    ptrdiff_t offset = target - s->code_ptr;
1136    if (offset == sextract64(offset, 0, 26)) {
1137        tcg_out_insn(s, 3206, BL, offset);
1138    } else {
1139        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1140        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1141    }
1142}
1143
1144static inline void tcg_out_goto_noaddr(TCGContext *s)
1145{
1146    /* We pay attention here to not modify the branch target by reading from
1147       the buffer. This ensure that caches and memory are kept coherent during
1148       retranslation.  Mask away possible garbage in the high bits for the
1149       first translation, while keeping the offset bits for retranslation. */
1150    uint32_t old = tcg_in32(s);
1151    tcg_out_insn(s, 3206, B, old);
1152}
1153
1154static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
1155{
1156    /* See comments in tcg_out_goto_noaddr.  */
1157    uint32_t old = tcg_in32(s) >> 5;
1158    tcg_out_insn(s, 3202, B_C, c, old);
1159}
1160
1161static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1162{
1163    tcg_out_insn(s, 3207, BLR, reg);
1164}
1165
1166static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1167{
1168    ptrdiff_t offset = target - s->code_ptr;
1169    if (offset == sextract64(offset, 0, 26)) {
1170        tcg_out_insn(s, 3206, BL, offset);
1171    } else {
1172        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1173        tcg_out_callr(s, TCG_REG_TMP);
1174    }
1175}
1176
1177void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1178                              uintptr_t addr)
1179{
1180    tcg_insn_unit i1, i2;
1181    TCGType rt = TCG_TYPE_I64;
1182    TCGReg  rd = TCG_REG_TMP;
1183    uint64_t pair;
1184
1185    ptrdiff_t offset = addr - jmp_addr;
1186
1187    if (offset == sextract64(offset, 0, 26)) {
1188        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1189        i2 = NOP;
1190    } else {
1191        offset = (addr >> 12) - (jmp_addr >> 12);
1192
1193        /* patch ADRP */
1194        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1195        /* patch ADDI */
1196        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1197    }
1198    pair = (uint64_t)i2 << 32 | i1;
1199    atomic_set((uint64_t *)jmp_addr, pair);
1200    flush_icache_range(jmp_addr, jmp_addr + 8);
1201}
1202
1203static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1204{
1205    if (!l->has_value) {
1206        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1207        tcg_out_goto_noaddr(s);
1208    } else {
1209        tcg_out_goto(s, l->u.value_ptr);
1210    }
1211}
1212
1213static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1214                           TCGArg b, bool b_const, TCGLabel *l)
1215{
1216    intptr_t offset;
1217    bool need_cmp;
1218
1219    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1220        need_cmp = false;
1221    } else {
1222        need_cmp = true;
1223        tcg_out_cmp(s, ext, a, b, b_const);
1224    }
1225
1226    if (!l->has_value) {
1227        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1228        offset = tcg_in32(s) >> 5;
1229    } else {
1230        offset = l->u.value_ptr - s->code_ptr;
1231        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1232    }
1233
1234    if (need_cmp) {
1235        tcg_out_insn(s, 3202, B_C, c, offset);
1236    } else if (c == TCG_COND_EQ) {
1237        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1238    } else {
1239        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1240    }
1241}
1242
1243static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1244{
1245    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1246}
1247
1248static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1249{
1250    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1251}
1252
1253static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1254{
1255    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1256}
1257
1258static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
1259                               TCGReg rd, TCGReg rn)
1260{
1261    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1262    int bits = (8 << s_bits) - 1;
1263    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1264}
1265
1266static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
1267                               TCGReg rd, TCGReg rn)
1268{
1269    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1270    int bits = (8 << s_bits) - 1;
1271    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1272}
1273
1274static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1275                            TCGReg rn, int64_t aimm)
1276{
1277    if (aimm >= 0) {
1278        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1279    } else {
1280        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1281    }
1282}
1283
1284static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1285                                   TCGReg rh, TCGReg al, TCGReg ah,
1286                                   tcg_target_long bl, tcg_target_long bh,
1287                                   bool const_bl, bool const_bh, bool sub)
1288{
1289    TCGReg orig_rl = rl;
1290    AArch64Insn insn;
1291
1292    if (rl == ah || (!const_bh && rl == bh)) {
1293        rl = TCG_REG_TMP;
1294    }
1295
1296    if (const_bl) {
1297        insn = I3401_ADDSI;
1298        if ((bl < 0) ^ sub) {
1299            insn = I3401_SUBSI;
1300            bl = -bl;
1301        }
1302        if (unlikely(al == TCG_REG_XZR)) {
1303            /* ??? We want to allow al to be zero for the benefit of
1304               negation via subtraction.  However, that leaves open the
1305               possibility of adding 0+const in the low part, and the
1306               immediate add instructions encode XSP not XZR.  Don't try
1307               anything more elaborate here than loading another zero.  */
1308            al = TCG_REG_TMP;
1309            tcg_out_movi(s, ext, al, 0);
1310        }
1311        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1312    } else {
1313        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1314    }
1315
1316    insn = I3503_ADC;
1317    if (const_bh) {
1318        /* Note that the only two constants we support are 0 and -1, and
1319           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1320        if ((bh != 0) ^ sub) {
1321            insn = I3503_SBC;
1322        }
1323        bh = TCG_REG_XZR;
1324    } else if (sub) {
1325        insn = I3503_SBC;
1326    }
1327    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1328
1329    tcg_out_mov(s, ext, orig_rl, rl);
1330}
1331
1332static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1333{
1334    static const uint32_t sync[] = {
1335        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1336        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1337        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1338        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1339        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1340    };
1341    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1342}
1343
1344static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1345                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1346{
1347    TCGReg a1 = a0;
1348    if (is_ctz) {
1349        a1 = TCG_REG_TMP;
1350        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1351    }
1352    if (const_b && b == (ext ? 64 : 32)) {
1353        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1354    } else {
1355        AArch64Insn sel = I3506_CSEL;
1356
1357        tcg_out_cmp(s, ext, a0, 0, 1);
1358        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1359
1360        if (const_b) {
1361            if (b == -1) {
1362                b = TCG_REG_XZR;
1363                sel = I3506_CSINV;
1364            } else if (b == 0) {
1365                b = TCG_REG_XZR;
1366            } else {
1367                tcg_out_movi(s, ext, d, b);
1368                b = d;
1369            }
1370        }
1371        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1372    }
1373}
1374
1375#ifdef CONFIG_SOFTMMU
1376#include "tcg-ldst.inc.c"
1377
1378/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1379 *                                     TCGMemOpIdx oi, uintptr_t ra)
1380 */
1381static void * const qemu_ld_helpers[16] = {
1382    [MO_UB]   = helper_ret_ldub_mmu,
1383    [MO_LEUW] = helper_le_lduw_mmu,
1384    [MO_LEUL] = helper_le_ldul_mmu,
1385    [MO_LEQ]  = helper_le_ldq_mmu,
1386    [MO_BEUW] = helper_be_lduw_mmu,
1387    [MO_BEUL] = helper_be_ldul_mmu,
1388    [MO_BEQ]  = helper_be_ldq_mmu,
1389};
1390
1391/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1392 *                                     uintxx_t val, TCGMemOpIdx oi,
1393 *                                     uintptr_t ra)
1394 */
1395static void * const qemu_st_helpers[16] = {
1396    [MO_UB]   = helper_ret_stb_mmu,
1397    [MO_LEUW] = helper_le_stw_mmu,
1398    [MO_LEUL] = helper_le_stl_mmu,
1399    [MO_LEQ]  = helper_le_stq_mmu,
1400    [MO_BEUW] = helper_be_stw_mmu,
1401    [MO_BEUL] = helper_be_stl_mmu,
1402    [MO_BEQ]  = helper_be_stq_mmu,
1403};
1404
1405static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1406{
1407    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1408    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1409    tcg_out_insn(s, 3406, ADR, rd, offset);
1410}
1411
1412static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1413{
1414    TCGMemOpIdx oi = lb->oi;
1415    TCGMemOp opc = get_memop(oi);
1416    TCGMemOp size = opc & MO_SIZE;
1417
1418    reloc_pc19(lb->label_ptr[0], s->code_ptr);
1419
1420    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1421    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1422    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1423    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1424    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1425    if (opc & MO_SIGN) {
1426        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1427    } else {
1428        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1429    }
1430
1431    tcg_out_goto(s, lb->raddr);
1432}
1433
1434static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1435{
1436    TCGMemOpIdx oi = lb->oi;
1437    TCGMemOp opc = get_memop(oi);
1438    TCGMemOp size = opc & MO_SIZE;
1439
1440    reloc_pc19(lb->label_ptr[0], s->code_ptr);
1441
1442    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1443    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1444    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1445    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1446    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1447    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1448    tcg_out_goto(s, lb->raddr);
1449}
1450
1451static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1452                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1453                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1454{
1455    TCGLabelQemuLdst *label = new_ldst_label(s);
1456
1457    label->is_ld = is_ld;
1458    label->oi = oi;
1459    label->type = ext;
1460    label->datalo_reg = data_reg;
1461    label->addrlo_reg = addr_reg;
1462    label->raddr = raddr;
1463    label->label_ptr[0] = label_ptr;
1464}
1465
1466/* Load and compare a TLB entry, emitting the conditional jump to the
1467   slow path for the failure case, which will be patched later when finalizing
1468   the slow path. Generated code returns the host addend in X1,
1469   clobbers X0,X2,X3,TMP. */
1470static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1471                             tcg_insn_unit **label_ptr, int mem_index,
1472                             bool is_read)
1473{
1474    int tlb_offset = is_read ?
1475        offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1476        : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1477    unsigned a_bits = get_alignment_bits(opc);
1478    unsigned s_bits = opc & MO_SIZE;
1479    unsigned a_mask = (1u << a_bits) - 1;
1480    unsigned s_mask = (1u << s_bits) - 1;
1481    TCGReg base = TCG_AREG0, x3;
1482    uint64_t tlb_mask;
1483
1484    /* For aligned accesses, we check the first byte and include the alignment
1485       bits within the address.  For unaligned access, we check that we don't
1486       cross pages using the address of the last byte of the access.  */
1487    if (a_bits >= s_bits) {
1488        x3 = addr_reg;
1489    } else {
1490        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1491                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1492        x3 = TCG_REG_X3;
1493    }
1494    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1495
1496    /* Extract the TLB index from the address into X0.
1497       X0<CPU_TLB_BITS:0> =
1498       addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1499    tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1500                 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1501
1502    /* Store the page mask part of the address into X3.  */
1503    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1504                     TCG_REG_X3, x3, tlb_mask);
1505
1506    /* Add any "high bits" from the tlb offset to the env address into X2,
1507       to take advantage of the LSL12 form of the ADDI instruction.
1508       X2 = env + (tlb_offset & 0xfff000) */
1509    if (tlb_offset & 0xfff000) {
1510        tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1511                     tlb_offset & 0xfff000);
1512        base = TCG_REG_X2;
1513    }
1514
1515    /* Merge the tlb index contribution into X2.
1516       X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1517    tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1518                 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1519
1520    /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1521       X0 = load [X2 + (tlb_offset & 0x000fff)] */
1522    tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1523                 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff,
1524                 TARGET_LONG_BITS == 32 ? 2 : 3);
1525
1526    /* Load the tlb addend. Do that early to avoid stalling.
1527       X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1528    tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1529                 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1530                 (is_read ? offsetof(CPUTLBEntry, addr_read)
1531                  : offsetof(CPUTLBEntry, addr_write)), 3);
1532
1533    /* Perform the address comparison. */
1534    tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1535
1536    /* If not equal, we jump to the slow path. */
1537    *label_ptr = s->code_ptr;
1538    tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1539}
1540
1541#endif /* CONFIG_SOFTMMU */
1542
1543static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1544                                   TCGReg data_r, TCGReg addr_r,
1545                                   TCGType otype, TCGReg off_r)
1546{
1547    const TCGMemOp bswap = memop & MO_BSWAP;
1548
1549    switch (memop & MO_SSIZE) {
1550    case MO_UB:
1551        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1552        break;
1553    case MO_SB:
1554        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1555                       data_r, addr_r, otype, off_r);
1556        break;
1557    case MO_UW:
1558        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1559        if (bswap) {
1560            tcg_out_rev16(s, data_r, data_r);
1561        }
1562        break;
1563    case MO_SW:
1564        if (bswap) {
1565            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1566            tcg_out_rev16(s, data_r, data_r);
1567            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1568        } else {
1569            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1570                           data_r, addr_r, otype, off_r);
1571        }
1572        break;
1573    case MO_UL:
1574        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1575        if (bswap) {
1576            tcg_out_rev32(s, data_r, data_r);
1577        }
1578        break;
1579    case MO_SL:
1580        if (bswap) {
1581            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1582            tcg_out_rev32(s, data_r, data_r);
1583            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1584        } else {
1585            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1586        }
1587        break;
1588    case MO_Q:
1589        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1590        if (bswap) {
1591            tcg_out_rev64(s, data_r, data_r);
1592        }
1593        break;
1594    default:
1595        tcg_abort();
1596    }
1597}
1598
1599static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1600                                   TCGReg data_r, TCGReg addr_r,
1601                                   TCGType otype, TCGReg off_r)
1602{
1603    const TCGMemOp bswap = memop & MO_BSWAP;
1604
1605    switch (memop & MO_SIZE) {
1606    case MO_8:
1607        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1608        break;
1609    case MO_16:
1610        if (bswap && data_r != TCG_REG_XZR) {
1611            tcg_out_rev16(s, TCG_REG_TMP, data_r);
1612            data_r = TCG_REG_TMP;
1613        }
1614        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1615        break;
1616    case MO_32:
1617        if (bswap && data_r != TCG_REG_XZR) {
1618            tcg_out_rev32(s, TCG_REG_TMP, data_r);
1619            data_r = TCG_REG_TMP;
1620        }
1621        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1622        break;
1623    case MO_64:
1624        if (bswap && data_r != TCG_REG_XZR) {
1625            tcg_out_rev64(s, TCG_REG_TMP, data_r);
1626            data_r = TCG_REG_TMP;
1627        }
1628        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1629        break;
1630    default:
1631        tcg_abort();
1632    }
1633}
1634
1635static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1636                            TCGMemOpIdx oi, TCGType ext)
1637{
1638    TCGMemOp memop = get_memop(oi);
1639    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1640#ifdef CONFIG_SOFTMMU
1641    unsigned mem_index = get_mmuidx(oi);
1642    tcg_insn_unit *label_ptr;
1643
1644    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1645    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1646                           TCG_REG_X1, otype, addr_reg);
1647    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1648                        s->code_ptr, label_ptr);
1649#else /* !CONFIG_SOFTMMU */
1650    if (USE_GUEST_BASE) {
1651        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1652                               TCG_REG_GUEST_BASE, otype, addr_reg);
1653    } else {
1654        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1655                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1656    }
1657#endif /* CONFIG_SOFTMMU */
1658}
1659
1660static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1661                            TCGMemOpIdx oi)
1662{
1663    TCGMemOp memop = get_memop(oi);
1664    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1665#ifdef CONFIG_SOFTMMU
1666    unsigned mem_index = get_mmuidx(oi);
1667    tcg_insn_unit *label_ptr;
1668
1669    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1670    tcg_out_qemu_st_direct(s, memop, data_reg,
1671                           TCG_REG_X1, otype, addr_reg);
1672    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1673                        data_reg, addr_reg, s->code_ptr, label_ptr);
1674#else /* !CONFIG_SOFTMMU */
1675    if (USE_GUEST_BASE) {
1676        tcg_out_qemu_st_direct(s, memop, data_reg,
1677                               TCG_REG_GUEST_BASE, otype, addr_reg);
1678    } else {
1679        tcg_out_qemu_st_direct(s, memop, data_reg,
1680                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1681    }
1682#endif /* CONFIG_SOFTMMU */
1683}
1684
1685static tcg_insn_unit *tb_ret_addr;
1686
1687static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1688                       const TCGArg args[TCG_MAX_OP_ARGS],
1689                       const int const_args[TCG_MAX_OP_ARGS])
1690{
1691    /* 99% of the time, we can signal the use of extension registers
1692       by looking to see if the opcode handles 64-bit data.  */
1693    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1694
1695    /* Hoist the loads of the most common arguments.  */
1696    TCGArg a0 = args[0];
1697    TCGArg a1 = args[1];
1698    TCGArg a2 = args[2];
1699    int c2 = const_args[2];
1700
1701    /* Some operands are defined with "rZ" constraint, a register or
1702       the zero register.  These need not actually test args[I] == 0.  */
1703#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1704
1705    switch (opc) {
1706    case INDEX_op_exit_tb:
1707        /* Reuse the zeroing that exists for goto_ptr.  */
1708        if (a0 == 0) {
1709            tcg_out_goto_long(s, s->code_gen_epilogue);
1710        } else {
1711            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1712            tcg_out_goto_long(s, tb_ret_addr);
1713        }
1714        break;
1715
1716    case INDEX_op_goto_tb:
1717        if (s->tb_jmp_insn_offset != NULL) {
1718            /* TCG_TARGET_HAS_direct_jump */
1719            /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1720               write can be used to patch the target address. */
1721            if ((uintptr_t)s->code_ptr & 7) {
1722                tcg_out32(s, NOP);
1723            }
1724            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1725            /* actual branch destination will be patched by
1726               tb_target_set_jmp_target later. */
1727            tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1728            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1729        } else {
1730            /* !TCG_TARGET_HAS_direct_jump */
1731            tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1732            intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1733            tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1734        }
1735        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1736        s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
1737        break;
1738
1739    case INDEX_op_goto_ptr:
1740        tcg_out_insn(s, 3207, BR, a0);
1741        break;
1742
1743    case INDEX_op_br:
1744        tcg_out_goto_label(s, arg_label(a0));
1745        break;
1746
1747    case INDEX_op_ld8u_i32:
1748    case INDEX_op_ld8u_i64:
1749        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1750        break;
1751    case INDEX_op_ld8s_i32:
1752        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1753        break;
1754    case INDEX_op_ld8s_i64:
1755        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1756        break;
1757    case INDEX_op_ld16u_i32:
1758    case INDEX_op_ld16u_i64:
1759        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1760        break;
1761    case INDEX_op_ld16s_i32:
1762        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1763        break;
1764    case INDEX_op_ld16s_i64:
1765        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1766        break;
1767    case INDEX_op_ld_i32:
1768    case INDEX_op_ld32u_i64:
1769        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1770        break;
1771    case INDEX_op_ld32s_i64:
1772        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1773        break;
1774    case INDEX_op_ld_i64:
1775        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1776        break;
1777
1778    case INDEX_op_st8_i32:
1779    case INDEX_op_st8_i64:
1780        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1781        break;
1782    case INDEX_op_st16_i32:
1783    case INDEX_op_st16_i64:
1784        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1785        break;
1786    case INDEX_op_st_i32:
1787    case INDEX_op_st32_i64:
1788        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1789        break;
1790    case INDEX_op_st_i64:
1791        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1792        break;
1793
1794    case INDEX_op_add_i32:
1795        a2 = (int32_t)a2;
1796        /* FALLTHRU */
1797    case INDEX_op_add_i64:
1798        if (c2) {
1799            tcg_out_addsubi(s, ext, a0, a1, a2);
1800        } else {
1801            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1802        }
1803        break;
1804
1805    case INDEX_op_sub_i32:
1806        a2 = (int32_t)a2;
1807        /* FALLTHRU */
1808    case INDEX_op_sub_i64:
1809        if (c2) {
1810            tcg_out_addsubi(s, ext, a0, a1, -a2);
1811        } else {
1812            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1813        }
1814        break;
1815
1816    case INDEX_op_neg_i64:
1817    case INDEX_op_neg_i32:
1818        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1819        break;
1820
1821    case INDEX_op_and_i32:
1822        a2 = (int32_t)a2;
1823        /* FALLTHRU */
1824    case INDEX_op_and_i64:
1825        if (c2) {
1826            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1827        } else {
1828            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1829        }
1830        break;
1831
1832    case INDEX_op_andc_i32:
1833        a2 = (int32_t)a2;
1834        /* FALLTHRU */
1835    case INDEX_op_andc_i64:
1836        if (c2) {
1837            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1838        } else {
1839            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1840        }
1841        break;
1842
1843    case INDEX_op_or_i32:
1844        a2 = (int32_t)a2;
1845        /* FALLTHRU */
1846    case INDEX_op_or_i64:
1847        if (c2) {
1848            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1849        } else {
1850            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1851        }
1852        break;
1853
1854    case INDEX_op_orc_i32:
1855        a2 = (int32_t)a2;
1856        /* FALLTHRU */
1857    case INDEX_op_orc_i64:
1858        if (c2) {
1859            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1860        } else {
1861            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1862        }
1863        break;
1864
1865    case INDEX_op_xor_i32:
1866        a2 = (int32_t)a2;
1867        /* FALLTHRU */
1868    case INDEX_op_xor_i64:
1869        if (c2) {
1870            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1871        } else {
1872            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1873        }
1874        break;
1875
1876    case INDEX_op_eqv_i32:
1877        a2 = (int32_t)a2;
1878        /* FALLTHRU */
1879    case INDEX_op_eqv_i64:
1880        if (c2) {
1881            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1882        } else {
1883            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1884        }
1885        break;
1886
1887    case INDEX_op_not_i64:
1888    case INDEX_op_not_i32:
1889        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1890        break;
1891
1892    case INDEX_op_mul_i64:
1893    case INDEX_op_mul_i32:
1894        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1895        break;
1896
1897    case INDEX_op_div_i64:
1898    case INDEX_op_div_i32:
1899        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1900        break;
1901    case INDEX_op_divu_i64:
1902    case INDEX_op_divu_i32:
1903        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1904        break;
1905
1906    case INDEX_op_rem_i64:
1907    case INDEX_op_rem_i32:
1908        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1909        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1910        break;
1911    case INDEX_op_remu_i64:
1912    case INDEX_op_remu_i32:
1913        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1914        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1915        break;
1916
1917    case INDEX_op_shl_i64:
1918    case INDEX_op_shl_i32:
1919        if (c2) {
1920            tcg_out_shl(s, ext, a0, a1, a2);
1921        } else {
1922            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1923        }
1924        break;
1925
1926    case INDEX_op_shr_i64:
1927    case INDEX_op_shr_i32:
1928        if (c2) {
1929            tcg_out_shr(s, ext, a0, a1, a2);
1930        } else {
1931            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1932        }
1933        break;
1934
1935    case INDEX_op_sar_i64:
1936    case INDEX_op_sar_i32:
1937        if (c2) {
1938            tcg_out_sar(s, ext, a0, a1, a2);
1939        } else {
1940            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1941        }
1942        break;
1943
1944    case INDEX_op_rotr_i64:
1945    case INDEX_op_rotr_i32:
1946        if (c2) {
1947            tcg_out_rotr(s, ext, a0, a1, a2);
1948        } else {
1949            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1950        }
1951        break;
1952
1953    case INDEX_op_rotl_i64:
1954    case INDEX_op_rotl_i32:
1955        if (c2) {
1956            tcg_out_rotl(s, ext, a0, a1, a2);
1957        } else {
1958            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1959            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1960        }
1961        break;
1962
1963    case INDEX_op_clz_i64:
1964    case INDEX_op_clz_i32:
1965        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
1966        break;
1967    case INDEX_op_ctz_i64:
1968    case INDEX_op_ctz_i32:
1969        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
1970        break;
1971
1972    case INDEX_op_brcond_i32:
1973        a1 = (int32_t)a1;
1974        /* FALLTHRU */
1975    case INDEX_op_brcond_i64:
1976        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1977        break;
1978
1979    case INDEX_op_setcond_i32:
1980        a2 = (int32_t)a2;
1981        /* FALLTHRU */
1982    case INDEX_op_setcond_i64:
1983        tcg_out_cmp(s, ext, a1, a2, c2);
1984        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
1985        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1986                     TCG_REG_XZR, tcg_invert_cond(args[3]));
1987        break;
1988
1989    case INDEX_op_movcond_i32:
1990        a2 = (int32_t)a2;
1991        /* FALLTHRU */
1992    case INDEX_op_movcond_i64:
1993        tcg_out_cmp(s, ext, a1, a2, c2);
1994        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1995        break;
1996
1997    case INDEX_op_qemu_ld_i32:
1998    case INDEX_op_qemu_ld_i64:
1999        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2000        break;
2001    case INDEX_op_qemu_st_i32:
2002    case INDEX_op_qemu_st_i64:
2003        tcg_out_qemu_st(s, REG0(0), a1, a2);
2004        break;
2005
2006    case INDEX_op_bswap64_i64:
2007        tcg_out_rev64(s, a0, a1);
2008        break;
2009    case INDEX_op_bswap32_i64:
2010    case INDEX_op_bswap32_i32:
2011        tcg_out_rev32(s, a0, a1);
2012        break;
2013    case INDEX_op_bswap16_i64:
2014    case INDEX_op_bswap16_i32:
2015        tcg_out_rev16(s, a0, a1);
2016        break;
2017
2018    case INDEX_op_ext8s_i64:
2019    case INDEX_op_ext8s_i32:
2020        tcg_out_sxt(s, ext, MO_8, a0, a1);
2021        break;
2022    case INDEX_op_ext16s_i64:
2023    case INDEX_op_ext16s_i32:
2024        tcg_out_sxt(s, ext, MO_16, a0, a1);
2025        break;
2026    case INDEX_op_ext_i32_i64:
2027    case INDEX_op_ext32s_i64:
2028        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2029        break;
2030    case INDEX_op_ext8u_i64:
2031    case INDEX_op_ext8u_i32:
2032        tcg_out_uxt(s, MO_8, a0, a1);
2033        break;
2034    case INDEX_op_ext16u_i64:
2035    case INDEX_op_ext16u_i32:
2036        tcg_out_uxt(s, MO_16, a0, a1);
2037        break;
2038    case INDEX_op_extu_i32_i64:
2039    case INDEX_op_ext32u_i64:
2040        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2041        break;
2042
2043    case INDEX_op_deposit_i64:
2044    case INDEX_op_deposit_i32:
2045        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2046        break;
2047
2048    case INDEX_op_extract_i64:
2049    case INDEX_op_extract_i32:
2050        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2051        break;
2052
2053    case INDEX_op_sextract_i64:
2054    case INDEX_op_sextract_i32:
2055        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2056        break;
2057
2058    case INDEX_op_add2_i32:
2059        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2060                        (int32_t)args[4], args[5], const_args[4],
2061                        const_args[5], false);
2062        break;
2063    case INDEX_op_add2_i64:
2064        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2065                        args[5], const_args[4], const_args[5], false);
2066        break;
2067    case INDEX_op_sub2_i32:
2068        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2069                        (int32_t)args[4], args[5], const_args[4],
2070                        const_args[5], true);
2071        break;
2072    case INDEX_op_sub2_i64:
2073        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2074                        args[5], const_args[4], const_args[5], true);
2075        break;
2076
2077    case INDEX_op_muluh_i64:
2078        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2079        break;
2080    case INDEX_op_mulsh_i64:
2081        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2082        break;
2083
2084    case INDEX_op_mb:
2085        tcg_out_mb(s, a0);
2086        break;
2087
2088    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2089    case INDEX_op_mov_i64:
2090    case INDEX_op_mov_vec:
2091    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
2092    case INDEX_op_movi_i64:
2093    case INDEX_op_dupi_vec:
2094    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2095    default:
2096        g_assert_not_reached();
2097    }
2098
2099#undef REG0
2100}
2101
2102static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2103                           unsigned vecl, unsigned vece,
2104                           const TCGArg *args, const int *const_args)
2105{
2106    static const AArch64Insn cmp_insn[16] = {
2107        [TCG_COND_EQ] = I3616_CMEQ,
2108        [TCG_COND_GT] = I3616_CMGT,
2109        [TCG_COND_GE] = I3616_CMGE,
2110        [TCG_COND_GTU] = I3616_CMHI,
2111        [TCG_COND_GEU] = I3616_CMHS,
2112    };
2113    static const AArch64Insn cmp0_insn[16] = {
2114        [TCG_COND_EQ] = I3617_CMEQ0,
2115        [TCG_COND_GT] = I3617_CMGT0,
2116        [TCG_COND_GE] = I3617_CMGE0,
2117        [TCG_COND_LT] = I3617_CMLT0,
2118        [TCG_COND_LE] = I3617_CMLE0,
2119    };
2120
2121    TCGType type = vecl + TCG_TYPE_V64;
2122    unsigned is_q = vecl;
2123    TCGArg a0, a1, a2;
2124
2125    a0 = args[0];
2126    a1 = args[1];
2127    a2 = args[2];
2128
2129    switch (opc) {
2130    case INDEX_op_ld_vec:
2131        tcg_out_ld(s, type, a0, a1, a2);
2132        break;
2133    case INDEX_op_st_vec:
2134        tcg_out_st(s, type, a0, a1, a2);
2135        break;
2136    case INDEX_op_add_vec:
2137        tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2138        break;
2139    case INDEX_op_sub_vec:
2140        tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2141        break;
2142    case INDEX_op_mul_vec:
2143        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2144        break;
2145    case INDEX_op_neg_vec:
2146        tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2147        break;
2148    case INDEX_op_and_vec:
2149        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2150        break;
2151    case INDEX_op_or_vec:
2152        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2153        break;
2154    case INDEX_op_xor_vec:
2155        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2156        break;
2157    case INDEX_op_andc_vec:
2158        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2159        break;
2160    case INDEX_op_orc_vec:
2161        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2162        break;
2163    case INDEX_op_not_vec:
2164        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2165        break;
2166    case INDEX_op_dup_vec:
2167        tcg_out_insn(s, 3605, DUP, is_q, a0, a1, 1 << vece, 0);
2168        break;
2169    case INDEX_op_shli_vec:
2170        tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2171        break;
2172    case INDEX_op_shri_vec:
2173        tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2174        break;
2175    case INDEX_op_sari_vec:
2176        tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2177        break;
2178    case INDEX_op_cmp_vec:
2179        {
2180            TCGCond cond = args[3];
2181            AArch64Insn insn;
2182
2183            if (cond == TCG_COND_NE) {
2184                if (const_args[2]) {
2185                    tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2186                } else {
2187                    tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2188                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2189                }
2190            } else {
2191                if (const_args[2]) {
2192                    insn = cmp0_insn[cond];
2193                    if (insn) {
2194                        tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2195                        break;
2196                    }
2197                    tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2198                    a2 = TCG_VEC_TMP;
2199                }
2200                insn = cmp_insn[cond];
2201                if (insn == 0) {
2202                    TCGArg t;
2203                    t = a1, a1 = a2, a2 = t;
2204                    cond = tcg_swap_cond(cond);
2205                    insn = cmp_insn[cond];
2206                    tcg_debug_assert(insn != 0);
2207                }
2208                tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2209            }
2210        }
2211        break;
2212    default:
2213        g_assert_not_reached();
2214    }
2215}
2216
2217int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2218{
2219    switch (opc) {
2220    case INDEX_op_add_vec:
2221    case INDEX_op_sub_vec:
2222    case INDEX_op_mul_vec:
2223    case INDEX_op_and_vec:
2224    case INDEX_op_or_vec:
2225    case INDEX_op_xor_vec:
2226    case INDEX_op_andc_vec:
2227    case INDEX_op_orc_vec:
2228    case INDEX_op_neg_vec:
2229    case INDEX_op_not_vec:
2230    case INDEX_op_cmp_vec:
2231    case INDEX_op_shli_vec:
2232    case INDEX_op_shri_vec:
2233    case INDEX_op_sari_vec:
2234        return 1;
2235
2236    default:
2237        return 0;
2238    }
2239}
2240
2241void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2242                       TCGArg a0, ...)
2243{
2244}
2245
2246static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2247{
2248    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2249    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2250    static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2251    static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2252    static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2253    static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2254    static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2255    static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2256    static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2257    static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2258    static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2259    static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2260    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2261    static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2262    static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2263    static const TCGTargetOpDef r_r_rAL
2264        = { .args_ct_str = { "r", "r", "rAL" } };
2265    static const TCGTargetOpDef dep
2266        = { .args_ct_str = { "r", "0", "rZ" } };
2267    static const TCGTargetOpDef movc
2268        = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2269    static const TCGTargetOpDef add2
2270        = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2271
2272    switch (op) {
2273    case INDEX_op_goto_ptr:
2274        return &r;
2275
2276    case INDEX_op_ld8u_i32:
2277    case INDEX_op_ld8s_i32:
2278    case INDEX_op_ld16u_i32:
2279    case INDEX_op_ld16s_i32:
2280    case INDEX_op_ld_i32:
2281    case INDEX_op_ld8u_i64:
2282    case INDEX_op_ld8s_i64:
2283    case INDEX_op_ld16u_i64:
2284    case INDEX_op_ld16s_i64:
2285    case INDEX_op_ld32u_i64:
2286    case INDEX_op_ld32s_i64:
2287    case INDEX_op_ld_i64:
2288    case INDEX_op_neg_i32:
2289    case INDEX_op_neg_i64:
2290    case INDEX_op_not_i32:
2291    case INDEX_op_not_i64:
2292    case INDEX_op_bswap16_i32:
2293    case INDEX_op_bswap32_i32:
2294    case INDEX_op_bswap16_i64:
2295    case INDEX_op_bswap32_i64:
2296    case INDEX_op_bswap64_i64:
2297    case INDEX_op_ext8s_i32:
2298    case INDEX_op_ext16s_i32:
2299    case INDEX_op_ext8u_i32:
2300    case INDEX_op_ext16u_i32:
2301    case INDEX_op_ext8s_i64:
2302    case INDEX_op_ext16s_i64:
2303    case INDEX_op_ext32s_i64:
2304    case INDEX_op_ext8u_i64:
2305    case INDEX_op_ext16u_i64:
2306    case INDEX_op_ext32u_i64:
2307    case INDEX_op_ext_i32_i64:
2308    case INDEX_op_extu_i32_i64:
2309    case INDEX_op_extract_i32:
2310    case INDEX_op_extract_i64:
2311    case INDEX_op_sextract_i32:
2312    case INDEX_op_sextract_i64:
2313        return &r_r;
2314
2315    case INDEX_op_st8_i32:
2316    case INDEX_op_st16_i32:
2317    case INDEX_op_st_i32:
2318    case INDEX_op_st8_i64:
2319    case INDEX_op_st16_i64:
2320    case INDEX_op_st32_i64:
2321    case INDEX_op_st_i64:
2322        return &rZ_r;
2323
2324    case INDEX_op_add_i32:
2325    case INDEX_op_add_i64:
2326    case INDEX_op_sub_i32:
2327    case INDEX_op_sub_i64:
2328    case INDEX_op_setcond_i32:
2329    case INDEX_op_setcond_i64:
2330        return &r_r_rA;
2331
2332    case INDEX_op_mul_i32:
2333    case INDEX_op_mul_i64:
2334    case INDEX_op_div_i32:
2335    case INDEX_op_div_i64:
2336    case INDEX_op_divu_i32:
2337    case INDEX_op_divu_i64:
2338    case INDEX_op_rem_i32:
2339    case INDEX_op_rem_i64:
2340    case INDEX_op_remu_i32:
2341    case INDEX_op_remu_i64:
2342    case INDEX_op_muluh_i64:
2343    case INDEX_op_mulsh_i64:
2344        return &r_r_r;
2345
2346    case INDEX_op_and_i32:
2347    case INDEX_op_and_i64:
2348    case INDEX_op_or_i32:
2349    case INDEX_op_or_i64:
2350    case INDEX_op_xor_i32:
2351    case INDEX_op_xor_i64:
2352    case INDEX_op_andc_i32:
2353    case INDEX_op_andc_i64:
2354    case INDEX_op_orc_i32:
2355    case INDEX_op_orc_i64:
2356    case INDEX_op_eqv_i32:
2357    case INDEX_op_eqv_i64:
2358        return &r_r_rL;
2359
2360    case INDEX_op_shl_i32:
2361    case INDEX_op_shr_i32:
2362    case INDEX_op_sar_i32:
2363    case INDEX_op_rotl_i32:
2364    case INDEX_op_rotr_i32:
2365    case INDEX_op_shl_i64:
2366    case INDEX_op_shr_i64:
2367    case INDEX_op_sar_i64:
2368    case INDEX_op_rotl_i64:
2369    case INDEX_op_rotr_i64:
2370        return &r_r_ri;
2371
2372    case INDEX_op_clz_i32:
2373    case INDEX_op_ctz_i32:
2374    case INDEX_op_clz_i64:
2375    case INDEX_op_ctz_i64:
2376        return &r_r_rAL;
2377
2378    case INDEX_op_brcond_i32:
2379    case INDEX_op_brcond_i64:
2380        return &r_rA;
2381
2382    case INDEX_op_movcond_i32:
2383    case INDEX_op_movcond_i64:
2384        return &movc;
2385
2386    case INDEX_op_qemu_ld_i32:
2387    case INDEX_op_qemu_ld_i64:
2388        return &r_l;
2389    case INDEX_op_qemu_st_i32:
2390    case INDEX_op_qemu_st_i64:
2391        return &lZ_l;
2392
2393    case INDEX_op_deposit_i32:
2394    case INDEX_op_deposit_i64:
2395        return &dep;
2396
2397    case INDEX_op_add2_i32:
2398    case INDEX_op_add2_i64:
2399    case INDEX_op_sub2_i32:
2400    case INDEX_op_sub2_i64:
2401        return &add2;
2402
2403    case INDEX_op_add_vec:
2404    case INDEX_op_sub_vec:
2405    case INDEX_op_mul_vec:
2406    case INDEX_op_and_vec:
2407    case INDEX_op_or_vec:
2408    case INDEX_op_xor_vec:
2409    case INDEX_op_andc_vec:
2410    case INDEX_op_orc_vec:
2411        return &w_w_w;
2412    case INDEX_op_not_vec:
2413    case INDEX_op_neg_vec:
2414    case INDEX_op_shli_vec:
2415    case INDEX_op_shri_vec:
2416    case INDEX_op_sari_vec:
2417        return &w_w;
2418    case INDEX_op_ld_vec:
2419    case INDEX_op_st_vec:
2420        return &w_r;
2421    case INDEX_op_dup_vec:
2422        return &w_wr;
2423    case INDEX_op_cmp_vec:
2424        return &w_w_wZ;
2425
2426    default:
2427        return NULL;
2428    }
2429}
2430
2431static void tcg_target_init(TCGContext *s)
2432{
2433    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2434    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2435    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2436    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2437
2438    tcg_target_call_clobber_regs = -1ull;
2439    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2440    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2441    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2442    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2443    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2444    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2445    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2446    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2447    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2448    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2449    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2450    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2451    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2452    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2453    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2454    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2455    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2456    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2457    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2458
2459    s->reserved_regs = 0;
2460    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2461    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2462    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2463    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2464    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2465}
2466
2467/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2468#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2469
2470#define FRAME_SIZE \
2471    ((PUSH_SIZE \
2472      + TCG_STATIC_CALL_ARGS_SIZE \
2473      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2474      + TCG_TARGET_STACK_ALIGN - 1) \
2475     & ~(TCG_TARGET_STACK_ALIGN - 1))
2476
2477/* We're expecting a 2 byte uleb128 encoded value.  */
2478QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2479
2480/* We're expecting to use a single ADDI insn.  */
2481QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2482
2483static void tcg_target_qemu_prologue(TCGContext *s)
2484{
2485    TCGReg r;
2486
2487    /* Push (FP, LR) and allocate space for all saved registers.  */
2488    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2489                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2490
2491    /* Set up frame pointer for canonical unwinding.  */
2492    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2493
2494    /* Store callee-preserved regs x19..x28.  */
2495    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2496        int ofs = (r - TCG_REG_X19 + 2) * 8;
2497        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2498    }
2499
2500    /* Make stack space for TCG locals.  */
2501    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2502                 FRAME_SIZE - PUSH_SIZE);
2503
2504    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2505    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2506                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2507
2508#if !defined(CONFIG_SOFTMMU)
2509    if (USE_GUEST_BASE) {
2510        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2511        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2512    }
2513#endif
2514
2515    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2516    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2517
2518    /*
2519     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2520     * and fall through to the rest of the epilogue.
2521     */
2522    s->code_gen_epilogue = s->code_ptr;
2523    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2524
2525    /* TB epilogue */
2526    tb_ret_addr = s->code_ptr;
2527
2528    /* Remove TCG locals stack space.  */
2529    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2530                 FRAME_SIZE - PUSH_SIZE);
2531
2532    /* Restore registers x19..x28.  */
2533    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2534        int ofs = (r - TCG_REG_X19 + 2) * 8;
2535        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2536    }
2537
2538    /* Pop (FP, LR), restore SP to previous frame.  */
2539    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2540                 TCG_REG_SP, PUSH_SIZE, 0, 1);
2541    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2542}
2543
2544static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2545{
2546    int i;
2547    for (i = 0; i < count; ++i) {
2548        p[i] = NOP;
2549    }
2550}
2551
2552typedef struct {
2553    DebugFrameHeader h;
2554    uint8_t fde_def_cfa[4];
2555    uint8_t fde_reg_ofs[24];
2556} DebugFrame;
2557
2558#define ELF_HOST_MACHINE EM_AARCH64
2559
2560static const DebugFrame debug_frame = {
2561    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2562    .h.cie.id = -1,
2563    .h.cie.version = 1,
2564    .h.cie.code_align = 1,
2565    .h.cie.data_align = 0x78,             /* sleb128 -8 */
2566    .h.cie.return_column = TCG_REG_LR,
2567
2568    /* Total FDE size does not include the "len" member.  */
2569    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2570
2571    .fde_def_cfa = {
2572        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
2573        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2574        (FRAME_SIZE >> 7)
2575    },
2576    .fde_reg_ofs = {
2577        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
2578        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
2579        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
2580        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
2581        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
2582        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
2583        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
2584        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
2585        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
2586        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
2587        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
2588        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
2589    }
2590};
2591
2592void tcg_register_jit(void *buf, size_t buf_size)
2593{
2594    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2595}
2596