qemu/tcg/aarch64/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Initial TCG Implementation for aarch64
   3 *
   4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
   5 * Written by Claudio Fontana
   6 *
   7 * This work is licensed under the terms of the GNU GPL, version 2 or
   8 * (at your option) any later version.
   9 *
  10 * See the COPYING file in the top-level directory for details.
  11 */
  12
  13#include "tcg-pool.inc.c"
  14#include "qemu/bitops.h"
  15
  16/* We're going to re-use TCGType in setting of the SF bit, which controls
  17   the size of the operation performed.  If we know the values match, it
  18   makes things much cleaner.  */
  19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
  20
  21#ifdef CONFIG_DEBUG_TCG
  22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  23    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  24    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  25    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  26    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
  27
  28    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
  29    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
  30    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
  31    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
  32};
  33#endif /* CONFIG_DEBUG_TCG */
  34
  35static const int tcg_target_reg_alloc_order[] = {
  36    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
  37    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
  38    TCG_REG_X28, /* we will reserve this for guest_base if configured */
  39
  40    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
  41    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
  42    TCG_REG_X16, TCG_REG_X17,
  43
  44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
  46
  47    /* X18 reserved by system */
  48    /* X19 reserved for AREG0 */
  49    /* X29 reserved as fp */
  50    /* X30 reserved as temporary */
  51
  52    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
  53    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
  54    /* V8 - V15 are call-saved, and skipped.  */
  55    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
  56    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
  57    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
  58    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
  59};
  60
  61static const int tcg_target_call_iarg_regs[8] = {
  62    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  63    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
  64};
  65static const int tcg_target_call_oarg_regs[1] = {
  66    TCG_REG_X0
  67};
  68
  69#define TCG_REG_TMP TCG_REG_X30
  70#define TCG_VEC_TMP TCG_REG_V31
  71
  72#ifndef CONFIG_SOFTMMU
  73/* Note that XZR cannot be encoded in the address base register slot,
  74   as that actaully encodes SP.  So if we need to zero-extend the guest
  75   address, via the address index register slot, we need to load even
  76   a zero guest base into a register.  */
  77#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
  78#define TCG_REG_GUEST_BASE TCG_REG_X28
  79#endif
  80
  81static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
  82{
  83    ptrdiff_t offset = target - code_ptr;
  84    if (offset == sextract64(offset, 0, 26)) {
  85        /* read instruction, mask away previous PC_REL26 parameter contents,
  86           set the proper offset, then write back the instruction. */
  87        *code_ptr = deposit32(*code_ptr, 0, 26, offset);
  88        return true;
  89    }
  90    return false;
  91}
  92
  93static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
  94{
  95    ptrdiff_t offset = target - code_ptr;
  96    if (offset == sextract64(offset, 0, 19)) {
  97        *code_ptr = deposit32(*code_ptr, 5, 19, offset);
  98        return true;
  99    }
 100    return false;
 101}
 102
 103static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 104                               intptr_t value, intptr_t addend)
 105{
 106    tcg_debug_assert(addend == 0);
 107    switch (type) {
 108    case R_AARCH64_JUMP26:
 109    case R_AARCH64_CALL26:
 110        return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
 111    case R_AARCH64_CONDBR19:
 112        return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
 113    default:
 114        g_assert_not_reached();
 115    }
 116}
 117
 118#define TCG_CT_CONST_AIMM 0x100
 119#define TCG_CT_CONST_LIMM 0x200
 120#define TCG_CT_CONST_ZERO 0x400
 121#define TCG_CT_CONST_MONE 0x800
 122#define TCG_CT_CONST_ORRI 0x1000
 123#define TCG_CT_CONST_ANDI 0x2000
 124
 125/* parse target specific constraints */
 126static const char *target_parse_constraint(TCGArgConstraint *ct,
 127                                           const char *ct_str, TCGType type)
 128{
 129    switch (*ct_str++) {
 130    case 'r': /* general registers */
 131        ct->ct |= TCG_CT_REG;
 132        ct->u.regs |= 0xffffffffu;
 133        break;
 134    case 'w': /* advsimd registers */
 135        ct->ct |= TCG_CT_REG;
 136        ct->u.regs |= 0xffffffff00000000ull;
 137        break;
 138    case 'l': /* qemu_ld / qemu_st address, data_reg */
 139        ct->ct |= TCG_CT_REG;
 140        ct->u.regs = 0xffffffffu;
 141#ifdef CONFIG_SOFTMMU
 142        /* x0 and x1 will be overwritten when reading the tlb entry,
 143           and x2, and x3 for helper args, better to avoid using them. */
 144        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
 145        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
 146        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
 147        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
 148#endif
 149        break;
 150    case 'A': /* Valid for arithmetic immediate (positive or negative).  */
 151        ct->ct |= TCG_CT_CONST_AIMM;
 152        break;
 153    case 'L': /* Valid for logical immediate.  */
 154        ct->ct |= TCG_CT_CONST_LIMM;
 155        break;
 156    case 'M': /* minus one */
 157        ct->ct |= TCG_CT_CONST_MONE;
 158        break;
 159    case 'O': /* vector orr/bic immediate */
 160        ct->ct |= TCG_CT_CONST_ORRI;
 161        break;
 162    case 'N': /* vector orr/bic immediate, inverted */
 163        ct->ct |= TCG_CT_CONST_ANDI;
 164        break;
 165    case 'Z': /* zero */
 166        ct->ct |= TCG_CT_CONST_ZERO;
 167        break;
 168    default:
 169        return NULL;
 170    }
 171    return ct_str;
 172}
 173
 174/* Match a constant valid for addition (12-bit, optionally shifted).  */
 175static inline bool is_aimm(uint64_t val)
 176{
 177    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
 178}
 179
 180/* Match a constant valid for logical operations.  */
 181static inline bool is_limm(uint64_t val)
 182{
 183    /* Taking a simplified view of the logical immediates for now, ignoring
 184       the replication that can happen across the field.  Match bit patterns
 185       of the forms
 186           0....01....1
 187           0..01..10..0
 188       and their inverses.  */
 189
 190    /* Make things easier below, by testing the form with msb clear. */
 191    if ((int64_t)val < 0) {
 192        val = ~val;
 193    }
 194    if (val == 0) {
 195        return false;
 196    }
 197    val += val & -val;
 198    return (val & (val - 1)) == 0;
 199}
 200
 201/* Return true if v16 is a valid 16-bit shifted immediate.  */
 202static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
 203{
 204    if (v16 == (v16 & 0xff)) {
 205        *cmode = 0x8;
 206        *imm8 = v16 & 0xff;
 207        return true;
 208    } else if (v16 == (v16 & 0xff00)) {
 209        *cmode = 0xa;
 210        *imm8 = v16 >> 8;
 211        return true;
 212    }
 213    return false;
 214}
 215
 216/* Return true if v32 is a valid 32-bit shifted immediate.  */
 217static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
 218{
 219    if (v32 == (v32 & 0xff)) {
 220        *cmode = 0x0;
 221        *imm8 = v32 & 0xff;
 222        return true;
 223    } else if (v32 == (v32 & 0xff00)) {
 224        *cmode = 0x2;
 225        *imm8 = (v32 >> 8) & 0xff;
 226        return true;
 227    } else if (v32 == (v32 & 0xff0000)) {
 228        *cmode = 0x4;
 229        *imm8 = (v32 >> 16) & 0xff;
 230        return true;
 231    } else if (v32 == (v32 & 0xff000000)) {
 232        *cmode = 0x6;
 233        *imm8 = v32 >> 24;
 234        return true;
 235    }
 236    return false;
 237}
 238
 239/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
 240static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
 241{
 242    if ((v32 & 0xffff00ff) == 0xff) {
 243        *cmode = 0xc;
 244        *imm8 = (v32 >> 8) & 0xff;
 245        return true;
 246    } else if ((v32 & 0xff00ffff) == 0xffff) {
 247        *cmode = 0xd;
 248        *imm8 = (v32 >> 16) & 0xff;
 249        return true;
 250    }
 251    return false;
 252}
 253
 254/* Return true if v32 is a valid float32 immediate.  */
 255static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
 256{
 257    if (extract32(v32, 0, 19) == 0
 258        && (extract32(v32, 25, 6) == 0x20
 259            || extract32(v32, 25, 6) == 0x1f)) {
 260        *cmode = 0xf;
 261        *imm8 = (extract32(v32, 31, 1) << 7)
 262              | (extract32(v32, 25, 1) << 6)
 263              | extract32(v32, 19, 6);
 264        return true;
 265    }
 266    return false;
 267}
 268
 269/* Return true if v64 is a valid float64 immediate.  */
 270static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
 271{
 272    if (extract64(v64, 0, 48) == 0
 273        && (extract64(v64, 54, 9) == 0x100
 274            || extract64(v64, 54, 9) == 0x0ff)) {
 275        *cmode = 0xf;
 276        *imm8 = (extract64(v64, 63, 1) << 7)
 277              | (extract64(v64, 54, 1) << 6)
 278              | extract64(v64, 48, 6);
 279        return true;
 280    }
 281    return false;
 282}
 283
 284/*
 285 * Return non-zero if v32 can be formed by MOVI+ORR.
 286 * Place the parameters for MOVI in (cmode, imm8).
 287 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
 288 */
 289static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
 290{
 291    int i;
 292
 293    for (i = 6; i > 0; i -= 2) {
 294        /* Mask out one byte we can add with ORR.  */
 295        uint32_t tmp = v32 & ~(0xffu << (i * 4));
 296        if (is_shimm32(tmp, cmode, imm8) ||
 297            is_soimm32(tmp, cmode, imm8)) {
 298            break;
 299        }
 300    }
 301    return i;
 302}
 303
 304/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
 305static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
 306{
 307    if (v32 == deposit32(v32, 16, 16, v32)) {
 308        return is_shimm16(v32, cmode, imm8);
 309    } else {
 310        return is_shimm32(v32, cmode, imm8);
 311    }
 312}
 313
 314static int tcg_target_const_match(tcg_target_long val, TCGType type,
 315                                  const TCGArgConstraint *arg_ct)
 316{
 317    int ct = arg_ct->ct;
 318
 319    if (ct & TCG_CT_CONST) {
 320        return 1;
 321    }
 322    if (type == TCG_TYPE_I32) {
 323        val = (int32_t)val;
 324    }
 325    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
 326        return 1;
 327    }
 328    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
 329        return 1;
 330    }
 331    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 332        return 1;
 333    }
 334    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
 335        return 1;
 336    }
 337
 338    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
 339    case 0:
 340        break;
 341    case TCG_CT_CONST_ANDI:
 342        val = ~val;
 343        /* fallthru */
 344    case TCG_CT_CONST_ORRI:
 345        if (val == deposit64(val, 32, 32, val)) {
 346            int cmode, imm8;
 347            return is_shimm1632(val, &cmode, &imm8);
 348        }
 349        break;
 350    default:
 351        /* Both bits should not be set for the same insn.  */
 352        g_assert_not_reached();
 353    }
 354
 355    return 0;
 356}
 357
 358enum aarch64_cond_code {
 359    COND_EQ = 0x0,
 360    COND_NE = 0x1,
 361    COND_CS = 0x2,     /* Unsigned greater or equal */
 362    COND_HS = COND_CS, /* ALIAS greater or equal */
 363    COND_CC = 0x3,     /* Unsigned less than */
 364    COND_LO = COND_CC, /* ALIAS Lower */
 365    COND_MI = 0x4,     /* Negative */
 366    COND_PL = 0x5,     /* Zero or greater */
 367    COND_VS = 0x6,     /* Overflow */
 368    COND_VC = 0x7,     /* No overflow */
 369    COND_HI = 0x8,     /* Unsigned greater than */
 370    COND_LS = 0x9,     /* Unsigned less or equal */
 371    COND_GE = 0xa,
 372    COND_LT = 0xb,
 373    COND_GT = 0xc,
 374    COND_LE = 0xd,
 375    COND_AL = 0xe,
 376    COND_NV = 0xf, /* behaves like COND_AL here */
 377};
 378
 379static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
 380    [TCG_COND_EQ] = COND_EQ,
 381    [TCG_COND_NE] = COND_NE,
 382    [TCG_COND_LT] = COND_LT,
 383    [TCG_COND_GE] = COND_GE,
 384    [TCG_COND_LE] = COND_LE,
 385    [TCG_COND_GT] = COND_GT,
 386    /* unsigned */
 387    [TCG_COND_LTU] = COND_LO,
 388    [TCG_COND_GTU] = COND_HI,
 389    [TCG_COND_GEU] = COND_HS,
 390    [TCG_COND_LEU] = COND_LS,
 391};
 392
 393typedef enum {
 394    LDST_ST = 0,    /* store */
 395    LDST_LD = 1,    /* load */
 396    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
 397    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
 398} AArch64LdstType;
 399
 400/* We encode the format of the insn into the beginning of the name, so that
 401   we can have the preprocessor help "typecheck" the insn vs the output
 402   function.  Arm didn't provide us with nice names for the formats, so we
 403   use the section number of the architecture reference manual in which the
 404   instruction group is described.  */
 405typedef enum {
 406    /* Compare and branch (immediate).  */
 407    I3201_CBZ       = 0x34000000,
 408    I3201_CBNZ      = 0x35000000,
 409
 410    /* Conditional branch (immediate).  */
 411    I3202_B_C       = 0x54000000,
 412
 413    /* Unconditional branch (immediate).  */
 414    I3206_B         = 0x14000000,
 415    I3206_BL        = 0x94000000,
 416
 417    /* Unconditional branch (register).  */
 418    I3207_BR        = 0xd61f0000,
 419    I3207_BLR       = 0xd63f0000,
 420    I3207_RET       = 0xd65f0000,
 421
 422    /* AdvSIMD load/store single structure.  */
 423    I3303_LD1R      = 0x0d40c000,
 424
 425    /* Load literal for loading the address at pc-relative offset */
 426    I3305_LDR       = 0x58000000,
 427    I3305_LDR_v64   = 0x5c000000,
 428    I3305_LDR_v128  = 0x9c000000,
 429
 430    /* Load/store register.  Described here as 3.3.12, but the helper
 431       that emits them can transform to 3.3.10 or 3.3.13.  */
 432    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
 433    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
 434    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
 435    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
 436
 437    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
 438    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
 439    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
 440    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
 441
 442    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
 443    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
 444
 445    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
 446    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
 447    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
 448
 449    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
 450    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
 451
 452    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
 453    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
 454
 455    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
 456    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
 457
 458    I3312_TO_I3310  = 0x00200800,
 459    I3312_TO_I3313  = 0x01000000,
 460
 461    /* Load/store register pair instructions.  */
 462    I3314_LDP       = 0x28400000,
 463    I3314_STP       = 0x28000000,
 464
 465    /* Add/subtract immediate instructions.  */
 466    I3401_ADDI      = 0x11000000,
 467    I3401_ADDSI     = 0x31000000,
 468    I3401_SUBI      = 0x51000000,
 469    I3401_SUBSI     = 0x71000000,
 470
 471    /* Bitfield instructions.  */
 472    I3402_BFM       = 0x33000000,
 473    I3402_SBFM      = 0x13000000,
 474    I3402_UBFM      = 0x53000000,
 475
 476    /* Extract instruction.  */
 477    I3403_EXTR      = 0x13800000,
 478
 479    /* Logical immediate instructions.  */
 480    I3404_ANDI      = 0x12000000,
 481    I3404_ORRI      = 0x32000000,
 482    I3404_EORI      = 0x52000000,
 483
 484    /* Move wide immediate instructions.  */
 485    I3405_MOVN      = 0x12800000,
 486    I3405_MOVZ      = 0x52800000,
 487    I3405_MOVK      = 0x72800000,
 488
 489    /* PC relative addressing instructions.  */
 490    I3406_ADR       = 0x10000000,
 491    I3406_ADRP      = 0x90000000,
 492
 493    /* Add/subtract shifted register instructions (without a shift).  */
 494    I3502_ADD       = 0x0b000000,
 495    I3502_ADDS      = 0x2b000000,
 496    I3502_SUB       = 0x4b000000,
 497    I3502_SUBS      = 0x6b000000,
 498
 499    /* Add/subtract shifted register instructions (with a shift).  */
 500    I3502S_ADD_LSL  = I3502_ADD,
 501
 502    /* Add/subtract with carry instructions.  */
 503    I3503_ADC       = 0x1a000000,
 504    I3503_SBC       = 0x5a000000,
 505
 506    /* Conditional select instructions.  */
 507    I3506_CSEL      = 0x1a800000,
 508    I3506_CSINC     = 0x1a800400,
 509    I3506_CSINV     = 0x5a800000,
 510    I3506_CSNEG     = 0x5a800400,
 511
 512    /* Data-processing (1 source) instructions.  */
 513    I3507_CLZ       = 0x5ac01000,
 514    I3507_RBIT      = 0x5ac00000,
 515    I3507_REV16     = 0x5ac00400,
 516    I3507_REV32     = 0x5ac00800,
 517    I3507_REV64     = 0x5ac00c00,
 518
 519    /* Data-processing (2 source) instructions.  */
 520    I3508_LSLV      = 0x1ac02000,
 521    I3508_LSRV      = 0x1ac02400,
 522    I3508_ASRV      = 0x1ac02800,
 523    I3508_RORV      = 0x1ac02c00,
 524    I3508_SMULH     = 0x9b407c00,
 525    I3508_UMULH     = 0x9bc07c00,
 526    I3508_UDIV      = 0x1ac00800,
 527    I3508_SDIV      = 0x1ac00c00,
 528
 529    /* Data-processing (3 source) instructions.  */
 530    I3509_MADD      = 0x1b000000,
 531    I3509_MSUB      = 0x1b008000,
 532
 533    /* Logical shifted register instructions (without a shift).  */
 534    I3510_AND       = 0x0a000000,
 535    I3510_BIC       = 0x0a200000,
 536    I3510_ORR       = 0x2a000000,
 537    I3510_ORN       = 0x2a200000,
 538    I3510_EOR       = 0x4a000000,
 539    I3510_EON       = 0x4a200000,
 540    I3510_ANDS      = 0x6a000000,
 541
 542    /* Logical shifted register instructions (with a shift).  */
 543    I3502S_AND_LSR  = I3510_AND | (1 << 22),
 544
 545    /* AdvSIMD copy */
 546    I3605_DUP      = 0x0e000400,
 547    I3605_INS      = 0x4e001c00,
 548    I3605_UMOV     = 0x0e003c00,
 549
 550    /* AdvSIMD modified immediate */
 551    I3606_MOVI      = 0x0f000400,
 552    I3606_MVNI      = 0x2f000400,
 553    I3606_BIC       = 0x2f001400,
 554    I3606_ORR       = 0x0f001400,
 555
 556    /* AdvSIMD shift by immediate */
 557    I3614_SSHR      = 0x0f000400,
 558    I3614_SSRA      = 0x0f001400,
 559    I3614_SHL       = 0x0f005400,
 560    I3614_USHR      = 0x2f000400,
 561    I3614_USRA      = 0x2f001400,
 562
 563    /* AdvSIMD three same.  */
 564    I3616_ADD       = 0x0e208400,
 565    I3616_AND       = 0x0e201c00,
 566    I3616_BIC       = 0x0e601c00,
 567    I3616_BIF       = 0x2ee01c00,
 568    I3616_BIT       = 0x2ea01c00,
 569    I3616_BSL       = 0x2e601c00,
 570    I3616_EOR       = 0x2e201c00,
 571    I3616_MUL       = 0x0e209c00,
 572    I3616_ORR       = 0x0ea01c00,
 573    I3616_ORN       = 0x0ee01c00,
 574    I3616_SUB       = 0x2e208400,
 575    I3616_CMGT      = 0x0e203400,
 576    I3616_CMGE      = 0x0e203c00,
 577    I3616_CMTST     = 0x0e208c00,
 578    I3616_CMHI      = 0x2e203400,
 579    I3616_CMHS      = 0x2e203c00,
 580    I3616_CMEQ      = 0x2e208c00,
 581    I3616_SMAX      = 0x0e206400,
 582    I3616_SMIN      = 0x0e206c00,
 583    I3616_SSHL      = 0x0e204400,
 584    I3616_SQADD     = 0x0e200c00,
 585    I3616_SQSUB     = 0x0e202c00,
 586    I3616_UMAX      = 0x2e206400,
 587    I3616_UMIN      = 0x2e206c00,
 588    I3616_UQADD     = 0x2e200c00,
 589    I3616_UQSUB     = 0x2e202c00,
 590    I3616_USHL      = 0x2e204400,
 591
 592    /* AdvSIMD two-reg misc.  */
 593    I3617_CMGT0     = 0x0e208800,
 594    I3617_CMEQ0     = 0x0e209800,
 595    I3617_CMLT0     = 0x0e20a800,
 596    I3617_CMGE0     = 0x2e208800,
 597    I3617_CMLE0     = 0x2e20a800,
 598    I3617_NOT       = 0x2e205800,
 599    I3617_ABS       = 0x0e20b800,
 600    I3617_NEG       = 0x2e20b800,
 601
 602    /* System instructions.  */
 603    NOP             = 0xd503201f,
 604    DMB_ISH         = 0xd50338bf,
 605    DMB_LD          = 0x00000100,
 606    DMB_ST          = 0x00000200,
 607} AArch64Insn;
 608
 609static inline uint32_t tcg_in32(TCGContext *s)
 610{
 611    uint32_t v = *(uint32_t *)s->code_ptr;
 612    return v;
 613}
 614
 615/* Emit an opcode with "type-checking" of the format.  */
 616#define tcg_out_insn(S, FMT, OP, ...) \
 617    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
 618
 619static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
 620                              TCGReg rt, TCGReg rn, unsigned size)
 621{
 622    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
 623}
 624
 625static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
 626                              int imm19, TCGReg rt)
 627{
 628    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
 629}
 630
 631static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
 632                              TCGReg rt, int imm19)
 633{
 634    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
 635}
 636
 637static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
 638                              TCGCond c, int imm19)
 639{
 640    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
 641}
 642
 643static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
 644{
 645    tcg_out32(s, insn | (imm26 & 0x03ffffff));
 646}
 647
 648static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
 649{
 650    tcg_out32(s, insn | rn << 5);
 651}
 652
 653static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
 654                              TCGReg r1, TCGReg r2, TCGReg rn,
 655                              tcg_target_long ofs, bool pre, bool w)
 656{
 657    insn |= 1u << 31; /* ext */
 658    insn |= pre << 24;
 659    insn |= w << 23;
 660
 661    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
 662    insn |= (ofs & (0x7f << 3)) << (15 - 3);
 663
 664    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
 665}
 666
 667static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
 668                              TCGReg rd, TCGReg rn, uint64_t aimm)
 669{
 670    if (aimm > 0xfff) {
 671        tcg_debug_assert((aimm & 0xfff) == 0);
 672        aimm >>= 12;
 673        tcg_debug_assert(aimm <= 0xfff);
 674        aimm |= 1 << 12;  /* apply LSL 12 */
 675    }
 676    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
 677}
 678
 679/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
 680   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
 681   that feed the DecodeBitMasks pseudo function.  */
 682static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
 683                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
 684{
 685    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
 686              | rn << 5 | rd);
 687}
 688
 689#define tcg_out_insn_3404  tcg_out_insn_3402
 690
 691static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
 692                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
 693{
 694    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
 695              | rn << 5 | rd);
 696}
 697
 698/* This function is used for the Move (wide immediate) instruction group.
 699   Note that SHIFT is a full shift count, not the 2 bit HW field. */
 700static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
 701                              TCGReg rd, uint16_t half, unsigned shift)
 702{
 703    tcg_debug_assert((shift & ~0x30) == 0);
 704    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
 705}
 706
 707static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
 708                              TCGReg rd, int64_t disp)
 709{
 710    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
 711}
 712
 713/* This function is for both 3.5.2 (Add/Subtract shifted register), for
 714   the rare occasion when we actually want to supply a shift amount.  */
 715static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
 716                                      TCGType ext, TCGReg rd, TCGReg rn,
 717                                      TCGReg rm, int imm6)
 718{
 719    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
 720}
 721
 722/* This function is for 3.5.2 (Add/subtract shifted register),
 723   and 3.5.10 (Logical shifted register), for the vast majorty of cases
 724   when we don't want to apply a shift.  Thus it can also be used for
 725   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
 726static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
 727                              TCGReg rd, TCGReg rn, TCGReg rm)
 728{
 729    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
 730}
 731
 732#define tcg_out_insn_3503  tcg_out_insn_3502
 733#define tcg_out_insn_3508  tcg_out_insn_3502
 734#define tcg_out_insn_3510  tcg_out_insn_3502
 735
 736static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
 737                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
 738{
 739    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
 740              | tcg_cond_to_aarch64[c] << 12);
 741}
 742
 743static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
 744                              TCGReg rd, TCGReg rn)
 745{
 746    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
 747}
 748
 749static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
 750                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
 751{
 752    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
 753}
 754
 755static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
 756                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
 757{
 758    /* Note that bit 11 set means general register input.  Therefore
 759       we can handle both register sets with one function.  */
 760    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
 761              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
 762}
 763
 764static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
 765                              TCGReg rd, bool op, int cmode, uint8_t imm8)
 766{
 767    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
 768              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
 769}
 770
 771static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
 772                              TCGReg rd, TCGReg rn, unsigned immhb)
 773{
 774    tcg_out32(s, insn | q << 30 | immhb << 16
 775              | (rn & 0x1f) << 5 | (rd & 0x1f));
 776}
 777
 778static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
 779                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
 780{
 781    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
 782              | (rn & 0x1f) << 5 | (rd & 0x1f));
 783}
 784
 785static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
 786                              unsigned size, TCGReg rd, TCGReg rn)
 787{
 788    tcg_out32(s, insn | q << 30 | (size << 22)
 789              | (rn & 0x1f) << 5 | (rd & 0x1f));
 790}
 791
 792static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
 793                              TCGReg rd, TCGReg base, TCGType ext,
 794                              TCGReg regoff)
 795{
 796    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 797    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
 798              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
 799}
 800
 801static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
 802                              TCGReg rd, TCGReg rn, intptr_t offset)
 803{
 804    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
 805}
 806
 807static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
 808                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
 809{
 810    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 811    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
 812              | rn << 5 | (rd & 0x1f));
 813}
 814
 815/* Register to register move using ORR (shifted register with no shift). */
 816static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
 817{
 818    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
 819}
 820
 821/* Register to register move using ADDI (move to/from SP).  */
 822static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
 823{
 824    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
 825}
 826
 827/* This function is used for the Logical (immediate) instruction group.
 828   The value of LIMM must satisfy IS_LIMM.  See the comment above about
 829   only supporting simplified logical immediates.  */
 830static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
 831                             TCGReg rd, TCGReg rn, uint64_t limm)
 832{
 833    unsigned h, l, r, c;
 834
 835    tcg_debug_assert(is_limm(limm));
 836
 837    h = clz64(limm);
 838    l = ctz64(limm);
 839    if (l == 0) {
 840        r = 0;                  /* form 0....01....1 */
 841        c = ctz64(~limm) - 1;
 842        if (h == 0) {
 843            r = clz64(~limm);   /* form 1..10..01..1 */
 844            c += r;
 845        }
 846    } else {
 847        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
 848        c = r - h - 1;
 849    }
 850    if (ext == TCG_TYPE_I32) {
 851        r &= 31;
 852        c &= 31;
 853    }
 854
 855    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
 856}
 857
 858static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
 859                             TCGReg rd, tcg_target_long v64)
 860{
 861    bool q = type == TCG_TYPE_V128;
 862    int cmode, imm8, i;
 863
 864    /* Test all bytes equal first.  */
 865    if (v64 == dup_const(MO_8, v64)) {
 866        imm8 = (uint8_t)v64;
 867        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
 868        return;
 869    }
 870
 871    /*
 872     * Test all bytes 0x00 or 0xff second.  This can match cases that
 873     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
 874     */
 875    for (i = imm8 = 0; i < 8; i++) {
 876        uint8_t byte = v64 >> (i * 8);
 877        if (byte == 0xff) {
 878            imm8 |= 1 << i;
 879        } else if (byte != 0) {
 880            goto fail_bytes;
 881        }
 882    }
 883    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
 884    return;
 885 fail_bytes:
 886
 887    /*
 888     * Tests for various replications.  For each element width, if we
 889     * cannot find an expansion there's no point checking a larger
 890     * width because we already know by replication it cannot match.
 891     */
 892    if (v64 == dup_const(MO_16, v64)) {
 893        uint16_t v16 = v64;
 894
 895        if (is_shimm16(v16, &cmode, &imm8)) {
 896            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
 897            return;
 898        }
 899        if (is_shimm16(~v16, &cmode, &imm8)) {
 900            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
 901            return;
 902        }
 903
 904        /*
 905         * Otherwise, all remaining constants can be loaded in two insns:
 906         * rd = v16 & 0xff, rd |= v16 & 0xff00.
 907         */
 908        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
 909        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
 910        return;
 911    } else if (v64 == dup_const(MO_32, v64)) {
 912        uint32_t v32 = v64;
 913        uint32_t n32 = ~v32;
 914
 915        if (is_shimm32(v32, &cmode, &imm8) ||
 916            is_soimm32(v32, &cmode, &imm8) ||
 917            is_fimm32(v32, &cmode, &imm8)) {
 918            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
 919            return;
 920        }
 921        if (is_shimm32(n32, &cmode, &imm8) ||
 922            is_soimm32(n32, &cmode, &imm8)) {
 923            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
 924            return;
 925        }
 926
 927        /*
 928         * Restrict the set of constants to those we can load with
 929         * two instructions.  Others we load from the pool.
 930         */
 931        i = is_shimm32_pair(v32, &cmode, &imm8);
 932        if (i) {
 933            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
 934            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
 935            return;
 936        }
 937        i = is_shimm32_pair(n32, &cmode, &imm8);
 938        if (i) {
 939            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
 940            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
 941            return;
 942        }
 943    } else if (is_fimm64(v64, &cmode, &imm8)) {
 944        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
 945        return;
 946    }
 947
 948    /*
 949     * As a last resort, load from the constant pool.  Sadly there
 950     * is no LD1R (literal), so store the full 16-byte vector.
 951     */
 952    if (type == TCG_TYPE_V128) {
 953        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
 954        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
 955    } else {
 956        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
 957        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
 958    }
 959}
 960
 961static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 962                            TCGReg rd, TCGReg rs)
 963{
 964    int is_q = type - TCG_TYPE_V64;
 965    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
 966    return true;
 967}
 968
 969static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 970                             TCGReg r, TCGReg base, intptr_t offset)
 971{
 972    TCGReg temp = TCG_REG_TMP;
 973
 974    if (offset < -0xffffff || offset > 0xffffff) {
 975        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
 976        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
 977        base = temp;
 978    } else {
 979        AArch64Insn add_insn = I3401_ADDI;
 980
 981        if (offset < 0) {
 982            add_insn = I3401_SUBI;
 983            offset = -offset;
 984        }
 985        if (offset & 0xfff000) {
 986            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
 987            base = temp;
 988        }
 989        if (offset & 0xfff) {
 990            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
 991            base = temp;
 992        }
 993    }
 994    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
 995    return true;
 996}
 997
 998static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
 999                         tcg_target_long value)
1000{
1001    tcg_target_long svalue = value;
1002    tcg_target_long ivalue = ~value;
1003    tcg_target_long t0, t1, t2;
1004    int s0, s1;
1005    AArch64Insn opc;
1006
1007    switch (type) {
1008    case TCG_TYPE_I32:
1009    case TCG_TYPE_I64:
1010        tcg_debug_assert(rd < 32);
1011        break;
1012
1013    case TCG_TYPE_V64:
1014    case TCG_TYPE_V128:
1015        tcg_debug_assert(rd >= 32);
1016        tcg_out_dupi_vec(s, type, rd, value);
1017        return;
1018
1019    default:
1020        g_assert_not_reached();
1021    }
1022
1023    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1024       values within [2**31, 2**32-1], we can create smaller sequences by
1025       interpreting this as a negative 32-bit number, while ensuring that
1026       the high 32 bits are cleared by setting SF=0.  */
1027    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1028        svalue = (int32_t)value;
1029        value = (uint32_t)value;
1030        ivalue = (uint32_t)ivalue;
1031        type = TCG_TYPE_I32;
1032    }
1033
1034    /* Speed things up by handling the common case of small positive
1035       and negative values specially.  */
1036    if ((value & ~0xffffull) == 0) {
1037        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1038        return;
1039    } else if ((ivalue & ~0xffffull) == 0) {
1040        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1041        return;
1042    }
1043
1044    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1045       use the sign-extended value.  That lets us match rotated values such
1046       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1047    if (is_limm(svalue)) {
1048        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1049        return;
1050    }
1051
1052    /* Look for host pointer values within 4G of the PC.  This happens
1053       often when loading pointers to QEMU's own data structures.  */
1054    if (type == TCG_TYPE_I64) {
1055        tcg_target_long disp = value - (intptr_t)s->code_ptr;
1056        if (disp == sextract64(disp, 0, 21)) {
1057            tcg_out_insn(s, 3406, ADR, rd, disp);
1058            return;
1059        }
1060        disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
1061        if (disp == sextract64(disp, 0, 21)) {
1062            tcg_out_insn(s, 3406, ADRP, rd, disp);
1063            if (value & 0xfff) {
1064                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1065            }
1066            return;
1067        }
1068    }
1069
1070    /* Would it take fewer insns to begin with MOVN?  */
1071    if (ctpop64(value) >= 32) {
1072        t0 = ivalue;
1073        opc = I3405_MOVN;
1074    } else {
1075        t0 = value;
1076        opc = I3405_MOVZ;
1077    }
1078    s0 = ctz64(t0) & (63 & -16);
1079    t1 = t0 & ~(0xffffUL << s0);
1080    s1 = ctz64(t1) & (63 & -16);
1081    t2 = t1 & ~(0xffffUL << s1);
1082    if (t2 == 0) {
1083        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1084        if (t1 != 0) {
1085            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1086        }
1087        return;
1088    }
1089
1090    /* For more than 2 insns, dump it into the constant pool.  */
1091    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1092    tcg_out_insn(s, 3305, LDR, 0, rd);
1093}
1094
1095/* Define something more legible for general use.  */
1096#define tcg_out_ldst_r  tcg_out_insn_3310
1097
1098static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1099                         TCGReg rn, intptr_t offset, int lgsize)
1100{
1101    /* If the offset is naturally aligned and in range, then we can
1102       use the scaled uimm12 encoding */
1103    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1104        uintptr_t scaled_uimm = offset >> lgsize;
1105        if (scaled_uimm <= 0xfff) {
1106            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1107            return;
1108        }
1109    }
1110
1111    /* Small signed offsets can use the unscaled encoding.  */
1112    if (offset >= -256 && offset < 256) {
1113        tcg_out_insn_3312(s, insn, rd, rn, offset);
1114        return;
1115    }
1116
1117    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1118    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1119    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1120}
1121
1122static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1123{
1124    if (ret == arg) {
1125        return true;
1126    }
1127    switch (type) {
1128    case TCG_TYPE_I32:
1129    case TCG_TYPE_I64:
1130        if (ret < 32 && arg < 32) {
1131            tcg_out_movr(s, type, ret, arg);
1132            break;
1133        } else if (ret < 32) {
1134            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1135            break;
1136        } else if (arg < 32) {
1137            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1138            break;
1139        }
1140        /* FALLTHRU */
1141
1142    case TCG_TYPE_V64:
1143        tcg_debug_assert(ret >= 32 && arg >= 32);
1144        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1145        break;
1146    case TCG_TYPE_V128:
1147        tcg_debug_assert(ret >= 32 && arg >= 32);
1148        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1149        break;
1150
1151    default:
1152        g_assert_not_reached();
1153    }
1154    return true;
1155}
1156
1157static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1158                       TCGReg base, intptr_t ofs)
1159{
1160    AArch64Insn insn;
1161    int lgsz;
1162
1163    switch (type) {
1164    case TCG_TYPE_I32:
1165        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1166        lgsz = 2;
1167        break;
1168    case TCG_TYPE_I64:
1169        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1170        lgsz = 3;
1171        break;
1172    case TCG_TYPE_V64:
1173        insn = I3312_LDRVD;
1174        lgsz = 3;
1175        break;
1176    case TCG_TYPE_V128:
1177        insn = I3312_LDRVQ;
1178        lgsz = 4;
1179        break;
1180    default:
1181        g_assert_not_reached();
1182    }
1183    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1184}
1185
1186static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1187                       TCGReg base, intptr_t ofs)
1188{
1189    AArch64Insn insn;
1190    int lgsz;
1191
1192    switch (type) {
1193    case TCG_TYPE_I32:
1194        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1195        lgsz = 2;
1196        break;
1197    case TCG_TYPE_I64:
1198        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1199        lgsz = 3;
1200        break;
1201    case TCG_TYPE_V64:
1202        insn = I3312_STRVD;
1203        lgsz = 3;
1204        break;
1205    case TCG_TYPE_V128:
1206        insn = I3312_STRVQ;
1207        lgsz = 4;
1208        break;
1209    default:
1210        g_assert_not_reached();
1211    }
1212    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1213}
1214
1215static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1216                               TCGReg base, intptr_t ofs)
1217{
1218    if (type <= TCG_TYPE_I64 && val == 0) {
1219        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1220        return true;
1221    }
1222    return false;
1223}
1224
1225static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1226                               TCGReg rn, unsigned int a, unsigned int b)
1227{
1228    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1229}
1230
1231static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1232                                TCGReg rn, unsigned int a, unsigned int b)
1233{
1234    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1235}
1236
1237static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1238                                TCGReg rn, unsigned int a, unsigned int b)
1239{
1240    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1241}
1242
1243static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1244                                TCGReg rn, TCGReg rm, unsigned int a)
1245{
1246    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1247}
1248
1249static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1250                               TCGReg rd, TCGReg rn, unsigned int m)
1251{
1252    int bits = ext ? 64 : 32;
1253    int max = bits - 1;
1254    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1255}
1256
1257static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1258                               TCGReg rd, TCGReg rn, unsigned int m)
1259{
1260    int max = ext ? 63 : 31;
1261    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1262}
1263
1264static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1265                               TCGReg rd, TCGReg rn, unsigned int m)
1266{
1267    int max = ext ? 63 : 31;
1268    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1269}
1270
1271static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1272                                TCGReg rd, TCGReg rn, unsigned int m)
1273{
1274    int max = ext ? 63 : 31;
1275    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1276}
1277
1278static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1279                                TCGReg rd, TCGReg rn, unsigned int m)
1280{
1281    int bits = ext ? 64 : 32;
1282    int max = bits - 1;
1283    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1284}
1285
1286static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1287                               TCGReg rn, unsigned lsb, unsigned width)
1288{
1289    unsigned size = ext ? 64 : 32;
1290    unsigned a = (size - lsb) & (size - 1);
1291    unsigned b = width - 1;
1292    tcg_out_bfm(s, ext, rd, rn, a, b);
1293}
1294
1295static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1296                        tcg_target_long b, bool const_b)
1297{
1298    if (const_b) {
1299        /* Using CMP or CMN aliases.  */
1300        if (b >= 0) {
1301            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1302        } else {
1303            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1304        }
1305    } else {
1306        /* Using CMP alias SUBS wzr, Wn, Wm */
1307        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1308    }
1309}
1310
1311static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1312{
1313    ptrdiff_t offset = target - s->code_ptr;
1314    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1315    tcg_out_insn(s, 3206, B, offset);
1316}
1317
1318static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1319{
1320    ptrdiff_t offset = target - s->code_ptr;
1321    if (offset == sextract64(offset, 0, 26)) {
1322        tcg_out_insn(s, 3206, BL, offset);
1323    } else {
1324        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1325        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1326    }
1327}
1328
1329static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1330{
1331    tcg_out_insn(s, 3207, BLR, reg);
1332}
1333
1334static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1335{
1336    ptrdiff_t offset = target - s->code_ptr;
1337    if (offset == sextract64(offset, 0, 26)) {
1338        tcg_out_insn(s, 3206, BL, offset);
1339    } else {
1340        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1341        tcg_out_callr(s, TCG_REG_TMP);
1342    }
1343}
1344
1345void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1346                              uintptr_t addr)
1347{
1348    tcg_insn_unit i1, i2;
1349    TCGType rt = TCG_TYPE_I64;
1350    TCGReg  rd = TCG_REG_TMP;
1351    uint64_t pair;
1352
1353    ptrdiff_t offset = addr - jmp_addr;
1354
1355    if (offset == sextract64(offset, 0, 26)) {
1356        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1357        i2 = NOP;
1358    } else {
1359        offset = (addr >> 12) - (jmp_addr >> 12);
1360
1361        /* patch ADRP */
1362        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1363        /* patch ADDI */
1364        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1365    }
1366    pair = (uint64_t)i2 << 32 | i1;
1367    atomic_set((uint64_t *)jmp_addr, pair);
1368    flush_icache_range(jmp_addr, jmp_addr + 8);
1369}
1370
1371static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1372{
1373    if (!l->has_value) {
1374        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1375        tcg_out_insn(s, 3206, B, 0);
1376    } else {
1377        tcg_out_goto(s, l->u.value_ptr);
1378    }
1379}
1380
1381static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1382                           TCGArg b, bool b_const, TCGLabel *l)
1383{
1384    intptr_t offset;
1385    bool need_cmp;
1386
1387    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1388        need_cmp = false;
1389    } else {
1390        need_cmp = true;
1391        tcg_out_cmp(s, ext, a, b, b_const);
1392    }
1393
1394    if (!l->has_value) {
1395        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1396        offset = tcg_in32(s) >> 5;
1397    } else {
1398        offset = l->u.value_ptr - s->code_ptr;
1399        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1400    }
1401
1402    if (need_cmp) {
1403        tcg_out_insn(s, 3202, B_C, c, offset);
1404    } else if (c == TCG_COND_EQ) {
1405        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1406    } else {
1407        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1408    }
1409}
1410
1411static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1412{
1413    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1414}
1415
1416static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1417{
1418    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1419}
1420
1421static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1422{
1423    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1424}
1425
1426static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
1427                               TCGReg rd, TCGReg rn)
1428{
1429    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1430    int bits = (8 << s_bits) - 1;
1431    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1432}
1433
1434static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
1435                               TCGReg rd, TCGReg rn)
1436{
1437    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1438    int bits = (8 << s_bits) - 1;
1439    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1440}
1441
1442static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1443                            TCGReg rn, int64_t aimm)
1444{
1445    if (aimm >= 0) {
1446        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1447    } else {
1448        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1449    }
1450}
1451
1452static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1453                                   TCGReg rh, TCGReg al, TCGReg ah,
1454                                   tcg_target_long bl, tcg_target_long bh,
1455                                   bool const_bl, bool const_bh, bool sub)
1456{
1457    TCGReg orig_rl = rl;
1458    AArch64Insn insn;
1459
1460    if (rl == ah || (!const_bh && rl == bh)) {
1461        rl = TCG_REG_TMP;
1462    }
1463
1464    if (const_bl) {
1465        insn = I3401_ADDSI;
1466        if ((bl < 0) ^ sub) {
1467            insn = I3401_SUBSI;
1468            bl = -bl;
1469        }
1470        if (unlikely(al == TCG_REG_XZR)) {
1471            /* ??? We want to allow al to be zero for the benefit of
1472               negation via subtraction.  However, that leaves open the
1473               possibility of adding 0+const in the low part, and the
1474               immediate add instructions encode XSP not XZR.  Don't try
1475               anything more elaborate here than loading another zero.  */
1476            al = TCG_REG_TMP;
1477            tcg_out_movi(s, ext, al, 0);
1478        }
1479        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1480    } else {
1481        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1482    }
1483
1484    insn = I3503_ADC;
1485    if (const_bh) {
1486        /* Note that the only two constants we support are 0 and -1, and
1487           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1488        if ((bh != 0) ^ sub) {
1489            insn = I3503_SBC;
1490        }
1491        bh = TCG_REG_XZR;
1492    } else if (sub) {
1493        insn = I3503_SBC;
1494    }
1495    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1496
1497    tcg_out_mov(s, ext, orig_rl, rl);
1498}
1499
1500static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1501{
1502    static const uint32_t sync[] = {
1503        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1504        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1505        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1506        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1507        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1508    };
1509    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1510}
1511
1512static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1513                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1514{
1515    TCGReg a1 = a0;
1516    if (is_ctz) {
1517        a1 = TCG_REG_TMP;
1518        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1519    }
1520    if (const_b && b == (ext ? 64 : 32)) {
1521        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1522    } else {
1523        AArch64Insn sel = I3506_CSEL;
1524
1525        tcg_out_cmp(s, ext, a0, 0, 1);
1526        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1527
1528        if (const_b) {
1529            if (b == -1) {
1530                b = TCG_REG_XZR;
1531                sel = I3506_CSINV;
1532            } else if (b == 0) {
1533                b = TCG_REG_XZR;
1534            } else {
1535                tcg_out_movi(s, ext, d, b);
1536                b = d;
1537            }
1538        }
1539        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1540    }
1541}
1542
1543#ifdef CONFIG_SOFTMMU
1544#include "tcg-ldst.inc.c"
1545
1546/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1547 *                                     TCGMemOpIdx oi, uintptr_t ra)
1548 */
1549static void * const qemu_ld_helpers[16] = {
1550    [MO_UB]   = helper_ret_ldub_mmu,
1551    [MO_LEUW] = helper_le_lduw_mmu,
1552    [MO_LEUL] = helper_le_ldul_mmu,
1553    [MO_LEQ]  = helper_le_ldq_mmu,
1554    [MO_BEUW] = helper_be_lduw_mmu,
1555    [MO_BEUL] = helper_be_ldul_mmu,
1556    [MO_BEQ]  = helper_be_ldq_mmu,
1557};
1558
1559/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1560 *                                     uintxx_t val, TCGMemOpIdx oi,
1561 *                                     uintptr_t ra)
1562 */
1563static void * const qemu_st_helpers[16] = {
1564    [MO_UB]   = helper_ret_stb_mmu,
1565    [MO_LEUW] = helper_le_stw_mmu,
1566    [MO_LEUL] = helper_le_stl_mmu,
1567    [MO_LEQ]  = helper_le_stq_mmu,
1568    [MO_BEUW] = helper_be_stw_mmu,
1569    [MO_BEUL] = helper_be_stl_mmu,
1570    [MO_BEQ]  = helper_be_stq_mmu,
1571};
1572
1573static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1574{
1575    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1576    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1577    tcg_out_insn(s, 3406, ADR, rd, offset);
1578}
1579
1580static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1581{
1582    TCGMemOpIdx oi = lb->oi;
1583    TCGMemOp opc = get_memop(oi);
1584    TCGMemOp size = opc & MO_SIZE;
1585
1586    if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1587        return false;
1588    }
1589
1590    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1591    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1592    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1593    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1594    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1595    if (opc & MO_SIGN) {
1596        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1597    } else {
1598        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1599    }
1600
1601    tcg_out_goto(s, lb->raddr);
1602    return true;
1603}
1604
1605static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1606{
1607    TCGMemOpIdx oi = lb->oi;
1608    TCGMemOp opc = get_memop(oi);
1609    TCGMemOp size = opc & MO_SIZE;
1610
1611    if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1612        return false;
1613    }
1614
1615    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1616    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1617    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1618    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1619    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1620    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1621    tcg_out_goto(s, lb->raddr);
1622    return true;
1623}
1624
1625static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1626                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1627                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1628{
1629    TCGLabelQemuLdst *label = new_ldst_label(s);
1630
1631    label->is_ld = is_ld;
1632    label->oi = oi;
1633    label->type = ext;
1634    label->datalo_reg = data_reg;
1635    label->addrlo_reg = addr_reg;
1636    label->raddr = raddr;
1637    label->label_ptr[0] = label_ptr;
1638}
1639
1640/* We expect to use a 7-bit scaled negative offset from ENV.  */
1641QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1642QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1643
1644/* These offsets are built into the LDP below.  */
1645QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1646QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1647
1648/* Load and compare a TLB entry, emitting the conditional jump to the
1649   slow path for the failure case, which will be patched later when finalizing
1650   the slow path. Generated code returns the host addend in X1,
1651   clobbers X0,X2,X3,TMP. */
1652static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1653                             tcg_insn_unit **label_ptr, int mem_index,
1654                             bool is_read)
1655{
1656    unsigned a_bits = get_alignment_bits(opc);
1657    unsigned s_bits = opc & MO_SIZE;
1658    unsigned a_mask = (1u << a_bits) - 1;
1659    unsigned s_mask = (1u << s_bits) - 1;
1660    TCGReg x3;
1661    TCGType mask_type;
1662    uint64_t compare_mask;
1663
1664    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1665                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1666
1667    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1668    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1669                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1670
1671    /* Extract the TLB index from the address into X0.  */
1672    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1673                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1674                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1675
1676    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1677    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1678
1679    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1680    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1681               ? offsetof(CPUTLBEntry, addr_read)
1682               : offsetof(CPUTLBEntry, addr_write));
1683    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1684               offsetof(CPUTLBEntry, addend));
1685
1686    /* For aligned accesses, we check the first byte and include the alignment
1687       bits within the address.  For unaligned access, we check that we don't
1688       cross pages using the address of the last byte of the access.  */
1689    if (a_bits >= s_bits) {
1690        x3 = addr_reg;
1691    } else {
1692        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1693                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1694        x3 = TCG_REG_X3;
1695    }
1696    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1697
1698    /* Store the page mask part of the address into X3.  */
1699    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1700                     TCG_REG_X3, x3, compare_mask);
1701
1702    /* Perform the address comparison. */
1703    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1704
1705    /* If not equal, we jump to the slow path. */
1706    *label_ptr = s->code_ptr;
1707    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1708}
1709
1710#endif /* CONFIG_SOFTMMU */
1711
1712static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1713                                   TCGReg data_r, TCGReg addr_r,
1714                                   TCGType otype, TCGReg off_r)
1715{
1716    const TCGMemOp bswap = memop & MO_BSWAP;
1717
1718    switch (memop & MO_SSIZE) {
1719    case MO_UB:
1720        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1721        break;
1722    case MO_SB:
1723        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1724                       data_r, addr_r, otype, off_r);
1725        break;
1726    case MO_UW:
1727        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1728        if (bswap) {
1729            tcg_out_rev16(s, data_r, data_r);
1730        }
1731        break;
1732    case MO_SW:
1733        if (bswap) {
1734            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1735            tcg_out_rev16(s, data_r, data_r);
1736            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1737        } else {
1738            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1739                           data_r, addr_r, otype, off_r);
1740        }
1741        break;
1742    case MO_UL:
1743        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1744        if (bswap) {
1745            tcg_out_rev32(s, data_r, data_r);
1746        }
1747        break;
1748    case MO_SL:
1749        if (bswap) {
1750            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1751            tcg_out_rev32(s, data_r, data_r);
1752            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1753        } else {
1754            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1755        }
1756        break;
1757    case MO_Q:
1758        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1759        if (bswap) {
1760            tcg_out_rev64(s, data_r, data_r);
1761        }
1762        break;
1763    default:
1764        tcg_abort();
1765    }
1766}
1767
1768static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1769                                   TCGReg data_r, TCGReg addr_r,
1770                                   TCGType otype, TCGReg off_r)
1771{
1772    const TCGMemOp bswap = memop & MO_BSWAP;
1773
1774    switch (memop & MO_SIZE) {
1775    case MO_8:
1776        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1777        break;
1778    case MO_16:
1779        if (bswap && data_r != TCG_REG_XZR) {
1780            tcg_out_rev16(s, TCG_REG_TMP, data_r);
1781            data_r = TCG_REG_TMP;
1782        }
1783        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1784        break;
1785    case MO_32:
1786        if (bswap && data_r != TCG_REG_XZR) {
1787            tcg_out_rev32(s, TCG_REG_TMP, data_r);
1788            data_r = TCG_REG_TMP;
1789        }
1790        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1791        break;
1792    case MO_64:
1793        if (bswap && data_r != TCG_REG_XZR) {
1794            tcg_out_rev64(s, TCG_REG_TMP, data_r);
1795            data_r = TCG_REG_TMP;
1796        }
1797        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1798        break;
1799    default:
1800        tcg_abort();
1801    }
1802}
1803
1804static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1805                            TCGMemOpIdx oi, TCGType ext)
1806{
1807    TCGMemOp memop = get_memop(oi);
1808    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1809#ifdef CONFIG_SOFTMMU
1810    unsigned mem_index = get_mmuidx(oi);
1811    tcg_insn_unit *label_ptr;
1812
1813    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1814    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1815                           TCG_REG_X1, otype, addr_reg);
1816    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1817                        s->code_ptr, label_ptr);
1818#else /* !CONFIG_SOFTMMU */
1819    if (USE_GUEST_BASE) {
1820        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1821                               TCG_REG_GUEST_BASE, otype, addr_reg);
1822    } else {
1823        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1824                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1825    }
1826#endif /* CONFIG_SOFTMMU */
1827}
1828
1829static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1830                            TCGMemOpIdx oi)
1831{
1832    TCGMemOp memop = get_memop(oi);
1833    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1834#ifdef CONFIG_SOFTMMU
1835    unsigned mem_index = get_mmuidx(oi);
1836    tcg_insn_unit *label_ptr;
1837
1838    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1839    tcg_out_qemu_st_direct(s, memop, data_reg,
1840                           TCG_REG_X1, otype, addr_reg);
1841    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1842                        data_reg, addr_reg, s->code_ptr, label_ptr);
1843#else /* !CONFIG_SOFTMMU */
1844    if (USE_GUEST_BASE) {
1845        tcg_out_qemu_st_direct(s, memop, data_reg,
1846                               TCG_REG_GUEST_BASE, otype, addr_reg);
1847    } else {
1848        tcg_out_qemu_st_direct(s, memop, data_reg,
1849                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1850    }
1851#endif /* CONFIG_SOFTMMU */
1852}
1853
1854static tcg_insn_unit *tb_ret_addr;
1855
1856static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1857                       const TCGArg args[TCG_MAX_OP_ARGS],
1858                       const int const_args[TCG_MAX_OP_ARGS])
1859{
1860    /* 99% of the time, we can signal the use of extension registers
1861       by looking to see if the opcode handles 64-bit data.  */
1862    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1863
1864    /* Hoist the loads of the most common arguments.  */
1865    TCGArg a0 = args[0];
1866    TCGArg a1 = args[1];
1867    TCGArg a2 = args[2];
1868    int c2 = const_args[2];
1869
1870    /* Some operands are defined with "rZ" constraint, a register or
1871       the zero register.  These need not actually test args[I] == 0.  */
1872#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1873
1874    switch (opc) {
1875    case INDEX_op_exit_tb:
1876        /* Reuse the zeroing that exists for goto_ptr.  */
1877        if (a0 == 0) {
1878            tcg_out_goto_long(s, s->code_gen_epilogue);
1879        } else {
1880            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1881            tcg_out_goto_long(s, tb_ret_addr);
1882        }
1883        break;
1884
1885    case INDEX_op_goto_tb:
1886        if (s->tb_jmp_insn_offset != NULL) {
1887            /* TCG_TARGET_HAS_direct_jump */
1888            /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1889               write can be used to patch the target address. */
1890            if ((uintptr_t)s->code_ptr & 7) {
1891                tcg_out32(s, NOP);
1892            }
1893            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1894            /* actual branch destination will be patched by
1895               tb_target_set_jmp_target later. */
1896            tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1897            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1898        } else {
1899            /* !TCG_TARGET_HAS_direct_jump */
1900            tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1901            intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1902            tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1903        }
1904        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1905        set_jmp_reset_offset(s, a0);
1906        break;
1907
1908    case INDEX_op_goto_ptr:
1909        tcg_out_insn(s, 3207, BR, a0);
1910        break;
1911
1912    case INDEX_op_br:
1913        tcg_out_goto_label(s, arg_label(a0));
1914        break;
1915
1916    case INDEX_op_ld8u_i32:
1917    case INDEX_op_ld8u_i64:
1918        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1919        break;
1920    case INDEX_op_ld8s_i32:
1921        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1922        break;
1923    case INDEX_op_ld8s_i64:
1924        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1925        break;
1926    case INDEX_op_ld16u_i32:
1927    case INDEX_op_ld16u_i64:
1928        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1929        break;
1930    case INDEX_op_ld16s_i32:
1931        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1932        break;
1933    case INDEX_op_ld16s_i64:
1934        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1935        break;
1936    case INDEX_op_ld_i32:
1937    case INDEX_op_ld32u_i64:
1938        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1939        break;
1940    case INDEX_op_ld32s_i64:
1941        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1942        break;
1943    case INDEX_op_ld_i64:
1944        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1945        break;
1946
1947    case INDEX_op_st8_i32:
1948    case INDEX_op_st8_i64:
1949        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1950        break;
1951    case INDEX_op_st16_i32:
1952    case INDEX_op_st16_i64:
1953        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1954        break;
1955    case INDEX_op_st_i32:
1956    case INDEX_op_st32_i64:
1957        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1958        break;
1959    case INDEX_op_st_i64:
1960        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1961        break;
1962
1963    case INDEX_op_add_i32:
1964        a2 = (int32_t)a2;
1965        /* FALLTHRU */
1966    case INDEX_op_add_i64:
1967        if (c2) {
1968            tcg_out_addsubi(s, ext, a0, a1, a2);
1969        } else {
1970            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1971        }
1972        break;
1973
1974    case INDEX_op_sub_i32:
1975        a2 = (int32_t)a2;
1976        /* FALLTHRU */
1977    case INDEX_op_sub_i64:
1978        if (c2) {
1979            tcg_out_addsubi(s, ext, a0, a1, -a2);
1980        } else {
1981            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1982        }
1983        break;
1984
1985    case INDEX_op_neg_i64:
1986    case INDEX_op_neg_i32:
1987        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1988        break;
1989
1990    case INDEX_op_and_i32:
1991        a2 = (int32_t)a2;
1992        /* FALLTHRU */
1993    case INDEX_op_and_i64:
1994        if (c2) {
1995            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1996        } else {
1997            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1998        }
1999        break;
2000
2001    case INDEX_op_andc_i32:
2002        a2 = (int32_t)a2;
2003        /* FALLTHRU */
2004    case INDEX_op_andc_i64:
2005        if (c2) {
2006            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2007        } else {
2008            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2009        }
2010        break;
2011
2012    case INDEX_op_or_i32:
2013        a2 = (int32_t)a2;
2014        /* FALLTHRU */
2015    case INDEX_op_or_i64:
2016        if (c2) {
2017            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2018        } else {
2019            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2020        }
2021        break;
2022
2023    case INDEX_op_orc_i32:
2024        a2 = (int32_t)a2;
2025        /* FALLTHRU */
2026    case INDEX_op_orc_i64:
2027        if (c2) {
2028            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2029        } else {
2030            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2031        }
2032        break;
2033
2034    case INDEX_op_xor_i32:
2035        a2 = (int32_t)a2;
2036        /* FALLTHRU */
2037    case INDEX_op_xor_i64:
2038        if (c2) {
2039            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2040        } else {
2041            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2042        }
2043        break;
2044
2045    case INDEX_op_eqv_i32:
2046        a2 = (int32_t)a2;
2047        /* FALLTHRU */
2048    case INDEX_op_eqv_i64:
2049        if (c2) {
2050            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2051        } else {
2052            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2053        }
2054        break;
2055
2056    case INDEX_op_not_i64:
2057    case INDEX_op_not_i32:
2058        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2059        break;
2060
2061    case INDEX_op_mul_i64:
2062    case INDEX_op_mul_i32:
2063        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2064        break;
2065
2066    case INDEX_op_div_i64:
2067    case INDEX_op_div_i32:
2068        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2069        break;
2070    case INDEX_op_divu_i64:
2071    case INDEX_op_divu_i32:
2072        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2073        break;
2074
2075    case INDEX_op_rem_i64:
2076    case INDEX_op_rem_i32:
2077        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2078        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2079        break;
2080    case INDEX_op_remu_i64:
2081    case INDEX_op_remu_i32:
2082        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2083        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2084        break;
2085
2086    case INDEX_op_shl_i64:
2087    case INDEX_op_shl_i32:
2088        if (c2) {
2089            tcg_out_shl(s, ext, a0, a1, a2);
2090        } else {
2091            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2092        }
2093        break;
2094
2095    case INDEX_op_shr_i64:
2096    case INDEX_op_shr_i32:
2097        if (c2) {
2098            tcg_out_shr(s, ext, a0, a1, a2);
2099        } else {
2100            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2101        }
2102        break;
2103
2104    case INDEX_op_sar_i64:
2105    case INDEX_op_sar_i32:
2106        if (c2) {
2107            tcg_out_sar(s, ext, a0, a1, a2);
2108        } else {
2109            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2110        }
2111        break;
2112
2113    case INDEX_op_rotr_i64:
2114    case INDEX_op_rotr_i32:
2115        if (c2) {
2116            tcg_out_rotr(s, ext, a0, a1, a2);
2117        } else {
2118            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2119        }
2120        break;
2121
2122    case INDEX_op_rotl_i64:
2123    case INDEX_op_rotl_i32:
2124        if (c2) {
2125            tcg_out_rotl(s, ext, a0, a1, a2);
2126        } else {
2127            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2128            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2129        }
2130        break;
2131
2132    case INDEX_op_clz_i64:
2133    case INDEX_op_clz_i32:
2134        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2135        break;
2136    case INDEX_op_ctz_i64:
2137    case INDEX_op_ctz_i32:
2138        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2139        break;
2140
2141    case INDEX_op_brcond_i32:
2142        a1 = (int32_t)a1;
2143        /* FALLTHRU */
2144    case INDEX_op_brcond_i64:
2145        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2146        break;
2147
2148    case INDEX_op_setcond_i32:
2149        a2 = (int32_t)a2;
2150        /* FALLTHRU */
2151    case INDEX_op_setcond_i64:
2152        tcg_out_cmp(s, ext, a1, a2, c2);
2153        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2154        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2155                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2156        break;
2157
2158    case INDEX_op_movcond_i32:
2159        a2 = (int32_t)a2;
2160        /* FALLTHRU */
2161    case INDEX_op_movcond_i64:
2162        tcg_out_cmp(s, ext, a1, a2, c2);
2163        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2164        break;
2165
2166    case INDEX_op_qemu_ld_i32:
2167    case INDEX_op_qemu_ld_i64:
2168        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2169        break;
2170    case INDEX_op_qemu_st_i32:
2171    case INDEX_op_qemu_st_i64:
2172        tcg_out_qemu_st(s, REG0(0), a1, a2);
2173        break;
2174
2175    case INDEX_op_bswap64_i64:
2176        tcg_out_rev64(s, a0, a1);
2177        break;
2178    case INDEX_op_bswap32_i64:
2179    case INDEX_op_bswap32_i32:
2180        tcg_out_rev32(s, a0, a1);
2181        break;
2182    case INDEX_op_bswap16_i64:
2183    case INDEX_op_bswap16_i32:
2184        tcg_out_rev16(s, a0, a1);
2185        break;
2186
2187    case INDEX_op_ext8s_i64:
2188    case INDEX_op_ext8s_i32:
2189        tcg_out_sxt(s, ext, MO_8, a0, a1);
2190        break;
2191    case INDEX_op_ext16s_i64:
2192    case INDEX_op_ext16s_i32:
2193        tcg_out_sxt(s, ext, MO_16, a0, a1);
2194        break;
2195    case INDEX_op_ext_i32_i64:
2196    case INDEX_op_ext32s_i64:
2197        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2198        break;
2199    case INDEX_op_ext8u_i64:
2200    case INDEX_op_ext8u_i32:
2201        tcg_out_uxt(s, MO_8, a0, a1);
2202        break;
2203    case INDEX_op_ext16u_i64:
2204    case INDEX_op_ext16u_i32:
2205        tcg_out_uxt(s, MO_16, a0, a1);
2206        break;
2207    case INDEX_op_extu_i32_i64:
2208    case INDEX_op_ext32u_i64:
2209        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2210        break;
2211
2212    case INDEX_op_deposit_i64:
2213    case INDEX_op_deposit_i32:
2214        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2215        break;
2216
2217    case INDEX_op_extract_i64:
2218    case INDEX_op_extract_i32:
2219        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2220        break;
2221
2222    case INDEX_op_sextract_i64:
2223    case INDEX_op_sextract_i32:
2224        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2225        break;
2226
2227    case INDEX_op_extract2_i64:
2228    case INDEX_op_extract2_i32:
2229        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2230        break;
2231
2232    case INDEX_op_add2_i32:
2233        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2234                        (int32_t)args[4], args[5], const_args[4],
2235                        const_args[5], false);
2236        break;
2237    case INDEX_op_add2_i64:
2238        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2239                        args[5], const_args[4], const_args[5], false);
2240        break;
2241    case INDEX_op_sub2_i32:
2242        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2243                        (int32_t)args[4], args[5], const_args[4],
2244                        const_args[5], true);
2245        break;
2246    case INDEX_op_sub2_i64:
2247        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2248                        args[5], const_args[4], const_args[5], true);
2249        break;
2250
2251    case INDEX_op_muluh_i64:
2252        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2253        break;
2254    case INDEX_op_mulsh_i64:
2255        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2256        break;
2257
2258    case INDEX_op_mb:
2259        tcg_out_mb(s, a0);
2260        break;
2261
2262    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2263    case INDEX_op_mov_i64:
2264    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
2265    case INDEX_op_movi_i64:
2266    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2267    default:
2268        g_assert_not_reached();
2269    }
2270
2271#undef REG0
2272}
2273
2274static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2275                           unsigned vecl, unsigned vece,
2276                           const TCGArg *args, const int *const_args)
2277{
2278    static const AArch64Insn cmp_insn[16] = {
2279        [TCG_COND_EQ] = I3616_CMEQ,
2280        [TCG_COND_GT] = I3616_CMGT,
2281        [TCG_COND_GE] = I3616_CMGE,
2282        [TCG_COND_GTU] = I3616_CMHI,
2283        [TCG_COND_GEU] = I3616_CMHS,
2284    };
2285    static const AArch64Insn cmp0_insn[16] = {
2286        [TCG_COND_EQ] = I3617_CMEQ0,
2287        [TCG_COND_GT] = I3617_CMGT0,
2288        [TCG_COND_GE] = I3617_CMGE0,
2289        [TCG_COND_LT] = I3617_CMLT0,
2290        [TCG_COND_LE] = I3617_CMLE0,
2291    };
2292
2293    TCGType type = vecl + TCG_TYPE_V64;
2294    unsigned is_q = vecl;
2295    TCGArg a0, a1, a2, a3;
2296    int cmode, imm8;
2297
2298    a0 = args[0];
2299    a1 = args[1];
2300    a2 = args[2];
2301
2302    switch (opc) {
2303    case INDEX_op_ld_vec:
2304        tcg_out_ld(s, type, a0, a1, a2);
2305        break;
2306    case INDEX_op_st_vec:
2307        tcg_out_st(s, type, a0, a1, a2);
2308        break;
2309    case INDEX_op_dupm_vec:
2310        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2311        break;
2312    case INDEX_op_add_vec:
2313        tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2314        break;
2315    case INDEX_op_sub_vec:
2316        tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2317        break;
2318    case INDEX_op_mul_vec:
2319        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2320        break;
2321    case INDEX_op_neg_vec:
2322        tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2323        break;
2324    case INDEX_op_abs_vec:
2325        tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2326        break;
2327    case INDEX_op_and_vec:
2328        if (const_args[2]) {
2329            is_shimm1632(~a2, &cmode, &imm8);
2330            if (a0 == a1) {
2331                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2332                return;
2333            }
2334            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2335            a2 = a0;
2336        }
2337        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2338        break;
2339    case INDEX_op_or_vec:
2340        if (const_args[2]) {
2341            is_shimm1632(a2, &cmode, &imm8);
2342            if (a0 == a1) {
2343                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2344                return;
2345            }
2346            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2347            a2 = a0;
2348        }
2349        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2350        break;
2351    case INDEX_op_andc_vec:
2352        if (const_args[2]) {
2353            is_shimm1632(a2, &cmode, &imm8);
2354            if (a0 == a1) {
2355                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2356                return;
2357            }
2358            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2359            a2 = a0;
2360        }
2361        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2362        break;
2363    case INDEX_op_orc_vec:
2364        if (const_args[2]) {
2365            is_shimm1632(~a2, &cmode, &imm8);
2366            if (a0 == a1) {
2367                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2368                return;
2369            }
2370            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2371            a2 = a0;
2372        }
2373        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2374        break;
2375    case INDEX_op_xor_vec:
2376        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2377        break;
2378    case INDEX_op_ssadd_vec:
2379        tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2380        break;
2381    case INDEX_op_sssub_vec:
2382        tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2383        break;
2384    case INDEX_op_usadd_vec:
2385        tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2386        break;
2387    case INDEX_op_ussub_vec:
2388        tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2389        break;
2390    case INDEX_op_smax_vec:
2391        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2392        break;
2393    case INDEX_op_smin_vec:
2394        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2395        break;
2396    case INDEX_op_umax_vec:
2397        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2398        break;
2399    case INDEX_op_umin_vec:
2400        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2401        break;
2402    case INDEX_op_not_vec:
2403        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2404        break;
2405    case INDEX_op_shli_vec:
2406        tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2407        break;
2408    case INDEX_op_shri_vec:
2409        tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2410        break;
2411    case INDEX_op_sari_vec:
2412        tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2413        break;
2414    case INDEX_op_shlv_vec:
2415        tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2416        break;
2417    case INDEX_op_aa64_sshl_vec:
2418        tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2419        break;
2420    case INDEX_op_cmp_vec:
2421        {
2422            TCGCond cond = args[3];
2423            AArch64Insn insn;
2424
2425            if (cond == TCG_COND_NE) {
2426                if (const_args[2]) {
2427                    tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2428                } else {
2429                    tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2430                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2431                }
2432            } else {
2433                if (const_args[2]) {
2434                    insn = cmp0_insn[cond];
2435                    if (insn) {
2436                        tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2437                        break;
2438                    }
2439                    tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2440                    a2 = TCG_VEC_TMP;
2441                }
2442                insn = cmp_insn[cond];
2443                if (insn == 0) {
2444                    TCGArg t;
2445                    t = a1, a1 = a2, a2 = t;
2446                    cond = tcg_swap_cond(cond);
2447                    insn = cmp_insn[cond];
2448                    tcg_debug_assert(insn != 0);
2449                }
2450                tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2451            }
2452        }
2453        break;
2454
2455    case INDEX_op_bitsel_vec:
2456        a3 = args[3];
2457        if (a0 == a3) {
2458            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2459        } else if (a0 == a2) {
2460            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2461        } else {
2462            if (a0 != a1) {
2463                tcg_out_mov(s, type, a0, a1);
2464            }
2465            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2466        }
2467        break;
2468
2469    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2470    case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi.  */
2471    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2472    default:
2473        g_assert_not_reached();
2474    }
2475}
2476
2477int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2478{
2479    switch (opc) {
2480    case INDEX_op_add_vec:
2481    case INDEX_op_sub_vec:
2482    case INDEX_op_and_vec:
2483    case INDEX_op_or_vec:
2484    case INDEX_op_xor_vec:
2485    case INDEX_op_andc_vec:
2486    case INDEX_op_orc_vec:
2487    case INDEX_op_neg_vec:
2488    case INDEX_op_abs_vec:
2489    case INDEX_op_not_vec:
2490    case INDEX_op_cmp_vec:
2491    case INDEX_op_shli_vec:
2492    case INDEX_op_shri_vec:
2493    case INDEX_op_sari_vec:
2494    case INDEX_op_ssadd_vec:
2495    case INDEX_op_sssub_vec:
2496    case INDEX_op_usadd_vec:
2497    case INDEX_op_ussub_vec:
2498    case INDEX_op_shlv_vec:
2499    case INDEX_op_bitsel_vec:
2500        return 1;
2501    case INDEX_op_shrv_vec:
2502    case INDEX_op_sarv_vec:
2503        return -1;
2504    case INDEX_op_mul_vec:
2505    case INDEX_op_smax_vec:
2506    case INDEX_op_smin_vec:
2507    case INDEX_op_umax_vec:
2508    case INDEX_op_umin_vec:
2509        return vece < MO_64;
2510
2511    default:
2512        return 0;
2513    }
2514}
2515
2516void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2517                       TCGArg a0, ...)
2518{
2519    va_list va;
2520    TCGv_vec v0, v1, v2, t1;
2521
2522    va_start(va, a0);
2523    v0 = temp_tcgv_vec(arg_temp(a0));
2524    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2525    v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2526
2527    switch (opc) {
2528    case INDEX_op_shrv_vec:
2529    case INDEX_op_sarv_vec:
2530        /* Right shifts are negative left shifts for AArch64.  */
2531        t1 = tcg_temp_new_vec(type);
2532        tcg_gen_neg_vec(vece, t1, v2);
2533        opc = (opc == INDEX_op_shrv_vec
2534               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2535        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2536                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2537        tcg_temp_free_vec(t1);
2538        break;
2539
2540    default:
2541        g_assert_not_reached();
2542    }
2543
2544    va_end(va);
2545}
2546
2547static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2548{
2549    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2550    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2551    static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2552    static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2553    static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2554    static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2555    static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2556    static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2557    static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2558    static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2559    static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2560    static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
2561    static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
2562    static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2563    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2564    static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2565    static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2566    static const TCGTargetOpDef r_r_rAL
2567        = { .args_ct_str = { "r", "r", "rAL" } };
2568    static const TCGTargetOpDef dep
2569        = { .args_ct_str = { "r", "0", "rZ" } };
2570    static const TCGTargetOpDef ext2
2571        = { .args_ct_str = { "r", "rZ", "rZ" } };
2572    static const TCGTargetOpDef movc
2573        = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2574    static const TCGTargetOpDef add2
2575        = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2576    static const TCGTargetOpDef w_w_w_w
2577        = { .args_ct_str = { "w", "w", "w", "w" } };
2578
2579    switch (op) {
2580    case INDEX_op_goto_ptr:
2581        return &r;
2582
2583    case INDEX_op_ld8u_i32:
2584    case INDEX_op_ld8s_i32:
2585    case INDEX_op_ld16u_i32:
2586    case INDEX_op_ld16s_i32:
2587    case INDEX_op_ld_i32:
2588    case INDEX_op_ld8u_i64:
2589    case INDEX_op_ld8s_i64:
2590    case INDEX_op_ld16u_i64:
2591    case INDEX_op_ld16s_i64:
2592    case INDEX_op_ld32u_i64:
2593    case INDEX_op_ld32s_i64:
2594    case INDEX_op_ld_i64:
2595    case INDEX_op_neg_i32:
2596    case INDEX_op_neg_i64:
2597    case INDEX_op_not_i32:
2598    case INDEX_op_not_i64:
2599    case INDEX_op_bswap16_i32:
2600    case INDEX_op_bswap32_i32:
2601    case INDEX_op_bswap16_i64:
2602    case INDEX_op_bswap32_i64:
2603    case INDEX_op_bswap64_i64:
2604    case INDEX_op_ext8s_i32:
2605    case INDEX_op_ext16s_i32:
2606    case INDEX_op_ext8u_i32:
2607    case INDEX_op_ext16u_i32:
2608    case INDEX_op_ext8s_i64:
2609    case INDEX_op_ext16s_i64:
2610    case INDEX_op_ext32s_i64:
2611    case INDEX_op_ext8u_i64:
2612    case INDEX_op_ext16u_i64:
2613    case INDEX_op_ext32u_i64:
2614    case INDEX_op_ext_i32_i64:
2615    case INDEX_op_extu_i32_i64:
2616    case INDEX_op_extract_i32:
2617    case INDEX_op_extract_i64:
2618    case INDEX_op_sextract_i32:
2619    case INDEX_op_sextract_i64:
2620        return &r_r;
2621
2622    case INDEX_op_st8_i32:
2623    case INDEX_op_st16_i32:
2624    case INDEX_op_st_i32:
2625    case INDEX_op_st8_i64:
2626    case INDEX_op_st16_i64:
2627    case INDEX_op_st32_i64:
2628    case INDEX_op_st_i64:
2629        return &rZ_r;
2630
2631    case INDEX_op_add_i32:
2632    case INDEX_op_add_i64:
2633    case INDEX_op_sub_i32:
2634    case INDEX_op_sub_i64:
2635    case INDEX_op_setcond_i32:
2636    case INDEX_op_setcond_i64:
2637        return &r_r_rA;
2638
2639    case INDEX_op_mul_i32:
2640    case INDEX_op_mul_i64:
2641    case INDEX_op_div_i32:
2642    case INDEX_op_div_i64:
2643    case INDEX_op_divu_i32:
2644    case INDEX_op_divu_i64:
2645    case INDEX_op_rem_i32:
2646    case INDEX_op_rem_i64:
2647    case INDEX_op_remu_i32:
2648    case INDEX_op_remu_i64:
2649    case INDEX_op_muluh_i64:
2650    case INDEX_op_mulsh_i64:
2651        return &r_r_r;
2652
2653    case INDEX_op_and_i32:
2654    case INDEX_op_and_i64:
2655    case INDEX_op_or_i32:
2656    case INDEX_op_or_i64:
2657    case INDEX_op_xor_i32:
2658    case INDEX_op_xor_i64:
2659    case INDEX_op_andc_i32:
2660    case INDEX_op_andc_i64:
2661    case INDEX_op_orc_i32:
2662    case INDEX_op_orc_i64:
2663    case INDEX_op_eqv_i32:
2664    case INDEX_op_eqv_i64:
2665        return &r_r_rL;
2666
2667    case INDEX_op_shl_i32:
2668    case INDEX_op_shr_i32:
2669    case INDEX_op_sar_i32:
2670    case INDEX_op_rotl_i32:
2671    case INDEX_op_rotr_i32:
2672    case INDEX_op_shl_i64:
2673    case INDEX_op_shr_i64:
2674    case INDEX_op_sar_i64:
2675    case INDEX_op_rotl_i64:
2676    case INDEX_op_rotr_i64:
2677        return &r_r_ri;
2678
2679    case INDEX_op_clz_i32:
2680    case INDEX_op_ctz_i32:
2681    case INDEX_op_clz_i64:
2682    case INDEX_op_ctz_i64:
2683        return &r_r_rAL;
2684
2685    case INDEX_op_brcond_i32:
2686    case INDEX_op_brcond_i64:
2687        return &r_rA;
2688
2689    case INDEX_op_movcond_i32:
2690    case INDEX_op_movcond_i64:
2691        return &movc;
2692
2693    case INDEX_op_qemu_ld_i32:
2694    case INDEX_op_qemu_ld_i64:
2695        return &r_l;
2696    case INDEX_op_qemu_st_i32:
2697    case INDEX_op_qemu_st_i64:
2698        return &lZ_l;
2699
2700    case INDEX_op_deposit_i32:
2701    case INDEX_op_deposit_i64:
2702        return &dep;
2703
2704    case INDEX_op_extract2_i32:
2705    case INDEX_op_extract2_i64:
2706        return &ext2;
2707
2708    case INDEX_op_add2_i32:
2709    case INDEX_op_add2_i64:
2710    case INDEX_op_sub2_i32:
2711    case INDEX_op_sub2_i64:
2712        return &add2;
2713
2714    case INDEX_op_add_vec:
2715    case INDEX_op_sub_vec:
2716    case INDEX_op_mul_vec:
2717    case INDEX_op_xor_vec:
2718    case INDEX_op_ssadd_vec:
2719    case INDEX_op_sssub_vec:
2720    case INDEX_op_usadd_vec:
2721    case INDEX_op_ussub_vec:
2722    case INDEX_op_smax_vec:
2723    case INDEX_op_smin_vec:
2724    case INDEX_op_umax_vec:
2725    case INDEX_op_umin_vec:
2726    case INDEX_op_shlv_vec:
2727    case INDEX_op_shrv_vec:
2728    case INDEX_op_sarv_vec:
2729    case INDEX_op_aa64_sshl_vec:
2730        return &w_w_w;
2731    case INDEX_op_not_vec:
2732    case INDEX_op_neg_vec:
2733    case INDEX_op_abs_vec:
2734    case INDEX_op_shli_vec:
2735    case INDEX_op_shri_vec:
2736    case INDEX_op_sari_vec:
2737        return &w_w;
2738    case INDEX_op_ld_vec:
2739    case INDEX_op_st_vec:
2740    case INDEX_op_dupm_vec:
2741        return &w_r;
2742    case INDEX_op_dup_vec:
2743        return &w_wr;
2744    case INDEX_op_or_vec:
2745    case INDEX_op_andc_vec:
2746        return &w_w_wO;
2747    case INDEX_op_and_vec:
2748    case INDEX_op_orc_vec:
2749        return &w_w_wN;
2750    case INDEX_op_cmp_vec:
2751        return &w_w_wZ;
2752    case INDEX_op_bitsel_vec:
2753        return &w_w_w_w;
2754
2755    default:
2756        return NULL;
2757    }
2758}
2759
2760static void tcg_target_init(TCGContext *s)
2761{
2762    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2763    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2764    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2765    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2766
2767    tcg_target_call_clobber_regs = -1ull;
2768    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2769    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2770    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2771    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2772    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2773    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2774    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2775    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2776    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2777    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2778    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2779    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2780    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2781    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2782    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2783    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2784    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2785    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2786    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2787
2788    s->reserved_regs = 0;
2789    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2790    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2791    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2792    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2793    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2794}
2795
2796/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2797#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2798
2799#define FRAME_SIZE \
2800    ((PUSH_SIZE \
2801      + TCG_STATIC_CALL_ARGS_SIZE \
2802      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2803      + TCG_TARGET_STACK_ALIGN - 1) \
2804     & ~(TCG_TARGET_STACK_ALIGN - 1))
2805
2806/* We're expecting a 2 byte uleb128 encoded value.  */
2807QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2808
2809/* We're expecting to use a single ADDI insn.  */
2810QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2811
2812static void tcg_target_qemu_prologue(TCGContext *s)
2813{
2814    TCGReg r;
2815
2816    /* Push (FP, LR) and allocate space for all saved registers.  */
2817    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2818                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2819
2820    /* Set up frame pointer for canonical unwinding.  */
2821    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2822
2823    /* Store callee-preserved regs x19..x28.  */
2824    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2825        int ofs = (r - TCG_REG_X19 + 2) * 8;
2826        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2827    }
2828
2829    /* Make stack space for TCG locals.  */
2830    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2831                 FRAME_SIZE - PUSH_SIZE);
2832
2833    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2834    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2835                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2836
2837#if !defined(CONFIG_SOFTMMU)
2838    if (USE_GUEST_BASE) {
2839        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2840        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2841    }
2842#endif
2843
2844    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2845    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2846
2847    /*
2848     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2849     * and fall through to the rest of the epilogue.
2850     */
2851    s->code_gen_epilogue = s->code_ptr;
2852    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2853
2854    /* TB epilogue */
2855    tb_ret_addr = s->code_ptr;
2856
2857    /* Remove TCG locals stack space.  */
2858    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2859                 FRAME_SIZE - PUSH_SIZE);
2860
2861    /* Restore registers x19..x28.  */
2862    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2863        int ofs = (r - TCG_REG_X19 + 2) * 8;
2864        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2865    }
2866
2867    /* Pop (FP, LR), restore SP to previous frame.  */
2868    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2869                 TCG_REG_SP, PUSH_SIZE, 0, 1);
2870    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2871}
2872
2873static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2874{
2875    int i;
2876    for (i = 0; i < count; ++i) {
2877        p[i] = NOP;
2878    }
2879}
2880
2881typedef struct {
2882    DebugFrameHeader h;
2883    uint8_t fde_def_cfa[4];
2884    uint8_t fde_reg_ofs[24];
2885} DebugFrame;
2886
2887#define ELF_HOST_MACHINE EM_AARCH64
2888
2889static const DebugFrame debug_frame = {
2890    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2891    .h.cie.id = -1,
2892    .h.cie.version = 1,
2893    .h.cie.code_align = 1,
2894    .h.cie.data_align = 0x78,             /* sleb128 -8 */
2895    .h.cie.return_column = TCG_REG_LR,
2896
2897    /* Total FDE size does not include the "len" member.  */
2898    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2899
2900    .fde_def_cfa = {
2901        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
2902        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2903        (FRAME_SIZE >> 7)
2904    },
2905    .fde_reg_ofs = {
2906        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
2907        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
2908        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
2909        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
2910        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
2911        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
2912        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
2913        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
2914        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
2915        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
2916        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
2917        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
2918    }
2919};
2920
2921void tcg_register_jit(void *buf, size_t buf_size)
2922{
2923    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2924}
2925