qemu/tcg/aarch64/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Initial TCG Implementation for aarch64
   3 *
   4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
   5 * Written by Claudio Fontana
   6 *
   7 * This work is licensed under the terms of the GNU GPL, version 2 or
   8 * (at your option) any later version.
   9 *
  10 * See the COPYING file in the top-level directory for details.
  11 */
  12
  13#include "tcg-pool.inc.c"
  14#include "qemu/bitops.h"
  15
  16/* We're going to re-use TCGType in setting of the SF bit, which controls
  17   the size of the operation performed.  If we know the values match, it
  18   makes things much cleaner.  */
  19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
  20
  21#ifdef CONFIG_DEBUG_TCG
  22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  23    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  24    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  25    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  26    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
  27
  28    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
  29    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
  30    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
  31    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
  32};
  33#endif /* CONFIG_DEBUG_TCG */
  34
  35static const int tcg_target_reg_alloc_order[] = {
  36    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
  37    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
  38    TCG_REG_X28, /* we will reserve this for guest_base if configured */
  39
  40    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
  41    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
  42    TCG_REG_X16, TCG_REG_X17,
  43
  44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
  46
  47    /* X18 reserved by system */
  48    /* X19 reserved for AREG0 */
  49    /* X29 reserved as fp */
  50    /* X30 reserved as temporary */
  51
  52    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
  53    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
  54    /* V8 - V15 are call-saved, and skipped.  */
  55    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
  56    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
  57    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
  58    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
  59};
  60
  61static const int tcg_target_call_iarg_regs[8] = {
  62    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  63    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
  64};
  65static const int tcg_target_call_oarg_regs[1] = {
  66    TCG_REG_X0
  67};
  68
  69#define TCG_REG_TMP TCG_REG_X30
  70#define TCG_VEC_TMP TCG_REG_V31
  71
  72#ifndef CONFIG_SOFTMMU
  73/* Note that XZR cannot be encoded in the address base register slot,
  74   as that actaully encodes SP.  So if we need to zero-extend the guest
  75   address, via the address index register slot, we need to load even
  76   a zero guest base into a register.  */
  77#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
  78#define TCG_REG_GUEST_BASE TCG_REG_X28
  79#endif
  80
  81static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
  82{
  83    ptrdiff_t offset = target - code_ptr;
  84    if (offset == sextract64(offset, 0, 26)) {
  85        /* read instruction, mask away previous PC_REL26 parameter contents,
  86           set the proper offset, then write back the instruction. */
  87        *code_ptr = deposit32(*code_ptr, 0, 26, offset);
  88        return true;
  89    }
  90    return false;
  91}
  92
  93static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
  94{
  95    ptrdiff_t offset = target - code_ptr;
  96    if (offset == sextract64(offset, 0, 19)) {
  97        *code_ptr = deposit32(*code_ptr, 5, 19, offset);
  98        return true;
  99    }
 100    return false;
 101}
 102
 103static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 104                               intptr_t value, intptr_t addend)
 105{
 106    tcg_debug_assert(addend == 0);
 107    switch (type) {
 108    case R_AARCH64_JUMP26:
 109    case R_AARCH64_CALL26:
 110        return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
 111    case R_AARCH64_CONDBR19:
 112        return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
 113    default:
 114        g_assert_not_reached();
 115    }
 116}
 117
 118#define TCG_CT_CONST_AIMM 0x100
 119#define TCG_CT_CONST_LIMM 0x200
 120#define TCG_CT_CONST_ZERO 0x400
 121#define TCG_CT_CONST_MONE 0x800
 122
 123/* parse target specific constraints */
 124static const char *target_parse_constraint(TCGArgConstraint *ct,
 125                                           const char *ct_str, TCGType type)
 126{
 127    switch (*ct_str++) {
 128    case 'r': /* general registers */
 129        ct->ct |= TCG_CT_REG;
 130        ct->u.regs |= 0xffffffffu;
 131        break;
 132    case 'w': /* advsimd registers */
 133        ct->ct |= TCG_CT_REG;
 134        ct->u.regs |= 0xffffffff00000000ull;
 135        break;
 136    case 'l': /* qemu_ld / qemu_st address, data_reg */
 137        ct->ct |= TCG_CT_REG;
 138        ct->u.regs = 0xffffffffu;
 139#ifdef CONFIG_SOFTMMU
 140        /* x0 and x1 will be overwritten when reading the tlb entry,
 141           and x2, and x3 for helper args, better to avoid using them. */
 142        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
 143        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
 144        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
 145        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
 146#endif
 147        break;
 148    case 'A': /* Valid for arithmetic immediate (positive or negative).  */
 149        ct->ct |= TCG_CT_CONST_AIMM;
 150        break;
 151    case 'L': /* Valid for logical immediate.  */
 152        ct->ct |= TCG_CT_CONST_LIMM;
 153        break;
 154    case 'M': /* minus one */
 155        ct->ct |= TCG_CT_CONST_MONE;
 156        break;
 157    case 'Z': /* zero */
 158        ct->ct |= TCG_CT_CONST_ZERO;
 159        break;
 160    default:
 161        return NULL;
 162    }
 163    return ct_str;
 164}
 165
 166/* Match a constant valid for addition (12-bit, optionally shifted).  */
 167static inline bool is_aimm(uint64_t val)
 168{
 169    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
 170}
 171
 172/* Match a constant valid for logical operations.  */
 173static inline bool is_limm(uint64_t val)
 174{
 175    /* Taking a simplified view of the logical immediates for now, ignoring
 176       the replication that can happen across the field.  Match bit patterns
 177       of the forms
 178           0....01....1
 179           0..01..10..0
 180       and their inverses.  */
 181
 182    /* Make things easier below, by testing the form with msb clear. */
 183    if ((int64_t)val < 0) {
 184        val = ~val;
 185    }
 186    if (val == 0) {
 187        return false;
 188    }
 189    val += val & -val;
 190    return (val & (val - 1)) == 0;
 191}
 192
 193/* Match a constant that is valid for vectors.  */
 194static bool is_fimm(uint64_t v64, int *op, int *cmode, int *imm8)
 195{
 196    int i;
 197
 198    *op = 0;
 199    /* Match replication across 8 bits.  */
 200    if (v64 == dup_const(MO_8, v64)) {
 201        *cmode = 0xe;
 202        *imm8 = v64 & 0xff;
 203        return true;
 204    }
 205    /* Match replication across 16 bits.  */
 206    if (v64 == dup_const(MO_16, v64)) {
 207        uint16_t v16 = v64;
 208
 209        if (v16 == (v16 & 0xff)) {
 210            *cmode = 0x8;
 211            *imm8 = v16 & 0xff;
 212            return true;
 213        } else if (v16 == (v16 & 0xff00)) {
 214            *cmode = 0xa;
 215            *imm8 = v16 >> 8;
 216            return true;
 217        }
 218    }
 219    /* Match replication across 32 bits.  */
 220    if (v64 == dup_const(MO_32, v64)) {
 221        uint32_t v32 = v64;
 222
 223        if (v32 == (v32 & 0xff)) {
 224            *cmode = 0x0;
 225            *imm8 = v32 & 0xff;
 226            return true;
 227        } else if (v32 == (v32 & 0xff00)) {
 228            *cmode = 0x2;
 229            *imm8 = (v32 >> 8) & 0xff;
 230            return true;
 231        } else if (v32 == (v32 & 0xff0000)) {
 232            *cmode = 0x4;
 233            *imm8 = (v32 >> 16) & 0xff;
 234            return true;
 235        } else if (v32 == (v32 & 0xff000000)) {
 236            *cmode = 0x6;
 237            *imm8 = v32 >> 24;
 238            return true;
 239        } else if ((v32 & 0xffff00ff) == 0xff) {
 240            *cmode = 0xc;
 241            *imm8 = (v32 >> 8) & 0xff;
 242            return true;
 243        } else if ((v32 & 0xff00ffff) == 0xffff) {
 244            *cmode = 0xd;
 245            *imm8 = (v32 >> 16) & 0xff;
 246            return true;
 247        }
 248        /* Match forms of a float32.  */
 249        if (extract32(v32, 0, 19) == 0
 250            && (extract32(v32, 25, 6) == 0x20
 251                || extract32(v32, 25, 6) == 0x1f)) {
 252            *cmode = 0xf;
 253            *imm8 = (extract32(v32, 31, 1) << 7)
 254                  | (extract32(v32, 25, 1) << 6)
 255                  | extract32(v32, 19, 6);
 256            return true;
 257        }
 258    }
 259    /* Match forms of a float64.  */
 260    if (extract64(v64, 0, 48) == 0
 261        && (extract64(v64, 54, 9) == 0x100
 262            || extract64(v64, 54, 9) == 0x0ff)) {
 263        *cmode = 0xf;
 264        *op = 1;
 265        *imm8 = (extract64(v64, 63, 1) << 7)
 266              | (extract64(v64, 54, 1) << 6)
 267              | extract64(v64, 48, 6);
 268        return true;
 269    }
 270    /* Match bytes of 0x00 and 0xff.  */
 271    for (i = 0; i < 64; i += 8) {
 272        uint64_t byte = extract64(v64, i, 8);
 273        if (byte != 0 && byte != 0xff) {
 274            break;
 275        }
 276    }
 277    if (i == 64) {
 278        *cmode = 0xe;
 279        *op = 1;
 280        *imm8 = (extract64(v64, 0, 1) << 0)
 281              | (extract64(v64, 8, 1) << 1)
 282              | (extract64(v64, 16, 1) << 2)
 283              | (extract64(v64, 24, 1) << 3)
 284              | (extract64(v64, 32, 1) << 4)
 285              | (extract64(v64, 40, 1) << 5)
 286              | (extract64(v64, 48, 1) << 6)
 287              | (extract64(v64, 56, 1) << 7);
 288        return true;
 289    }
 290    return false;
 291}
 292
 293static int tcg_target_const_match(tcg_target_long val, TCGType type,
 294                                  const TCGArgConstraint *arg_ct)
 295{
 296    int ct = arg_ct->ct;
 297
 298    if (ct & TCG_CT_CONST) {
 299        return 1;
 300    }
 301    if (type == TCG_TYPE_I32) {
 302        val = (int32_t)val;
 303    }
 304    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
 305        return 1;
 306    }
 307    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
 308        return 1;
 309    }
 310    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 311        return 1;
 312    }
 313    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
 314        return 1;
 315    }
 316
 317    return 0;
 318}
 319
 320enum aarch64_cond_code {
 321    COND_EQ = 0x0,
 322    COND_NE = 0x1,
 323    COND_CS = 0x2,     /* Unsigned greater or equal */
 324    COND_HS = COND_CS, /* ALIAS greater or equal */
 325    COND_CC = 0x3,     /* Unsigned less than */
 326    COND_LO = COND_CC, /* ALIAS Lower */
 327    COND_MI = 0x4,     /* Negative */
 328    COND_PL = 0x5,     /* Zero or greater */
 329    COND_VS = 0x6,     /* Overflow */
 330    COND_VC = 0x7,     /* No overflow */
 331    COND_HI = 0x8,     /* Unsigned greater than */
 332    COND_LS = 0x9,     /* Unsigned less or equal */
 333    COND_GE = 0xa,
 334    COND_LT = 0xb,
 335    COND_GT = 0xc,
 336    COND_LE = 0xd,
 337    COND_AL = 0xe,
 338    COND_NV = 0xf, /* behaves like COND_AL here */
 339};
 340
 341static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
 342    [TCG_COND_EQ] = COND_EQ,
 343    [TCG_COND_NE] = COND_NE,
 344    [TCG_COND_LT] = COND_LT,
 345    [TCG_COND_GE] = COND_GE,
 346    [TCG_COND_LE] = COND_LE,
 347    [TCG_COND_GT] = COND_GT,
 348    /* unsigned */
 349    [TCG_COND_LTU] = COND_LO,
 350    [TCG_COND_GTU] = COND_HI,
 351    [TCG_COND_GEU] = COND_HS,
 352    [TCG_COND_LEU] = COND_LS,
 353};
 354
 355typedef enum {
 356    LDST_ST = 0,    /* store */
 357    LDST_LD = 1,    /* load */
 358    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
 359    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
 360} AArch64LdstType;
 361
 362/* We encode the format of the insn into the beginning of the name, so that
 363   we can have the preprocessor help "typecheck" the insn vs the output
 364   function.  Arm didn't provide us with nice names for the formats, so we
 365   use the section number of the architecture reference manual in which the
 366   instruction group is described.  */
 367typedef enum {
 368    /* Compare and branch (immediate).  */
 369    I3201_CBZ       = 0x34000000,
 370    I3201_CBNZ      = 0x35000000,
 371
 372    /* Conditional branch (immediate).  */
 373    I3202_B_C       = 0x54000000,
 374
 375    /* Unconditional branch (immediate).  */
 376    I3206_B         = 0x14000000,
 377    I3206_BL        = 0x94000000,
 378
 379    /* Unconditional branch (register).  */
 380    I3207_BR        = 0xd61f0000,
 381    I3207_BLR       = 0xd63f0000,
 382    I3207_RET       = 0xd65f0000,
 383
 384    /* Load literal for loading the address at pc-relative offset */
 385    I3305_LDR       = 0x58000000,
 386    I3305_LDR_v64   = 0x5c000000,
 387    I3305_LDR_v128  = 0x9c000000,
 388
 389    /* Load/store register.  Described here as 3.3.12, but the helper
 390       that emits them can transform to 3.3.10 or 3.3.13.  */
 391    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
 392    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
 393    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
 394    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
 395
 396    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
 397    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
 398    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
 399    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
 400
 401    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
 402    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
 403
 404    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
 405    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
 406    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
 407
 408    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
 409    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
 410
 411    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
 412    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
 413
 414    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
 415    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
 416
 417    I3312_TO_I3310  = 0x00200800,
 418    I3312_TO_I3313  = 0x01000000,
 419
 420    /* Load/store register pair instructions.  */
 421    I3314_LDP       = 0x28400000,
 422    I3314_STP       = 0x28000000,
 423
 424    /* Add/subtract immediate instructions.  */
 425    I3401_ADDI      = 0x11000000,
 426    I3401_ADDSI     = 0x31000000,
 427    I3401_SUBI      = 0x51000000,
 428    I3401_SUBSI     = 0x71000000,
 429
 430    /* Bitfield instructions.  */
 431    I3402_BFM       = 0x33000000,
 432    I3402_SBFM      = 0x13000000,
 433    I3402_UBFM      = 0x53000000,
 434
 435    /* Extract instruction.  */
 436    I3403_EXTR      = 0x13800000,
 437
 438    /* Logical immediate instructions.  */
 439    I3404_ANDI      = 0x12000000,
 440    I3404_ORRI      = 0x32000000,
 441    I3404_EORI      = 0x52000000,
 442
 443    /* Move wide immediate instructions.  */
 444    I3405_MOVN      = 0x12800000,
 445    I3405_MOVZ      = 0x52800000,
 446    I3405_MOVK      = 0x72800000,
 447
 448    /* PC relative addressing instructions.  */
 449    I3406_ADR       = 0x10000000,
 450    I3406_ADRP      = 0x90000000,
 451
 452    /* Add/subtract shifted register instructions (without a shift).  */
 453    I3502_ADD       = 0x0b000000,
 454    I3502_ADDS      = 0x2b000000,
 455    I3502_SUB       = 0x4b000000,
 456    I3502_SUBS      = 0x6b000000,
 457
 458    /* Add/subtract shifted register instructions (with a shift).  */
 459    I3502S_ADD_LSL  = I3502_ADD,
 460
 461    /* Add/subtract with carry instructions.  */
 462    I3503_ADC       = 0x1a000000,
 463    I3503_SBC       = 0x5a000000,
 464
 465    /* Conditional select instructions.  */
 466    I3506_CSEL      = 0x1a800000,
 467    I3506_CSINC     = 0x1a800400,
 468    I3506_CSINV     = 0x5a800000,
 469    I3506_CSNEG     = 0x5a800400,
 470
 471    /* Data-processing (1 source) instructions.  */
 472    I3507_CLZ       = 0x5ac01000,
 473    I3507_RBIT      = 0x5ac00000,
 474    I3507_REV16     = 0x5ac00400,
 475    I3507_REV32     = 0x5ac00800,
 476    I3507_REV64     = 0x5ac00c00,
 477
 478    /* Data-processing (2 source) instructions.  */
 479    I3508_LSLV      = 0x1ac02000,
 480    I3508_LSRV      = 0x1ac02400,
 481    I3508_ASRV      = 0x1ac02800,
 482    I3508_RORV      = 0x1ac02c00,
 483    I3508_SMULH     = 0x9b407c00,
 484    I3508_UMULH     = 0x9bc07c00,
 485    I3508_UDIV      = 0x1ac00800,
 486    I3508_SDIV      = 0x1ac00c00,
 487
 488    /* Data-processing (3 source) instructions.  */
 489    I3509_MADD      = 0x1b000000,
 490    I3509_MSUB      = 0x1b008000,
 491
 492    /* Logical shifted register instructions (without a shift).  */
 493    I3510_AND       = 0x0a000000,
 494    I3510_BIC       = 0x0a200000,
 495    I3510_ORR       = 0x2a000000,
 496    I3510_ORN       = 0x2a200000,
 497    I3510_EOR       = 0x4a000000,
 498    I3510_EON       = 0x4a200000,
 499    I3510_ANDS      = 0x6a000000,
 500
 501    /* Logical shifted register instructions (with a shift).  */
 502    I3502S_AND_LSR  = I3510_AND | (1 << 22),
 503
 504    /* AdvSIMD copy */
 505    I3605_DUP      = 0x0e000400,
 506    I3605_INS      = 0x4e001c00,
 507    I3605_UMOV     = 0x0e003c00,
 508
 509    /* AdvSIMD modified immediate */
 510    I3606_MOVI      = 0x0f000400,
 511
 512    /* AdvSIMD shift by immediate */
 513    I3614_SSHR      = 0x0f000400,
 514    I3614_SSRA      = 0x0f001400,
 515    I3614_SHL       = 0x0f005400,
 516    I3614_USHR      = 0x2f000400,
 517    I3614_USRA      = 0x2f001400,
 518
 519    /* AdvSIMD three same.  */
 520    I3616_ADD       = 0x0e208400,
 521    I3616_AND       = 0x0e201c00,
 522    I3616_BIC       = 0x0e601c00,
 523    I3616_EOR       = 0x2e201c00,
 524    I3616_MUL       = 0x0e209c00,
 525    I3616_ORR       = 0x0ea01c00,
 526    I3616_ORN       = 0x0ee01c00,
 527    I3616_SUB       = 0x2e208400,
 528    I3616_CMGT      = 0x0e203400,
 529    I3616_CMGE      = 0x0e203c00,
 530    I3616_CMTST     = 0x0e208c00,
 531    I3616_CMHI      = 0x2e203400,
 532    I3616_CMHS      = 0x2e203c00,
 533    I3616_CMEQ      = 0x2e208c00,
 534    I3616_SMAX      = 0x0e206400,
 535    I3616_SMIN      = 0x0e206c00,
 536    I3616_SQADD     = 0x0e200c00,
 537    I3616_SQSUB     = 0x0e202c00,
 538    I3616_UMAX      = 0x2e206400,
 539    I3616_UMIN      = 0x2e206c00,
 540    I3616_UQADD     = 0x2e200c00,
 541    I3616_UQSUB     = 0x2e202c00,
 542
 543    /* AdvSIMD two-reg misc.  */
 544    I3617_CMGT0     = 0x0e208800,
 545    I3617_CMEQ0     = 0x0e209800,
 546    I3617_CMLT0     = 0x0e20a800,
 547    I3617_CMGE0     = 0x2e208800,
 548    I3617_CMLE0     = 0x2e20a800,
 549    I3617_NOT       = 0x2e205800,
 550    I3617_NEG       = 0x2e20b800,
 551
 552    /* System instructions.  */
 553    NOP             = 0xd503201f,
 554    DMB_ISH         = 0xd50338bf,
 555    DMB_LD          = 0x00000100,
 556    DMB_ST          = 0x00000200,
 557} AArch64Insn;
 558
 559static inline uint32_t tcg_in32(TCGContext *s)
 560{
 561    uint32_t v = *(uint32_t *)s->code_ptr;
 562    return v;
 563}
 564
 565/* Emit an opcode with "type-checking" of the format.  */
 566#define tcg_out_insn(S, FMT, OP, ...) \
 567    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
 568
 569static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
 570{
 571    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
 572}
 573
 574static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
 575                              TCGReg rt, int imm19)
 576{
 577    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
 578}
 579
 580static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
 581                              TCGCond c, int imm19)
 582{
 583    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
 584}
 585
 586static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
 587{
 588    tcg_out32(s, insn | (imm26 & 0x03ffffff));
 589}
 590
 591static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
 592{
 593    tcg_out32(s, insn | rn << 5);
 594}
 595
 596static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
 597                              TCGReg r1, TCGReg r2, TCGReg rn,
 598                              tcg_target_long ofs, bool pre, bool w)
 599{
 600    insn |= 1u << 31; /* ext */
 601    insn |= pre << 24;
 602    insn |= w << 23;
 603
 604    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
 605    insn |= (ofs & (0x7f << 3)) << (15 - 3);
 606
 607    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
 608}
 609
 610static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
 611                              TCGReg rd, TCGReg rn, uint64_t aimm)
 612{
 613    if (aimm > 0xfff) {
 614        tcg_debug_assert((aimm & 0xfff) == 0);
 615        aimm >>= 12;
 616        tcg_debug_assert(aimm <= 0xfff);
 617        aimm |= 1 << 12;  /* apply LSL 12 */
 618    }
 619    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
 620}
 621
 622/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
 623   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
 624   that feed the DecodeBitMasks pseudo function.  */
 625static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
 626                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
 627{
 628    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
 629              | rn << 5 | rd);
 630}
 631
 632#define tcg_out_insn_3404  tcg_out_insn_3402
 633
 634static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
 635                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
 636{
 637    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
 638              | rn << 5 | rd);
 639}
 640
 641/* This function is used for the Move (wide immediate) instruction group.
 642   Note that SHIFT is a full shift count, not the 2 bit HW field. */
 643static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
 644                              TCGReg rd, uint16_t half, unsigned shift)
 645{
 646    tcg_debug_assert((shift & ~0x30) == 0);
 647    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
 648}
 649
 650static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
 651                              TCGReg rd, int64_t disp)
 652{
 653    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
 654}
 655
 656/* This function is for both 3.5.2 (Add/Subtract shifted register), for
 657   the rare occasion when we actually want to supply a shift amount.  */
 658static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
 659                                      TCGType ext, TCGReg rd, TCGReg rn,
 660                                      TCGReg rm, int imm6)
 661{
 662    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
 663}
 664
 665/* This function is for 3.5.2 (Add/subtract shifted register),
 666   and 3.5.10 (Logical shifted register), for the vast majorty of cases
 667   when we don't want to apply a shift.  Thus it can also be used for
 668   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
 669static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
 670                              TCGReg rd, TCGReg rn, TCGReg rm)
 671{
 672    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
 673}
 674
 675#define tcg_out_insn_3503  tcg_out_insn_3502
 676#define tcg_out_insn_3508  tcg_out_insn_3502
 677#define tcg_out_insn_3510  tcg_out_insn_3502
 678
 679static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
 680                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
 681{
 682    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
 683              | tcg_cond_to_aarch64[c] << 12);
 684}
 685
 686static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
 687                              TCGReg rd, TCGReg rn)
 688{
 689    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
 690}
 691
 692static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
 693                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
 694{
 695    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
 696}
 697
 698static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
 699                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
 700{
 701    /* Note that bit 11 set means general register input.  Therefore
 702       we can handle both register sets with one function.  */
 703    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
 704              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
 705}
 706
 707static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
 708                              TCGReg rd, bool op, int cmode, uint8_t imm8)
 709{
 710    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
 711              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
 712}
 713
 714static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
 715                              TCGReg rd, TCGReg rn, unsigned immhb)
 716{
 717    tcg_out32(s, insn | q << 30 | immhb << 16
 718              | (rn & 0x1f) << 5 | (rd & 0x1f));
 719}
 720
 721static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
 722                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
 723{
 724    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
 725              | (rn & 0x1f) << 5 | (rd & 0x1f));
 726}
 727
 728static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
 729                              unsigned size, TCGReg rd, TCGReg rn)
 730{
 731    tcg_out32(s, insn | q << 30 | (size << 22)
 732              | (rn & 0x1f) << 5 | (rd & 0x1f));
 733}
 734
 735static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
 736                              TCGReg rd, TCGReg base, TCGType ext,
 737                              TCGReg regoff)
 738{
 739    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 740    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
 741              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
 742}
 743
 744static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
 745                              TCGReg rd, TCGReg rn, intptr_t offset)
 746{
 747    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
 748}
 749
 750static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
 751                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
 752{
 753    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 754    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
 755              | rn << 5 | (rd & 0x1f));
 756}
 757
 758/* Register to register move using ORR (shifted register with no shift). */
 759static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
 760{
 761    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
 762}
 763
 764/* Register to register move using ADDI (move to/from SP).  */
 765static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
 766{
 767    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
 768}
 769
 770/* This function is used for the Logical (immediate) instruction group.
 771   The value of LIMM must satisfy IS_LIMM.  See the comment above about
 772   only supporting simplified logical immediates.  */
 773static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
 774                             TCGReg rd, TCGReg rn, uint64_t limm)
 775{
 776    unsigned h, l, r, c;
 777
 778    tcg_debug_assert(is_limm(limm));
 779
 780    h = clz64(limm);
 781    l = ctz64(limm);
 782    if (l == 0) {
 783        r = 0;                  /* form 0....01....1 */
 784        c = ctz64(~limm) - 1;
 785        if (h == 0) {
 786            r = clz64(~limm);   /* form 1..10..01..1 */
 787            c += r;
 788        }
 789    } else {
 790        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
 791        c = r - h - 1;
 792    }
 793    if (ext == TCG_TYPE_I32) {
 794        r &= 31;
 795        c &= 31;
 796    }
 797
 798    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
 799}
 800
 801static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
 802                             TCGReg rd, uint64_t v64)
 803{
 804    int op, cmode, imm8;
 805
 806    if (is_fimm(v64, &op, &cmode, &imm8)) {
 807        tcg_out_insn(s, 3606, MOVI, type == TCG_TYPE_V128, rd, op, cmode, imm8);
 808    } else if (type == TCG_TYPE_V128) {
 809        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
 810        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
 811    } else {
 812        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
 813        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
 814    }
 815}
 816
 817static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
 818                         tcg_target_long value)
 819{
 820    tcg_target_long svalue = value;
 821    tcg_target_long ivalue = ~value;
 822    tcg_target_long t0, t1, t2;
 823    int s0, s1;
 824    AArch64Insn opc;
 825
 826    switch (type) {
 827    case TCG_TYPE_I32:
 828    case TCG_TYPE_I64:
 829        tcg_debug_assert(rd < 32);
 830        break;
 831
 832    case TCG_TYPE_V64:
 833    case TCG_TYPE_V128:
 834        tcg_debug_assert(rd >= 32);
 835        tcg_out_dupi_vec(s, type, rd, value);
 836        return;
 837
 838    default:
 839        g_assert_not_reached();
 840    }
 841
 842    /* For 32-bit values, discard potential garbage in value.  For 64-bit
 843       values within [2**31, 2**32-1], we can create smaller sequences by
 844       interpreting this as a negative 32-bit number, while ensuring that
 845       the high 32 bits are cleared by setting SF=0.  */
 846    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
 847        svalue = (int32_t)value;
 848        value = (uint32_t)value;
 849        ivalue = (uint32_t)ivalue;
 850        type = TCG_TYPE_I32;
 851    }
 852
 853    /* Speed things up by handling the common case of small positive
 854       and negative values specially.  */
 855    if ((value & ~0xffffull) == 0) {
 856        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
 857        return;
 858    } else if ((ivalue & ~0xffffull) == 0) {
 859        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
 860        return;
 861    }
 862
 863    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
 864       use the sign-extended value.  That lets us match rotated values such
 865       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
 866    if (is_limm(svalue)) {
 867        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
 868        return;
 869    }
 870
 871    /* Look for host pointer values within 4G of the PC.  This happens
 872       often when loading pointers to QEMU's own data structures.  */
 873    if (type == TCG_TYPE_I64) {
 874        tcg_target_long disp = value - (intptr_t)s->code_ptr;
 875        if (disp == sextract64(disp, 0, 21)) {
 876            tcg_out_insn(s, 3406, ADR, rd, disp);
 877            return;
 878        }
 879        disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
 880        if (disp == sextract64(disp, 0, 21)) {
 881            tcg_out_insn(s, 3406, ADRP, rd, disp);
 882            if (value & 0xfff) {
 883                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
 884            }
 885            return;
 886        }
 887    }
 888
 889    /* Would it take fewer insns to begin with MOVN?  */
 890    if (ctpop64(value) >= 32) {
 891        t0 = ivalue;
 892        opc = I3405_MOVN;
 893    } else {
 894        t0 = value;
 895        opc = I3405_MOVZ;
 896    }
 897    s0 = ctz64(t0) & (63 & -16);
 898    t1 = t0 & ~(0xffffUL << s0);
 899    s1 = ctz64(t1) & (63 & -16);
 900    t2 = t1 & ~(0xffffUL << s1);
 901    if (t2 == 0) {
 902        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
 903        if (t1 != 0) {
 904            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
 905        }
 906        return;
 907    }
 908
 909    /* For more than 2 insns, dump it into the constant pool.  */
 910    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
 911    tcg_out_insn(s, 3305, LDR, 0, rd);
 912}
 913
 914/* Define something more legible for general use.  */
 915#define tcg_out_ldst_r  tcg_out_insn_3310
 916
 917static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
 918                         TCGReg rn, intptr_t offset, int lgsize)
 919{
 920    /* If the offset is naturally aligned and in range, then we can
 921       use the scaled uimm12 encoding */
 922    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
 923        uintptr_t scaled_uimm = offset >> lgsize;
 924        if (scaled_uimm <= 0xfff) {
 925            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
 926            return;
 927        }
 928    }
 929
 930    /* Small signed offsets can use the unscaled encoding.  */
 931    if (offset >= -256 && offset < 256) {
 932        tcg_out_insn_3312(s, insn, rd, rn, offset);
 933        return;
 934    }
 935
 936    /* Worst-case scenario, move offset to temp register, use reg offset.  */
 937    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
 938    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
 939}
 940
 941static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
 942{
 943    if (ret == arg) {
 944        return;
 945    }
 946    switch (type) {
 947    case TCG_TYPE_I32:
 948    case TCG_TYPE_I64:
 949        if (ret < 32 && arg < 32) {
 950            tcg_out_movr(s, type, ret, arg);
 951            break;
 952        } else if (ret < 32) {
 953            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
 954            break;
 955        } else if (arg < 32) {
 956            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
 957            break;
 958        }
 959        /* FALLTHRU */
 960
 961    case TCG_TYPE_V64:
 962        tcg_debug_assert(ret >= 32 && arg >= 32);
 963        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
 964        break;
 965    case TCG_TYPE_V128:
 966        tcg_debug_assert(ret >= 32 && arg >= 32);
 967        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
 968        break;
 969
 970    default:
 971        g_assert_not_reached();
 972    }
 973}
 974
 975static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
 976                       TCGReg base, intptr_t ofs)
 977{
 978    AArch64Insn insn;
 979    int lgsz;
 980
 981    switch (type) {
 982    case TCG_TYPE_I32:
 983        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
 984        lgsz = 2;
 985        break;
 986    case TCG_TYPE_I64:
 987        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
 988        lgsz = 3;
 989        break;
 990    case TCG_TYPE_V64:
 991        insn = I3312_LDRVD;
 992        lgsz = 3;
 993        break;
 994    case TCG_TYPE_V128:
 995        insn = I3312_LDRVQ;
 996        lgsz = 4;
 997        break;
 998    default:
 999        g_assert_not_reached();
1000    }
1001    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1002}
1003
1004static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1005                       TCGReg base, intptr_t ofs)
1006{
1007    AArch64Insn insn;
1008    int lgsz;
1009
1010    switch (type) {
1011    case TCG_TYPE_I32:
1012        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1013        lgsz = 2;
1014        break;
1015    case TCG_TYPE_I64:
1016        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1017        lgsz = 3;
1018        break;
1019    case TCG_TYPE_V64:
1020        insn = I3312_STRVD;
1021        lgsz = 3;
1022        break;
1023    case TCG_TYPE_V128:
1024        insn = I3312_STRVQ;
1025        lgsz = 4;
1026        break;
1027    default:
1028        g_assert_not_reached();
1029    }
1030    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1031}
1032
1033static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1034                               TCGReg base, intptr_t ofs)
1035{
1036    if (type <= TCG_TYPE_I64 && val == 0) {
1037        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1038        return true;
1039    }
1040    return false;
1041}
1042
1043static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1044                               TCGReg rn, unsigned int a, unsigned int b)
1045{
1046    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1047}
1048
1049static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1050                                TCGReg rn, unsigned int a, unsigned int b)
1051{
1052    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1053}
1054
1055static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1056                                TCGReg rn, unsigned int a, unsigned int b)
1057{
1058    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1059}
1060
1061static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1062                                TCGReg rn, TCGReg rm, unsigned int a)
1063{
1064    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1065}
1066
1067static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1068                               TCGReg rd, TCGReg rn, unsigned int m)
1069{
1070    int bits = ext ? 64 : 32;
1071    int max = bits - 1;
1072    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1073}
1074
1075static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1076                               TCGReg rd, TCGReg rn, unsigned int m)
1077{
1078    int max = ext ? 63 : 31;
1079    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1080}
1081
1082static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1083                               TCGReg rd, TCGReg rn, unsigned int m)
1084{
1085    int max = ext ? 63 : 31;
1086    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1087}
1088
1089static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1090                                TCGReg rd, TCGReg rn, unsigned int m)
1091{
1092    int max = ext ? 63 : 31;
1093    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1094}
1095
1096static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1097                                TCGReg rd, TCGReg rn, unsigned int m)
1098{
1099    int bits = ext ? 64 : 32;
1100    int max = bits - 1;
1101    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1102}
1103
1104static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1105                               TCGReg rn, unsigned lsb, unsigned width)
1106{
1107    unsigned size = ext ? 64 : 32;
1108    unsigned a = (size - lsb) & (size - 1);
1109    unsigned b = width - 1;
1110    tcg_out_bfm(s, ext, rd, rn, a, b);
1111}
1112
1113static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1114                        tcg_target_long b, bool const_b)
1115{
1116    if (const_b) {
1117        /* Using CMP or CMN aliases.  */
1118        if (b >= 0) {
1119            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1120        } else {
1121            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1122        }
1123    } else {
1124        /* Using CMP alias SUBS wzr, Wn, Wm */
1125        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1126    }
1127}
1128
1129static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1130{
1131    ptrdiff_t offset = target - s->code_ptr;
1132    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1133    tcg_out_insn(s, 3206, B, offset);
1134}
1135
1136static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1137{
1138    ptrdiff_t offset = target - s->code_ptr;
1139    if (offset == sextract64(offset, 0, 26)) {
1140        tcg_out_insn(s, 3206, BL, offset);
1141    } else {
1142        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1143        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1144    }
1145}
1146
1147static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1148{
1149    tcg_out_insn(s, 3207, BLR, reg);
1150}
1151
1152static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1153{
1154    ptrdiff_t offset = target - s->code_ptr;
1155    if (offset == sextract64(offset, 0, 26)) {
1156        tcg_out_insn(s, 3206, BL, offset);
1157    } else {
1158        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1159        tcg_out_callr(s, TCG_REG_TMP);
1160    }
1161}
1162
1163void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1164                              uintptr_t addr)
1165{
1166    tcg_insn_unit i1, i2;
1167    TCGType rt = TCG_TYPE_I64;
1168    TCGReg  rd = TCG_REG_TMP;
1169    uint64_t pair;
1170
1171    ptrdiff_t offset = addr - jmp_addr;
1172
1173    if (offset == sextract64(offset, 0, 26)) {
1174        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1175        i2 = NOP;
1176    } else {
1177        offset = (addr >> 12) - (jmp_addr >> 12);
1178
1179        /* patch ADRP */
1180        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1181        /* patch ADDI */
1182        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1183    }
1184    pair = (uint64_t)i2 << 32 | i1;
1185    atomic_set((uint64_t *)jmp_addr, pair);
1186    flush_icache_range(jmp_addr, jmp_addr + 8);
1187}
1188
1189static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1190{
1191    if (!l->has_value) {
1192        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1193        tcg_out_insn(s, 3206, B, 0);
1194    } else {
1195        tcg_out_goto(s, l->u.value_ptr);
1196    }
1197}
1198
1199static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1200                           TCGArg b, bool b_const, TCGLabel *l)
1201{
1202    intptr_t offset;
1203    bool need_cmp;
1204
1205    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1206        need_cmp = false;
1207    } else {
1208        need_cmp = true;
1209        tcg_out_cmp(s, ext, a, b, b_const);
1210    }
1211
1212    if (!l->has_value) {
1213        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1214        offset = tcg_in32(s) >> 5;
1215    } else {
1216        offset = l->u.value_ptr - s->code_ptr;
1217        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1218    }
1219
1220    if (need_cmp) {
1221        tcg_out_insn(s, 3202, B_C, c, offset);
1222    } else if (c == TCG_COND_EQ) {
1223        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1224    } else {
1225        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1226    }
1227}
1228
1229static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1230{
1231    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1232}
1233
1234static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1235{
1236    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1237}
1238
1239static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1240{
1241    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1242}
1243
1244static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
1245                               TCGReg rd, TCGReg rn)
1246{
1247    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1248    int bits = (8 << s_bits) - 1;
1249    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1250}
1251
1252static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
1253                               TCGReg rd, TCGReg rn)
1254{
1255    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1256    int bits = (8 << s_bits) - 1;
1257    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1258}
1259
1260static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1261                            TCGReg rn, int64_t aimm)
1262{
1263    if (aimm >= 0) {
1264        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1265    } else {
1266        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1267    }
1268}
1269
1270static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1271                                   TCGReg rh, TCGReg al, TCGReg ah,
1272                                   tcg_target_long bl, tcg_target_long bh,
1273                                   bool const_bl, bool const_bh, bool sub)
1274{
1275    TCGReg orig_rl = rl;
1276    AArch64Insn insn;
1277
1278    if (rl == ah || (!const_bh && rl == bh)) {
1279        rl = TCG_REG_TMP;
1280    }
1281
1282    if (const_bl) {
1283        insn = I3401_ADDSI;
1284        if ((bl < 0) ^ sub) {
1285            insn = I3401_SUBSI;
1286            bl = -bl;
1287        }
1288        if (unlikely(al == TCG_REG_XZR)) {
1289            /* ??? We want to allow al to be zero for the benefit of
1290               negation via subtraction.  However, that leaves open the
1291               possibility of adding 0+const in the low part, and the
1292               immediate add instructions encode XSP not XZR.  Don't try
1293               anything more elaborate here than loading another zero.  */
1294            al = TCG_REG_TMP;
1295            tcg_out_movi(s, ext, al, 0);
1296        }
1297        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1298    } else {
1299        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1300    }
1301
1302    insn = I3503_ADC;
1303    if (const_bh) {
1304        /* Note that the only two constants we support are 0 and -1, and
1305           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1306        if ((bh != 0) ^ sub) {
1307            insn = I3503_SBC;
1308        }
1309        bh = TCG_REG_XZR;
1310    } else if (sub) {
1311        insn = I3503_SBC;
1312    }
1313    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1314
1315    tcg_out_mov(s, ext, orig_rl, rl);
1316}
1317
1318static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1319{
1320    static const uint32_t sync[] = {
1321        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1322        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1323        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1324        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1325        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1326    };
1327    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1328}
1329
1330static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1331                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1332{
1333    TCGReg a1 = a0;
1334    if (is_ctz) {
1335        a1 = TCG_REG_TMP;
1336        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1337    }
1338    if (const_b && b == (ext ? 64 : 32)) {
1339        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1340    } else {
1341        AArch64Insn sel = I3506_CSEL;
1342
1343        tcg_out_cmp(s, ext, a0, 0, 1);
1344        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1345
1346        if (const_b) {
1347            if (b == -1) {
1348                b = TCG_REG_XZR;
1349                sel = I3506_CSINV;
1350            } else if (b == 0) {
1351                b = TCG_REG_XZR;
1352            } else {
1353                tcg_out_movi(s, ext, d, b);
1354                b = d;
1355            }
1356        }
1357        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1358    }
1359}
1360
1361#ifdef CONFIG_SOFTMMU
1362#include "tcg-ldst.inc.c"
1363
1364/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1365 *                                     TCGMemOpIdx oi, uintptr_t ra)
1366 */
1367static void * const qemu_ld_helpers[16] = {
1368    [MO_UB]   = helper_ret_ldub_mmu,
1369    [MO_LEUW] = helper_le_lduw_mmu,
1370    [MO_LEUL] = helper_le_ldul_mmu,
1371    [MO_LEQ]  = helper_le_ldq_mmu,
1372    [MO_BEUW] = helper_be_lduw_mmu,
1373    [MO_BEUL] = helper_be_ldul_mmu,
1374    [MO_BEQ]  = helper_be_ldq_mmu,
1375};
1376
1377/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1378 *                                     uintxx_t val, TCGMemOpIdx oi,
1379 *                                     uintptr_t ra)
1380 */
1381static void * const qemu_st_helpers[16] = {
1382    [MO_UB]   = helper_ret_stb_mmu,
1383    [MO_LEUW] = helper_le_stw_mmu,
1384    [MO_LEUL] = helper_le_stl_mmu,
1385    [MO_LEQ]  = helper_le_stq_mmu,
1386    [MO_BEUW] = helper_be_stw_mmu,
1387    [MO_BEUL] = helper_be_stl_mmu,
1388    [MO_BEQ]  = helper_be_stq_mmu,
1389};
1390
1391static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1392{
1393    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1394    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1395    tcg_out_insn(s, 3406, ADR, rd, offset);
1396}
1397
1398static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1399{
1400    TCGMemOpIdx oi = lb->oi;
1401    TCGMemOp opc = get_memop(oi);
1402    TCGMemOp size = opc & MO_SIZE;
1403
1404    bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr);
1405    tcg_debug_assert(ok);
1406
1407    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1408    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1409    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1410    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1411    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1412    if (opc & MO_SIGN) {
1413        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1414    } else {
1415        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1416    }
1417
1418    tcg_out_goto(s, lb->raddr);
1419}
1420
1421static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1422{
1423    TCGMemOpIdx oi = lb->oi;
1424    TCGMemOp opc = get_memop(oi);
1425    TCGMemOp size = opc & MO_SIZE;
1426
1427    bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr);
1428    tcg_debug_assert(ok);
1429
1430    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1431    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1432    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1433    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1434    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1435    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1436    tcg_out_goto(s, lb->raddr);
1437}
1438
1439static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1440                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1441                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1442{
1443    TCGLabelQemuLdst *label = new_ldst_label(s);
1444
1445    label->is_ld = is_ld;
1446    label->oi = oi;
1447    label->type = ext;
1448    label->datalo_reg = data_reg;
1449    label->addrlo_reg = addr_reg;
1450    label->raddr = raddr;
1451    label->label_ptr[0] = label_ptr;
1452}
1453
1454/* We expect tlb_mask to be before tlb_table.  */
1455QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
1456                  offsetof(CPUArchState, tlb_mask));
1457
1458/* We expect to use a 24-bit unsigned offset from ENV.  */
1459QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1])
1460                  > 0xffffff);
1461
1462/* Load and compare a TLB entry, emitting the conditional jump to the
1463   slow path for the failure case, which will be patched later when finalizing
1464   the slow path. Generated code returns the host addend in X1,
1465   clobbers X0,X2,X3,TMP. */
1466static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1467                             tcg_insn_unit **label_ptr, int mem_index,
1468                             bool is_read)
1469{
1470    int mask_ofs = offsetof(CPUArchState, tlb_mask[mem_index]);
1471    int table_ofs = offsetof(CPUArchState, tlb_table[mem_index]);
1472    unsigned a_bits = get_alignment_bits(opc);
1473    unsigned s_bits = opc & MO_SIZE;
1474    unsigned a_mask = (1u << a_bits) - 1;
1475    unsigned s_mask = (1u << s_bits) - 1;
1476    TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0, x3;
1477    TCGType mask_type;
1478    uint64_t compare_mask;
1479
1480    if (table_ofs > 0xfff) {
1481        int table_hi = table_ofs & ~0xfff;
1482        int mask_hi = mask_ofs & ~0xfff;
1483
1484        table_base = TCG_REG_X1;
1485        if (mask_hi == table_hi) {
1486            mask_base = table_base;
1487        } else if (mask_hi) {
1488            mask_base = TCG_REG_X0;
1489            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64,
1490                         mask_base, TCG_AREG0, mask_hi);
1491        }
1492        tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64,
1493                     table_base, TCG_AREG0, table_hi);
1494        mask_ofs -= mask_hi;
1495        table_ofs -= table_hi;
1496    }
1497
1498    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1499                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1500
1501    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
1502    tcg_out_ld(s, mask_type, TCG_REG_X0, mask_base, mask_ofs);
1503    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, table_base, table_ofs);
1504
1505    /* Extract the TLB index from the address into X0.  */
1506    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1507                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1508                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1509
1510    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1511    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1512
1513    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1514    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1515               ? offsetof(CPUTLBEntry, addr_read)
1516               : offsetof(CPUTLBEntry, addr_write));
1517    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1518               offsetof(CPUTLBEntry, addend));
1519
1520    /* For aligned accesses, we check the first byte and include the alignment
1521       bits within the address.  For unaligned access, we check that we don't
1522       cross pages using the address of the last byte of the access.  */
1523    if (a_bits >= s_bits) {
1524        x3 = addr_reg;
1525    } else {
1526        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1527                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1528        x3 = TCG_REG_X3;
1529    }
1530    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1531
1532    /* Store the page mask part of the address into X3.  */
1533    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1534                     TCG_REG_X3, x3, compare_mask);
1535
1536    /* Perform the address comparison. */
1537    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1538
1539    /* If not equal, we jump to the slow path. */
1540    *label_ptr = s->code_ptr;
1541    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1542}
1543
1544#endif /* CONFIG_SOFTMMU */
1545
1546static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1547                                   TCGReg data_r, TCGReg addr_r,
1548                                   TCGType otype, TCGReg off_r)
1549{
1550    const TCGMemOp bswap = memop & MO_BSWAP;
1551
1552    switch (memop & MO_SSIZE) {
1553    case MO_UB:
1554        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1555        break;
1556    case MO_SB:
1557        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1558                       data_r, addr_r, otype, off_r);
1559        break;
1560    case MO_UW:
1561        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1562        if (bswap) {
1563            tcg_out_rev16(s, data_r, data_r);
1564        }
1565        break;
1566    case MO_SW:
1567        if (bswap) {
1568            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1569            tcg_out_rev16(s, data_r, data_r);
1570            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1571        } else {
1572            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1573                           data_r, addr_r, otype, off_r);
1574        }
1575        break;
1576    case MO_UL:
1577        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1578        if (bswap) {
1579            tcg_out_rev32(s, data_r, data_r);
1580        }
1581        break;
1582    case MO_SL:
1583        if (bswap) {
1584            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1585            tcg_out_rev32(s, data_r, data_r);
1586            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1587        } else {
1588            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1589        }
1590        break;
1591    case MO_Q:
1592        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1593        if (bswap) {
1594            tcg_out_rev64(s, data_r, data_r);
1595        }
1596        break;
1597    default:
1598        tcg_abort();
1599    }
1600}
1601
1602static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1603                                   TCGReg data_r, TCGReg addr_r,
1604                                   TCGType otype, TCGReg off_r)
1605{
1606    const TCGMemOp bswap = memop & MO_BSWAP;
1607
1608    switch (memop & MO_SIZE) {
1609    case MO_8:
1610        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1611        break;
1612    case MO_16:
1613        if (bswap && data_r != TCG_REG_XZR) {
1614            tcg_out_rev16(s, TCG_REG_TMP, data_r);
1615            data_r = TCG_REG_TMP;
1616        }
1617        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1618        break;
1619    case MO_32:
1620        if (bswap && data_r != TCG_REG_XZR) {
1621            tcg_out_rev32(s, TCG_REG_TMP, data_r);
1622            data_r = TCG_REG_TMP;
1623        }
1624        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1625        break;
1626    case MO_64:
1627        if (bswap && data_r != TCG_REG_XZR) {
1628            tcg_out_rev64(s, TCG_REG_TMP, data_r);
1629            data_r = TCG_REG_TMP;
1630        }
1631        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1632        break;
1633    default:
1634        tcg_abort();
1635    }
1636}
1637
1638static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1639                            TCGMemOpIdx oi, TCGType ext)
1640{
1641    TCGMemOp memop = get_memop(oi);
1642    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1643#ifdef CONFIG_SOFTMMU
1644    unsigned mem_index = get_mmuidx(oi);
1645    tcg_insn_unit *label_ptr;
1646
1647    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1648    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1649                           TCG_REG_X1, otype, addr_reg);
1650    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1651                        s->code_ptr, label_ptr);
1652#else /* !CONFIG_SOFTMMU */
1653    if (USE_GUEST_BASE) {
1654        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1655                               TCG_REG_GUEST_BASE, otype, addr_reg);
1656    } else {
1657        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1658                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1659    }
1660#endif /* CONFIG_SOFTMMU */
1661}
1662
1663static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1664                            TCGMemOpIdx oi)
1665{
1666    TCGMemOp memop = get_memop(oi);
1667    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1668#ifdef CONFIG_SOFTMMU
1669    unsigned mem_index = get_mmuidx(oi);
1670    tcg_insn_unit *label_ptr;
1671
1672    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1673    tcg_out_qemu_st_direct(s, memop, data_reg,
1674                           TCG_REG_X1, otype, addr_reg);
1675    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1676                        data_reg, addr_reg, s->code_ptr, label_ptr);
1677#else /* !CONFIG_SOFTMMU */
1678    if (USE_GUEST_BASE) {
1679        tcg_out_qemu_st_direct(s, memop, data_reg,
1680                               TCG_REG_GUEST_BASE, otype, addr_reg);
1681    } else {
1682        tcg_out_qemu_st_direct(s, memop, data_reg,
1683                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1684    }
1685#endif /* CONFIG_SOFTMMU */
1686}
1687
1688static tcg_insn_unit *tb_ret_addr;
1689
1690static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1691                       const TCGArg args[TCG_MAX_OP_ARGS],
1692                       const int const_args[TCG_MAX_OP_ARGS])
1693{
1694    /* 99% of the time, we can signal the use of extension registers
1695       by looking to see if the opcode handles 64-bit data.  */
1696    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1697
1698    /* Hoist the loads of the most common arguments.  */
1699    TCGArg a0 = args[0];
1700    TCGArg a1 = args[1];
1701    TCGArg a2 = args[2];
1702    int c2 = const_args[2];
1703
1704    /* Some operands are defined with "rZ" constraint, a register or
1705       the zero register.  These need not actually test args[I] == 0.  */
1706#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1707
1708    switch (opc) {
1709    case INDEX_op_exit_tb:
1710        /* Reuse the zeroing that exists for goto_ptr.  */
1711        if (a0 == 0) {
1712            tcg_out_goto_long(s, s->code_gen_epilogue);
1713        } else {
1714            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1715            tcg_out_goto_long(s, tb_ret_addr);
1716        }
1717        break;
1718
1719    case INDEX_op_goto_tb:
1720        if (s->tb_jmp_insn_offset != NULL) {
1721            /* TCG_TARGET_HAS_direct_jump */
1722            /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1723               write can be used to patch the target address. */
1724            if ((uintptr_t)s->code_ptr & 7) {
1725                tcg_out32(s, NOP);
1726            }
1727            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1728            /* actual branch destination will be patched by
1729               tb_target_set_jmp_target later. */
1730            tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1731            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1732        } else {
1733            /* !TCG_TARGET_HAS_direct_jump */
1734            tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1735            intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1736            tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1737        }
1738        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1739        set_jmp_reset_offset(s, a0);
1740        break;
1741
1742    case INDEX_op_goto_ptr:
1743        tcg_out_insn(s, 3207, BR, a0);
1744        break;
1745
1746    case INDEX_op_br:
1747        tcg_out_goto_label(s, arg_label(a0));
1748        break;
1749
1750    case INDEX_op_ld8u_i32:
1751    case INDEX_op_ld8u_i64:
1752        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1753        break;
1754    case INDEX_op_ld8s_i32:
1755        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1756        break;
1757    case INDEX_op_ld8s_i64:
1758        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1759        break;
1760    case INDEX_op_ld16u_i32:
1761    case INDEX_op_ld16u_i64:
1762        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1763        break;
1764    case INDEX_op_ld16s_i32:
1765        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1766        break;
1767    case INDEX_op_ld16s_i64:
1768        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1769        break;
1770    case INDEX_op_ld_i32:
1771    case INDEX_op_ld32u_i64:
1772        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1773        break;
1774    case INDEX_op_ld32s_i64:
1775        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1776        break;
1777    case INDEX_op_ld_i64:
1778        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1779        break;
1780
1781    case INDEX_op_st8_i32:
1782    case INDEX_op_st8_i64:
1783        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1784        break;
1785    case INDEX_op_st16_i32:
1786    case INDEX_op_st16_i64:
1787        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1788        break;
1789    case INDEX_op_st_i32:
1790    case INDEX_op_st32_i64:
1791        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1792        break;
1793    case INDEX_op_st_i64:
1794        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1795        break;
1796
1797    case INDEX_op_add_i32:
1798        a2 = (int32_t)a2;
1799        /* FALLTHRU */
1800    case INDEX_op_add_i64:
1801        if (c2) {
1802            tcg_out_addsubi(s, ext, a0, a1, a2);
1803        } else {
1804            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1805        }
1806        break;
1807
1808    case INDEX_op_sub_i32:
1809        a2 = (int32_t)a2;
1810        /* FALLTHRU */
1811    case INDEX_op_sub_i64:
1812        if (c2) {
1813            tcg_out_addsubi(s, ext, a0, a1, -a2);
1814        } else {
1815            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1816        }
1817        break;
1818
1819    case INDEX_op_neg_i64:
1820    case INDEX_op_neg_i32:
1821        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1822        break;
1823
1824    case INDEX_op_and_i32:
1825        a2 = (int32_t)a2;
1826        /* FALLTHRU */
1827    case INDEX_op_and_i64:
1828        if (c2) {
1829            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1830        } else {
1831            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1832        }
1833        break;
1834
1835    case INDEX_op_andc_i32:
1836        a2 = (int32_t)a2;
1837        /* FALLTHRU */
1838    case INDEX_op_andc_i64:
1839        if (c2) {
1840            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1841        } else {
1842            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1843        }
1844        break;
1845
1846    case INDEX_op_or_i32:
1847        a2 = (int32_t)a2;
1848        /* FALLTHRU */
1849    case INDEX_op_or_i64:
1850        if (c2) {
1851            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1852        } else {
1853            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1854        }
1855        break;
1856
1857    case INDEX_op_orc_i32:
1858        a2 = (int32_t)a2;
1859        /* FALLTHRU */
1860    case INDEX_op_orc_i64:
1861        if (c2) {
1862            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1863        } else {
1864            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1865        }
1866        break;
1867
1868    case INDEX_op_xor_i32:
1869        a2 = (int32_t)a2;
1870        /* FALLTHRU */
1871    case INDEX_op_xor_i64:
1872        if (c2) {
1873            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1874        } else {
1875            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1876        }
1877        break;
1878
1879    case INDEX_op_eqv_i32:
1880        a2 = (int32_t)a2;
1881        /* FALLTHRU */
1882    case INDEX_op_eqv_i64:
1883        if (c2) {
1884            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1885        } else {
1886            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1887        }
1888        break;
1889
1890    case INDEX_op_not_i64:
1891    case INDEX_op_not_i32:
1892        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1893        break;
1894
1895    case INDEX_op_mul_i64:
1896    case INDEX_op_mul_i32:
1897        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1898        break;
1899
1900    case INDEX_op_div_i64:
1901    case INDEX_op_div_i32:
1902        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1903        break;
1904    case INDEX_op_divu_i64:
1905    case INDEX_op_divu_i32:
1906        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1907        break;
1908
1909    case INDEX_op_rem_i64:
1910    case INDEX_op_rem_i32:
1911        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1912        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1913        break;
1914    case INDEX_op_remu_i64:
1915    case INDEX_op_remu_i32:
1916        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1917        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1918        break;
1919
1920    case INDEX_op_shl_i64:
1921    case INDEX_op_shl_i32:
1922        if (c2) {
1923            tcg_out_shl(s, ext, a0, a1, a2);
1924        } else {
1925            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1926        }
1927        break;
1928
1929    case INDEX_op_shr_i64:
1930    case INDEX_op_shr_i32:
1931        if (c2) {
1932            tcg_out_shr(s, ext, a0, a1, a2);
1933        } else {
1934            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1935        }
1936        break;
1937
1938    case INDEX_op_sar_i64:
1939    case INDEX_op_sar_i32:
1940        if (c2) {
1941            tcg_out_sar(s, ext, a0, a1, a2);
1942        } else {
1943            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1944        }
1945        break;
1946
1947    case INDEX_op_rotr_i64:
1948    case INDEX_op_rotr_i32:
1949        if (c2) {
1950            tcg_out_rotr(s, ext, a0, a1, a2);
1951        } else {
1952            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1953        }
1954        break;
1955
1956    case INDEX_op_rotl_i64:
1957    case INDEX_op_rotl_i32:
1958        if (c2) {
1959            tcg_out_rotl(s, ext, a0, a1, a2);
1960        } else {
1961            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1962            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1963        }
1964        break;
1965
1966    case INDEX_op_clz_i64:
1967    case INDEX_op_clz_i32:
1968        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
1969        break;
1970    case INDEX_op_ctz_i64:
1971    case INDEX_op_ctz_i32:
1972        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
1973        break;
1974
1975    case INDEX_op_brcond_i32:
1976        a1 = (int32_t)a1;
1977        /* FALLTHRU */
1978    case INDEX_op_brcond_i64:
1979        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1980        break;
1981
1982    case INDEX_op_setcond_i32:
1983        a2 = (int32_t)a2;
1984        /* FALLTHRU */
1985    case INDEX_op_setcond_i64:
1986        tcg_out_cmp(s, ext, a1, a2, c2);
1987        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
1988        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1989                     TCG_REG_XZR, tcg_invert_cond(args[3]));
1990        break;
1991
1992    case INDEX_op_movcond_i32:
1993        a2 = (int32_t)a2;
1994        /* FALLTHRU */
1995    case INDEX_op_movcond_i64:
1996        tcg_out_cmp(s, ext, a1, a2, c2);
1997        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1998        break;
1999
2000    case INDEX_op_qemu_ld_i32:
2001    case INDEX_op_qemu_ld_i64:
2002        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2003        break;
2004    case INDEX_op_qemu_st_i32:
2005    case INDEX_op_qemu_st_i64:
2006        tcg_out_qemu_st(s, REG0(0), a1, a2);
2007        break;
2008
2009    case INDEX_op_bswap64_i64:
2010        tcg_out_rev64(s, a0, a1);
2011        break;
2012    case INDEX_op_bswap32_i64:
2013    case INDEX_op_bswap32_i32:
2014        tcg_out_rev32(s, a0, a1);
2015        break;
2016    case INDEX_op_bswap16_i64:
2017    case INDEX_op_bswap16_i32:
2018        tcg_out_rev16(s, a0, a1);
2019        break;
2020
2021    case INDEX_op_ext8s_i64:
2022    case INDEX_op_ext8s_i32:
2023        tcg_out_sxt(s, ext, MO_8, a0, a1);
2024        break;
2025    case INDEX_op_ext16s_i64:
2026    case INDEX_op_ext16s_i32:
2027        tcg_out_sxt(s, ext, MO_16, a0, a1);
2028        break;
2029    case INDEX_op_ext_i32_i64:
2030    case INDEX_op_ext32s_i64:
2031        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2032        break;
2033    case INDEX_op_ext8u_i64:
2034    case INDEX_op_ext8u_i32:
2035        tcg_out_uxt(s, MO_8, a0, a1);
2036        break;
2037    case INDEX_op_ext16u_i64:
2038    case INDEX_op_ext16u_i32:
2039        tcg_out_uxt(s, MO_16, a0, a1);
2040        break;
2041    case INDEX_op_extu_i32_i64:
2042    case INDEX_op_ext32u_i64:
2043        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2044        break;
2045
2046    case INDEX_op_deposit_i64:
2047    case INDEX_op_deposit_i32:
2048        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2049        break;
2050
2051    case INDEX_op_extract_i64:
2052    case INDEX_op_extract_i32:
2053        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2054        break;
2055
2056    case INDEX_op_sextract_i64:
2057    case INDEX_op_sextract_i32:
2058        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2059        break;
2060
2061    case INDEX_op_add2_i32:
2062        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2063                        (int32_t)args[4], args[5], const_args[4],
2064                        const_args[5], false);
2065        break;
2066    case INDEX_op_add2_i64:
2067        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2068                        args[5], const_args[4], const_args[5], false);
2069        break;
2070    case INDEX_op_sub2_i32:
2071        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2072                        (int32_t)args[4], args[5], const_args[4],
2073                        const_args[5], true);
2074        break;
2075    case INDEX_op_sub2_i64:
2076        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2077                        args[5], const_args[4], const_args[5], true);
2078        break;
2079
2080    case INDEX_op_muluh_i64:
2081        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2082        break;
2083    case INDEX_op_mulsh_i64:
2084        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2085        break;
2086
2087    case INDEX_op_mb:
2088        tcg_out_mb(s, a0);
2089        break;
2090
2091    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2092    case INDEX_op_mov_i64:
2093    case INDEX_op_mov_vec:
2094    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
2095    case INDEX_op_movi_i64:
2096    case INDEX_op_dupi_vec:
2097    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2098    default:
2099        g_assert_not_reached();
2100    }
2101
2102#undef REG0
2103}
2104
2105static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2106                           unsigned vecl, unsigned vece,
2107                           const TCGArg *args, const int *const_args)
2108{
2109    static const AArch64Insn cmp_insn[16] = {
2110        [TCG_COND_EQ] = I3616_CMEQ,
2111        [TCG_COND_GT] = I3616_CMGT,
2112        [TCG_COND_GE] = I3616_CMGE,
2113        [TCG_COND_GTU] = I3616_CMHI,
2114        [TCG_COND_GEU] = I3616_CMHS,
2115    };
2116    static const AArch64Insn cmp0_insn[16] = {
2117        [TCG_COND_EQ] = I3617_CMEQ0,
2118        [TCG_COND_GT] = I3617_CMGT0,
2119        [TCG_COND_GE] = I3617_CMGE0,
2120        [TCG_COND_LT] = I3617_CMLT0,
2121        [TCG_COND_LE] = I3617_CMLE0,
2122    };
2123
2124    TCGType type = vecl + TCG_TYPE_V64;
2125    unsigned is_q = vecl;
2126    TCGArg a0, a1, a2;
2127
2128    a0 = args[0];
2129    a1 = args[1];
2130    a2 = args[2];
2131
2132    switch (opc) {
2133    case INDEX_op_ld_vec:
2134        tcg_out_ld(s, type, a0, a1, a2);
2135        break;
2136    case INDEX_op_st_vec:
2137        tcg_out_st(s, type, a0, a1, a2);
2138        break;
2139    case INDEX_op_add_vec:
2140        tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2141        break;
2142    case INDEX_op_sub_vec:
2143        tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2144        break;
2145    case INDEX_op_mul_vec:
2146        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2147        break;
2148    case INDEX_op_neg_vec:
2149        tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2150        break;
2151    case INDEX_op_and_vec:
2152        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2153        break;
2154    case INDEX_op_or_vec:
2155        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2156        break;
2157    case INDEX_op_xor_vec:
2158        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2159        break;
2160    case INDEX_op_andc_vec:
2161        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2162        break;
2163    case INDEX_op_orc_vec:
2164        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2165        break;
2166    case INDEX_op_ssadd_vec:
2167        tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2168        break;
2169    case INDEX_op_sssub_vec:
2170        tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2171        break;
2172    case INDEX_op_usadd_vec:
2173        tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2174        break;
2175    case INDEX_op_ussub_vec:
2176        tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2177        break;
2178    case INDEX_op_smax_vec:
2179        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2180        break;
2181    case INDEX_op_smin_vec:
2182        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2183        break;
2184    case INDEX_op_umax_vec:
2185        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2186        break;
2187    case INDEX_op_umin_vec:
2188        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2189        break;
2190    case INDEX_op_not_vec:
2191        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2192        break;
2193    case INDEX_op_dup_vec:
2194        tcg_out_insn(s, 3605, DUP, is_q, a0, a1, 1 << vece, 0);
2195        break;
2196    case INDEX_op_shli_vec:
2197        tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2198        break;
2199    case INDEX_op_shri_vec:
2200        tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2201        break;
2202    case INDEX_op_sari_vec:
2203        tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2204        break;
2205    case INDEX_op_cmp_vec:
2206        {
2207            TCGCond cond = args[3];
2208            AArch64Insn insn;
2209
2210            if (cond == TCG_COND_NE) {
2211                if (const_args[2]) {
2212                    tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2213                } else {
2214                    tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2215                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2216                }
2217            } else {
2218                if (const_args[2]) {
2219                    insn = cmp0_insn[cond];
2220                    if (insn) {
2221                        tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2222                        break;
2223                    }
2224                    tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2225                    a2 = TCG_VEC_TMP;
2226                }
2227                insn = cmp_insn[cond];
2228                if (insn == 0) {
2229                    TCGArg t;
2230                    t = a1, a1 = a2, a2 = t;
2231                    cond = tcg_swap_cond(cond);
2232                    insn = cmp_insn[cond];
2233                    tcg_debug_assert(insn != 0);
2234                }
2235                tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2236            }
2237        }
2238        break;
2239    default:
2240        g_assert_not_reached();
2241    }
2242}
2243
2244int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2245{
2246    switch (opc) {
2247    case INDEX_op_add_vec:
2248    case INDEX_op_sub_vec:
2249    case INDEX_op_and_vec:
2250    case INDEX_op_or_vec:
2251    case INDEX_op_xor_vec:
2252    case INDEX_op_andc_vec:
2253    case INDEX_op_orc_vec:
2254    case INDEX_op_neg_vec:
2255    case INDEX_op_not_vec:
2256    case INDEX_op_cmp_vec:
2257    case INDEX_op_shli_vec:
2258    case INDEX_op_shri_vec:
2259    case INDEX_op_sari_vec:
2260    case INDEX_op_ssadd_vec:
2261    case INDEX_op_sssub_vec:
2262    case INDEX_op_usadd_vec:
2263    case INDEX_op_ussub_vec:
2264    case INDEX_op_smax_vec:
2265    case INDEX_op_smin_vec:
2266    case INDEX_op_umax_vec:
2267    case INDEX_op_umin_vec:
2268        return 1;
2269    case INDEX_op_mul_vec:
2270        return vece < MO_64;
2271
2272    default:
2273        return 0;
2274    }
2275}
2276
2277void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2278                       TCGArg a0, ...)
2279{
2280}
2281
2282static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2283{
2284    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2285    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2286    static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2287    static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2288    static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2289    static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2290    static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2291    static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2292    static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2293    static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2294    static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2295    static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2296    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2297    static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2298    static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2299    static const TCGTargetOpDef r_r_rAL
2300        = { .args_ct_str = { "r", "r", "rAL" } };
2301    static const TCGTargetOpDef dep
2302        = { .args_ct_str = { "r", "0", "rZ" } };
2303    static const TCGTargetOpDef movc
2304        = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2305    static const TCGTargetOpDef add2
2306        = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2307
2308    switch (op) {
2309    case INDEX_op_goto_ptr:
2310        return &r;
2311
2312    case INDEX_op_ld8u_i32:
2313    case INDEX_op_ld8s_i32:
2314    case INDEX_op_ld16u_i32:
2315    case INDEX_op_ld16s_i32:
2316    case INDEX_op_ld_i32:
2317    case INDEX_op_ld8u_i64:
2318    case INDEX_op_ld8s_i64:
2319    case INDEX_op_ld16u_i64:
2320    case INDEX_op_ld16s_i64:
2321    case INDEX_op_ld32u_i64:
2322    case INDEX_op_ld32s_i64:
2323    case INDEX_op_ld_i64:
2324    case INDEX_op_neg_i32:
2325    case INDEX_op_neg_i64:
2326    case INDEX_op_not_i32:
2327    case INDEX_op_not_i64:
2328    case INDEX_op_bswap16_i32:
2329    case INDEX_op_bswap32_i32:
2330    case INDEX_op_bswap16_i64:
2331    case INDEX_op_bswap32_i64:
2332    case INDEX_op_bswap64_i64:
2333    case INDEX_op_ext8s_i32:
2334    case INDEX_op_ext16s_i32:
2335    case INDEX_op_ext8u_i32:
2336    case INDEX_op_ext16u_i32:
2337    case INDEX_op_ext8s_i64:
2338    case INDEX_op_ext16s_i64:
2339    case INDEX_op_ext32s_i64:
2340    case INDEX_op_ext8u_i64:
2341    case INDEX_op_ext16u_i64:
2342    case INDEX_op_ext32u_i64:
2343    case INDEX_op_ext_i32_i64:
2344    case INDEX_op_extu_i32_i64:
2345    case INDEX_op_extract_i32:
2346    case INDEX_op_extract_i64:
2347    case INDEX_op_sextract_i32:
2348    case INDEX_op_sextract_i64:
2349        return &r_r;
2350
2351    case INDEX_op_st8_i32:
2352    case INDEX_op_st16_i32:
2353    case INDEX_op_st_i32:
2354    case INDEX_op_st8_i64:
2355    case INDEX_op_st16_i64:
2356    case INDEX_op_st32_i64:
2357    case INDEX_op_st_i64:
2358        return &rZ_r;
2359
2360    case INDEX_op_add_i32:
2361    case INDEX_op_add_i64:
2362    case INDEX_op_sub_i32:
2363    case INDEX_op_sub_i64:
2364    case INDEX_op_setcond_i32:
2365    case INDEX_op_setcond_i64:
2366        return &r_r_rA;
2367
2368    case INDEX_op_mul_i32:
2369    case INDEX_op_mul_i64:
2370    case INDEX_op_div_i32:
2371    case INDEX_op_div_i64:
2372    case INDEX_op_divu_i32:
2373    case INDEX_op_divu_i64:
2374    case INDEX_op_rem_i32:
2375    case INDEX_op_rem_i64:
2376    case INDEX_op_remu_i32:
2377    case INDEX_op_remu_i64:
2378    case INDEX_op_muluh_i64:
2379    case INDEX_op_mulsh_i64:
2380        return &r_r_r;
2381
2382    case INDEX_op_and_i32:
2383    case INDEX_op_and_i64:
2384    case INDEX_op_or_i32:
2385    case INDEX_op_or_i64:
2386    case INDEX_op_xor_i32:
2387    case INDEX_op_xor_i64:
2388    case INDEX_op_andc_i32:
2389    case INDEX_op_andc_i64:
2390    case INDEX_op_orc_i32:
2391    case INDEX_op_orc_i64:
2392    case INDEX_op_eqv_i32:
2393    case INDEX_op_eqv_i64:
2394        return &r_r_rL;
2395
2396    case INDEX_op_shl_i32:
2397    case INDEX_op_shr_i32:
2398    case INDEX_op_sar_i32:
2399    case INDEX_op_rotl_i32:
2400    case INDEX_op_rotr_i32:
2401    case INDEX_op_shl_i64:
2402    case INDEX_op_shr_i64:
2403    case INDEX_op_sar_i64:
2404    case INDEX_op_rotl_i64:
2405    case INDEX_op_rotr_i64:
2406        return &r_r_ri;
2407
2408    case INDEX_op_clz_i32:
2409    case INDEX_op_ctz_i32:
2410    case INDEX_op_clz_i64:
2411    case INDEX_op_ctz_i64:
2412        return &r_r_rAL;
2413
2414    case INDEX_op_brcond_i32:
2415    case INDEX_op_brcond_i64:
2416        return &r_rA;
2417
2418    case INDEX_op_movcond_i32:
2419    case INDEX_op_movcond_i64:
2420        return &movc;
2421
2422    case INDEX_op_qemu_ld_i32:
2423    case INDEX_op_qemu_ld_i64:
2424        return &r_l;
2425    case INDEX_op_qemu_st_i32:
2426    case INDEX_op_qemu_st_i64:
2427        return &lZ_l;
2428
2429    case INDEX_op_deposit_i32:
2430    case INDEX_op_deposit_i64:
2431        return &dep;
2432
2433    case INDEX_op_add2_i32:
2434    case INDEX_op_add2_i64:
2435    case INDEX_op_sub2_i32:
2436    case INDEX_op_sub2_i64:
2437        return &add2;
2438
2439    case INDEX_op_add_vec:
2440    case INDEX_op_sub_vec:
2441    case INDEX_op_mul_vec:
2442    case INDEX_op_and_vec:
2443    case INDEX_op_or_vec:
2444    case INDEX_op_xor_vec:
2445    case INDEX_op_andc_vec:
2446    case INDEX_op_orc_vec:
2447    case INDEX_op_ssadd_vec:
2448    case INDEX_op_sssub_vec:
2449    case INDEX_op_usadd_vec:
2450    case INDEX_op_ussub_vec:
2451    case INDEX_op_smax_vec:
2452    case INDEX_op_smin_vec:
2453    case INDEX_op_umax_vec:
2454    case INDEX_op_umin_vec:
2455        return &w_w_w;
2456    case INDEX_op_not_vec:
2457    case INDEX_op_neg_vec:
2458    case INDEX_op_shli_vec:
2459    case INDEX_op_shri_vec:
2460    case INDEX_op_sari_vec:
2461        return &w_w;
2462    case INDEX_op_ld_vec:
2463    case INDEX_op_st_vec:
2464        return &w_r;
2465    case INDEX_op_dup_vec:
2466        return &w_wr;
2467    case INDEX_op_cmp_vec:
2468        return &w_w_wZ;
2469
2470    default:
2471        return NULL;
2472    }
2473}
2474
2475static void tcg_target_init(TCGContext *s)
2476{
2477    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2478    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2479    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2480    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2481
2482    tcg_target_call_clobber_regs = -1ull;
2483    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2484    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2485    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2486    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2487    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2488    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2489    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2490    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2491    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2492    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2493    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2494    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2495    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2496    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2497    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2498    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2499    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2500    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2501    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2502
2503    s->reserved_regs = 0;
2504    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2505    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2506    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2507    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2508    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2509}
2510
2511/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2512#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2513
2514#define FRAME_SIZE \
2515    ((PUSH_SIZE \
2516      + TCG_STATIC_CALL_ARGS_SIZE \
2517      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2518      + TCG_TARGET_STACK_ALIGN - 1) \
2519     & ~(TCG_TARGET_STACK_ALIGN - 1))
2520
2521/* We're expecting a 2 byte uleb128 encoded value.  */
2522QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2523
2524/* We're expecting to use a single ADDI insn.  */
2525QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2526
2527static void tcg_target_qemu_prologue(TCGContext *s)
2528{
2529    TCGReg r;
2530
2531    /* Push (FP, LR) and allocate space for all saved registers.  */
2532    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2533                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2534
2535    /* Set up frame pointer for canonical unwinding.  */
2536    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2537
2538    /* Store callee-preserved regs x19..x28.  */
2539    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2540        int ofs = (r - TCG_REG_X19 + 2) * 8;
2541        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2542    }
2543
2544    /* Make stack space for TCG locals.  */
2545    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2546                 FRAME_SIZE - PUSH_SIZE);
2547
2548    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2549    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2550                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2551
2552#if !defined(CONFIG_SOFTMMU)
2553    if (USE_GUEST_BASE) {
2554        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2555        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2556    }
2557#endif
2558
2559    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2560    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2561
2562    /*
2563     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2564     * and fall through to the rest of the epilogue.
2565     */
2566    s->code_gen_epilogue = s->code_ptr;
2567    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2568
2569    /* TB epilogue */
2570    tb_ret_addr = s->code_ptr;
2571
2572    /* Remove TCG locals stack space.  */
2573    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2574                 FRAME_SIZE - PUSH_SIZE);
2575
2576    /* Restore registers x19..x28.  */
2577    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2578        int ofs = (r - TCG_REG_X19 + 2) * 8;
2579        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2580    }
2581
2582    /* Pop (FP, LR), restore SP to previous frame.  */
2583    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2584                 TCG_REG_SP, PUSH_SIZE, 0, 1);
2585    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2586}
2587
2588static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2589{
2590    int i;
2591    for (i = 0; i < count; ++i) {
2592        p[i] = NOP;
2593    }
2594}
2595
2596typedef struct {
2597    DebugFrameHeader h;
2598    uint8_t fde_def_cfa[4];
2599    uint8_t fde_reg_ofs[24];
2600} DebugFrame;
2601
2602#define ELF_HOST_MACHINE EM_AARCH64
2603
2604static const DebugFrame debug_frame = {
2605    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2606    .h.cie.id = -1,
2607    .h.cie.version = 1,
2608    .h.cie.code_align = 1,
2609    .h.cie.data_align = 0x78,             /* sleb128 -8 */
2610    .h.cie.return_column = TCG_REG_LR,
2611
2612    /* Total FDE size does not include the "len" member.  */
2613    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2614
2615    .fde_def_cfa = {
2616        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
2617        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2618        (FRAME_SIZE >> 7)
2619    },
2620    .fde_reg_ofs = {
2621        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
2622        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
2623        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
2624        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
2625        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
2626        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
2627        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
2628        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
2629        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
2630        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
2631        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
2632        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
2633    }
2634};
2635
2636void tcg_register_jit(void *buf, size_t buf_size)
2637{
2638    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2639}
2640