qemu/tcg/aarch64/tcg-target.c
<<
>>
Prefs
   1/*
   2 * Initial TCG Implementation for aarch64
   3 *
   4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
   5 * Written by Claudio Fontana
   6 *
   7 * This work is licensed under the terms of the GNU GPL, version 2 or
   8 * (at your option) any later version.
   9 *
  10 * See the COPYING file in the top-level directory for details.
  11 */
  12
  13#include "qemu/bitops.h"
  14
  15#ifndef NDEBUG
  16static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  17    "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
  18    "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
  19    "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
  20    "%x24", "%x25", "%x26", "%x27", "%x28",
  21    "%fp", /* frame pointer */
  22    "%lr", /* link register */
  23    "%sp",  /* stack pointer */
  24};
  25#endif /* NDEBUG */
  26
  27#ifdef TARGET_WORDS_BIGENDIAN
  28 #define TCG_LDST_BSWAP 1
  29#else
  30 #define TCG_LDST_BSWAP 0
  31#endif
  32
  33static const int tcg_target_reg_alloc_order[] = {
  34    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
  35    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
  36    TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
  37
  38    TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, TCG_REG_X12,
  39    TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
  40    TCG_REG_X16, TCG_REG_X17,
  41
  42    TCG_REG_X18, TCG_REG_X19, /* will not use these, see tcg_target_init */
  43
  44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
  46
  47    TCG_REG_X8, /* will not use, see tcg_target_init */
  48};
  49
  50static const int tcg_target_call_iarg_regs[8] = {
  51    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  52    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
  53};
  54static const int tcg_target_call_oarg_regs[1] = {
  55    TCG_REG_X0
  56};
  57
  58#define TCG_REG_TMP TCG_REG_X8
  59
  60#ifndef CONFIG_SOFTMMU
  61# if defined(CONFIG_USE_GUEST_BASE)
  62# define TCG_REG_GUEST_BASE TCG_REG_X28
  63# else
  64# define TCG_REG_GUEST_BASE TCG_REG_XZR
  65# endif
  66#endif
  67
  68static inline void reloc_pc26(void *code_ptr, tcg_target_long target)
  69{
  70    tcg_target_long offset; uint32_t insn;
  71    offset = (target - (tcg_target_long)code_ptr) / 4;
  72    /* read instruction, mask away previous PC_REL26 parameter contents,
  73       set the proper offset, then write back the instruction. */
  74    insn = *(uint32_t *)code_ptr;
  75    insn = deposit32(insn, 0, 26, offset);
  76    *(uint32_t *)code_ptr = insn;
  77}
  78
  79static inline void reloc_pc19(void *code_ptr, tcg_target_long target)
  80{
  81    tcg_target_long offset; uint32_t insn;
  82    offset = (target - (tcg_target_long)code_ptr) / 4;
  83    /* read instruction, mask away previous PC_REL19 parameter contents,
  84       set the proper offset, then write back the instruction. */
  85    insn = *(uint32_t *)code_ptr;
  86    insn = deposit32(insn, 5, 19, offset);
  87    *(uint32_t *)code_ptr = insn;
  88}
  89
  90static inline void patch_reloc(uint8_t *code_ptr, int type,
  91                               tcg_target_long value, tcg_target_long addend)
  92{
  93    value += addend;
  94
  95    switch (type) {
  96    case R_AARCH64_JUMP26:
  97    case R_AARCH64_CALL26:
  98        reloc_pc26(code_ptr, value);
  99        break;
 100    case R_AARCH64_CONDBR19:
 101        reloc_pc19(code_ptr, value);
 102        break;
 103
 104    default:
 105        tcg_abort();
 106    }
 107}
 108
 109/* parse target specific constraints */
 110static int target_parse_constraint(TCGArgConstraint *ct,
 111                                   const char **pct_str)
 112{
 113    const char *ct_str = *pct_str;
 114
 115    switch (ct_str[0]) {
 116    case 'r':
 117        ct->ct |= TCG_CT_REG;
 118        tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
 119        break;
 120    case 'l': /* qemu_ld / qemu_st address, data_reg */
 121        ct->ct |= TCG_CT_REG;
 122        tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
 123#ifdef CONFIG_SOFTMMU
 124        /* x0 and x1 will be overwritten when reading the tlb entry,
 125           and x2, and x3 for helper args, better to avoid using them. */
 126        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
 127        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
 128        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
 129        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
 130#endif
 131        break;
 132    default:
 133        return -1;
 134    }
 135
 136    ct_str++;
 137    *pct_str = ct_str;
 138    return 0;
 139}
 140
 141static inline int tcg_target_const_match(tcg_target_long val,
 142                                         const TCGArgConstraint *arg_ct)
 143{
 144    int ct = arg_ct->ct;
 145
 146    if (ct & TCG_CT_CONST) {
 147        return 1;
 148    }
 149
 150    return 0;
 151}
 152
 153enum aarch64_cond_code {
 154    COND_EQ = 0x0,
 155    COND_NE = 0x1,
 156    COND_CS = 0x2,     /* Unsigned greater or equal */
 157    COND_HS = COND_CS, /* ALIAS greater or equal */
 158    COND_CC = 0x3,     /* Unsigned less than */
 159    COND_LO = COND_CC, /* ALIAS Lower */
 160    COND_MI = 0x4,     /* Negative */
 161    COND_PL = 0x5,     /* Zero or greater */
 162    COND_VS = 0x6,     /* Overflow */
 163    COND_VC = 0x7,     /* No overflow */
 164    COND_HI = 0x8,     /* Unsigned greater than */
 165    COND_LS = 0x9,     /* Unsigned less or equal */
 166    COND_GE = 0xa,
 167    COND_LT = 0xb,
 168    COND_GT = 0xc,
 169    COND_LE = 0xd,
 170    COND_AL = 0xe,
 171    COND_NV = 0xf, /* behaves like COND_AL here */
 172};
 173
 174static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
 175    [TCG_COND_EQ] = COND_EQ,
 176    [TCG_COND_NE] = COND_NE,
 177    [TCG_COND_LT] = COND_LT,
 178    [TCG_COND_GE] = COND_GE,
 179    [TCG_COND_LE] = COND_LE,
 180    [TCG_COND_GT] = COND_GT,
 181    /* unsigned */
 182    [TCG_COND_LTU] = COND_LO,
 183    [TCG_COND_GTU] = COND_HI,
 184    [TCG_COND_GEU] = COND_HS,
 185    [TCG_COND_LEU] = COND_LS,
 186};
 187
 188/* opcodes for LDR / STR instructions with base + simm9 addressing */
 189enum aarch64_ldst_op_data { /* size of the data moved */
 190    LDST_8 = 0x38,
 191    LDST_16 = 0x78,
 192    LDST_32 = 0xb8,
 193    LDST_64 = 0xf8,
 194};
 195enum aarch64_ldst_op_type { /* type of operation */
 196    LDST_ST = 0x0,    /* store */
 197    LDST_LD = 0x4,    /* load */
 198    LDST_LD_S_X = 0x8,  /* load and sign-extend into Xt */
 199    LDST_LD_S_W = 0xc,  /* load and sign-extend into Wt */
 200};
 201
 202enum aarch64_arith_opc {
 203    ARITH_AND = 0x0a,
 204    ARITH_ADD = 0x0b,
 205    ARITH_OR = 0x2a,
 206    ARITH_ADDS = 0x2b,
 207    ARITH_XOR = 0x4a,
 208    ARITH_SUB = 0x4b,
 209    ARITH_ANDS = 0x6a,
 210    ARITH_SUBS = 0x6b,
 211};
 212
 213enum aarch64_srr_opc {
 214    SRR_SHL = 0x0,
 215    SRR_SHR = 0x4,
 216    SRR_SAR = 0x8,
 217    SRR_ROR = 0xc
 218};
 219
 220static inline enum aarch64_ldst_op_data
 221aarch64_ldst_get_data(TCGOpcode tcg_op)
 222{
 223    switch (tcg_op) {
 224    case INDEX_op_ld8u_i32:
 225    case INDEX_op_ld8s_i32:
 226    case INDEX_op_ld8u_i64:
 227    case INDEX_op_ld8s_i64:
 228    case INDEX_op_st8_i32:
 229    case INDEX_op_st8_i64:
 230        return LDST_8;
 231
 232    case INDEX_op_ld16u_i32:
 233    case INDEX_op_ld16s_i32:
 234    case INDEX_op_ld16u_i64:
 235    case INDEX_op_ld16s_i64:
 236    case INDEX_op_st16_i32:
 237    case INDEX_op_st16_i64:
 238        return LDST_16;
 239
 240    case INDEX_op_ld_i32:
 241    case INDEX_op_st_i32:
 242    case INDEX_op_ld32u_i64:
 243    case INDEX_op_ld32s_i64:
 244    case INDEX_op_st32_i64:
 245        return LDST_32;
 246
 247    case INDEX_op_ld_i64:
 248    case INDEX_op_st_i64:
 249        return LDST_64;
 250
 251    default:
 252        tcg_abort();
 253    }
 254}
 255
 256static inline enum aarch64_ldst_op_type
 257aarch64_ldst_get_type(TCGOpcode tcg_op)
 258{
 259    switch (tcg_op) {
 260    case INDEX_op_st8_i32:
 261    case INDEX_op_st16_i32:
 262    case INDEX_op_st8_i64:
 263    case INDEX_op_st16_i64:
 264    case INDEX_op_st_i32:
 265    case INDEX_op_st32_i64:
 266    case INDEX_op_st_i64:
 267        return LDST_ST;
 268
 269    case INDEX_op_ld8u_i32:
 270    case INDEX_op_ld16u_i32:
 271    case INDEX_op_ld8u_i64:
 272    case INDEX_op_ld16u_i64:
 273    case INDEX_op_ld_i32:
 274    case INDEX_op_ld32u_i64:
 275    case INDEX_op_ld_i64:
 276        return LDST_LD;
 277
 278    case INDEX_op_ld8s_i32:
 279    case INDEX_op_ld16s_i32:
 280        return LDST_LD_S_W;
 281
 282    case INDEX_op_ld8s_i64:
 283    case INDEX_op_ld16s_i64:
 284    case INDEX_op_ld32s_i64:
 285        return LDST_LD_S_X;
 286
 287    default:
 288        tcg_abort();
 289    }
 290}
 291
 292static inline uint32_t tcg_in32(TCGContext *s)
 293{
 294    uint32_t v = *(uint32_t *)s->code_ptr;
 295    return v;
 296}
 297
 298static inline void tcg_out_ldst_9(TCGContext *s,
 299                                  enum aarch64_ldst_op_data op_data,
 300                                  enum aarch64_ldst_op_type op_type,
 301                                  TCGReg rd, TCGReg rn, tcg_target_long offset)
 302{
 303    /* use LDUR with BASE register with 9bit signed unscaled offset */
 304    unsigned int mod, off;
 305
 306    if (offset < 0) {
 307        off = (256 + offset);
 308        mod = 0x1;
 309    } else {
 310        off = offset;
 311        mod = 0x0;
 312    }
 313
 314    mod |= op_type;
 315    tcg_out32(s, op_data << 24 | mod << 20 | off << 12 | rn << 5 | rd);
 316}
 317
 318/* tcg_out_ldst_12 expects a scaled unsigned immediate offset */
 319static inline void tcg_out_ldst_12(TCGContext *s,
 320                                   enum aarch64_ldst_op_data op_data,
 321                                   enum aarch64_ldst_op_type op_type,
 322                                   TCGReg rd, TCGReg rn,
 323                                   tcg_target_ulong scaled_uimm)
 324{
 325    tcg_out32(s, (op_data | 1) << 24
 326              | op_type << 20 | scaled_uimm << 10 | rn << 5 | rd);
 327}
 328
 329static inline void tcg_out_movr(TCGContext *s, int ext, TCGReg rd, TCGReg src)
 330{
 331    /* register to register move using MOV (shifted register with no shift) */
 332    /* using MOV 0x2a0003e0 | (shift).. */
 333    unsigned int base = ext ? 0xaa0003e0 : 0x2a0003e0;
 334    tcg_out32(s, base | src << 16 | rd);
 335}
 336
 337static inline void tcg_out_movi_aux(TCGContext *s,
 338                                    TCGReg rd, uint64_t value)
 339{
 340    uint32_t half, base, shift, movk = 0;
 341    /* construct halfwords of the immediate with MOVZ/MOVK with LSL */
 342    /* using MOVZ 0x52800000 | extended reg.. */
 343    base = (value > 0xffffffff) ? 0xd2800000 : 0x52800000;
 344    /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
 345       first MOVZ with the half-word immediate skipping the zeros, with a shift
 346       (LSL) equal to this number. Then morph all next instructions into MOVKs.
 347       Zero the processed half-word in the value, continue until empty.
 348       We build the final result 16bits at a time with up to 4 instructions,
 349       but do not emit instructions for 16bit zero holes. */
 350    do {
 351        shift = ctz64(value) & (63 & -16);
 352        half = (value >> shift) & 0xffff;
 353        tcg_out32(s, base | movk | shift << 17 | half << 5 | rd);
 354        movk = 0x20000000; /* morph next MOVZs into MOVKs */
 355        value &= ~(0xffffUL << shift);
 356    } while (value);
 357}
 358
 359static inline void tcg_out_movi(TCGContext *s, TCGType type,
 360                                TCGReg rd, tcg_target_long value)
 361{
 362    if (type == TCG_TYPE_I64) {
 363        tcg_out_movi_aux(s, rd, value);
 364    } else {
 365        tcg_out_movi_aux(s, rd, value & 0xffffffff);
 366    }
 367}
 368
 369static inline void tcg_out_ldst_r(TCGContext *s,
 370                                  enum aarch64_ldst_op_data op_data,
 371                                  enum aarch64_ldst_op_type op_type,
 372                                  TCGReg rd, TCGReg base, TCGReg regoff)
 373{
 374    /* load from memory to register using base + 64bit register offset */
 375    /* using f.e. STR Wt, [Xn, Xm] 0xb8600800|(regoff << 16)|(base << 5)|rd */
 376    /* the 0x6000 is for the "no extend field" */
 377    tcg_out32(s, 0x00206800
 378              | op_data << 24 | op_type << 20 | regoff << 16 | base << 5 | rd);
 379}
 380
 381/* solve the whole ldst problem */
 382static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
 383                                enum aarch64_ldst_op_type type,
 384                                TCGReg rd, TCGReg rn, tcg_target_long offset)
 385{
 386    if (offset >= -256 && offset < 256) {
 387        tcg_out_ldst_9(s, data, type, rd, rn, offset);
 388        return;
 389    }
 390
 391    if (offset >= 256) {
 392        /* if the offset is naturally aligned and in range,
 393           then we can use the scaled uimm12 encoding */
 394        unsigned int s_bits = data >> 6;
 395        if (!(offset & ((1 << s_bits) - 1))) {
 396            tcg_target_ulong scaled_uimm = offset >> s_bits;
 397            if (scaled_uimm <= 0xfff) {
 398                tcg_out_ldst_12(s, data, type, rd, rn, scaled_uimm);
 399                return;
 400            }
 401        }
 402    }
 403
 404    /* worst-case scenario, move offset to temp register, use reg offset */
 405    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
 406    tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP);
 407}
 408
 409/* mov alias implemented with add immediate, useful to move to/from SP */
 410static inline void tcg_out_movr_sp(TCGContext *s, int ext, TCGReg rd, TCGReg rn)
 411{
 412    /* using ADD 0x11000000 | (ext) | rn << 5 | rd */
 413    unsigned int base = ext ? 0x91000000 : 0x11000000;
 414    tcg_out32(s, base | rn << 5 | rd);
 415}
 416
 417static inline void tcg_out_mov(TCGContext *s,
 418                               TCGType type, TCGReg ret, TCGReg arg)
 419{
 420    if (ret != arg) {
 421        tcg_out_movr(s, type == TCG_TYPE_I64, ret, arg);
 422    }
 423}
 424
 425static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
 426                              TCGReg arg1, tcg_target_long arg2)
 427{
 428    tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_LD,
 429                 arg, arg1, arg2);
 430}
 431
 432static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
 433                              TCGReg arg1, tcg_target_long arg2)
 434{
 435    tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_ST,
 436                 arg, arg1, arg2);
 437}
 438
 439static inline void tcg_out_arith(TCGContext *s, enum aarch64_arith_opc opc,
 440                                 int ext, TCGReg rd, TCGReg rn, TCGReg rm,
 441                                 int shift_imm)
 442{
 443    /* Using shifted register arithmetic operations */
 444    /* if extended register operation (64bit) just OR with 0x80 << 24 */
 445    unsigned int shift, base = ext ? (0x80 | opc) << 24 : opc << 24;
 446    if (shift_imm == 0) {
 447        shift = 0;
 448    } else if (shift_imm > 0) {
 449        shift = shift_imm << 10 | 1 << 22;
 450    } else /* (shift_imm < 0) */ {
 451        shift = (-shift_imm) << 10;
 452    }
 453    tcg_out32(s, base | rm << 16 | shift | rn << 5 | rd);
 454}
 455
 456static inline void tcg_out_mul(TCGContext *s, int ext,
 457                               TCGReg rd, TCGReg rn, TCGReg rm)
 458{
 459    /* Using MADD 0x1b000000 with Ra = wzr alias MUL 0x1b007c00 */
 460    unsigned int base = ext ? 0x9b007c00 : 0x1b007c00;
 461    tcg_out32(s, base | rm << 16 | rn << 5 | rd);
 462}
 463
 464static inline void tcg_out_shiftrot_reg(TCGContext *s,
 465                                        enum aarch64_srr_opc opc, int ext,
 466                                        TCGReg rd, TCGReg rn, TCGReg rm)
 467{
 468    /* using 2-source data processing instructions 0x1ac02000 */
 469    unsigned int base = ext ? 0x9ac02000 : 0x1ac02000;
 470    tcg_out32(s, base | rm << 16 | opc << 8 | rn << 5 | rd);
 471}
 472
 473static inline void tcg_out_ubfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
 474                                unsigned int a, unsigned int b)
 475{
 476    /* Using UBFM 0x53000000 Wd, Wn, a, b */
 477    unsigned int base = ext ? 0xd3400000 : 0x53000000;
 478    tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
 479}
 480
 481static inline void tcg_out_sbfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
 482                                unsigned int a, unsigned int b)
 483{
 484    /* Using SBFM 0x13000000 Wd, Wn, a, b */
 485    unsigned int base = ext ? 0x93400000 : 0x13000000;
 486    tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
 487}
 488
 489static inline void tcg_out_extr(TCGContext *s, int ext, TCGReg rd,
 490                                TCGReg rn, TCGReg rm, unsigned int a)
 491{
 492    /* Using EXTR 0x13800000 Wd, Wn, Wm, a */
 493    unsigned int base = ext ? 0x93c00000 : 0x13800000;
 494    tcg_out32(s, base | rm << 16 | a << 10 | rn << 5 | rd);
 495}
 496
 497static inline void tcg_out_shl(TCGContext *s, int ext,
 498                               TCGReg rd, TCGReg rn, unsigned int m)
 499{
 500    int bits, max;
 501    bits = ext ? 64 : 32;
 502    max = bits - 1;
 503    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
 504}
 505
 506static inline void tcg_out_shr(TCGContext *s, int ext,
 507                               TCGReg rd, TCGReg rn, unsigned int m)
 508{
 509    int max = ext ? 63 : 31;
 510    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
 511}
 512
 513static inline void tcg_out_sar(TCGContext *s, int ext,
 514                               TCGReg rd, TCGReg rn, unsigned int m)
 515{
 516    int max = ext ? 63 : 31;
 517    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
 518}
 519
 520static inline void tcg_out_rotr(TCGContext *s, int ext,
 521                                TCGReg rd, TCGReg rn, unsigned int m)
 522{
 523    int max = ext ? 63 : 31;
 524    tcg_out_extr(s, ext, rd, rn, rn, m & max);
 525}
 526
 527static inline void tcg_out_rotl(TCGContext *s, int ext,
 528                                TCGReg rd, TCGReg rn, unsigned int m)
 529{
 530    int bits, max;
 531    bits = ext ? 64 : 32;
 532    max = bits - 1;
 533    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
 534}
 535
 536static inline void tcg_out_cmp(TCGContext *s, int ext, TCGReg rn, TCGReg rm,
 537                               int shift_imm)
 538{
 539    /* Using CMP alias SUBS wzr, Wn, Wm */
 540    tcg_out_arith(s, ARITH_SUBS, ext, TCG_REG_XZR, rn, rm, shift_imm);
 541}
 542
 543static inline void tcg_out_cset(TCGContext *s, int ext, TCGReg rd, TCGCond c)
 544{
 545    /* Using CSET alias of CSINC 0x1a800400 Xd, XZR, XZR, invert(cond) */
 546    unsigned int base = ext ? 0x9a9f07e0 : 0x1a9f07e0;
 547    tcg_out32(s, base | tcg_cond_to_aarch64[tcg_invert_cond(c)] << 12 | rd);
 548}
 549
 550static inline void tcg_out_goto(TCGContext *s, tcg_target_long target)
 551{
 552    tcg_target_long offset;
 553    offset = (target - (tcg_target_long)s->code_ptr) / 4;
 554
 555    if (offset < -0x02000000 || offset >= 0x02000000) {
 556        /* out of 26bit range */
 557        tcg_abort();
 558    }
 559
 560    tcg_out32(s, 0x14000000 | (offset & 0x03ffffff));
 561}
 562
 563static inline void tcg_out_goto_noaddr(TCGContext *s)
 564{
 565    /* We pay attention here to not modify the branch target by
 566       reading from the buffer. This ensure that caches and memory are
 567       kept coherent during retranslation.
 568       Mask away possible garbage in the high bits for the first translation,
 569       while keeping the offset bits for retranslation. */
 570    uint32_t insn;
 571    insn = (tcg_in32(s) & 0x03ffffff) | 0x14000000;
 572    tcg_out32(s, insn);
 573}
 574
 575static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
 576{
 577    /* see comments in tcg_out_goto_noaddr */
 578    uint32_t insn;
 579    insn = tcg_in32(s) & (0x07ffff << 5);
 580    insn |= 0x54000000 | tcg_cond_to_aarch64[c];
 581    tcg_out32(s, insn);
 582}
 583
 584static inline void tcg_out_goto_cond(TCGContext *s, TCGCond c,
 585                                     tcg_target_long target)
 586{
 587    tcg_target_long offset;
 588    offset = (target - (tcg_target_long)s->code_ptr) / 4;
 589
 590    if (offset < -0x40000 || offset >= 0x40000) {
 591        /* out of 19bit range */
 592        tcg_abort();
 593    }
 594
 595    offset &= 0x7ffff;
 596    tcg_out32(s, 0x54000000 | tcg_cond_to_aarch64[c] | offset << 5);
 597}
 598
 599static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
 600{
 601    tcg_out32(s, 0xd63f0000 | reg << 5);
 602}
 603
 604static inline void tcg_out_gotor(TCGContext *s, TCGReg reg)
 605{
 606    tcg_out32(s, 0xd61f0000 | reg << 5);
 607}
 608
 609static inline void tcg_out_call(TCGContext *s, tcg_target_long target)
 610{
 611    tcg_target_long offset;
 612
 613    offset = (target - (tcg_target_long)s->code_ptr) / 4;
 614
 615    if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */
 616        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
 617        tcg_out_callr(s, TCG_REG_TMP);
 618    } else {
 619        tcg_out32(s, 0x94000000 | (offset & 0x03ffffff));
 620    }
 621}
 622
 623/* encode a logical immediate, mapping user parameter
 624   M=set bits pattern length to S=M-1 */
 625static inline unsigned int
 626aarch64_limm(unsigned int m, unsigned int r)
 627{
 628    assert(m > 0);
 629    return r << 16 | (m - 1) << 10;
 630}
 631
 632/* test a register against an immediate bit pattern made of
 633   M set bits rotated right by R.
 634   Examples:
 635   to test a 32/64 reg against 0x00000007, pass M = 3,  R = 0.
 636   to test a 32/64 reg against 0x000000ff, pass M = 8,  R = 0.
 637   to test a 32bit reg against 0xff000000, pass M = 8,  R = 8.
 638   to test a 32bit reg against 0xff0000ff, pass M = 16, R = 8.
 639 */
 640static inline void tcg_out_tst(TCGContext *s, int ext, TCGReg rn,
 641                               unsigned int m, unsigned int r)
 642{
 643    /* using TST alias of ANDS XZR, Xn,#bimm64 0x7200001f */
 644    unsigned int base = ext ? 0xf240001f : 0x7200001f;
 645    tcg_out32(s, base | aarch64_limm(m, r) | rn << 5);
 646}
 647
 648/* and a register with a bit pattern, similarly to TST, no flags change */
 649static inline void tcg_out_andi(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
 650                                unsigned int m, unsigned int r)
 651{
 652    /* using AND 0x12000000 */
 653    unsigned int base = ext ? 0x92400000 : 0x12000000;
 654    tcg_out32(s, base | aarch64_limm(m, r) | rn << 5 | rd);
 655}
 656
 657static inline void tcg_out_ret(TCGContext *s)
 658{
 659    /* emit RET { LR } */
 660    tcg_out32(s, 0xd65f03c0);
 661}
 662
 663void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
 664{
 665    tcg_target_long target, offset;
 666    target = (tcg_target_long)addr;
 667    offset = (target - (tcg_target_long)jmp_addr) / 4;
 668
 669    if (offset < -0x02000000 || offset >= 0x02000000) {
 670        /* out of 26bit range */
 671        tcg_abort();
 672    }
 673
 674    patch_reloc((uint8_t *)jmp_addr, R_AARCH64_JUMP26, target, 0);
 675    flush_icache_range(jmp_addr, jmp_addr + 4);
 676}
 677
 678static inline void tcg_out_goto_label(TCGContext *s, int label_index)
 679{
 680    TCGLabel *l = &s->labels[label_index];
 681
 682    if (!l->has_value) {
 683        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, label_index, 0);
 684        tcg_out_goto_noaddr(s);
 685    } else {
 686        tcg_out_goto(s, l->u.value);
 687    }
 688}
 689
 690static inline void tcg_out_goto_label_cond(TCGContext *s,
 691                                           TCGCond c, int label_index)
 692{
 693    TCGLabel *l = &s->labels[label_index];
 694
 695    if (!l->has_value) {
 696        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label_index, 0);
 697        tcg_out_goto_cond_noaddr(s, c);
 698    } else {
 699        tcg_out_goto_cond(s, c, l->u.value);
 700    }
 701}
 702
 703static inline void tcg_out_rev(TCGContext *s, int ext, TCGReg rd, TCGReg rm)
 704{
 705    /* using REV 0x5ac00800 */
 706    unsigned int base = ext ? 0xdac00c00 : 0x5ac00800;
 707    tcg_out32(s, base | rm << 5 | rd);
 708}
 709
 710static inline void tcg_out_rev16(TCGContext *s, int ext, TCGReg rd, TCGReg rm)
 711{
 712    /* using REV16 0x5ac00400 */
 713    unsigned int base = ext ? 0xdac00400 : 0x5ac00400;
 714    tcg_out32(s, base | rm << 5 | rd);
 715}
 716
 717static inline void tcg_out_sxt(TCGContext *s, int ext, int s_bits,
 718                               TCGReg rd, TCGReg rn)
 719{
 720    /* using ALIASes SXTB 0x13001c00, SXTH 0x13003c00, SXTW 0x93407c00
 721       of SBFM Xd, Xn, #0, #7|15|31 */
 722    int bits = 8 * (1 << s_bits) - 1;
 723    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
 724}
 725
 726static inline void tcg_out_uxt(TCGContext *s, int s_bits,
 727                               TCGReg rd, TCGReg rn)
 728{
 729    /* using ALIASes UXTB 0x53001c00, UXTH 0x53003c00
 730       of UBFM Wd, Wn, #0, #7|15 */
 731    int bits = 8 * (1 << s_bits) - 1;
 732    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
 733}
 734
 735static inline void tcg_out_addi(TCGContext *s, int ext,
 736                                TCGReg rd, TCGReg rn, unsigned int aimm)
 737{
 738    /* add immediate aimm unsigned 12bit value (with LSL 0 or 12) */
 739    /* using ADD 0x11000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
 740    unsigned int base = ext ? 0x91000000 : 0x11000000;
 741
 742    if (aimm <= 0xfff) {
 743        aimm <<= 10;
 744    } else {
 745        /* we can only shift left by 12, on assert we cannot represent */
 746        assert(!(aimm & 0xfff));
 747        assert(aimm <= 0xfff000);
 748        base |= 1 << 22; /* apply LSL 12 */
 749        aimm >>= 2;
 750    }
 751
 752    tcg_out32(s, base | aimm | (rn << 5) | rd);
 753}
 754
 755static inline void tcg_out_subi(TCGContext *s, int ext,
 756                                TCGReg rd, TCGReg rn, unsigned int aimm)
 757{
 758    /* sub immediate aimm unsigned 12bit value (with LSL 0 or 12) */
 759    /* using SUB 0x51000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
 760    unsigned int base = ext ? 0xd1000000 : 0x51000000;
 761
 762    if (aimm <= 0xfff) {
 763        aimm <<= 10;
 764    } else {
 765        /* we can only shift left by 12, on assert we cannot represent */
 766        assert(!(aimm & 0xfff));
 767        assert(aimm <= 0xfff000);
 768        base |= 1 << 22; /* apply LSL 12 */
 769        aimm >>= 2;
 770    }
 771
 772    tcg_out32(s, base | aimm | (rn << 5) | rd);
 773}
 774
 775static inline void tcg_out_nop(TCGContext *s)
 776{
 777    tcg_out32(s, 0xd503201f);
 778}
 779
 780#ifdef CONFIG_SOFTMMU
 781#include "exec/softmmu_defs.h"
 782
 783/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
 784   int mmu_idx) */
 785static const void * const qemu_ld_helpers[4] = {
 786    helper_ldb_mmu,
 787    helper_ldw_mmu,
 788    helper_ldl_mmu,
 789    helper_ldq_mmu,
 790};
 791
 792/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
 793   uintxx_t val, int mmu_idx) */
 794static const void * const qemu_st_helpers[4] = {
 795    helper_stb_mmu,
 796    helper_stw_mmu,
 797    helper_stl_mmu,
 798    helper_stq_mmu,
 799};
 800
 801static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 802{
 803    reloc_pc19(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
 804    tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0);
 805    tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg);
 806    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
 807    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
 808                 (tcg_target_long)qemu_ld_helpers[lb->opc & 3]);
 809    tcg_out_callr(s, TCG_REG_TMP);
 810    if (lb->opc & 0x04) {
 811        tcg_out_sxt(s, 1, lb->opc & 3, lb->datalo_reg, TCG_REG_X0);
 812    } else {
 813        tcg_out_movr(s, 1, lb->datalo_reg, TCG_REG_X0);
 814    }
 815
 816    tcg_out_goto(s, (tcg_target_long)lb->raddr);
 817}
 818
 819static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 820{
 821    reloc_pc19(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
 822
 823    tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0);
 824    tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg);
 825    tcg_out_movr(s, 1, TCG_REG_X2, lb->datalo_reg);
 826    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
 827    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
 828                 (tcg_target_long)qemu_st_helpers[lb->opc & 3]);
 829    tcg_out_callr(s, TCG_REG_TMP);
 830
 831    tcg_out_nop(s);
 832    tcg_out_goto(s, (tcg_target_long)lb->raddr);
 833}
 834
 835void tcg_out_tb_finalize(TCGContext *s)
 836{
 837    int i;
 838    for (i = 0; i < s->nb_qemu_ldst_labels; i++) {
 839        TCGLabelQemuLdst *label = &s->qemu_ldst_labels[i];
 840        if (label->is_ld) {
 841            tcg_out_qemu_ld_slow_path(s, label);
 842        } else {
 843            tcg_out_qemu_st_slow_path(s, label);
 844        }
 845    }
 846}
 847
 848static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
 849                                TCGReg data_reg, TCGReg addr_reg,
 850                                int mem_index,
 851                                uint8_t *raddr, uint8_t *label_ptr)
 852{
 853    int idx;
 854    TCGLabelQemuLdst *label;
 855
 856    if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) {
 857        tcg_abort();
 858    }
 859
 860    idx = s->nb_qemu_ldst_labels++;
 861    label = &s->qemu_ldst_labels[idx];
 862    label->is_ld = is_ld;
 863    label->opc = opc;
 864    label->datalo_reg = data_reg;
 865    label->addrlo_reg = addr_reg;
 866    label->mem_index = mem_index;
 867    label->raddr = raddr;
 868    label->label_ptr[0] = label_ptr;
 869}
 870
 871/* Load and compare a TLB entry, emitting the conditional jump to the
 872   slow path for the failure case, which will be patched later when finalizing
 873   the slow path. Generated code returns the host addend in X1,
 874   clobbers X0,X2,X3,TMP. */
 875static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
 876            int s_bits, uint8_t **label_ptr, int mem_index, int is_read)
 877{
 878    TCGReg base = TCG_AREG0;
 879    int tlb_offset = is_read ?
 880        offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
 881        : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
 882    /* Extract the TLB index from the address into X0.
 883       X0<CPU_TLB_BITS:0> =
 884       addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
 885    tcg_out_ubfm(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, addr_reg,
 886                 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
 887    /* Store the page mask part of the address and the low s_bits into X3.
 888       Later this allows checking for equality and alignment at the same time.
 889       X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
 890    tcg_out_andi(s, (TARGET_LONG_BITS == 64), TCG_REG_X3, addr_reg,
 891                 (TARGET_LONG_BITS - TARGET_PAGE_BITS) + s_bits,
 892                 (TARGET_LONG_BITS - TARGET_PAGE_BITS));
 893    /* Add any "high bits" from the tlb offset to the env address into X2,
 894       to take advantage of the LSL12 form of the addi instruction.
 895       X2 = env + (tlb_offset & 0xfff000) */
 896    tcg_out_addi(s, 1, TCG_REG_X2, base, tlb_offset & 0xfff000);
 897    /* Merge the tlb index contribution into X2.
 898       X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
 899    tcg_out_arith(s, ARITH_ADD, 1, TCG_REG_X2, TCG_REG_X2,
 900                  TCG_REG_X0, -CPU_TLB_ENTRY_BITS);
 901    /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
 902       X0 = load [X2 + (tlb_offset & 0x000fff)] */
 903    tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
 904                 LDST_LD, TCG_REG_X0, TCG_REG_X2,
 905                 (tlb_offset & 0xfff));
 906    /* Load the tlb addend. Do that early to avoid stalling.
 907       X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
 908    tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2,
 909                 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
 910                 (is_read ? offsetof(CPUTLBEntry, addr_read)
 911                  : offsetof(CPUTLBEntry, addr_write)));
 912    /* Perform the address comparison. */
 913    tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
 914    *label_ptr = s->code_ptr;
 915    /* If not equal, we jump to the slow path. */
 916    tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
 917}
 918
 919#endif /* CONFIG_SOFTMMU */
 920
 921static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data_r,
 922                                   TCGReg addr_r, TCGReg off_r)
 923{
 924    switch (opc) {
 925    case 0:
 926        tcg_out_ldst_r(s, LDST_8, LDST_LD, data_r, addr_r, off_r);
 927        break;
 928    case 0 | 4:
 929        tcg_out_ldst_r(s, LDST_8, LDST_LD_S_X, data_r, addr_r, off_r);
 930        break;
 931    case 1:
 932        tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
 933        if (TCG_LDST_BSWAP) {
 934            tcg_out_rev16(s, 0, data_r, data_r);
 935        }
 936        break;
 937    case 1 | 4:
 938        if (TCG_LDST_BSWAP) {
 939            tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
 940            tcg_out_rev16(s, 0, data_r, data_r);
 941            tcg_out_sxt(s, 1, 1, data_r, data_r);
 942        } else {
 943            tcg_out_ldst_r(s, LDST_16, LDST_LD_S_X, data_r, addr_r, off_r);
 944        }
 945        break;
 946    case 2:
 947        tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
 948        if (TCG_LDST_BSWAP) {
 949            tcg_out_rev(s, 0, data_r, data_r);
 950        }
 951        break;
 952    case 2 | 4:
 953        if (TCG_LDST_BSWAP) {
 954            tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
 955            tcg_out_rev(s, 0, data_r, data_r);
 956            tcg_out_sxt(s, 1, 2, data_r, data_r);
 957        } else {
 958            tcg_out_ldst_r(s, LDST_32, LDST_LD_S_X, data_r, addr_r, off_r);
 959        }
 960        break;
 961    case 3:
 962        tcg_out_ldst_r(s, LDST_64, LDST_LD, data_r, addr_r, off_r);
 963        if (TCG_LDST_BSWAP) {
 964            tcg_out_rev(s, 1, data_r, data_r);
 965        }
 966        break;
 967    default:
 968        tcg_abort();
 969    }
 970}
 971
 972static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data_r,
 973                                   TCGReg addr_r, TCGReg off_r)
 974{
 975    switch (opc) {
 976    case 0:
 977        tcg_out_ldst_r(s, LDST_8, LDST_ST, data_r, addr_r, off_r);
 978        break;
 979    case 1:
 980        if (TCG_LDST_BSWAP) {
 981            tcg_out_rev16(s, 0, TCG_REG_TMP, data_r);
 982            tcg_out_ldst_r(s, LDST_16, LDST_ST, TCG_REG_TMP, addr_r, off_r);
 983        } else {
 984            tcg_out_ldst_r(s, LDST_16, LDST_ST, data_r, addr_r, off_r);
 985        }
 986        break;
 987    case 2:
 988        if (TCG_LDST_BSWAP) {
 989            tcg_out_rev(s, 0, TCG_REG_TMP, data_r);
 990            tcg_out_ldst_r(s, LDST_32, LDST_ST, TCG_REG_TMP, addr_r, off_r);
 991        } else {
 992            tcg_out_ldst_r(s, LDST_32, LDST_ST, data_r, addr_r, off_r);
 993        }
 994        break;
 995    case 3:
 996        if (TCG_LDST_BSWAP) {
 997            tcg_out_rev(s, 1, TCG_REG_TMP, data_r);
 998            tcg_out_ldst_r(s, LDST_64, LDST_ST, TCG_REG_TMP, addr_r, off_r);
 999        } else {
1000            tcg_out_ldst_r(s, LDST_64, LDST_ST, data_r, addr_r, off_r);
1001        }
1002        break;
1003    default:
1004        tcg_abort();
1005    }
1006}
1007
1008static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
1009{
1010    TCGReg addr_reg, data_reg;
1011#ifdef CONFIG_SOFTMMU
1012    int mem_index, s_bits;
1013    uint8_t *label_ptr;
1014#endif
1015    data_reg = args[0];
1016    addr_reg = args[1];
1017
1018#ifdef CONFIG_SOFTMMU
1019    mem_index = args[2];
1020    s_bits = opc & 3;
1021    tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
1022    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1023    add_qemu_ldst_label(s, 1, opc, data_reg, addr_reg,
1024                        mem_index, s->code_ptr, label_ptr);
1025#else /* !CONFIG_SOFTMMU */
1026    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg,
1027                           GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1028#endif /* CONFIG_SOFTMMU */
1029}
1030
1031static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
1032{
1033    TCGReg addr_reg, data_reg;
1034#ifdef CONFIG_SOFTMMU
1035    int mem_index, s_bits;
1036    uint8_t *label_ptr;
1037#endif
1038    data_reg = args[0];
1039    addr_reg = args[1];
1040
1041#ifdef CONFIG_SOFTMMU
1042    mem_index = args[2];
1043    s_bits = opc & 3;
1044
1045    tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
1046    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1047    add_qemu_ldst_label(s, 0, opc, data_reg, addr_reg,
1048                        mem_index, s->code_ptr, label_ptr);
1049#else /* !CONFIG_SOFTMMU */
1050    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg,
1051                           GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1052#endif /* CONFIG_SOFTMMU */
1053}
1054
1055static uint8_t *tb_ret_addr;
1056
1057/* callee stack use example:
1058   stp     x29, x30, [sp,#-32]!
1059   mov     x29, sp
1060   stp     x1, x2, [sp,#16]
1061   ...
1062   ldp     x1, x2, [sp,#16]
1063   ldp     x29, x30, [sp],#32
1064   ret
1065*/
1066
1067/* push r1 and r2, and alloc stack space for a total of
1068   alloc_n elements (1 element=16 bytes, must be between 1 and 31. */
1069static inline void tcg_out_push_pair(TCGContext *s, TCGReg addr,
1070                                     TCGReg r1, TCGReg r2, int alloc_n)
1071{
1072    /* using indexed scaled simm7 STP 0x28800000 | (ext) | 0x01000000 (pre-idx)
1073       | alloc_n * (-1) << 16 | r2 << 10 | addr << 5 | r1 */
1074    assert(alloc_n > 0 && alloc_n < 0x20);
1075    alloc_n = (-alloc_n) & 0x3f;
1076    tcg_out32(s, 0xa9800000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1077}
1078
1079/* dealloc stack space for a total of alloc_n elements and pop r1, r2.  */
1080static inline void tcg_out_pop_pair(TCGContext *s, TCGReg addr,
1081                                    TCGReg r1, TCGReg r2, int alloc_n)
1082{
1083    /* using indexed scaled simm7 LDP 0x28c00000 | (ext) | nothing (post-idx)
1084       | alloc_n << 16 | r2 << 10 | addr << 5 | r1 */
1085    assert(alloc_n > 0 && alloc_n < 0x20);
1086    tcg_out32(s, 0xa8c00000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1087}
1088
1089static inline void tcg_out_store_pair(TCGContext *s, TCGReg addr,
1090                                      TCGReg r1, TCGReg r2, int idx)
1091{
1092    /* using register pair offset simm7 STP 0x29000000 | (ext)
1093       | idx << 16 | r2 << 10 | addr << 5 | r1 */
1094    assert(idx > 0 && idx < 0x20);
1095    tcg_out32(s, 0xa9000000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1096}
1097
1098static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
1099                                     TCGReg r1, TCGReg r2, int idx)
1100{
1101    /* using register pair offset simm7 LDP 0x29400000 | (ext)
1102       | idx << 16 | r2 << 10 | addr << 5 | r1 */
1103    assert(idx > 0 && idx < 0x20);
1104    tcg_out32(s, 0xa9400000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1105}
1106
1107static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1108                       const TCGArg *args, const int *const_args)
1109{
1110    /* ext will be set in the switch below, which will fall through to the
1111       common code. It triggers the use of extended regs where appropriate. */
1112    int ext = 0;
1113
1114    switch (opc) {
1115    case INDEX_op_exit_tb:
1116        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, args[0]);
1117        tcg_out_goto(s, (tcg_target_long)tb_ret_addr);
1118        break;
1119
1120    case INDEX_op_goto_tb:
1121#ifndef USE_DIRECT_JUMP
1122#error "USE_DIRECT_JUMP required for aarch64"
1123#endif
1124        assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
1125        s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1126        /* actual branch destination will be patched by
1127           aarch64_tb_set_jmp_target later, beware retranslation. */
1128        tcg_out_goto_noaddr(s);
1129        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1130        break;
1131
1132    case INDEX_op_call:
1133        if (const_args[0]) {
1134            tcg_out_call(s, args[0]);
1135        } else {
1136            tcg_out_callr(s, args[0]);
1137        }
1138        break;
1139
1140    case INDEX_op_br:
1141        tcg_out_goto_label(s, args[0]);
1142        break;
1143
1144    case INDEX_op_ld_i32:
1145    case INDEX_op_ld_i64:
1146    case INDEX_op_st_i32:
1147    case INDEX_op_st_i64:
1148    case INDEX_op_ld8u_i32:
1149    case INDEX_op_ld8s_i32:
1150    case INDEX_op_ld16u_i32:
1151    case INDEX_op_ld16s_i32:
1152    case INDEX_op_ld8u_i64:
1153    case INDEX_op_ld8s_i64:
1154    case INDEX_op_ld16u_i64:
1155    case INDEX_op_ld16s_i64:
1156    case INDEX_op_ld32u_i64:
1157    case INDEX_op_ld32s_i64:
1158    case INDEX_op_st8_i32:
1159    case INDEX_op_st8_i64:
1160    case INDEX_op_st16_i32:
1161    case INDEX_op_st16_i64:
1162    case INDEX_op_st32_i64:
1163        tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc),
1164                     args[0], args[1], args[2]);
1165        break;
1166
1167    case INDEX_op_mov_i64:
1168        ext = 1; /* fall through */
1169    case INDEX_op_mov_i32:
1170        tcg_out_movr(s, ext, args[0], args[1]);
1171        break;
1172
1173    case INDEX_op_movi_i64:
1174        tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1175        break;
1176    case INDEX_op_movi_i32:
1177        tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1178        break;
1179
1180    case INDEX_op_add_i64:
1181        ext = 1; /* fall through */
1182    case INDEX_op_add_i32:
1183        tcg_out_arith(s, ARITH_ADD, ext, args[0], args[1], args[2], 0);
1184        break;
1185
1186    case INDEX_op_sub_i64:
1187        ext = 1; /* fall through */
1188    case INDEX_op_sub_i32:
1189        tcg_out_arith(s, ARITH_SUB, ext, args[0], args[1], args[2], 0);
1190        break;
1191
1192    case INDEX_op_and_i64:
1193        ext = 1; /* fall through */
1194    case INDEX_op_and_i32:
1195        tcg_out_arith(s, ARITH_AND, ext, args[0], args[1], args[2], 0);
1196        break;
1197
1198    case INDEX_op_or_i64:
1199        ext = 1; /* fall through */
1200    case INDEX_op_or_i32:
1201        tcg_out_arith(s, ARITH_OR, ext, args[0], args[1], args[2], 0);
1202        break;
1203
1204    case INDEX_op_xor_i64:
1205        ext = 1; /* fall through */
1206    case INDEX_op_xor_i32:
1207        tcg_out_arith(s, ARITH_XOR, ext, args[0], args[1], args[2], 0);
1208        break;
1209
1210    case INDEX_op_mul_i64:
1211        ext = 1; /* fall through */
1212    case INDEX_op_mul_i32:
1213        tcg_out_mul(s, ext, args[0], args[1], args[2]);
1214        break;
1215
1216    case INDEX_op_shl_i64:
1217        ext = 1; /* fall through */
1218    case INDEX_op_shl_i32:
1219        if (const_args[2]) {    /* LSL / UBFM Wd, Wn, (32 - m) */
1220            tcg_out_shl(s, ext, args[0], args[1], args[2]);
1221        } else {                /* LSL / LSLV */
1222            tcg_out_shiftrot_reg(s, SRR_SHL, ext, args[0], args[1], args[2]);
1223        }
1224        break;
1225
1226    case INDEX_op_shr_i64:
1227        ext = 1; /* fall through */
1228    case INDEX_op_shr_i32:
1229        if (const_args[2]) {    /* LSR / UBFM Wd, Wn, m, 31 */
1230            tcg_out_shr(s, ext, args[0], args[1], args[2]);
1231        } else {                /* LSR / LSRV */
1232            tcg_out_shiftrot_reg(s, SRR_SHR, ext, args[0], args[1], args[2]);
1233        }
1234        break;
1235
1236    case INDEX_op_sar_i64:
1237        ext = 1; /* fall through */
1238    case INDEX_op_sar_i32:
1239        if (const_args[2]) {    /* ASR / SBFM Wd, Wn, m, 31 */
1240            tcg_out_sar(s, ext, args[0], args[1], args[2]);
1241        } else {                /* ASR / ASRV */
1242            tcg_out_shiftrot_reg(s, SRR_SAR, ext, args[0], args[1], args[2]);
1243        }
1244        break;
1245
1246    case INDEX_op_rotr_i64:
1247        ext = 1; /* fall through */
1248    case INDEX_op_rotr_i32:
1249        if (const_args[2]) {    /* ROR / EXTR Wd, Wm, Wm, m */
1250            tcg_out_rotr(s, ext, args[0], args[1], args[2]);
1251        } else {                /* ROR / RORV */
1252            tcg_out_shiftrot_reg(s, SRR_ROR, ext, args[0], args[1], args[2]);
1253        }
1254        break;
1255
1256    case INDEX_op_rotl_i64:
1257        ext = 1; /* fall through */
1258    case INDEX_op_rotl_i32:     /* same as rotate right by (32 - m) */
1259        if (const_args[2]) {    /* ROR / EXTR Wd, Wm, Wm, 32 - m */
1260            tcg_out_rotl(s, ext, args[0], args[1], args[2]);
1261        } else {
1262            tcg_out_arith(s, ARITH_SUB, 0,
1263                          TCG_REG_TMP, TCG_REG_XZR, args[2], 0);
1264            tcg_out_shiftrot_reg(s, SRR_ROR, ext,
1265                                 args[0], args[1], TCG_REG_TMP);
1266        }
1267        break;
1268
1269    case INDEX_op_brcond_i64:
1270        ext = 1; /* fall through */
1271    case INDEX_op_brcond_i32: /* CMP 0, 1, cond(2), label 3 */
1272        tcg_out_cmp(s, ext, args[0], args[1], 0);
1273        tcg_out_goto_label_cond(s, args[2], args[3]);
1274        break;
1275
1276    case INDEX_op_setcond_i64:
1277        ext = 1; /* fall through */
1278    case INDEX_op_setcond_i32:
1279        tcg_out_cmp(s, ext, args[1], args[2], 0);
1280        tcg_out_cset(s, 0, args[0], args[3]);
1281        break;
1282
1283    case INDEX_op_qemu_ld8u:
1284        tcg_out_qemu_ld(s, args, 0 | 0);
1285        break;
1286    case INDEX_op_qemu_ld8s:
1287        tcg_out_qemu_ld(s, args, 4 | 0);
1288        break;
1289    case INDEX_op_qemu_ld16u:
1290        tcg_out_qemu_ld(s, args, 0 | 1);
1291        break;
1292    case INDEX_op_qemu_ld16s:
1293        tcg_out_qemu_ld(s, args, 4 | 1);
1294        break;
1295    case INDEX_op_qemu_ld32u:
1296        tcg_out_qemu_ld(s, args, 0 | 2);
1297        break;
1298    case INDEX_op_qemu_ld32s:
1299        tcg_out_qemu_ld(s, args, 4 | 2);
1300        break;
1301    case INDEX_op_qemu_ld32:
1302        tcg_out_qemu_ld(s, args, 0 | 2);
1303        break;
1304    case INDEX_op_qemu_ld64:
1305        tcg_out_qemu_ld(s, args, 0 | 3);
1306        break;
1307    case INDEX_op_qemu_st8:
1308        tcg_out_qemu_st(s, args, 0);
1309        break;
1310    case INDEX_op_qemu_st16:
1311        tcg_out_qemu_st(s, args, 1);
1312        break;
1313    case INDEX_op_qemu_st32:
1314        tcg_out_qemu_st(s, args, 2);
1315        break;
1316    case INDEX_op_qemu_st64:
1317        tcg_out_qemu_st(s, args, 3);
1318        break;
1319
1320    case INDEX_op_bswap64_i64:
1321        ext = 1; /* fall through */
1322    case INDEX_op_bswap32_i64:
1323    case INDEX_op_bswap32_i32:
1324        tcg_out_rev(s, ext, args[0], args[1]);
1325        break;
1326    case INDEX_op_bswap16_i64:
1327    case INDEX_op_bswap16_i32:
1328        tcg_out_rev16(s, 0, args[0], args[1]);
1329        break;
1330
1331    case INDEX_op_ext8s_i64:
1332        ext = 1; /* fall through */
1333    case INDEX_op_ext8s_i32:
1334        tcg_out_sxt(s, ext, 0, args[0], args[1]);
1335        break;
1336    case INDEX_op_ext16s_i64:
1337        ext = 1; /* fall through */
1338    case INDEX_op_ext16s_i32:
1339        tcg_out_sxt(s, ext, 1, args[0], args[1]);
1340        break;
1341    case INDEX_op_ext32s_i64:
1342        tcg_out_sxt(s, 1, 2, args[0], args[1]);
1343        break;
1344    case INDEX_op_ext8u_i64:
1345    case INDEX_op_ext8u_i32:
1346        tcg_out_uxt(s, 0, args[0], args[1]);
1347        break;
1348    case INDEX_op_ext16u_i64:
1349    case INDEX_op_ext16u_i32:
1350        tcg_out_uxt(s, 1, args[0], args[1]);
1351        break;
1352    case INDEX_op_ext32u_i64:
1353        tcg_out_movr(s, 0, args[0], args[1]);
1354        break;
1355
1356    default:
1357        tcg_abort(); /* opcode not implemented */
1358    }
1359}
1360
1361static const TCGTargetOpDef aarch64_op_defs[] = {
1362    { INDEX_op_exit_tb, { } },
1363    { INDEX_op_goto_tb, { } },
1364    { INDEX_op_call, { "ri" } },
1365    { INDEX_op_br, { } },
1366
1367    { INDEX_op_mov_i32, { "r", "r" } },
1368    { INDEX_op_mov_i64, { "r", "r" } },
1369
1370    { INDEX_op_movi_i32, { "r" } },
1371    { INDEX_op_movi_i64, { "r" } },
1372
1373    { INDEX_op_ld8u_i32, { "r", "r" } },
1374    { INDEX_op_ld8s_i32, { "r", "r" } },
1375    { INDEX_op_ld16u_i32, { "r", "r" } },
1376    { INDEX_op_ld16s_i32, { "r", "r" } },
1377    { INDEX_op_ld_i32, { "r", "r" } },
1378    { INDEX_op_ld8u_i64, { "r", "r" } },
1379    { INDEX_op_ld8s_i64, { "r", "r" } },
1380    { INDEX_op_ld16u_i64, { "r", "r" } },
1381    { INDEX_op_ld16s_i64, { "r", "r" } },
1382    { INDEX_op_ld32u_i64, { "r", "r" } },
1383    { INDEX_op_ld32s_i64, { "r", "r" } },
1384    { INDEX_op_ld_i64, { "r", "r" } },
1385
1386    { INDEX_op_st8_i32, { "r", "r" } },
1387    { INDEX_op_st16_i32, { "r", "r" } },
1388    { INDEX_op_st_i32, { "r", "r" } },
1389    { INDEX_op_st8_i64, { "r", "r" } },
1390    { INDEX_op_st16_i64, { "r", "r" } },
1391    { INDEX_op_st32_i64, { "r", "r" } },
1392    { INDEX_op_st_i64, { "r", "r" } },
1393
1394    { INDEX_op_add_i32, { "r", "r", "r" } },
1395    { INDEX_op_add_i64, { "r", "r", "r" } },
1396    { INDEX_op_sub_i32, { "r", "r", "r" } },
1397    { INDEX_op_sub_i64, { "r", "r", "r" } },
1398    { INDEX_op_mul_i32, { "r", "r", "r" } },
1399    { INDEX_op_mul_i64, { "r", "r", "r" } },
1400    { INDEX_op_and_i32, { "r", "r", "r" } },
1401    { INDEX_op_and_i64, { "r", "r", "r" } },
1402    { INDEX_op_or_i32, { "r", "r", "r" } },
1403    { INDEX_op_or_i64, { "r", "r", "r" } },
1404    { INDEX_op_xor_i32, { "r", "r", "r" } },
1405    { INDEX_op_xor_i64, { "r", "r", "r" } },
1406
1407    { INDEX_op_shl_i32, { "r", "r", "ri" } },
1408    { INDEX_op_shr_i32, { "r", "r", "ri" } },
1409    { INDEX_op_sar_i32, { "r", "r", "ri" } },
1410    { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1411    { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1412    { INDEX_op_shl_i64, { "r", "r", "ri" } },
1413    { INDEX_op_shr_i64, { "r", "r", "ri" } },
1414    { INDEX_op_sar_i64, { "r", "r", "ri" } },
1415    { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1416    { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1417
1418    { INDEX_op_brcond_i32, { "r", "r" } },
1419    { INDEX_op_setcond_i32, { "r", "r", "r" } },
1420    { INDEX_op_brcond_i64, { "r", "r" } },
1421    { INDEX_op_setcond_i64, { "r", "r", "r" } },
1422
1423    { INDEX_op_qemu_ld8u, { "r", "l" } },
1424    { INDEX_op_qemu_ld8s, { "r", "l" } },
1425    { INDEX_op_qemu_ld16u, { "r", "l" } },
1426    { INDEX_op_qemu_ld16s, { "r", "l" } },
1427    { INDEX_op_qemu_ld32u, { "r", "l" } },
1428    { INDEX_op_qemu_ld32s, { "r", "l" } },
1429
1430    { INDEX_op_qemu_ld32, { "r", "l" } },
1431    { INDEX_op_qemu_ld64, { "r", "l" } },
1432
1433    { INDEX_op_qemu_st8, { "l", "l" } },
1434    { INDEX_op_qemu_st16, { "l", "l" } },
1435    { INDEX_op_qemu_st32, { "l", "l" } },
1436    { INDEX_op_qemu_st64, { "l", "l" } },
1437
1438    { INDEX_op_bswap16_i32, { "r", "r" } },
1439    { INDEX_op_bswap32_i32, { "r", "r" } },
1440    { INDEX_op_bswap16_i64, { "r", "r" } },
1441    { INDEX_op_bswap32_i64, { "r", "r" } },
1442    { INDEX_op_bswap64_i64, { "r", "r" } },
1443
1444    { INDEX_op_ext8s_i32, { "r", "r" } },
1445    { INDEX_op_ext16s_i32, { "r", "r" } },
1446    { INDEX_op_ext8u_i32, { "r", "r" } },
1447    { INDEX_op_ext16u_i32, { "r", "r" } },
1448
1449    { INDEX_op_ext8s_i64, { "r", "r" } },
1450    { INDEX_op_ext16s_i64, { "r", "r" } },
1451    { INDEX_op_ext32s_i64, { "r", "r" } },
1452    { INDEX_op_ext8u_i64, { "r", "r" } },
1453    { INDEX_op_ext16u_i64, { "r", "r" } },
1454    { INDEX_op_ext32u_i64, { "r", "r" } },
1455
1456    { -1 },
1457};
1458
1459static void tcg_target_init(TCGContext *s)
1460{
1461#if !defined(CONFIG_USER_ONLY)
1462    /* fail safe */
1463    if ((1ULL << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) {
1464        tcg_abort();
1465    }
1466#endif
1467    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1468    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1469
1470    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1471                     (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1472                     (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1473                     (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1474                     (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1475                     (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1476                     (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1477                     (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1478                     (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1479                     (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1480                     (1 << TCG_REG_X18));
1481
1482    tcg_regset_clear(s->reserved_regs);
1483    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1484    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1485    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1486    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1487
1488    tcg_add_target_add_op_defs(aarch64_op_defs);
1489}
1490
1491static void tcg_target_qemu_prologue(TCGContext *s)
1492{
1493    /* NB: frame sizes are in 16 byte stack units! */
1494    int frame_size_callee_saved, frame_size_tcg_locals;
1495    TCGReg r;
1496
1497    /* save pairs             (FP, LR) and (X19, X20) .. (X27, X28) */
1498    frame_size_callee_saved = (1) + (TCG_REG_X28 - TCG_REG_X19) / 2 + 1;
1499
1500    /* frame size requirement for TCG local variables */
1501    frame_size_tcg_locals = TCG_STATIC_CALL_ARGS_SIZE
1502        + CPU_TEMP_BUF_NLONGS * sizeof(long)
1503        + (TCG_TARGET_STACK_ALIGN - 1);
1504    frame_size_tcg_locals &= ~(TCG_TARGET_STACK_ALIGN - 1);
1505    frame_size_tcg_locals /= TCG_TARGET_STACK_ALIGN;
1506
1507    /* push (FP, LR) and update sp */
1508    tcg_out_push_pair(s, TCG_REG_SP,
1509                      TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1510
1511    /* FP -> callee_saved */
1512    tcg_out_movr_sp(s, 1, TCG_REG_FP, TCG_REG_SP);
1513
1514    /* store callee-preserved regs x19..x28 using FP -> callee_saved */
1515    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1516        int idx = (r - TCG_REG_X19) / 2 + 1;
1517        tcg_out_store_pair(s, TCG_REG_FP, r, r + 1, idx);
1518    }
1519
1520    /* make stack space for TCG locals */
1521    tcg_out_subi(s, 1, TCG_REG_SP, TCG_REG_SP,
1522                 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1523    /* inform TCG about how to find TCG locals with register, offset, size */
1524    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1525                  CPU_TEMP_BUF_NLONGS * sizeof(long));
1526
1527#if defined(CONFIG_USE_GUEST_BASE)
1528    if (GUEST_BASE) {
1529        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE);
1530        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1531    }
1532#endif
1533
1534    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1535    tcg_out_gotor(s, tcg_target_call_iarg_regs[1]);
1536
1537    tb_ret_addr = s->code_ptr;
1538
1539    /* remove TCG locals stack space */
1540    tcg_out_addi(s, 1, TCG_REG_SP, TCG_REG_SP,
1541                 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1542
1543    /* restore registers x19..x28.
1544       FP must be preserved, so it still points to callee_saved area */
1545    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1546        int idx = (r - TCG_REG_X19) / 2 + 1;
1547        tcg_out_load_pair(s, TCG_REG_FP, r, r + 1, idx);
1548    }
1549
1550    /* pop (FP, LR), restore SP to previous frame, return */
1551    tcg_out_pop_pair(s, TCG_REG_SP,
1552                     TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1553    tcg_out_ret(s);
1554}
1555