qemu/tcg/aarch64/tcg-target.c
<<
>>
Prefs
   1/*
   2 * Initial TCG Implementation for aarch64
   3 *
   4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
   5 * Written by Claudio Fontana
   6 *
   7 * This work is licensed under the terms of the GNU GPL, version 2 or
   8 * (at your option) any later version.
   9 *
  10 * See the COPYING file in the top-level directory for details.
  11 */
  12
  13#include "tcg-be-ldst.h"
  14#include "qemu/bitops.h"
  15
  16#ifndef NDEBUG
  17static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  18    "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
  19    "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
  20    "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
  21    "%x24", "%x25", "%x26", "%x27", "%x28",
  22    "%fp", /* frame pointer */
  23    "%lr", /* link register */
  24    "%sp",  /* stack pointer */
  25};
  26#endif /* NDEBUG */
  27
  28#ifdef TARGET_WORDS_BIGENDIAN
  29 #define TCG_LDST_BSWAP 1
  30#else
  31 #define TCG_LDST_BSWAP 0
  32#endif
  33
  34static const int tcg_target_reg_alloc_order[] = {
  35    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
  36    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
  37    TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
  38
  39    TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, TCG_REG_X12,
  40    TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
  41    TCG_REG_X16, TCG_REG_X17,
  42
  43    TCG_REG_X18, TCG_REG_X19, /* will not use these, see tcg_target_init */
  44
  45    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  46    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
  47
  48    TCG_REG_X8, /* will not use, see tcg_target_init */
  49};
  50
  51static const int tcg_target_call_iarg_regs[8] = {
  52    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  53    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
  54};
  55static const int tcg_target_call_oarg_regs[1] = {
  56    TCG_REG_X0
  57};
  58
  59#define TCG_REG_TMP TCG_REG_X8
  60
  61#ifndef CONFIG_SOFTMMU
  62# if defined(CONFIG_USE_GUEST_BASE)
  63# define TCG_REG_GUEST_BASE TCG_REG_X28
  64# else
  65# define TCG_REG_GUEST_BASE TCG_REG_XZR
  66# endif
  67#endif
  68
  69static inline void reloc_pc26(void *code_ptr, tcg_target_long target)
  70{
  71    tcg_target_long offset; uint32_t insn;
  72    offset = (target - (tcg_target_long)code_ptr) / 4;
  73    /* read instruction, mask away previous PC_REL26 parameter contents,
  74       set the proper offset, then write back the instruction. */
  75    insn = *(uint32_t *)code_ptr;
  76    insn = deposit32(insn, 0, 26, offset);
  77    *(uint32_t *)code_ptr = insn;
  78}
  79
  80static inline void reloc_pc19(void *code_ptr, tcg_target_long target)
  81{
  82    tcg_target_long offset; uint32_t insn;
  83    offset = (target - (tcg_target_long)code_ptr) / 4;
  84    /* read instruction, mask away previous PC_REL19 parameter contents,
  85       set the proper offset, then write back the instruction. */
  86    insn = *(uint32_t *)code_ptr;
  87    insn = deposit32(insn, 5, 19, offset);
  88    *(uint32_t *)code_ptr = insn;
  89}
  90
  91static inline void patch_reloc(uint8_t *code_ptr, int type,
  92                               intptr_t value, intptr_t addend)
  93{
  94    value += addend;
  95
  96    switch (type) {
  97    case R_AARCH64_JUMP26:
  98    case R_AARCH64_CALL26:
  99        reloc_pc26(code_ptr, value);
 100        break;
 101    case R_AARCH64_CONDBR19:
 102        reloc_pc19(code_ptr, value);
 103        break;
 104
 105    default:
 106        tcg_abort();
 107    }
 108}
 109
 110/* parse target specific constraints */
 111static int target_parse_constraint(TCGArgConstraint *ct,
 112                                   const char **pct_str)
 113{
 114    const char *ct_str = *pct_str;
 115
 116    switch (ct_str[0]) {
 117    case 'r':
 118        ct->ct |= TCG_CT_REG;
 119        tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
 120        break;
 121    case 'l': /* qemu_ld / qemu_st address, data_reg */
 122        ct->ct |= TCG_CT_REG;
 123        tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
 124#ifdef CONFIG_SOFTMMU
 125        /* x0 and x1 will be overwritten when reading the tlb entry,
 126           and x2, and x3 for helper args, better to avoid using them. */
 127        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
 128        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
 129        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
 130        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
 131#endif
 132        break;
 133    default:
 134        return -1;
 135    }
 136
 137    ct_str++;
 138    *pct_str = ct_str;
 139    return 0;
 140}
 141
 142static inline int tcg_target_const_match(tcg_target_long val,
 143                                         const TCGArgConstraint *arg_ct)
 144{
 145    int ct = arg_ct->ct;
 146
 147    if (ct & TCG_CT_CONST) {
 148        return 1;
 149    }
 150
 151    return 0;
 152}
 153
 154enum aarch64_cond_code {
 155    COND_EQ = 0x0,
 156    COND_NE = 0x1,
 157    COND_CS = 0x2,     /* Unsigned greater or equal */
 158    COND_HS = COND_CS, /* ALIAS greater or equal */
 159    COND_CC = 0x3,     /* Unsigned less than */
 160    COND_LO = COND_CC, /* ALIAS Lower */
 161    COND_MI = 0x4,     /* Negative */
 162    COND_PL = 0x5,     /* Zero or greater */
 163    COND_VS = 0x6,     /* Overflow */
 164    COND_VC = 0x7,     /* No overflow */
 165    COND_HI = 0x8,     /* Unsigned greater than */
 166    COND_LS = 0x9,     /* Unsigned less or equal */
 167    COND_GE = 0xa,
 168    COND_LT = 0xb,
 169    COND_GT = 0xc,
 170    COND_LE = 0xd,
 171    COND_AL = 0xe,
 172    COND_NV = 0xf, /* behaves like COND_AL here */
 173};
 174
 175static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
 176    [TCG_COND_EQ] = COND_EQ,
 177    [TCG_COND_NE] = COND_NE,
 178    [TCG_COND_LT] = COND_LT,
 179    [TCG_COND_GE] = COND_GE,
 180    [TCG_COND_LE] = COND_LE,
 181    [TCG_COND_GT] = COND_GT,
 182    /* unsigned */
 183    [TCG_COND_LTU] = COND_LO,
 184    [TCG_COND_GTU] = COND_HI,
 185    [TCG_COND_GEU] = COND_HS,
 186    [TCG_COND_LEU] = COND_LS,
 187};
 188
 189/* opcodes for LDR / STR instructions with base + simm9 addressing */
 190enum aarch64_ldst_op_data { /* size of the data moved */
 191    LDST_8 = 0x38,
 192    LDST_16 = 0x78,
 193    LDST_32 = 0xb8,
 194    LDST_64 = 0xf8,
 195};
 196enum aarch64_ldst_op_type { /* type of operation */
 197    LDST_ST = 0x0,    /* store */
 198    LDST_LD = 0x4,    /* load */
 199    LDST_LD_S_X = 0x8,  /* load and sign-extend into Xt */
 200    LDST_LD_S_W = 0xc,  /* load and sign-extend into Wt */
 201};
 202
 203enum aarch64_arith_opc {
 204    ARITH_AND = 0x0a,
 205    ARITH_ADD = 0x0b,
 206    ARITH_OR = 0x2a,
 207    ARITH_ADDS = 0x2b,
 208    ARITH_XOR = 0x4a,
 209    ARITH_SUB = 0x4b,
 210    ARITH_ANDS = 0x6a,
 211    ARITH_SUBS = 0x6b,
 212};
 213
 214enum aarch64_srr_opc {
 215    SRR_SHL = 0x0,
 216    SRR_SHR = 0x4,
 217    SRR_SAR = 0x8,
 218    SRR_ROR = 0xc
 219};
 220
 221static inline enum aarch64_ldst_op_data
 222aarch64_ldst_get_data(TCGOpcode tcg_op)
 223{
 224    switch (tcg_op) {
 225    case INDEX_op_ld8u_i32:
 226    case INDEX_op_ld8s_i32:
 227    case INDEX_op_ld8u_i64:
 228    case INDEX_op_ld8s_i64:
 229    case INDEX_op_st8_i32:
 230    case INDEX_op_st8_i64:
 231        return LDST_8;
 232
 233    case INDEX_op_ld16u_i32:
 234    case INDEX_op_ld16s_i32:
 235    case INDEX_op_ld16u_i64:
 236    case INDEX_op_ld16s_i64:
 237    case INDEX_op_st16_i32:
 238    case INDEX_op_st16_i64:
 239        return LDST_16;
 240
 241    case INDEX_op_ld_i32:
 242    case INDEX_op_st_i32:
 243    case INDEX_op_ld32u_i64:
 244    case INDEX_op_ld32s_i64:
 245    case INDEX_op_st32_i64:
 246        return LDST_32;
 247
 248    case INDEX_op_ld_i64:
 249    case INDEX_op_st_i64:
 250        return LDST_64;
 251
 252    default:
 253        tcg_abort();
 254    }
 255}
 256
 257static inline enum aarch64_ldst_op_type
 258aarch64_ldst_get_type(TCGOpcode tcg_op)
 259{
 260    switch (tcg_op) {
 261    case INDEX_op_st8_i32:
 262    case INDEX_op_st16_i32:
 263    case INDEX_op_st8_i64:
 264    case INDEX_op_st16_i64:
 265    case INDEX_op_st_i32:
 266    case INDEX_op_st32_i64:
 267    case INDEX_op_st_i64:
 268        return LDST_ST;
 269
 270    case INDEX_op_ld8u_i32:
 271    case INDEX_op_ld16u_i32:
 272    case INDEX_op_ld8u_i64:
 273    case INDEX_op_ld16u_i64:
 274    case INDEX_op_ld_i32:
 275    case INDEX_op_ld32u_i64:
 276    case INDEX_op_ld_i64:
 277        return LDST_LD;
 278
 279    case INDEX_op_ld8s_i32:
 280    case INDEX_op_ld16s_i32:
 281        return LDST_LD_S_W;
 282
 283    case INDEX_op_ld8s_i64:
 284    case INDEX_op_ld16s_i64:
 285    case INDEX_op_ld32s_i64:
 286        return LDST_LD_S_X;
 287
 288    default:
 289        tcg_abort();
 290    }
 291}
 292
 293static inline uint32_t tcg_in32(TCGContext *s)
 294{
 295    uint32_t v = *(uint32_t *)s->code_ptr;
 296    return v;
 297}
 298
 299static inline void tcg_out_ldst_9(TCGContext *s,
 300                                  enum aarch64_ldst_op_data op_data,
 301                                  enum aarch64_ldst_op_type op_type,
 302                                  TCGReg rd, TCGReg rn, tcg_target_long offset)
 303{
 304    /* use LDUR with BASE register with 9bit signed unscaled offset */
 305    unsigned int mod, off;
 306
 307    if (offset < 0) {
 308        off = (256 + offset);
 309        mod = 0x1;
 310    } else {
 311        off = offset;
 312        mod = 0x0;
 313    }
 314
 315    mod |= op_type;
 316    tcg_out32(s, op_data << 24 | mod << 20 | off << 12 | rn << 5 | rd);
 317}
 318
 319/* tcg_out_ldst_12 expects a scaled unsigned immediate offset */
 320static inline void tcg_out_ldst_12(TCGContext *s,
 321                                   enum aarch64_ldst_op_data op_data,
 322                                   enum aarch64_ldst_op_type op_type,
 323                                   TCGReg rd, TCGReg rn,
 324                                   tcg_target_ulong scaled_uimm)
 325{
 326    tcg_out32(s, (op_data | 1) << 24
 327              | op_type << 20 | scaled_uimm << 10 | rn << 5 | rd);
 328}
 329
 330static inline void tcg_out_movr(TCGContext *s, int ext, TCGReg rd, TCGReg src)
 331{
 332    /* register to register move using MOV (shifted register with no shift) */
 333    /* using MOV 0x2a0003e0 | (shift).. */
 334    unsigned int base = ext ? 0xaa0003e0 : 0x2a0003e0;
 335    tcg_out32(s, base | src << 16 | rd);
 336}
 337
 338static inline void tcg_out_movi_aux(TCGContext *s,
 339                                    TCGReg rd, uint64_t value)
 340{
 341    uint32_t half, base, shift, movk = 0;
 342    /* construct halfwords of the immediate with MOVZ/MOVK with LSL */
 343    /* using MOVZ 0x52800000 | extended reg.. */
 344    base = (value > 0xffffffff) ? 0xd2800000 : 0x52800000;
 345    /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
 346       first MOVZ with the half-word immediate skipping the zeros, with a shift
 347       (LSL) equal to this number. Then morph all next instructions into MOVKs.
 348       Zero the processed half-word in the value, continue until empty.
 349       We build the final result 16bits at a time with up to 4 instructions,
 350       but do not emit instructions for 16bit zero holes. */
 351    do {
 352        shift = ctz64(value) & (63 & -16);
 353        half = (value >> shift) & 0xffff;
 354        tcg_out32(s, base | movk | shift << 17 | half << 5 | rd);
 355        movk = 0x20000000; /* morph next MOVZs into MOVKs */
 356        value &= ~(0xffffUL << shift);
 357    } while (value);
 358}
 359
 360static inline void tcg_out_movi(TCGContext *s, TCGType type,
 361                                TCGReg rd, tcg_target_long value)
 362{
 363    if (type == TCG_TYPE_I64) {
 364        tcg_out_movi_aux(s, rd, value);
 365    } else {
 366        tcg_out_movi_aux(s, rd, value & 0xffffffff);
 367    }
 368}
 369
 370static inline void tcg_out_ldst_r(TCGContext *s,
 371                                  enum aarch64_ldst_op_data op_data,
 372                                  enum aarch64_ldst_op_type op_type,
 373                                  TCGReg rd, TCGReg base, TCGReg regoff)
 374{
 375    /* load from memory to register using base + 64bit register offset */
 376    /* using f.e. STR Wt, [Xn, Xm] 0xb8600800|(regoff << 16)|(base << 5)|rd */
 377    /* the 0x6000 is for the "no extend field" */
 378    tcg_out32(s, 0x00206800
 379              | op_data << 24 | op_type << 20 | regoff << 16 | base << 5 | rd);
 380}
 381
 382/* solve the whole ldst problem */
 383static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
 384                                enum aarch64_ldst_op_type type,
 385                                TCGReg rd, TCGReg rn, tcg_target_long offset)
 386{
 387    if (offset >= -256 && offset < 256) {
 388        tcg_out_ldst_9(s, data, type, rd, rn, offset);
 389        return;
 390    }
 391
 392    if (offset >= 256) {
 393        /* if the offset is naturally aligned and in range,
 394           then we can use the scaled uimm12 encoding */
 395        unsigned int s_bits = data >> 6;
 396        if (!(offset & ((1 << s_bits) - 1))) {
 397            tcg_target_ulong scaled_uimm = offset >> s_bits;
 398            if (scaled_uimm <= 0xfff) {
 399                tcg_out_ldst_12(s, data, type, rd, rn, scaled_uimm);
 400                return;
 401            }
 402        }
 403    }
 404
 405    /* worst-case scenario, move offset to temp register, use reg offset */
 406    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
 407    tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP);
 408}
 409
 410/* mov alias implemented with add immediate, useful to move to/from SP */
 411static inline void tcg_out_movr_sp(TCGContext *s, int ext, TCGReg rd, TCGReg rn)
 412{
 413    /* using ADD 0x11000000 | (ext) | rn << 5 | rd */
 414    unsigned int base = ext ? 0x91000000 : 0x11000000;
 415    tcg_out32(s, base | rn << 5 | rd);
 416}
 417
 418static inline void tcg_out_mov(TCGContext *s,
 419                               TCGType type, TCGReg ret, TCGReg arg)
 420{
 421    if (ret != arg) {
 422        tcg_out_movr(s, type == TCG_TYPE_I64, ret, arg);
 423    }
 424}
 425
 426static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
 427                              TCGReg arg1, intptr_t arg2)
 428{
 429    tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_LD,
 430                 arg, arg1, arg2);
 431}
 432
 433static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
 434                              TCGReg arg1, intptr_t arg2)
 435{
 436    tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_ST,
 437                 arg, arg1, arg2);
 438}
 439
 440static inline void tcg_out_arith(TCGContext *s, enum aarch64_arith_opc opc,
 441                                 int ext, TCGReg rd, TCGReg rn, TCGReg rm,
 442                                 int shift_imm)
 443{
 444    /* Using shifted register arithmetic operations */
 445    /* if extended register operation (64bit) just OR with 0x80 << 24 */
 446    unsigned int shift, base = ext ? (0x80 | opc) << 24 : opc << 24;
 447    if (shift_imm == 0) {
 448        shift = 0;
 449    } else if (shift_imm > 0) {
 450        shift = shift_imm << 10 | 1 << 22;
 451    } else /* (shift_imm < 0) */ {
 452        shift = (-shift_imm) << 10;
 453    }
 454    tcg_out32(s, base | rm << 16 | shift | rn << 5 | rd);
 455}
 456
 457static inline void tcg_out_mul(TCGContext *s, int ext,
 458                               TCGReg rd, TCGReg rn, TCGReg rm)
 459{
 460    /* Using MADD 0x1b000000 with Ra = wzr alias MUL 0x1b007c00 */
 461    unsigned int base = ext ? 0x9b007c00 : 0x1b007c00;
 462    tcg_out32(s, base | rm << 16 | rn << 5 | rd);
 463}
 464
 465static inline void tcg_out_shiftrot_reg(TCGContext *s,
 466                                        enum aarch64_srr_opc opc, int ext,
 467                                        TCGReg rd, TCGReg rn, TCGReg rm)
 468{
 469    /* using 2-source data processing instructions 0x1ac02000 */
 470    unsigned int base = ext ? 0x9ac02000 : 0x1ac02000;
 471    tcg_out32(s, base | rm << 16 | opc << 8 | rn << 5 | rd);
 472}
 473
 474static inline void tcg_out_ubfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
 475                                unsigned int a, unsigned int b)
 476{
 477    /* Using UBFM 0x53000000 Wd, Wn, a, b */
 478    unsigned int base = ext ? 0xd3400000 : 0x53000000;
 479    tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
 480}
 481
 482static inline void tcg_out_sbfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
 483                                unsigned int a, unsigned int b)
 484{
 485    /* Using SBFM 0x13000000 Wd, Wn, a, b */
 486    unsigned int base = ext ? 0x93400000 : 0x13000000;
 487    tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
 488}
 489
 490static inline void tcg_out_extr(TCGContext *s, int ext, TCGReg rd,
 491                                TCGReg rn, TCGReg rm, unsigned int a)
 492{
 493    /* Using EXTR 0x13800000 Wd, Wn, Wm, a */
 494    unsigned int base = ext ? 0x93c00000 : 0x13800000;
 495    tcg_out32(s, base | rm << 16 | a << 10 | rn << 5 | rd);
 496}
 497
 498static inline void tcg_out_shl(TCGContext *s, int ext,
 499                               TCGReg rd, TCGReg rn, unsigned int m)
 500{
 501    int bits, max;
 502    bits = ext ? 64 : 32;
 503    max = bits - 1;
 504    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
 505}
 506
 507static inline void tcg_out_shr(TCGContext *s, int ext,
 508                               TCGReg rd, TCGReg rn, unsigned int m)
 509{
 510    int max = ext ? 63 : 31;
 511    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
 512}
 513
 514static inline void tcg_out_sar(TCGContext *s, int ext,
 515                               TCGReg rd, TCGReg rn, unsigned int m)
 516{
 517    int max = ext ? 63 : 31;
 518    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
 519}
 520
 521static inline void tcg_out_rotr(TCGContext *s, int ext,
 522                                TCGReg rd, TCGReg rn, unsigned int m)
 523{
 524    int max = ext ? 63 : 31;
 525    tcg_out_extr(s, ext, rd, rn, rn, m & max);
 526}
 527
 528static inline void tcg_out_rotl(TCGContext *s, int ext,
 529                                TCGReg rd, TCGReg rn, unsigned int m)
 530{
 531    int bits, max;
 532    bits = ext ? 64 : 32;
 533    max = bits - 1;
 534    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
 535}
 536
 537static inline void tcg_out_cmp(TCGContext *s, int ext, TCGReg rn, TCGReg rm,
 538                               int shift_imm)
 539{
 540    /* Using CMP alias SUBS wzr, Wn, Wm */
 541    tcg_out_arith(s, ARITH_SUBS, ext, TCG_REG_XZR, rn, rm, shift_imm);
 542}
 543
 544static inline void tcg_out_cset(TCGContext *s, int ext, TCGReg rd, TCGCond c)
 545{
 546    /* Using CSET alias of CSINC 0x1a800400 Xd, XZR, XZR, invert(cond) */
 547    unsigned int base = ext ? 0x9a9f07e0 : 0x1a9f07e0;
 548    tcg_out32(s, base | tcg_cond_to_aarch64[tcg_invert_cond(c)] << 12 | rd);
 549}
 550
 551static inline void tcg_out_goto(TCGContext *s, tcg_target_long target)
 552{
 553    tcg_target_long offset;
 554    offset = (target - (tcg_target_long)s->code_ptr) / 4;
 555
 556    if (offset < -0x02000000 || offset >= 0x02000000) {
 557        /* out of 26bit range */
 558        tcg_abort();
 559    }
 560
 561    tcg_out32(s, 0x14000000 | (offset & 0x03ffffff));
 562}
 563
 564static inline void tcg_out_goto_noaddr(TCGContext *s)
 565{
 566    /* We pay attention here to not modify the branch target by
 567       reading from the buffer. This ensure that caches and memory are
 568       kept coherent during retranslation.
 569       Mask away possible garbage in the high bits for the first translation,
 570       while keeping the offset bits for retranslation. */
 571    uint32_t insn;
 572    insn = (tcg_in32(s) & 0x03ffffff) | 0x14000000;
 573    tcg_out32(s, insn);
 574}
 575
 576static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
 577{
 578    /* see comments in tcg_out_goto_noaddr */
 579    uint32_t insn;
 580    insn = tcg_in32(s) & (0x07ffff << 5);
 581    insn |= 0x54000000 | tcg_cond_to_aarch64[c];
 582    tcg_out32(s, insn);
 583}
 584
 585static inline void tcg_out_goto_cond(TCGContext *s, TCGCond c,
 586                                     tcg_target_long target)
 587{
 588    tcg_target_long offset;
 589    offset = (target - (tcg_target_long)s->code_ptr) / 4;
 590
 591    if (offset < -0x40000 || offset >= 0x40000) {
 592        /* out of 19bit range */
 593        tcg_abort();
 594    }
 595
 596    offset &= 0x7ffff;
 597    tcg_out32(s, 0x54000000 | tcg_cond_to_aarch64[c] | offset << 5);
 598}
 599
 600static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
 601{
 602    tcg_out32(s, 0xd63f0000 | reg << 5);
 603}
 604
 605static inline void tcg_out_gotor(TCGContext *s, TCGReg reg)
 606{
 607    tcg_out32(s, 0xd61f0000 | reg << 5);
 608}
 609
 610static inline void tcg_out_call(TCGContext *s, tcg_target_long target)
 611{
 612    tcg_target_long offset;
 613
 614    offset = (target - (tcg_target_long)s->code_ptr) / 4;
 615
 616    if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */
 617        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
 618        tcg_out_callr(s, TCG_REG_TMP);
 619    } else {
 620        tcg_out32(s, 0x94000000 | (offset & 0x03ffffff));
 621    }
 622}
 623
 624/* encode a logical immediate, mapping user parameter
 625   M=set bits pattern length to S=M-1 */
 626static inline unsigned int
 627aarch64_limm(unsigned int m, unsigned int r)
 628{
 629    assert(m > 0);
 630    return r << 16 | (m - 1) << 10;
 631}
 632
 633/* test a register against an immediate bit pattern made of
 634   M set bits rotated right by R.
 635   Examples:
 636   to test a 32/64 reg against 0x00000007, pass M = 3,  R = 0.
 637   to test a 32/64 reg against 0x000000ff, pass M = 8,  R = 0.
 638   to test a 32bit reg against 0xff000000, pass M = 8,  R = 8.
 639   to test a 32bit reg against 0xff0000ff, pass M = 16, R = 8.
 640 */
 641static inline void tcg_out_tst(TCGContext *s, int ext, TCGReg rn,
 642                               unsigned int m, unsigned int r)
 643{
 644    /* using TST alias of ANDS XZR, Xn,#bimm64 0x7200001f */
 645    unsigned int base = ext ? 0xf240001f : 0x7200001f;
 646    tcg_out32(s, base | aarch64_limm(m, r) | rn << 5);
 647}
 648
 649/* and a register with a bit pattern, similarly to TST, no flags change */
 650static inline void tcg_out_andi(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
 651                                unsigned int m, unsigned int r)
 652{
 653    /* using AND 0x12000000 */
 654    unsigned int base = ext ? 0x92400000 : 0x12000000;
 655    tcg_out32(s, base | aarch64_limm(m, r) | rn << 5 | rd);
 656}
 657
 658static inline void tcg_out_ret(TCGContext *s)
 659{
 660    /* emit RET { LR } */
 661    tcg_out32(s, 0xd65f03c0);
 662}
 663
 664void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
 665{
 666    tcg_target_long target, offset;
 667    target = (tcg_target_long)addr;
 668    offset = (target - (tcg_target_long)jmp_addr) / 4;
 669
 670    if (offset < -0x02000000 || offset >= 0x02000000) {
 671        /* out of 26bit range */
 672        tcg_abort();
 673    }
 674
 675    patch_reloc((uint8_t *)jmp_addr, R_AARCH64_JUMP26, target, 0);
 676    flush_icache_range(jmp_addr, jmp_addr + 4);
 677}
 678
 679static inline void tcg_out_goto_label(TCGContext *s, int label_index)
 680{
 681    TCGLabel *l = &s->labels[label_index];
 682
 683    if (!l->has_value) {
 684        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, label_index, 0);
 685        tcg_out_goto_noaddr(s);
 686    } else {
 687        tcg_out_goto(s, l->u.value);
 688    }
 689}
 690
 691static inline void tcg_out_goto_label_cond(TCGContext *s,
 692                                           TCGCond c, int label_index)
 693{
 694    TCGLabel *l = &s->labels[label_index];
 695
 696    if (!l->has_value) {
 697        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label_index, 0);
 698        tcg_out_goto_cond_noaddr(s, c);
 699    } else {
 700        tcg_out_goto_cond(s, c, l->u.value);
 701    }
 702}
 703
 704static inline void tcg_out_rev(TCGContext *s, int ext, TCGReg rd, TCGReg rm)
 705{
 706    /* using REV 0x5ac00800 */
 707    unsigned int base = ext ? 0xdac00c00 : 0x5ac00800;
 708    tcg_out32(s, base | rm << 5 | rd);
 709}
 710
 711static inline void tcg_out_rev16(TCGContext *s, int ext, TCGReg rd, TCGReg rm)
 712{
 713    /* using REV16 0x5ac00400 */
 714    unsigned int base = ext ? 0xdac00400 : 0x5ac00400;
 715    tcg_out32(s, base | rm << 5 | rd);
 716}
 717
 718static inline void tcg_out_sxt(TCGContext *s, int ext, int s_bits,
 719                               TCGReg rd, TCGReg rn)
 720{
 721    /* using ALIASes SXTB 0x13001c00, SXTH 0x13003c00, SXTW 0x93407c00
 722       of SBFM Xd, Xn, #0, #7|15|31 */
 723    int bits = 8 * (1 << s_bits) - 1;
 724    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
 725}
 726
 727static inline void tcg_out_uxt(TCGContext *s, int s_bits,
 728                               TCGReg rd, TCGReg rn)
 729{
 730    /* using ALIASes UXTB 0x53001c00, UXTH 0x53003c00
 731       of UBFM Wd, Wn, #0, #7|15 */
 732    int bits = 8 * (1 << s_bits) - 1;
 733    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
 734}
 735
 736static inline void tcg_out_addi(TCGContext *s, int ext,
 737                                TCGReg rd, TCGReg rn, unsigned int aimm)
 738{
 739    /* add immediate aimm unsigned 12bit value (with LSL 0 or 12) */
 740    /* using ADD 0x11000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
 741    unsigned int base = ext ? 0x91000000 : 0x11000000;
 742
 743    if (aimm <= 0xfff) {
 744        aimm <<= 10;
 745    } else {
 746        /* we can only shift left by 12, on assert we cannot represent */
 747        assert(!(aimm & 0xfff));
 748        assert(aimm <= 0xfff000);
 749        base |= 1 << 22; /* apply LSL 12 */
 750        aimm >>= 2;
 751    }
 752
 753    tcg_out32(s, base | aimm | (rn << 5) | rd);
 754}
 755
 756static inline void tcg_out_subi(TCGContext *s, int ext,
 757                                TCGReg rd, TCGReg rn, unsigned int aimm)
 758{
 759    /* sub immediate aimm unsigned 12bit value (with LSL 0 or 12) */
 760    /* using SUB 0x51000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
 761    unsigned int base = ext ? 0xd1000000 : 0x51000000;
 762
 763    if (aimm <= 0xfff) {
 764        aimm <<= 10;
 765    } else {
 766        /* we can only shift left by 12, on assert we cannot represent */
 767        assert(!(aimm & 0xfff));
 768        assert(aimm <= 0xfff000);
 769        base |= 1 << 22; /* apply LSL 12 */
 770        aimm >>= 2;
 771    }
 772
 773    tcg_out32(s, base | aimm | (rn << 5) | rd);
 774}
 775
 776static inline void tcg_out_nop(TCGContext *s)
 777{
 778    tcg_out32(s, 0xd503201f);
 779}
 780
 781#ifdef CONFIG_SOFTMMU
 782/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
 783 *                                     int mmu_idx, uintptr_t ra)
 784 */
 785static const void * const qemu_ld_helpers[4] = {
 786    helper_ret_ldub_mmu,
 787    helper_ret_lduw_mmu,
 788    helper_ret_ldul_mmu,
 789    helper_ret_ldq_mmu,
 790};
 791
 792/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
 793 *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
 794 */
 795static const void * const qemu_st_helpers[4] = {
 796    helper_ret_stb_mmu,
 797    helper_ret_stw_mmu,
 798    helper_ret_stl_mmu,
 799    helper_ret_stq_mmu,
 800};
 801
 802static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 803{
 804    reloc_pc19(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
 805    tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0);
 806    tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg);
 807    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
 808    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X3, (tcg_target_long)lb->raddr);
 809    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
 810                 (tcg_target_long)qemu_ld_helpers[lb->opc & 3]);
 811    tcg_out_callr(s, TCG_REG_TMP);
 812    if (lb->opc & 0x04) {
 813        tcg_out_sxt(s, 1, lb->opc & 3, lb->datalo_reg, TCG_REG_X0);
 814    } else {
 815        tcg_out_movr(s, 1, lb->datalo_reg, TCG_REG_X0);
 816    }
 817
 818    tcg_out_goto(s, (tcg_target_long)lb->raddr);
 819}
 820
 821static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 822{
 823    reloc_pc19(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
 824
 825    tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0);
 826    tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg);
 827    tcg_out_movr(s, 1, TCG_REG_X2, lb->datalo_reg);
 828    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
 829    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X4, (tcg_target_long)lb->raddr);
 830    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
 831                 (tcg_target_long)qemu_st_helpers[lb->opc & 3]);
 832    tcg_out_callr(s, TCG_REG_TMP);
 833
 834    tcg_out_nop(s);
 835    tcg_out_goto(s, (tcg_target_long)lb->raddr);
 836}
 837
 838static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
 839                                TCGReg data_reg, TCGReg addr_reg,
 840                                int mem_index,
 841                                uint8_t *raddr, uint8_t *label_ptr)
 842{
 843    TCGLabelQemuLdst *label = new_ldst_label(s);
 844
 845    label->is_ld = is_ld;
 846    label->opc = opc;
 847    label->datalo_reg = data_reg;
 848    label->addrlo_reg = addr_reg;
 849    label->mem_index = mem_index;
 850    label->raddr = raddr;
 851    label->label_ptr[0] = label_ptr;
 852}
 853
 854/* Load and compare a TLB entry, emitting the conditional jump to the
 855   slow path for the failure case, which will be patched later when finalizing
 856   the slow path. Generated code returns the host addend in X1,
 857   clobbers X0,X2,X3,TMP. */
 858static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
 859            int s_bits, uint8_t **label_ptr, int mem_index, int is_read)
 860{
 861    TCGReg base = TCG_AREG0;
 862    int tlb_offset = is_read ?
 863        offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
 864        : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
 865    /* Extract the TLB index from the address into X0.
 866       X0<CPU_TLB_BITS:0> =
 867       addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
 868    tcg_out_ubfm(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, addr_reg,
 869                 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
 870    /* Store the page mask part of the address and the low s_bits into X3.
 871       Later this allows checking for equality and alignment at the same time.
 872       X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
 873    tcg_out_andi(s, (TARGET_LONG_BITS == 64), TCG_REG_X3, addr_reg,
 874                 (TARGET_LONG_BITS - TARGET_PAGE_BITS) + s_bits,
 875                 (TARGET_LONG_BITS - TARGET_PAGE_BITS));
 876    /* Add any "high bits" from the tlb offset to the env address into X2,
 877       to take advantage of the LSL12 form of the addi instruction.
 878       X2 = env + (tlb_offset & 0xfff000) */
 879    tcg_out_addi(s, 1, TCG_REG_X2, base, tlb_offset & 0xfff000);
 880    /* Merge the tlb index contribution into X2.
 881       X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
 882    tcg_out_arith(s, ARITH_ADD, 1, TCG_REG_X2, TCG_REG_X2,
 883                  TCG_REG_X0, -CPU_TLB_ENTRY_BITS);
 884    /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
 885       X0 = load [X2 + (tlb_offset & 0x000fff)] */
 886    tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
 887                 LDST_LD, TCG_REG_X0, TCG_REG_X2,
 888                 (tlb_offset & 0xfff));
 889    /* Load the tlb addend. Do that early to avoid stalling.
 890       X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
 891    tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2,
 892                 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
 893                 (is_read ? offsetof(CPUTLBEntry, addr_read)
 894                  : offsetof(CPUTLBEntry, addr_write)));
 895    /* Perform the address comparison. */
 896    tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
 897    *label_ptr = s->code_ptr;
 898    /* If not equal, we jump to the slow path. */
 899    tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
 900}
 901
 902#endif /* CONFIG_SOFTMMU */
 903
 904static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data_r,
 905                                   TCGReg addr_r, TCGReg off_r)
 906{
 907    switch (opc) {
 908    case 0:
 909        tcg_out_ldst_r(s, LDST_8, LDST_LD, data_r, addr_r, off_r);
 910        break;
 911    case 0 | 4:
 912        tcg_out_ldst_r(s, LDST_8, LDST_LD_S_X, data_r, addr_r, off_r);
 913        break;
 914    case 1:
 915        tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
 916        if (TCG_LDST_BSWAP) {
 917            tcg_out_rev16(s, 0, data_r, data_r);
 918        }
 919        break;
 920    case 1 | 4:
 921        if (TCG_LDST_BSWAP) {
 922            tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
 923            tcg_out_rev16(s, 0, data_r, data_r);
 924            tcg_out_sxt(s, 1, 1, data_r, data_r);
 925        } else {
 926            tcg_out_ldst_r(s, LDST_16, LDST_LD_S_X, data_r, addr_r, off_r);
 927        }
 928        break;
 929    case 2:
 930        tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
 931        if (TCG_LDST_BSWAP) {
 932            tcg_out_rev(s, 0, data_r, data_r);
 933        }
 934        break;
 935    case 2 | 4:
 936        if (TCG_LDST_BSWAP) {
 937            tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
 938            tcg_out_rev(s, 0, data_r, data_r);
 939            tcg_out_sxt(s, 1, 2, data_r, data_r);
 940        } else {
 941            tcg_out_ldst_r(s, LDST_32, LDST_LD_S_X, data_r, addr_r, off_r);
 942        }
 943        break;
 944    case 3:
 945        tcg_out_ldst_r(s, LDST_64, LDST_LD, data_r, addr_r, off_r);
 946        if (TCG_LDST_BSWAP) {
 947            tcg_out_rev(s, 1, data_r, data_r);
 948        }
 949        break;
 950    default:
 951        tcg_abort();
 952    }
 953}
 954
 955static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data_r,
 956                                   TCGReg addr_r, TCGReg off_r)
 957{
 958    switch (opc) {
 959    case 0:
 960        tcg_out_ldst_r(s, LDST_8, LDST_ST, data_r, addr_r, off_r);
 961        break;
 962    case 1:
 963        if (TCG_LDST_BSWAP) {
 964            tcg_out_rev16(s, 0, TCG_REG_TMP, data_r);
 965            tcg_out_ldst_r(s, LDST_16, LDST_ST, TCG_REG_TMP, addr_r, off_r);
 966        } else {
 967            tcg_out_ldst_r(s, LDST_16, LDST_ST, data_r, addr_r, off_r);
 968        }
 969        break;
 970    case 2:
 971        if (TCG_LDST_BSWAP) {
 972            tcg_out_rev(s, 0, TCG_REG_TMP, data_r);
 973            tcg_out_ldst_r(s, LDST_32, LDST_ST, TCG_REG_TMP, addr_r, off_r);
 974        } else {
 975            tcg_out_ldst_r(s, LDST_32, LDST_ST, data_r, addr_r, off_r);
 976        }
 977        break;
 978    case 3:
 979        if (TCG_LDST_BSWAP) {
 980            tcg_out_rev(s, 1, TCG_REG_TMP, data_r);
 981            tcg_out_ldst_r(s, LDST_64, LDST_ST, TCG_REG_TMP, addr_r, off_r);
 982        } else {
 983            tcg_out_ldst_r(s, LDST_64, LDST_ST, data_r, addr_r, off_r);
 984        }
 985        break;
 986    default:
 987        tcg_abort();
 988    }
 989}
 990
 991static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 992{
 993    TCGReg addr_reg, data_reg;
 994#ifdef CONFIG_SOFTMMU
 995    int mem_index, s_bits;
 996    uint8_t *label_ptr;
 997#endif
 998    data_reg = args[0];
 999    addr_reg = args[1];
1000
1001#ifdef CONFIG_SOFTMMU
1002    mem_index = args[2];
1003    s_bits = opc & 3;
1004    tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
1005    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1006    add_qemu_ldst_label(s, 1, opc, data_reg, addr_reg,
1007                        mem_index, s->code_ptr, label_ptr);
1008#else /* !CONFIG_SOFTMMU */
1009    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg,
1010                           GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1011#endif /* CONFIG_SOFTMMU */
1012}
1013
1014static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
1015{
1016    TCGReg addr_reg, data_reg;
1017#ifdef CONFIG_SOFTMMU
1018    int mem_index, s_bits;
1019    uint8_t *label_ptr;
1020#endif
1021    data_reg = args[0];
1022    addr_reg = args[1];
1023
1024#ifdef CONFIG_SOFTMMU
1025    mem_index = args[2];
1026    s_bits = opc & 3;
1027
1028    tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
1029    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1030    add_qemu_ldst_label(s, 0, opc, data_reg, addr_reg,
1031                        mem_index, s->code_ptr, label_ptr);
1032#else /* !CONFIG_SOFTMMU */
1033    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg,
1034                           GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1035#endif /* CONFIG_SOFTMMU */
1036}
1037
1038static uint8_t *tb_ret_addr;
1039
1040/* callee stack use example:
1041   stp     x29, x30, [sp,#-32]!
1042   mov     x29, sp
1043   stp     x1, x2, [sp,#16]
1044   ...
1045   ldp     x1, x2, [sp,#16]
1046   ldp     x29, x30, [sp],#32
1047   ret
1048*/
1049
1050/* push r1 and r2, and alloc stack space for a total of
1051   alloc_n elements (1 element=16 bytes, must be between 1 and 31. */
1052static inline void tcg_out_push_pair(TCGContext *s, TCGReg addr,
1053                                     TCGReg r1, TCGReg r2, int alloc_n)
1054{
1055    /* using indexed scaled simm7 STP 0x28800000 | (ext) | 0x01000000 (pre-idx)
1056       | alloc_n * (-1) << 16 | r2 << 10 | addr << 5 | r1 */
1057    assert(alloc_n > 0 && alloc_n < 0x20);
1058    alloc_n = (-alloc_n) & 0x3f;
1059    tcg_out32(s, 0xa9800000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1060}
1061
1062/* dealloc stack space for a total of alloc_n elements and pop r1, r2.  */
1063static inline void tcg_out_pop_pair(TCGContext *s, TCGReg addr,
1064                                    TCGReg r1, TCGReg r2, int alloc_n)
1065{
1066    /* using indexed scaled simm7 LDP 0x28c00000 | (ext) | nothing (post-idx)
1067       | alloc_n << 16 | r2 << 10 | addr << 5 | r1 */
1068    assert(alloc_n > 0 && alloc_n < 0x20);
1069    tcg_out32(s, 0xa8c00000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1070}
1071
1072static inline void tcg_out_store_pair(TCGContext *s, TCGReg addr,
1073                                      TCGReg r1, TCGReg r2, int idx)
1074{
1075    /* using register pair offset simm7 STP 0x29000000 | (ext)
1076       | idx << 16 | r2 << 10 | addr << 5 | r1 */
1077    assert(idx > 0 && idx < 0x20);
1078    tcg_out32(s, 0xa9000000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1079}
1080
1081static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
1082                                     TCGReg r1, TCGReg r2, int idx)
1083{
1084    /* using register pair offset simm7 LDP 0x29400000 | (ext)
1085       | idx << 16 | r2 << 10 | addr << 5 | r1 */
1086    assert(idx > 0 && idx < 0x20);
1087    tcg_out32(s, 0xa9400000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1088}
1089
1090static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1091                       const TCGArg *args, const int *const_args)
1092{
1093    /* ext will be set in the switch below, which will fall through to the
1094       common code. It triggers the use of extended regs where appropriate. */
1095    int ext = 0;
1096
1097    switch (opc) {
1098    case INDEX_op_exit_tb:
1099        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, args[0]);
1100        tcg_out_goto(s, (tcg_target_long)tb_ret_addr);
1101        break;
1102
1103    case INDEX_op_goto_tb:
1104#ifndef USE_DIRECT_JUMP
1105#error "USE_DIRECT_JUMP required for aarch64"
1106#endif
1107        assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
1108        s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1109        /* actual branch destination will be patched by
1110           aarch64_tb_set_jmp_target later, beware retranslation. */
1111        tcg_out_goto_noaddr(s);
1112        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1113        break;
1114
1115    case INDEX_op_call:
1116        if (const_args[0]) {
1117            tcg_out_call(s, args[0]);
1118        } else {
1119            tcg_out_callr(s, args[0]);
1120        }
1121        break;
1122
1123    case INDEX_op_br:
1124        tcg_out_goto_label(s, args[0]);
1125        break;
1126
1127    case INDEX_op_ld_i32:
1128    case INDEX_op_ld_i64:
1129    case INDEX_op_st_i32:
1130    case INDEX_op_st_i64:
1131    case INDEX_op_ld8u_i32:
1132    case INDEX_op_ld8s_i32:
1133    case INDEX_op_ld16u_i32:
1134    case INDEX_op_ld16s_i32:
1135    case INDEX_op_ld8u_i64:
1136    case INDEX_op_ld8s_i64:
1137    case INDEX_op_ld16u_i64:
1138    case INDEX_op_ld16s_i64:
1139    case INDEX_op_ld32u_i64:
1140    case INDEX_op_ld32s_i64:
1141    case INDEX_op_st8_i32:
1142    case INDEX_op_st8_i64:
1143    case INDEX_op_st16_i32:
1144    case INDEX_op_st16_i64:
1145    case INDEX_op_st32_i64:
1146        tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc),
1147                     args[0], args[1], args[2]);
1148        break;
1149
1150    case INDEX_op_mov_i64:
1151        ext = 1; /* fall through */
1152    case INDEX_op_mov_i32:
1153        tcg_out_movr(s, ext, args[0], args[1]);
1154        break;
1155
1156    case INDEX_op_movi_i64:
1157        tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1158        break;
1159    case INDEX_op_movi_i32:
1160        tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1161        break;
1162
1163    case INDEX_op_add_i64:
1164        ext = 1; /* fall through */
1165    case INDEX_op_add_i32:
1166        tcg_out_arith(s, ARITH_ADD, ext, args[0], args[1], args[2], 0);
1167        break;
1168
1169    case INDEX_op_sub_i64:
1170        ext = 1; /* fall through */
1171    case INDEX_op_sub_i32:
1172        tcg_out_arith(s, ARITH_SUB, ext, args[0], args[1], args[2], 0);
1173        break;
1174
1175    case INDEX_op_and_i64:
1176        ext = 1; /* fall through */
1177    case INDEX_op_and_i32:
1178        tcg_out_arith(s, ARITH_AND, ext, args[0], args[1], args[2], 0);
1179        break;
1180
1181    case INDEX_op_or_i64:
1182        ext = 1; /* fall through */
1183    case INDEX_op_or_i32:
1184        tcg_out_arith(s, ARITH_OR, ext, args[0], args[1], args[2], 0);
1185        break;
1186
1187    case INDEX_op_xor_i64:
1188        ext = 1; /* fall through */
1189    case INDEX_op_xor_i32:
1190        tcg_out_arith(s, ARITH_XOR, ext, args[0], args[1], args[2], 0);
1191        break;
1192
1193    case INDEX_op_mul_i64:
1194        ext = 1; /* fall through */
1195    case INDEX_op_mul_i32:
1196        tcg_out_mul(s, ext, args[0], args[1], args[2]);
1197        break;
1198
1199    case INDEX_op_shl_i64:
1200        ext = 1; /* fall through */
1201    case INDEX_op_shl_i32:
1202        if (const_args[2]) {    /* LSL / UBFM Wd, Wn, (32 - m) */
1203            tcg_out_shl(s, ext, args[0], args[1], args[2]);
1204        } else {                /* LSL / LSLV */
1205            tcg_out_shiftrot_reg(s, SRR_SHL, ext, args[0], args[1], args[2]);
1206        }
1207        break;
1208
1209    case INDEX_op_shr_i64:
1210        ext = 1; /* fall through */
1211    case INDEX_op_shr_i32:
1212        if (const_args[2]) {    /* LSR / UBFM Wd, Wn, m, 31 */
1213            tcg_out_shr(s, ext, args[0], args[1], args[2]);
1214        } else {                /* LSR / LSRV */
1215            tcg_out_shiftrot_reg(s, SRR_SHR, ext, args[0], args[1], args[2]);
1216        }
1217        break;
1218
1219    case INDEX_op_sar_i64:
1220        ext = 1; /* fall through */
1221    case INDEX_op_sar_i32:
1222        if (const_args[2]) {    /* ASR / SBFM Wd, Wn, m, 31 */
1223            tcg_out_sar(s, ext, args[0], args[1], args[2]);
1224        } else {                /* ASR / ASRV */
1225            tcg_out_shiftrot_reg(s, SRR_SAR, ext, args[0], args[1], args[2]);
1226        }
1227        break;
1228
1229    case INDEX_op_rotr_i64:
1230        ext = 1; /* fall through */
1231    case INDEX_op_rotr_i32:
1232        if (const_args[2]) {    /* ROR / EXTR Wd, Wm, Wm, m */
1233            tcg_out_rotr(s, ext, args[0], args[1], args[2]);
1234        } else {                /* ROR / RORV */
1235            tcg_out_shiftrot_reg(s, SRR_ROR, ext, args[0], args[1], args[2]);
1236        }
1237        break;
1238
1239    case INDEX_op_rotl_i64:
1240        ext = 1; /* fall through */
1241    case INDEX_op_rotl_i32:     /* same as rotate right by (32 - m) */
1242        if (const_args[2]) {    /* ROR / EXTR Wd, Wm, Wm, 32 - m */
1243            tcg_out_rotl(s, ext, args[0], args[1], args[2]);
1244        } else {
1245            tcg_out_arith(s, ARITH_SUB, 0,
1246                          TCG_REG_TMP, TCG_REG_XZR, args[2], 0);
1247            tcg_out_shiftrot_reg(s, SRR_ROR, ext,
1248                                 args[0], args[1], TCG_REG_TMP);
1249        }
1250        break;
1251
1252    case INDEX_op_brcond_i64:
1253        ext = 1; /* fall through */
1254    case INDEX_op_brcond_i32: /* CMP 0, 1, cond(2), label 3 */
1255        tcg_out_cmp(s, ext, args[0], args[1], 0);
1256        tcg_out_goto_label_cond(s, args[2], args[3]);
1257        break;
1258
1259    case INDEX_op_setcond_i64:
1260        ext = 1; /* fall through */
1261    case INDEX_op_setcond_i32:
1262        tcg_out_cmp(s, ext, args[1], args[2], 0);
1263        tcg_out_cset(s, 0, args[0], args[3]);
1264        break;
1265
1266    case INDEX_op_qemu_ld8u:
1267        tcg_out_qemu_ld(s, args, 0 | 0);
1268        break;
1269    case INDEX_op_qemu_ld8s:
1270        tcg_out_qemu_ld(s, args, 4 | 0);
1271        break;
1272    case INDEX_op_qemu_ld16u:
1273        tcg_out_qemu_ld(s, args, 0 | 1);
1274        break;
1275    case INDEX_op_qemu_ld16s:
1276        tcg_out_qemu_ld(s, args, 4 | 1);
1277        break;
1278    case INDEX_op_qemu_ld32u:
1279        tcg_out_qemu_ld(s, args, 0 | 2);
1280        break;
1281    case INDEX_op_qemu_ld32s:
1282        tcg_out_qemu_ld(s, args, 4 | 2);
1283        break;
1284    case INDEX_op_qemu_ld32:
1285        tcg_out_qemu_ld(s, args, 0 | 2);
1286        break;
1287    case INDEX_op_qemu_ld64:
1288        tcg_out_qemu_ld(s, args, 0 | 3);
1289        break;
1290    case INDEX_op_qemu_st8:
1291        tcg_out_qemu_st(s, args, 0);
1292        break;
1293    case INDEX_op_qemu_st16:
1294        tcg_out_qemu_st(s, args, 1);
1295        break;
1296    case INDEX_op_qemu_st32:
1297        tcg_out_qemu_st(s, args, 2);
1298        break;
1299    case INDEX_op_qemu_st64:
1300        tcg_out_qemu_st(s, args, 3);
1301        break;
1302
1303    case INDEX_op_bswap64_i64:
1304        ext = 1; /* fall through */
1305    case INDEX_op_bswap32_i64:
1306    case INDEX_op_bswap32_i32:
1307        tcg_out_rev(s, ext, args[0], args[1]);
1308        break;
1309    case INDEX_op_bswap16_i64:
1310    case INDEX_op_bswap16_i32:
1311        tcg_out_rev16(s, 0, args[0], args[1]);
1312        break;
1313
1314    case INDEX_op_ext8s_i64:
1315        ext = 1; /* fall through */
1316    case INDEX_op_ext8s_i32:
1317        tcg_out_sxt(s, ext, 0, args[0], args[1]);
1318        break;
1319    case INDEX_op_ext16s_i64:
1320        ext = 1; /* fall through */
1321    case INDEX_op_ext16s_i32:
1322        tcg_out_sxt(s, ext, 1, args[0], args[1]);
1323        break;
1324    case INDEX_op_ext32s_i64:
1325        tcg_out_sxt(s, 1, 2, args[0], args[1]);
1326        break;
1327    case INDEX_op_ext8u_i64:
1328    case INDEX_op_ext8u_i32:
1329        tcg_out_uxt(s, 0, args[0], args[1]);
1330        break;
1331    case INDEX_op_ext16u_i64:
1332    case INDEX_op_ext16u_i32:
1333        tcg_out_uxt(s, 1, args[0], args[1]);
1334        break;
1335    case INDEX_op_ext32u_i64:
1336        tcg_out_movr(s, 0, args[0], args[1]);
1337        break;
1338
1339    default:
1340        tcg_abort(); /* opcode not implemented */
1341    }
1342}
1343
1344static const TCGTargetOpDef aarch64_op_defs[] = {
1345    { INDEX_op_exit_tb, { } },
1346    { INDEX_op_goto_tb, { } },
1347    { INDEX_op_call, { "ri" } },
1348    { INDEX_op_br, { } },
1349
1350    { INDEX_op_mov_i32, { "r", "r" } },
1351    { INDEX_op_mov_i64, { "r", "r" } },
1352
1353    { INDEX_op_movi_i32, { "r" } },
1354    { INDEX_op_movi_i64, { "r" } },
1355
1356    { INDEX_op_ld8u_i32, { "r", "r" } },
1357    { INDEX_op_ld8s_i32, { "r", "r" } },
1358    { INDEX_op_ld16u_i32, { "r", "r" } },
1359    { INDEX_op_ld16s_i32, { "r", "r" } },
1360    { INDEX_op_ld_i32, { "r", "r" } },
1361    { INDEX_op_ld8u_i64, { "r", "r" } },
1362    { INDEX_op_ld8s_i64, { "r", "r" } },
1363    { INDEX_op_ld16u_i64, { "r", "r" } },
1364    { INDEX_op_ld16s_i64, { "r", "r" } },
1365    { INDEX_op_ld32u_i64, { "r", "r" } },
1366    { INDEX_op_ld32s_i64, { "r", "r" } },
1367    { INDEX_op_ld_i64, { "r", "r" } },
1368
1369    { INDEX_op_st8_i32, { "r", "r" } },
1370    { INDEX_op_st16_i32, { "r", "r" } },
1371    { INDEX_op_st_i32, { "r", "r" } },
1372    { INDEX_op_st8_i64, { "r", "r" } },
1373    { INDEX_op_st16_i64, { "r", "r" } },
1374    { INDEX_op_st32_i64, { "r", "r" } },
1375    { INDEX_op_st_i64, { "r", "r" } },
1376
1377    { INDEX_op_add_i32, { "r", "r", "r" } },
1378    { INDEX_op_add_i64, { "r", "r", "r" } },
1379    { INDEX_op_sub_i32, { "r", "r", "r" } },
1380    { INDEX_op_sub_i64, { "r", "r", "r" } },
1381    { INDEX_op_mul_i32, { "r", "r", "r" } },
1382    { INDEX_op_mul_i64, { "r", "r", "r" } },
1383    { INDEX_op_and_i32, { "r", "r", "r" } },
1384    { INDEX_op_and_i64, { "r", "r", "r" } },
1385    { INDEX_op_or_i32, { "r", "r", "r" } },
1386    { INDEX_op_or_i64, { "r", "r", "r" } },
1387    { INDEX_op_xor_i32, { "r", "r", "r" } },
1388    { INDEX_op_xor_i64, { "r", "r", "r" } },
1389
1390    { INDEX_op_shl_i32, { "r", "r", "ri" } },
1391    { INDEX_op_shr_i32, { "r", "r", "ri" } },
1392    { INDEX_op_sar_i32, { "r", "r", "ri" } },
1393    { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1394    { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1395    { INDEX_op_shl_i64, { "r", "r", "ri" } },
1396    { INDEX_op_shr_i64, { "r", "r", "ri" } },
1397    { INDEX_op_sar_i64, { "r", "r", "ri" } },
1398    { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1399    { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1400
1401    { INDEX_op_brcond_i32, { "r", "r" } },
1402    { INDEX_op_setcond_i32, { "r", "r", "r" } },
1403    { INDEX_op_brcond_i64, { "r", "r" } },
1404    { INDEX_op_setcond_i64, { "r", "r", "r" } },
1405
1406    { INDEX_op_qemu_ld8u, { "r", "l" } },
1407    { INDEX_op_qemu_ld8s, { "r", "l" } },
1408    { INDEX_op_qemu_ld16u, { "r", "l" } },
1409    { INDEX_op_qemu_ld16s, { "r", "l" } },
1410    { INDEX_op_qemu_ld32u, { "r", "l" } },
1411    { INDEX_op_qemu_ld32s, { "r", "l" } },
1412
1413    { INDEX_op_qemu_ld32, { "r", "l" } },
1414    { INDEX_op_qemu_ld64, { "r", "l" } },
1415
1416    { INDEX_op_qemu_st8, { "l", "l" } },
1417    { INDEX_op_qemu_st16, { "l", "l" } },
1418    { INDEX_op_qemu_st32, { "l", "l" } },
1419    { INDEX_op_qemu_st64, { "l", "l" } },
1420
1421    { INDEX_op_bswap16_i32, { "r", "r" } },
1422    { INDEX_op_bswap32_i32, { "r", "r" } },
1423    { INDEX_op_bswap16_i64, { "r", "r" } },
1424    { INDEX_op_bswap32_i64, { "r", "r" } },
1425    { INDEX_op_bswap64_i64, { "r", "r" } },
1426
1427    { INDEX_op_ext8s_i32, { "r", "r" } },
1428    { INDEX_op_ext16s_i32, { "r", "r" } },
1429    { INDEX_op_ext8u_i32, { "r", "r" } },
1430    { INDEX_op_ext16u_i32, { "r", "r" } },
1431
1432    { INDEX_op_ext8s_i64, { "r", "r" } },
1433    { INDEX_op_ext16s_i64, { "r", "r" } },
1434    { INDEX_op_ext32s_i64, { "r", "r" } },
1435    { INDEX_op_ext8u_i64, { "r", "r" } },
1436    { INDEX_op_ext16u_i64, { "r", "r" } },
1437    { INDEX_op_ext32u_i64, { "r", "r" } },
1438
1439    { -1 },
1440};
1441
1442static void tcg_target_init(TCGContext *s)
1443{
1444#if !defined(CONFIG_USER_ONLY)
1445    /* fail safe */
1446    if ((1ULL << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) {
1447        tcg_abort();
1448    }
1449#endif
1450    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1451    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1452
1453    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1454                     (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1455                     (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1456                     (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1457                     (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1458                     (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1459                     (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1460                     (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1461                     (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1462                     (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1463                     (1 << TCG_REG_X18));
1464
1465    tcg_regset_clear(s->reserved_regs);
1466    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1467    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1468    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1469    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1470
1471    tcg_add_target_add_op_defs(aarch64_op_defs);
1472}
1473
1474static void tcg_target_qemu_prologue(TCGContext *s)
1475{
1476    /* NB: frame sizes are in 16 byte stack units! */
1477    int frame_size_callee_saved, frame_size_tcg_locals;
1478    TCGReg r;
1479
1480    /* save pairs             (FP, LR) and (X19, X20) .. (X27, X28) */
1481    frame_size_callee_saved = (1) + (TCG_REG_X28 - TCG_REG_X19) / 2 + 1;
1482
1483    /* frame size requirement for TCG local variables */
1484    frame_size_tcg_locals = TCG_STATIC_CALL_ARGS_SIZE
1485        + CPU_TEMP_BUF_NLONGS * sizeof(long)
1486        + (TCG_TARGET_STACK_ALIGN - 1);
1487    frame_size_tcg_locals &= ~(TCG_TARGET_STACK_ALIGN - 1);
1488    frame_size_tcg_locals /= TCG_TARGET_STACK_ALIGN;
1489
1490    /* push (FP, LR) and update sp */
1491    tcg_out_push_pair(s, TCG_REG_SP,
1492                      TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1493
1494    /* FP -> callee_saved */
1495    tcg_out_movr_sp(s, 1, TCG_REG_FP, TCG_REG_SP);
1496
1497    /* store callee-preserved regs x19..x28 using FP -> callee_saved */
1498    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1499        int idx = (r - TCG_REG_X19) / 2 + 1;
1500        tcg_out_store_pair(s, TCG_REG_FP, r, r + 1, idx);
1501    }
1502
1503    /* make stack space for TCG locals */
1504    tcg_out_subi(s, 1, TCG_REG_SP, TCG_REG_SP,
1505                 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1506    /* inform TCG about how to find TCG locals with register, offset, size */
1507    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1508                  CPU_TEMP_BUF_NLONGS * sizeof(long));
1509
1510#if defined(CONFIG_USE_GUEST_BASE)
1511    if (GUEST_BASE) {
1512        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE);
1513        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1514    }
1515#endif
1516
1517    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1518    tcg_out_gotor(s, tcg_target_call_iarg_regs[1]);
1519
1520    tb_ret_addr = s->code_ptr;
1521
1522    /* remove TCG locals stack space */
1523    tcg_out_addi(s, 1, TCG_REG_SP, TCG_REG_SP,
1524                 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1525
1526    /* restore registers x19..x28.
1527       FP must be preserved, so it still points to callee_saved area */
1528    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1529        int idx = (r - TCG_REG_X19) / 2 + 1;
1530        tcg_out_load_pair(s, TCG_REG_FP, r, r + 1, idx);
1531    }
1532
1533    /* pop (FP, LR), restore SP to previous frame, return */
1534    tcg_out_pop_pair(s, TCG_REG_SP,
1535                     TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1536    tcg_out_ret(s);
1537}
1538