qemu/tcg/arm/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Andrzej Zaborowski
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "elf.h"
  26#include "tcg-be-ldst.h"
  27
  28/* The __ARM_ARCH define is provided by gcc 4.8.  Construct it otherwise.  */
  29#ifndef __ARM_ARCH
  30# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
  31     || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
  32     || defined(__ARM_ARCH_7EM__)
  33#  define __ARM_ARCH 7
  34# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
  35       || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
  36       || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
  37#  define __ARM_ARCH 6
  38# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \
  39       || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
  40       || defined(__ARM_ARCH_5TEJ__)
  41#  define __ARM_ARCH 5
  42# else
  43#  define __ARM_ARCH 4
  44# endif
  45#endif
  46
  47static int arm_arch = __ARM_ARCH;
  48
  49#if defined(__ARM_ARCH_5T__) \
  50    || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
  51# define use_armv5t_instructions 1
  52#else
  53# define use_armv5t_instructions use_armv6_instructions
  54#endif
  55
  56#define use_armv6_instructions  (__ARM_ARCH >= 6 || arm_arch >= 6)
  57#define use_armv7_instructions  (__ARM_ARCH >= 7 || arm_arch >= 7)
  58
  59#ifndef use_idiv_instructions
  60bool use_idiv_instructions;
  61#endif
  62
  63/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined.  */
  64#ifdef CONFIG_SOFTMMU
  65# define USING_SOFTMMU 1
  66#else
  67# define USING_SOFTMMU 0
  68#endif
  69
  70#ifdef CONFIG_DEBUG_TCG
  71static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  72    "%r0",
  73    "%r1",
  74    "%r2",
  75    "%r3",
  76    "%r4",
  77    "%r5",
  78    "%r6",
  79    "%r7",
  80    "%r8",
  81    "%r9",
  82    "%r10",
  83    "%r11",
  84    "%r12",
  85    "%r13",
  86    "%r14",
  87    "%pc",
  88};
  89#endif
  90
  91static const int tcg_target_reg_alloc_order[] = {
  92    TCG_REG_R4,
  93    TCG_REG_R5,
  94    TCG_REG_R6,
  95    TCG_REG_R7,
  96    TCG_REG_R8,
  97    TCG_REG_R9,
  98    TCG_REG_R10,
  99    TCG_REG_R11,
 100    TCG_REG_R13,
 101    TCG_REG_R0,
 102    TCG_REG_R1,
 103    TCG_REG_R2,
 104    TCG_REG_R3,
 105    TCG_REG_R12,
 106    TCG_REG_R14,
 107};
 108
 109static const int tcg_target_call_iarg_regs[4] = {
 110    TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
 111};
 112static const int tcg_target_call_oarg_regs[2] = {
 113    TCG_REG_R0, TCG_REG_R1
 114};
 115
 116#define TCG_REG_TMP  TCG_REG_R12
 117
 118static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
 119{
 120    ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
 121    *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
 122}
 123
 124static inline void reloc_pc24_atomic(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
 125{
 126    ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
 127    tcg_insn_unit insn = atomic_read(code_ptr);
 128    tcg_debug_assert(offset == sextract32(offset, 0, 24));
 129    atomic_set(code_ptr, deposit32(insn, 0, 24, offset));
 130}
 131
 132static void patch_reloc(tcg_insn_unit *code_ptr, int type,
 133                        intptr_t value, intptr_t addend)
 134{
 135    tcg_debug_assert(type == R_ARM_PC24);
 136    tcg_debug_assert(addend == 0);
 137    reloc_pc24(code_ptr, (tcg_insn_unit *)value);
 138}
 139
 140#define TCG_CT_CONST_ARM  0x100
 141#define TCG_CT_CONST_INV  0x200
 142#define TCG_CT_CONST_NEG  0x400
 143#define TCG_CT_CONST_ZERO 0x800
 144
 145/* parse target specific constraints */
 146static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 147{
 148    const char *ct_str;
 149
 150    ct_str = *pct_str;
 151    switch (ct_str[0]) {
 152    case 'I':
 153        ct->ct |= TCG_CT_CONST_ARM;
 154        break;
 155    case 'K':
 156        ct->ct |= TCG_CT_CONST_INV;
 157        break;
 158    case 'N': /* The gcc constraint letter is L, already used here.  */
 159        ct->ct |= TCG_CT_CONST_NEG;
 160        break;
 161    case 'Z':
 162        ct->ct |= TCG_CT_CONST_ZERO;
 163        break;
 164
 165    case 'r':
 166        ct->ct |= TCG_CT_REG;
 167        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
 168        break;
 169
 170    /* qemu_ld address */
 171    case 'l':
 172        ct->ct |= TCG_CT_REG;
 173        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
 174#ifdef CONFIG_SOFTMMU
 175        /* r0-r2,lr will be overwritten when reading the tlb entry,
 176           so don't use these. */
 177        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
 178        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
 179        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
 180        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
 181#endif
 182        break;
 183
 184    /* qemu_st address & data */
 185    case 's':
 186        ct->ct |= TCG_CT_REG;
 187        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
 188        /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
 189           and r0-r1 doing the byte swapping, so don't use these. */
 190        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
 191        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
 192#if defined(CONFIG_SOFTMMU)
 193        /* Avoid clashes with registers being used for helper args */
 194        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
 195#if TARGET_LONG_BITS == 64
 196        /* Avoid clashes with registers being used for helper args */
 197        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
 198#endif
 199        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
 200#endif
 201        break;
 202
 203    default:
 204        return -1;
 205    }
 206    ct_str++;
 207    *pct_str = ct_str;
 208
 209    return 0;
 210}
 211
 212static inline uint32_t rotl(uint32_t val, int n)
 213{
 214  return (val << n) | (val >> (32 - n));
 215}
 216
 217/* ARM immediates for ALU instructions are made of an unsigned 8-bit
 218   right-rotated by an even amount between 0 and 30. */
 219static inline int encode_imm(uint32_t imm)
 220{
 221    int shift;
 222
 223    /* simple case, only lower bits */
 224    if ((imm & ~0xff) == 0)
 225        return 0;
 226    /* then try a simple even shift */
 227    shift = ctz32(imm) & ~1;
 228    if (((imm >> shift) & ~0xff) == 0)
 229        return 32 - shift;
 230    /* now try harder with rotations */
 231    if ((rotl(imm, 2) & ~0xff) == 0)
 232        return 2;
 233    if ((rotl(imm, 4) & ~0xff) == 0)
 234        return 4;
 235    if ((rotl(imm, 6) & ~0xff) == 0)
 236        return 6;
 237    /* imm can't be encoded */
 238    return -1;
 239}
 240
 241static inline int check_fit_imm(uint32_t imm)
 242{
 243    return encode_imm(imm) >= 0;
 244}
 245
 246/* Test if a constant matches the constraint.
 247 * TODO: define constraints for:
 248 *
 249 * ldr/str offset:   between -0xfff and 0xfff
 250 * ldrh/strh offset: between -0xff and 0xff
 251 * mov operand2:     values represented with x << (2 * y), x < 0x100
 252 * add, sub, eor...: ditto
 253 */
 254static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
 255                                         const TCGArgConstraint *arg_ct)
 256{
 257    int ct;
 258    ct = arg_ct->ct;
 259    if (ct & TCG_CT_CONST) {
 260        return 1;
 261    } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
 262        return 1;
 263    } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
 264        return 1;
 265    } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
 266        return 1;
 267    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 268        return 1;
 269    } else {
 270        return 0;
 271    }
 272}
 273
 274#define TO_CPSR (1 << 20)
 275
 276typedef enum {
 277    ARITH_AND = 0x0 << 21,
 278    ARITH_EOR = 0x1 << 21,
 279    ARITH_SUB = 0x2 << 21,
 280    ARITH_RSB = 0x3 << 21,
 281    ARITH_ADD = 0x4 << 21,
 282    ARITH_ADC = 0x5 << 21,
 283    ARITH_SBC = 0x6 << 21,
 284    ARITH_RSC = 0x7 << 21,
 285    ARITH_TST = 0x8 << 21 | TO_CPSR,
 286    ARITH_CMP = 0xa << 21 | TO_CPSR,
 287    ARITH_CMN = 0xb << 21 | TO_CPSR,
 288    ARITH_ORR = 0xc << 21,
 289    ARITH_MOV = 0xd << 21,
 290    ARITH_BIC = 0xe << 21,
 291    ARITH_MVN = 0xf << 21,
 292
 293    INSN_LDR_IMM   = 0x04100000,
 294    INSN_LDR_REG   = 0x06100000,
 295    INSN_STR_IMM   = 0x04000000,
 296    INSN_STR_REG   = 0x06000000,
 297
 298    INSN_LDRH_IMM  = 0x005000b0,
 299    INSN_LDRH_REG  = 0x001000b0,
 300    INSN_LDRSH_IMM = 0x005000f0,
 301    INSN_LDRSH_REG = 0x001000f0,
 302    INSN_STRH_IMM  = 0x004000b0,
 303    INSN_STRH_REG  = 0x000000b0,
 304
 305    INSN_LDRB_IMM  = 0x04500000,
 306    INSN_LDRB_REG  = 0x06500000,
 307    INSN_LDRSB_IMM = 0x005000d0,
 308    INSN_LDRSB_REG = 0x001000d0,
 309    INSN_STRB_IMM  = 0x04400000,
 310    INSN_STRB_REG  = 0x06400000,
 311
 312    INSN_LDRD_IMM  = 0x004000d0,
 313    INSN_LDRD_REG  = 0x000000d0,
 314    INSN_STRD_IMM  = 0x004000f0,
 315    INSN_STRD_REG  = 0x000000f0,
 316
 317    INSN_DMB_ISH   = 0x5bf07ff5,
 318    INSN_DMB_MCR   = 0xba0f07ee,
 319
 320} ARMInsn;
 321
 322#define SHIFT_IMM_LSL(im)       (((im) << 7) | 0x00)
 323#define SHIFT_IMM_LSR(im)       (((im) << 7) | 0x20)
 324#define SHIFT_IMM_ASR(im)       (((im) << 7) | 0x40)
 325#define SHIFT_IMM_ROR(im)       (((im) << 7) | 0x60)
 326#define SHIFT_REG_LSL(rs)       (((rs) << 8) | 0x10)
 327#define SHIFT_REG_LSR(rs)       (((rs) << 8) | 0x30)
 328#define SHIFT_REG_ASR(rs)       (((rs) << 8) | 0x50)
 329#define SHIFT_REG_ROR(rs)       (((rs) << 8) | 0x70)
 330
 331enum arm_cond_code_e {
 332    COND_EQ = 0x0,
 333    COND_NE = 0x1,
 334    COND_CS = 0x2,      /* Unsigned greater or equal */
 335    COND_CC = 0x3,      /* Unsigned less than */
 336    COND_MI = 0x4,      /* Negative */
 337    COND_PL = 0x5,      /* Zero or greater */
 338    COND_VS = 0x6,      /* Overflow */
 339    COND_VC = 0x7,      /* No overflow */
 340    COND_HI = 0x8,      /* Unsigned greater than */
 341    COND_LS = 0x9,      /* Unsigned less or equal */
 342    COND_GE = 0xa,
 343    COND_LT = 0xb,
 344    COND_GT = 0xc,
 345    COND_LE = 0xd,
 346    COND_AL = 0xe,
 347};
 348
 349static const uint8_t tcg_cond_to_arm_cond[] = {
 350    [TCG_COND_EQ] = COND_EQ,
 351    [TCG_COND_NE] = COND_NE,
 352    [TCG_COND_LT] = COND_LT,
 353    [TCG_COND_GE] = COND_GE,
 354    [TCG_COND_LE] = COND_LE,
 355    [TCG_COND_GT] = COND_GT,
 356    /* unsigned */
 357    [TCG_COND_LTU] = COND_CC,
 358    [TCG_COND_GEU] = COND_CS,
 359    [TCG_COND_LEU] = COND_LS,
 360    [TCG_COND_GTU] = COND_HI,
 361};
 362
 363static inline void tcg_out_bx(TCGContext *s, int cond, int rn)
 364{
 365    tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
 366}
 367
 368static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
 369{
 370    tcg_out32(s, (cond << 28) | 0x0a000000 |
 371                    (((offset - 8) >> 2) & 0x00ffffff));
 372}
 373
 374static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
 375{
 376    /* We pay attention here to not modify the branch target by masking
 377       the corresponding bytes.  This ensure that caches and memory are
 378       kept coherent during retranslation. */
 379    tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a));
 380}
 381
 382static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
 383{
 384    /* We pay attention here to not modify the branch target by masking
 385       the corresponding bytes.  This ensure that caches and memory are
 386       kept coherent during retranslation. */
 387    tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b));
 388}
 389
 390static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
 391{
 392    tcg_out32(s, (cond << 28) | 0x0b000000 |
 393                    (((offset - 8) >> 2) & 0x00ffffff));
 394}
 395
 396static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
 397{
 398    tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
 399}
 400
 401static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
 402{
 403    tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
 404                (((offset - 8) >> 2) & 0x00ffffff));
 405}
 406
 407static inline void tcg_out_dat_reg(TCGContext *s,
 408                int cond, int opc, int rd, int rn, int rm, int shift)
 409{
 410    tcg_out32(s, (cond << 28) | (0 << 25) | opc |
 411                    (rn << 16) | (rd << 12) | shift | rm);
 412}
 413
 414static inline void tcg_out_nop(TCGContext *s)
 415{
 416    if (use_armv7_instructions) {
 417        /* Architected nop introduced in v6k.  */
 418        /* ??? This is an MSR (imm) 0,0,0 insn.  Anyone know if this
 419           also Just So Happened to do nothing on pre-v6k so that we
 420           don't need to conditionalize it?  */
 421        tcg_out32(s, 0xe320f000);
 422    } else {
 423        /* Prior to that the assembler uses mov r0, r0.  */
 424        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0));
 425    }
 426}
 427
 428static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
 429{
 430    /* Simple reg-reg move, optimising out the 'do nothing' case */
 431    if (rd != rm) {
 432        tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
 433    }
 434}
 435
 436static inline void tcg_out_dat_imm(TCGContext *s,
 437                int cond, int opc, int rd, int rn, int im)
 438{
 439    tcg_out32(s, (cond << 28) | (1 << 25) | opc |
 440                    (rn << 16) | (rd << 12) | im);
 441}
 442
 443static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
 444{
 445    int rot, opc, rn;
 446
 447    /* For armv7, make sure not to use movw+movt when mov/mvn would do.
 448       Speed things up by only checking when movt would be required.
 449       Prior to armv7, have one go at fully rotated immediates before
 450       doing the decomposition thing below.  */
 451    if (!use_armv7_instructions || (arg & 0xffff0000)) {
 452        rot = encode_imm(arg);
 453        if (rot >= 0) {
 454            tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
 455                            rotl(arg, rot) | (rot << 7));
 456            return;
 457        }
 458        rot = encode_imm(~arg);
 459        if (rot >= 0) {
 460            tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
 461                            rotl(~arg, rot) | (rot << 7));
 462            return;
 463        }
 464    }
 465
 466    /* Use movw + movt.  */
 467    if (use_armv7_instructions) {
 468        /* movw */
 469        tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
 470                  | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
 471        if (arg & 0xffff0000) {
 472            /* movt */
 473            tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
 474                      | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
 475        }
 476        return;
 477    }
 478
 479    /* TODO: This is very suboptimal, we can easily have a constant
 480       pool somewhere after all the instructions.  */
 481    opc = ARITH_MOV;
 482    rn = 0;
 483    /* If we have lots of leading 1's, we can shorten the sequence by
 484       beginning with mvn and then clearing higher bits with eor.  */
 485    if (clz32(~arg) > clz32(arg)) {
 486        opc = ARITH_MVN, arg = ~arg;
 487    }
 488    do {
 489        int i = ctz32(arg) & ~1;
 490        rot = ((32 - i) << 7) & 0xf00;
 491        tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
 492        arg &= ~(0xff << i);
 493
 494        opc = ARITH_EOR;
 495        rn = rd;
 496    } while (arg);
 497}
 498
 499static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
 500                                  TCGArg lhs, TCGArg rhs, int rhs_is_const)
 501{
 502    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 503     * rhs must satisfy the "rI" constraint.
 504     */
 505    if (rhs_is_const) {
 506        int rot = encode_imm(rhs);
 507        tcg_debug_assert(rot >= 0);
 508        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 509    } else {
 510        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 511    }
 512}
 513
 514static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
 515                            TCGReg dst, TCGReg lhs, TCGArg rhs,
 516                            bool rhs_is_const)
 517{
 518    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 519     * rhs must satisfy the "rIK" constraint.
 520     */
 521    if (rhs_is_const) {
 522        int rot = encode_imm(rhs);
 523        if (rot < 0) {
 524            rhs = ~rhs;
 525            rot = encode_imm(rhs);
 526            tcg_debug_assert(rot >= 0);
 527            opc = opinv;
 528        }
 529        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 530    } else {
 531        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 532    }
 533}
 534
 535static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
 536                            TCGArg dst, TCGArg lhs, TCGArg rhs,
 537                            bool rhs_is_const)
 538{
 539    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 540     * rhs must satisfy the "rIN" constraint.
 541     */
 542    if (rhs_is_const) {
 543        int rot = encode_imm(rhs);
 544        if (rot < 0) {
 545            rhs = -rhs;
 546            rot = encode_imm(rhs);
 547            tcg_debug_assert(rot >= 0);
 548            opc = opneg;
 549        }
 550        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 551    } else {
 552        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 553    }
 554}
 555
 556static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
 557                                 TCGReg rn, TCGReg rm)
 558{
 559    /* if ArchVersion() < 6 && d == n then UNPREDICTABLE;  */
 560    if (!use_armv6_instructions && rd == rn) {
 561        if (rd == rm) {
 562            /* rd == rn == rm; copy an input to tmp first.  */
 563            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 564            rm = rn = TCG_REG_TMP;
 565        } else {
 566            rn = rm;
 567            rm = rd;
 568        }
 569    }
 570    /* mul */
 571    tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
 572}
 573
 574static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
 575                                   TCGReg rd1, TCGReg rn, TCGReg rm)
 576{
 577    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
 578    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
 579        if (rd0 == rm || rd1 == rm) {
 580            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 581            rn = TCG_REG_TMP;
 582        } else {
 583            TCGReg t = rn;
 584            rn = rm;
 585            rm = t;
 586        }
 587    }
 588    /* umull */
 589    tcg_out32(s, (cond << 28) | 0x00800090 |
 590              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 591}
 592
 593static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
 594                                   TCGReg rd1, TCGReg rn, TCGReg rm)
 595{
 596    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
 597    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
 598        if (rd0 == rm || rd1 == rm) {
 599            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 600            rn = TCG_REG_TMP;
 601        } else {
 602            TCGReg t = rn;
 603            rn = rm;
 604            rm = t;
 605        }
 606    }
 607    /* smull */
 608    tcg_out32(s, (cond << 28) | 0x00c00090 |
 609              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 610}
 611
 612static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
 613{
 614    tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 615}
 616
 617static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
 618{
 619    tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 620}
 621
 622static inline void tcg_out_ext8s(TCGContext *s, int cond,
 623                                 int rd, int rn)
 624{
 625    if (use_armv6_instructions) {
 626        /* sxtb */
 627        tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
 628    } else {
 629        tcg_out_dat_reg(s, cond, ARITH_MOV,
 630                        rd, 0, rn, SHIFT_IMM_LSL(24));
 631        tcg_out_dat_reg(s, cond, ARITH_MOV,
 632                        rd, 0, rd, SHIFT_IMM_ASR(24));
 633    }
 634}
 635
 636static inline void tcg_out_ext8u(TCGContext *s, int cond,
 637                                 int rd, int rn)
 638{
 639    tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
 640}
 641
 642static inline void tcg_out_ext16s(TCGContext *s, int cond,
 643                                  int rd, int rn)
 644{
 645    if (use_armv6_instructions) {
 646        /* sxth */
 647        tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
 648    } else {
 649        tcg_out_dat_reg(s, cond, ARITH_MOV,
 650                        rd, 0, rn, SHIFT_IMM_LSL(16));
 651        tcg_out_dat_reg(s, cond, ARITH_MOV,
 652                        rd, 0, rd, SHIFT_IMM_ASR(16));
 653    }
 654}
 655
 656static inline void tcg_out_ext16u(TCGContext *s, int cond,
 657                                  int rd, int rn)
 658{
 659    if (use_armv6_instructions) {
 660        /* uxth */
 661        tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
 662    } else {
 663        tcg_out_dat_reg(s, cond, ARITH_MOV,
 664                        rd, 0, rn, SHIFT_IMM_LSL(16));
 665        tcg_out_dat_reg(s, cond, ARITH_MOV,
 666                        rd, 0, rd, SHIFT_IMM_LSR(16));
 667    }
 668}
 669
 670static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
 671{
 672    if (use_armv6_instructions) {
 673        /* revsh */
 674        tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
 675    } else {
 676        tcg_out_dat_reg(s, cond, ARITH_MOV,
 677                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
 678        tcg_out_dat_reg(s, cond, ARITH_MOV,
 679                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
 680        tcg_out_dat_reg(s, cond, ARITH_ORR,
 681                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
 682    }
 683}
 684
 685static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
 686{
 687    if (use_armv6_instructions) {
 688        /* rev16 */
 689        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
 690    } else {
 691        tcg_out_dat_reg(s, cond, ARITH_MOV,
 692                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
 693        tcg_out_dat_reg(s, cond, ARITH_MOV,
 694                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
 695        tcg_out_dat_reg(s, cond, ARITH_ORR,
 696                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
 697    }
 698}
 699
 700/* swap the two low bytes assuming that the two high input bytes and the
 701   two high output bit can hold any value. */
 702static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
 703{
 704    if (use_armv6_instructions) {
 705        /* rev16 */
 706        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
 707    } else {
 708        tcg_out_dat_reg(s, cond, ARITH_MOV,
 709                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
 710        tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
 711        tcg_out_dat_reg(s, cond, ARITH_ORR,
 712                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
 713    }
 714}
 715
 716static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
 717{
 718    if (use_armv6_instructions) {
 719        /* rev */
 720        tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
 721    } else {
 722        tcg_out_dat_reg(s, cond, ARITH_EOR,
 723                        TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
 724        tcg_out_dat_imm(s, cond, ARITH_BIC,
 725                        TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
 726        tcg_out_dat_reg(s, cond, ARITH_MOV,
 727                        rd, 0, rn, SHIFT_IMM_ROR(8));
 728        tcg_out_dat_reg(s, cond, ARITH_EOR,
 729                        rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
 730    }
 731}
 732
 733bool tcg_target_deposit_valid(int ofs, int len)
 734{
 735    /* ??? Without bfi, we could improve over generic code by combining
 736       the right-shift from a non-zero ofs with the orr.  We do run into
 737       problems when rd == rs, and the mask generated from ofs+len doesn't
 738       fit into an immediate.  We would have to be careful not to pessimize
 739       wrt the optimizations performed on the expanded code.  */
 740    return use_armv7_instructions;
 741}
 742
 743static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
 744                                   TCGArg a1, int ofs, int len, bool const_a1)
 745{
 746    if (const_a1) {
 747        /* bfi becomes bfc with rn == 15.  */
 748        a1 = 15;
 749    }
 750    /* bfi/bfc */
 751    tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
 752              | (ofs << 7) | ((ofs + len - 1) << 16));
 753}
 754
 755/* Note that this routine is used for both LDR and LDRH formats, so we do
 756   not wish to include an immediate shift at this point.  */
 757static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 758                            TCGReg rn, TCGReg rm, bool u, bool p, bool w)
 759{
 760    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
 761              | (w << 21) | (rn << 16) | (rt << 12) | rm);
 762}
 763
 764static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 765                            TCGReg rn, int imm8, bool p, bool w)
 766{
 767    bool u = 1;
 768    if (imm8 < 0) {
 769        imm8 = -imm8;
 770        u = 0;
 771    }
 772    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
 773              (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
 774}
 775
 776static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 777                             TCGReg rn, int imm12, bool p, bool w)
 778{
 779    bool u = 1;
 780    if (imm12 < 0) {
 781        imm12 = -imm12;
 782        u = 0;
 783    }
 784    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
 785              (rn << 16) | (rt << 12) | imm12);
 786}
 787
 788static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
 789                                   TCGReg rn, int imm12)
 790{
 791    tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
 792}
 793
 794static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
 795                                   TCGReg rn, int imm12)
 796{
 797    tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
 798}
 799
 800static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
 801                                  TCGReg rn, TCGReg rm)
 802{
 803    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
 804}
 805
 806static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
 807                                  TCGReg rn, TCGReg rm)
 808{
 809    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
 810}
 811
 812static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
 813                                   TCGReg rn, int imm8)
 814{
 815    tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
 816}
 817
 818static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
 819                                  TCGReg rn, TCGReg rm)
 820{
 821    tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
 822}
 823
 824static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
 825                                   TCGReg rn, int imm8)
 826{
 827    tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
 828}
 829
 830static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
 831                                  TCGReg rn, TCGReg rm)
 832{
 833    tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
 834}
 835
 836/* Register pre-increment with base writeback.  */
 837static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
 838                                    TCGReg rn, TCGReg rm)
 839{
 840    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
 841}
 842
 843static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
 844                                    TCGReg rn, TCGReg rm)
 845{
 846    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
 847}
 848
 849static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
 850                                   TCGReg rn, int imm8)
 851{
 852    tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
 853}
 854
 855static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
 856                                  TCGReg rn, int imm8)
 857{
 858    tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
 859}
 860
 861static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
 862                                   TCGReg rn, TCGReg rm)
 863{
 864    tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
 865}
 866
 867static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
 868                                  TCGReg rn, TCGReg rm)
 869{
 870    tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
 871}
 872
 873static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
 874                                   TCGReg rn, int imm8)
 875{
 876    tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
 877}
 878
 879static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
 880                                   TCGReg rn, TCGReg rm)
 881{
 882    tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
 883}
 884
 885static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
 886                                  TCGReg rn, int imm12)
 887{
 888    tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
 889}
 890
 891static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
 892                                  TCGReg rn, int imm12)
 893{
 894    tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
 895}
 896
 897static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
 898                                 TCGReg rn, TCGReg rm)
 899{
 900    tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
 901}
 902
 903static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
 904                                 TCGReg rn, TCGReg rm)
 905{
 906    tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
 907}
 908
 909static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
 910                                  TCGReg rn, int imm8)
 911{
 912    tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
 913}
 914
 915static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
 916                                  TCGReg rn, TCGReg rm)
 917{
 918    tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
 919}
 920
 921static inline void tcg_out_ld32u(TCGContext *s, int cond,
 922                int rd, int rn, int32_t offset)
 923{
 924    if (offset > 0xfff || offset < -0xfff) {
 925        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 926        tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
 927    } else
 928        tcg_out_ld32_12(s, cond, rd, rn, offset);
 929}
 930
 931static inline void tcg_out_st32(TCGContext *s, int cond,
 932                int rd, int rn, int32_t offset)
 933{
 934    if (offset > 0xfff || offset < -0xfff) {
 935        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 936        tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
 937    } else
 938        tcg_out_st32_12(s, cond, rd, rn, offset);
 939}
 940
 941static inline void tcg_out_ld16u(TCGContext *s, int cond,
 942                int rd, int rn, int32_t offset)
 943{
 944    if (offset > 0xff || offset < -0xff) {
 945        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 946        tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
 947    } else
 948        tcg_out_ld16u_8(s, cond, rd, rn, offset);
 949}
 950
 951static inline void tcg_out_ld16s(TCGContext *s, int cond,
 952                int rd, int rn, int32_t offset)
 953{
 954    if (offset > 0xff || offset < -0xff) {
 955        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 956        tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
 957    } else
 958        tcg_out_ld16s_8(s, cond, rd, rn, offset);
 959}
 960
 961static inline void tcg_out_st16(TCGContext *s, int cond,
 962                int rd, int rn, int32_t offset)
 963{
 964    if (offset > 0xff || offset < -0xff) {
 965        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 966        tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
 967    } else
 968        tcg_out_st16_8(s, cond, rd, rn, offset);
 969}
 970
 971static inline void tcg_out_ld8u(TCGContext *s, int cond,
 972                int rd, int rn, int32_t offset)
 973{
 974    if (offset > 0xfff || offset < -0xfff) {
 975        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 976        tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
 977    } else
 978        tcg_out_ld8_12(s, cond, rd, rn, offset);
 979}
 980
 981static inline void tcg_out_ld8s(TCGContext *s, int cond,
 982                int rd, int rn, int32_t offset)
 983{
 984    if (offset > 0xff || offset < -0xff) {
 985        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 986        tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
 987    } else
 988        tcg_out_ld8s_8(s, cond, rd, rn, offset);
 989}
 990
 991static inline void tcg_out_st8(TCGContext *s, int cond,
 992                int rd, int rn, int32_t offset)
 993{
 994    if (offset > 0xfff || offset < -0xfff) {
 995        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 996        tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
 997    } else
 998        tcg_out_st8_12(s, cond, rd, rn, offset);
 999}
1000
1001/* The _goto case is normally between TBs within the same code buffer, and
1002 * with the code buffer limited to 16MB we wouldn't need the long case.
1003 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
1004 */
1005static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
1006{
1007    intptr_t addri = (intptr_t)addr;
1008    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1009
1010    if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
1011        tcg_out_b(s, cond, disp);
1012        return;
1013    }
1014
1015    tcg_out_movi32(s, cond, TCG_REG_TMP, addri);
1016    if (use_armv5t_instructions) {
1017        tcg_out_bx(s, cond, TCG_REG_TMP);
1018    } else {
1019        if (addri & 1) {
1020            tcg_abort();
1021        }
1022        tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP);
1023    }
1024}
1025
1026/* The call case is mostly used for helpers - so it's not unreasonable
1027 * for them to be beyond branch range */
1028static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
1029{
1030    intptr_t addri = (intptr_t)addr;
1031    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1032
1033    if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
1034        if (addri & 1) {
1035            /* Use BLX if the target is in Thumb mode */
1036            if (!use_armv5t_instructions) {
1037                tcg_abort();
1038            }
1039            tcg_out_blx_imm(s, disp);
1040        } else {
1041            tcg_out_bl(s, COND_AL, disp);
1042        }
1043    } else if (use_armv7_instructions) {
1044        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
1045        tcg_out_blx(s, COND_AL, TCG_REG_TMP);
1046    } else {
1047        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
1048        tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
1049        tcg_out32(s, addri);
1050    }
1051}
1052
1053void arm_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
1054{
1055    tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
1056    tcg_insn_unit *target = (tcg_insn_unit *)addr;
1057
1058    /* we could use a ldr pc, [pc, #-4] kind of branch and avoid the flush */
1059    reloc_pc24_atomic(code_ptr, target);
1060    flush_icache_range(jmp_addr, jmp_addr + 4);
1061}
1062
1063static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
1064{
1065    if (l->has_value) {
1066        tcg_out_goto(s, cond, l->u.value_ptr);
1067    } else {
1068        tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
1069        tcg_out_b_noaddr(s, cond);
1070    }
1071}
1072
1073static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1074{
1075    if (use_armv7_instructions) {
1076        tcg_out32(s, INSN_DMB_ISH);
1077    } else if (use_armv6_instructions) {
1078        tcg_out32(s, INSN_DMB_MCR);
1079    }
1080}
1081
1082#ifdef CONFIG_SOFTMMU
1083/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1084 *                                     int mmu_idx, uintptr_t ra)
1085 */
1086static void * const qemu_ld_helpers[16] = {
1087    [MO_UB]   = helper_ret_ldub_mmu,
1088    [MO_SB]   = helper_ret_ldsb_mmu,
1089
1090    [MO_LEUW] = helper_le_lduw_mmu,
1091    [MO_LEUL] = helper_le_ldul_mmu,
1092    [MO_LEQ]  = helper_le_ldq_mmu,
1093    [MO_LESW] = helper_le_ldsw_mmu,
1094    [MO_LESL] = helper_le_ldul_mmu,
1095
1096    [MO_BEUW] = helper_be_lduw_mmu,
1097    [MO_BEUL] = helper_be_ldul_mmu,
1098    [MO_BEQ]  = helper_be_ldq_mmu,
1099    [MO_BESW] = helper_be_ldsw_mmu,
1100    [MO_BESL] = helper_be_ldul_mmu,
1101};
1102
1103/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1104 *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
1105 */
1106static void * const qemu_st_helpers[16] = {
1107    [MO_UB]   = helper_ret_stb_mmu,
1108    [MO_LEUW] = helper_le_stw_mmu,
1109    [MO_LEUL] = helper_le_stl_mmu,
1110    [MO_LEQ]  = helper_le_stq_mmu,
1111    [MO_BEUW] = helper_be_stw_mmu,
1112    [MO_BEUL] = helper_be_stl_mmu,
1113    [MO_BEQ]  = helper_be_stq_mmu,
1114};
1115
1116/* Helper routines for marshalling helper function arguments into
1117 * the correct registers and stack.
1118 * argreg is where we want to put this argument, arg is the argument itself.
1119 * Return value is the updated argreg ready for the next call.
1120 * Note that argreg 0..3 is real registers, 4+ on stack.
1121 *
1122 * We provide routines for arguments which are: immediate, 32 bit
1123 * value in register, 16 and 8 bit values in register (which must be zero
1124 * extended before use) and 64 bit value in a lo:hi register pair.
1125 */
1126#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG)                \
1127static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg)              \
1128{                                                                          \
1129    if (argreg < 4) {                                                      \
1130        MOV_ARG(s, COND_AL, argreg, arg);                                  \
1131    } else {                                                               \
1132        int ofs = (argreg - 4) * 4;                                        \
1133        EXT_ARG;                                                           \
1134        tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE);            \
1135        tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs);         \
1136    }                                                                      \
1137    return argreg + 1;                                                     \
1138}
1139
1140DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
1141    (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1142DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
1143    (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1144DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
1145    (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1146DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
1147
1148static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
1149                                TCGReg arglo, TCGReg arghi)
1150{
1151    /* 64 bit arguments must go in even/odd register pairs
1152     * and in 8-aligned stack slots.
1153     */
1154    if (argreg & 1) {
1155        argreg++;
1156    }
1157    if (use_armv6_instructions && argreg >= 4
1158        && (arglo & 1) == 0 && arghi == arglo + 1) {
1159        tcg_out_strd_8(s, COND_AL, arglo,
1160                       TCG_REG_CALL_STACK, (argreg - 4) * 4);
1161        return argreg + 2;
1162    } else {
1163        argreg = tcg_out_arg_reg32(s, argreg, arglo);
1164        argreg = tcg_out_arg_reg32(s, argreg, arghi);
1165        return argreg;
1166    }
1167}
1168
1169#define TLB_SHIFT       (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
1170
1171/* We're expecting to use an 8-bit immediate and to mask.  */
1172QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
1173
1174/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
1175   Using the offset of the second entry in the last tlb table ensures
1176   that we can index all of the elements of the first entry.  */
1177QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
1178                  > 0xffff);
1179
1180/* Load and compare a TLB entry, leaving the flags set.  Returns the register
1181   containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */
1182
1183static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1184                               TCGMemOp opc, int mem_index, bool is_load)
1185{
1186    TCGReg base = TCG_AREG0;
1187    int cmp_off =
1188        (is_load
1189         ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1190         : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
1191    int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
1192    unsigned s_bits = opc & MO_SIZE;
1193    unsigned a_bits = get_alignment_bits(opc);
1194
1195    /* Should generate something like the following:
1196     *   shr    tmp, addrlo, #TARGET_PAGE_BITS                    (1)
1197     *   add    r2, env, #high
1198     *   and    r0, tmp, #(CPU_TLB_SIZE - 1)                      (2)
1199     *   add    r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS               (3)
1200     *   ldr    r0, [r2, #cmp]                                    (4)
1201     *   tst    addrlo, #s_mask
1202     *   ldr    r2, [r2, #add]                                    (5)
1203     *   cmpeq  r0, tmp, lsl #TARGET_PAGE_BITS
1204     */
1205    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
1206                    0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1207
1208    /* We checked that the offset is contained within 16 bits above.  */
1209    if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
1210        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1211                        (24 << 7) | (cmp_off >> 8));
1212        base = TCG_REG_R2;
1213        add_off -= cmp_off & 0xff00;
1214        cmp_off &= 0xff;
1215    }
1216
1217    tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1218                    TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
1219    tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1220                    TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1221
1222    /* Load the tlb comparator.  Use ldrd if needed and available,
1223       but due to how the pointer needs setting up, ldm isn't useful.
1224       Base arm5 doesn't have ldrd, but armv5te does.  */
1225    if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1226        tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1227    } else {
1228        tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1229        if (TARGET_LONG_BITS == 64) {
1230            tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
1231        }
1232    }
1233
1234    /* Check alignment.  We don't support inline unaligned acceses,
1235       but we can easily support overalignment checks.  */
1236    if (a_bits < s_bits) {
1237        a_bits = s_bits;
1238    }
1239    if (a_bits) {
1240        tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, (1 << a_bits) - 1);
1241    }
1242
1243    /* Load the tlb addend.  */
1244    tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
1245
1246    tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
1247                    TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1248
1249    if (TARGET_LONG_BITS == 64) {
1250        tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
1251                        TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
1252    }
1253
1254    return TCG_REG_R2;
1255}
1256
1257/* Record the context of a call to the out of line helper code for the slow
1258   path for a load or store, so that we can later generate the correct
1259   helper code.  */
1260static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1261                                TCGReg datalo, TCGReg datahi, TCGReg addrlo,
1262                                TCGReg addrhi, tcg_insn_unit *raddr,
1263                                tcg_insn_unit *label_ptr)
1264{
1265    TCGLabelQemuLdst *label = new_ldst_label(s);
1266
1267    label->is_ld = is_ld;
1268    label->oi = oi;
1269    label->datalo_reg = datalo;
1270    label->datahi_reg = datahi;
1271    label->addrlo_reg = addrlo;
1272    label->addrhi_reg = addrhi;
1273    label->raddr = raddr;
1274    label->label_ptr[0] = label_ptr;
1275}
1276
1277static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1278{
1279    TCGReg argreg, datalo, datahi;
1280    TCGMemOpIdx oi = lb->oi;
1281    TCGMemOp opc = get_memop(oi);
1282    void *func;
1283
1284    reloc_pc24(lb->label_ptr[0], s->code_ptr);
1285
1286    argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
1287    if (TARGET_LONG_BITS == 64) {
1288        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1289    } else {
1290        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1291    }
1292    argreg = tcg_out_arg_imm32(s, argreg, oi);
1293    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1294
1295    /* For armv6 we can use the canonical unsigned helpers and minimize
1296       icache usage.  For pre-armv6, use the signed helpers since we do
1297       not have a single insn sign-extend.  */
1298    if (use_armv6_instructions) {
1299        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
1300    } else {
1301        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
1302        if (opc & MO_SIGN) {
1303            opc = MO_UL;
1304        }
1305    }
1306    tcg_out_call(s, func);
1307
1308    datalo = lb->datalo_reg;
1309    datahi = lb->datahi_reg;
1310    switch (opc & MO_SSIZE) {
1311    case MO_SB:
1312        tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
1313        break;
1314    case MO_SW:
1315        tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
1316        break;
1317    default:
1318        tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1319        break;
1320    case MO_Q:
1321        if (datalo != TCG_REG_R1) {
1322            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1323            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1324        } else if (datahi != TCG_REG_R0) {
1325            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1326            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1327        } else {
1328            tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
1329            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1330            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
1331        }
1332        break;
1333    }
1334
1335    tcg_out_goto(s, COND_AL, lb->raddr);
1336}
1337
1338static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1339{
1340    TCGReg argreg, datalo, datahi;
1341    TCGMemOpIdx oi = lb->oi;
1342    TCGMemOp opc = get_memop(oi);
1343
1344    reloc_pc24(lb->label_ptr[0], s->code_ptr);
1345
1346    argreg = TCG_REG_R0;
1347    argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
1348    if (TARGET_LONG_BITS == 64) {
1349        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1350    } else {
1351        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1352    }
1353
1354    datalo = lb->datalo_reg;
1355    datahi = lb->datahi_reg;
1356    switch (opc & MO_SIZE) {
1357    case MO_8:
1358        argreg = tcg_out_arg_reg8(s, argreg, datalo);
1359        break;
1360    case MO_16:
1361        argreg = tcg_out_arg_reg16(s, argreg, datalo);
1362        break;
1363    case MO_32:
1364    default:
1365        argreg = tcg_out_arg_reg32(s, argreg, datalo);
1366        break;
1367    case MO_64:
1368        argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
1369        break;
1370    }
1371
1372    argreg = tcg_out_arg_imm32(s, argreg, oi);
1373    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1374
1375    /* Tail-call to the helper, which will return to the fast path.  */
1376    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1377}
1378#endif /* SOFTMMU */
1379
1380static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
1381                                         TCGReg datalo, TCGReg datahi,
1382                                         TCGReg addrlo, TCGReg addend)
1383{
1384    TCGMemOp bswap = opc & MO_BSWAP;
1385
1386    switch (opc & MO_SSIZE) {
1387    case MO_UB:
1388        tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
1389        break;
1390    case MO_SB:
1391        tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
1392        break;
1393    case MO_UW:
1394        tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1395        if (bswap) {
1396            tcg_out_bswap16(s, COND_AL, datalo, datalo);
1397        }
1398        break;
1399    case MO_SW:
1400        if (bswap) {
1401            tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1402            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1403        } else {
1404            tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
1405        }
1406        break;
1407    case MO_UL:
1408    default:
1409        tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
1410        if (bswap) {
1411            tcg_out_bswap32(s, COND_AL, datalo, datalo);
1412        }
1413        break;
1414    case MO_Q:
1415        {
1416            TCGReg dl = (bswap ? datahi : datalo);
1417            TCGReg dh = (bswap ? datalo : datahi);
1418
1419            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
1420            if (USING_SOFTMMU && use_armv6_instructions
1421                && (dl & 1) == 0 && dh == dl + 1) {
1422                tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
1423            } else if (dl != addend) {
1424                tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
1425                tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
1426            } else {
1427                tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
1428                                addend, addrlo, SHIFT_IMM_LSL(0));
1429                tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
1430                tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
1431            }
1432            if (bswap) {
1433                tcg_out_bswap32(s, COND_AL, dl, dl);
1434                tcg_out_bswap32(s, COND_AL, dh, dh);
1435            }
1436        }
1437        break;
1438    }
1439}
1440
1441static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
1442                                          TCGReg datalo, TCGReg datahi,
1443                                          TCGReg addrlo)
1444{
1445    TCGMemOp bswap = opc & MO_BSWAP;
1446
1447    switch (opc & MO_SSIZE) {
1448    case MO_UB:
1449        tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
1450        break;
1451    case MO_SB:
1452        tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
1453        break;
1454    case MO_UW:
1455        tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1456        if (bswap) {
1457            tcg_out_bswap16(s, COND_AL, datalo, datalo);
1458        }
1459        break;
1460    case MO_SW:
1461        if (bswap) {
1462            tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1463            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1464        } else {
1465            tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
1466        }
1467        break;
1468    case MO_UL:
1469    default:
1470        tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
1471        if (bswap) {
1472            tcg_out_bswap32(s, COND_AL, datalo, datalo);
1473        }
1474        break;
1475    case MO_Q:
1476        {
1477            TCGReg dl = (bswap ? datahi : datalo);
1478            TCGReg dh = (bswap ? datalo : datahi);
1479
1480            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
1481            if (USING_SOFTMMU && use_armv6_instructions
1482                && (dl & 1) == 0 && dh == dl + 1) {
1483                tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
1484            } else if (dl == addrlo) {
1485                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1486                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1487            } else {
1488                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1489                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1490            }
1491            if (bswap) {
1492                tcg_out_bswap32(s, COND_AL, dl, dl);
1493                tcg_out_bswap32(s, COND_AL, dh, dh);
1494            }
1495        }
1496        break;
1497    }
1498}
1499
1500static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1501{
1502    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1503    TCGMemOpIdx oi;
1504    TCGMemOp opc;
1505#ifdef CONFIG_SOFTMMU
1506    int mem_index;
1507    TCGReg addend;
1508    tcg_insn_unit *label_ptr;
1509#endif
1510
1511    datalo = *args++;
1512    datahi = (is64 ? *args++ : 0);
1513    addrlo = *args++;
1514    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1515    oi = *args++;
1516    opc = get_memop(oi);
1517
1518#ifdef CONFIG_SOFTMMU
1519    mem_index = get_mmuidx(oi);
1520    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
1521
1522    /* This a conditional BL only to load a pointer within this opcode into LR
1523       for the slow path.  We will not be using the value for a tail call.  */
1524    label_ptr = s->code_ptr;
1525    tcg_out_bl_noaddr(s, COND_NE);
1526
1527    tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
1528
1529    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1530                        s->code_ptr, label_ptr);
1531#else /* !CONFIG_SOFTMMU */
1532    if (guest_base) {
1533        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1534        tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
1535    } else {
1536        tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
1537    }
1538#endif
1539}
1540
1541static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc,
1542                                         TCGReg datalo, TCGReg datahi,
1543                                         TCGReg addrlo, TCGReg addend)
1544{
1545    TCGMemOp bswap = opc & MO_BSWAP;
1546
1547    switch (opc & MO_SIZE) {
1548    case MO_8:
1549        tcg_out_st8_r(s, cond, datalo, addrlo, addend);
1550        break;
1551    case MO_16:
1552        if (bswap) {
1553            tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
1554            tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
1555        } else {
1556            tcg_out_st16_r(s, cond, datalo, addrlo, addend);
1557        }
1558        break;
1559    case MO_32:
1560    default:
1561        if (bswap) {
1562            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1563            tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
1564        } else {
1565            tcg_out_st32_r(s, cond, datalo, addrlo, addend);
1566        }
1567        break;
1568    case MO_64:
1569        /* Avoid strd for user-only emulation, to handle unaligned.  */
1570        if (bswap) {
1571            tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
1572            tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
1573            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1574            tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
1575        } else if (USING_SOFTMMU && use_armv6_instructions
1576                   && (datalo & 1) == 0 && datahi == datalo + 1) {
1577            tcg_out_strd_r(s, cond, datalo, addrlo, addend);
1578        } else {
1579            tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
1580            tcg_out_st32_12(s, cond, datahi, addend, 4);
1581        }
1582        break;
1583    }
1584}
1585
1586static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc,
1587                                          TCGReg datalo, TCGReg datahi,
1588                                          TCGReg addrlo)
1589{
1590    TCGMemOp bswap = opc & MO_BSWAP;
1591
1592    switch (opc & MO_SIZE) {
1593    case MO_8:
1594        tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
1595        break;
1596    case MO_16:
1597        if (bswap) {
1598            tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
1599            tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
1600        } else {
1601            tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
1602        }
1603        break;
1604    case MO_32:
1605    default:
1606        if (bswap) {
1607            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1608            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1609        } else {
1610            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1611        }
1612        break;
1613    case MO_64:
1614        /* Avoid strd for user-only emulation, to handle unaligned.  */
1615        if (bswap) {
1616            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
1617            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1618            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1619            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
1620        } else if (USING_SOFTMMU && use_armv6_instructions
1621                   && (datalo & 1) == 0 && datahi == datalo + 1) {
1622            tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
1623        } else {
1624            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1625            tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
1626        }
1627        break;
1628    }
1629}
1630
1631static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1632{
1633    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1634    TCGMemOpIdx oi;
1635    TCGMemOp opc;
1636#ifdef CONFIG_SOFTMMU
1637    int mem_index;
1638    TCGReg addend;
1639    tcg_insn_unit *label_ptr;
1640#endif
1641
1642    datalo = *args++;
1643    datahi = (is64 ? *args++ : 0);
1644    addrlo = *args++;
1645    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1646    oi = *args++;
1647    opc = get_memop(oi);
1648
1649#ifdef CONFIG_SOFTMMU
1650    mem_index = get_mmuidx(oi);
1651    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
1652
1653    tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
1654
1655    /* The conditional call must come last, as we're going to return here.  */
1656    label_ptr = s->code_ptr;
1657    tcg_out_bl_noaddr(s, COND_NE);
1658
1659    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1660                        s->code_ptr, label_ptr);
1661#else /* !CONFIG_SOFTMMU */
1662    if (guest_base) {
1663        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1664        tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
1665                              datahi, addrlo, TCG_REG_TMP);
1666    } else {
1667        tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
1668    }
1669#endif
1670}
1671
1672static tcg_insn_unit *tb_ret_addr;
1673
1674static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1675                const TCGArg *args, const int *const_args)
1676{
1677    TCGArg a0, a1, a2, a3, a4, a5;
1678    int c;
1679
1680    switch (opc) {
1681    case INDEX_op_exit_tb:
1682        tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
1683        tcg_out_goto(s, COND_AL, tb_ret_addr);
1684        break;
1685    case INDEX_op_goto_tb:
1686        if (s->tb_jmp_insn_offset) {
1687            /* Direct jump method */
1688            s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
1689            tcg_out_b_noaddr(s, COND_AL);
1690        } else {
1691            /* Indirect jump method */
1692            intptr_t ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
1693            tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
1694            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff);
1695        }
1696        s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
1697        break;
1698    case INDEX_op_br:
1699        tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
1700        break;
1701
1702    case INDEX_op_ld8u_i32:
1703        tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
1704        break;
1705    case INDEX_op_ld8s_i32:
1706        tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
1707        break;
1708    case INDEX_op_ld16u_i32:
1709        tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
1710        break;
1711    case INDEX_op_ld16s_i32:
1712        tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
1713        break;
1714    case INDEX_op_ld_i32:
1715        tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
1716        break;
1717    case INDEX_op_st8_i32:
1718        tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
1719        break;
1720    case INDEX_op_st16_i32:
1721        tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
1722        break;
1723    case INDEX_op_st_i32:
1724        tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
1725        break;
1726
1727    case INDEX_op_movcond_i32:
1728        /* Constraints mean that v2 is always in the same register as dest,
1729         * so we only need to do "if condition passed, move v1 to dest".
1730         */
1731        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1732                        args[1], args[2], const_args[2]);
1733        tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
1734                        ARITH_MVN, args[0], 0, args[3], const_args[3]);
1735        break;
1736    case INDEX_op_add_i32:
1737        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
1738                        args[0], args[1], args[2], const_args[2]);
1739        break;
1740    case INDEX_op_sub_i32:
1741        if (const_args[1]) {
1742            if (const_args[2]) {
1743                tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
1744            } else {
1745                tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
1746                               args[0], args[2], args[1], 1);
1747            }
1748        } else {
1749            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
1750                            args[0], args[1], args[2], const_args[2]);
1751        }
1752        break;
1753    case INDEX_op_and_i32:
1754        tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
1755                        args[0], args[1], args[2], const_args[2]);
1756        break;
1757    case INDEX_op_andc_i32:
1758        tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
1759                        args[0], args[1], args[2], const_args[2]);
1760        break;
1761    case INDEX_op_or_i32:
1762        c = ARITH_ORR;
1763        goto gen_arith;
1764    case INDEX_op_xor_i32:
1765        c = ARITH_EOR;
1766        /* Fall through.  */
1767    gen_arith:
1768        tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
1769        break;
1770    case INDEX_op_add2_i32:
1771        a0 = args[0], a1 = args[1], a2 = args[2];
1772        a3 = args[3], a4 = args[4], a5 = args[5];
1773        if (a0 == a3 || (a0 == a5 && !const_args[5])) {
1774            a0 = TCG_REG_TMP;
1775        }
1776        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
1777                        a0, a2, a4, const_args[4]);
1778        tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
1779                        a1, a3, a5, const_args[5]);
1780        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1781        break;
1782    case INDEX_op_sub2_i32:
1783        a0 = args[0], a1 = args[1], a2 = args[2];
1784        a3 = args[3], a4 = args[4], a5 = args[5];
1785        if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
1786            a0 = TCG_REG_TMP;
1787        }
1788        if (const_args[2]) {
1789            if (const_args[4]) {
1790                tcg_out_movi32(s, COND_AL, a0, a4);
1791                a4 = a0;
1792            }
1793            tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
1794        } else {
1795            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
1796                            ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
1797        }
1798        if (const_args[3]) {
1799            if (const_args[5]) {
1800                tcg_out_movi32(s, COND_AL, a1, a5);
1801                a5 = a1;
1802            }
1803            tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
1804        } else {
1805            tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
1806                            a1, a3, a5, const_args[5]);
1807        }
1808        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1809        break;
1810    case INDEX_op_neg_i32:
1811        tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
1812        break;
1813    case INDEX_op_not_i32:
1814        tcg_out_dat_reg(s, COND_AL,
1815                        ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
1816        break;
1817    case INDEX_op_mul_i32:
1818        tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
1819        break;
1820    case INDEX_op_mulu2_i32:
1821        tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1822        break;
1823    case INDEX_op_muls2_i32:
1824        tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1825        break;
1826    /* XXX: Perhaps args[2] & 0x1f is wrong */
1827    case INDEX_op_shl_i32:
1828        c = const_args[2] ?
1829                SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
1830        goto gen_shift32;
1831    case INDEX_op_shr_i32:
1832        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
1833                SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
1834        goto gen_shift32;
1835    case INDEX_op_sar_i32:
1836        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
1837                SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
1838        goto gen_shift32;
1839    case INDEX_op_rotr_i32:
1840        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
1841                SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
1842        /* Fall through.  */
1843    gen_shift32:
1844        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
1845        break;
1846
1847    case INDEX_op_rotl_i32:
1848        if (const_args[2]) {
1849            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1850                            ((0x20 - args[2]) & 0x1f) ?
1851                            SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
1852                            SHIFT_IMM_LSL(0));
1853        } else {
1854            tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
1855            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1856                            SHIFT_REG_ROR(TCG_REG_TMP));
1857        }
1858        break;
1859
1860    case INDEX_op_brcond_i32:
1861        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1862                       args[0], args[1], const_args[1]);
1863        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
1864                           arg_label(args[3]));
1865        break;
1866    case INDEX_op_brcond2_i32:
1867        /* The resulting conditions are:
1868         * TCG_COND_EQ    -->  a0 == a2 && a1 == a3,
1869         * TCG_COND_NE    --> (a0 != a2 && a1 == a3) ||  a1 != a3,
1870         * TCG_COND_LT(U) --> (a0 <  a2 && a1 == a3) ||  a1 <  a3,
1871         * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
1872         * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
1873         * TCG_COND_GT(U) --> (a0 >  a2 && a1 == a3) ||  a1 >  a3,
1874         */
1875        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1876                        args[1], args[3], const_args[3]);
1877        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1878                        args[0], args[2], const_args[2]);
1879        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]],
1880                           arg_label(args[5]));
1881        break;
1882    case INDEX_op_setcond_i32:
1883        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1884                        args[1], args[2], const_args[2]);
1885        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
1886                        ARITH_MOV, args[0], 0, 1);
1887        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
1888                        ARITH_MOV, args[0], 0, 0);
1889        break;
1890    case INDEX_op_setcond2_i32:
1891        /* See brcond2_i32 comment */
1892        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1893                        args[2], args[4], const_args[4]);
1894        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1895                        args[1], args[3], const_args[3]);
1896        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
1897                        ARITH_MOV, args[0], 0, 1);
1898        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
1899                        ARITH_MOV, args[0], 0, 0);
1900        break;
1901
1902    case INDEX_op_qemu_ld_i32:
1903        tcg_out_qemu_ld(s, args, 0);
1904        break;
1905    case INDEX_op_qemu_ld_i64:
1906        tcg_out_qemu_ld(s, args, 1);
1907        break;
1908    case INDEX_op_qemu_st_i32:
1909        tcg_out_qemu_st(s, args, 0);
1910        break;
1911    case INDEX_op_qemu_st_i64:
1912        tcg_out_qemu_st(s, args, 1);
1913        break;
1914
1915    case INDEX_op_bswap16_i32:
1916        tcg_out_bswap16(s, COND_AL, args[0], args[1]);
1917        break;
1918    case INDEX_op_bswap32_i32:
1919        tcg_out_bswap32(s, COND_AL, args[0], args[1]);
1920        break;
1921
1922    case INDEX_op_ext8s_i32:
1923        tcg_out_ext8s(s, COND_AL, args[0], args[1]);
1924        break;
1925    case INDEX_op_ext16s_i32:
1926        tcg_out_ext16s(s, COND_AL, args[0], args[1]);
1927        break;
1928    case INDEX_op_ext16u_i32:
1929        tcg_out_ext16u(s, COND_AL, args[0], args[1]);
1930        break;
1931
1932    case INDEX_op_deposit_i32:
1933        tcg_out_deposit(s, COND_AL, args[0], args[2],
1934                        args[3], args[4], const_args[2]);
1935        break;
1936
1937    case INDEX_op_div_i32:
1938        tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
1939        break;
1940    case INDEX_op_divu_i32:
1941        tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
1942        break;
1943
1944    case INDEX_op_mb:
1945        tcg_out_mb(s, args[0]);
1946        break;
1947
1948    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
1949    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
1950    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
1951    default:
1952        tcg_abort();
1953    }
1954}
1955
1956static const TCGTargetOpDef arm_op_defs[] = {
1957    { INDEX_op_exit_tb, { } },
1958    { INDEX_op_goto_tb, { } },
1959    { INDEX_op_br, { } },
1960
1961    { INDEX_op_ld8u_i32, { "r", "r" } },
1962    { INDEX_op_ld8s_i32, { "r", "r" } },
1963    { INDEX_op_ld16u_i32, { "r", "r" } },
1964    { INDEX_op_ld16s_i32, { "r", "r" } },
1965    { INDEX_op_ld_i32, { "r", "r" } },
1966    { INDEX_op_st8_i32, { "r", "r" } },
1967    { INDEX_op_st16_i32, { "r", "r" } },
1968    { INDEX_op_st_i32, { "r", "r" } },
1969
1970    /* TODO: "r", "r", "ri" */
1971    { INDEX_op_add_i32, { "r", "r", "rIN" } },
1972    { INDEX_op_sub_i32, { "r", "rI", "rIN" } },
1973    { INDEX_op_mul_i32, { "r", "r", "r" } },
1974    { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
1975    { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
1976    { INDEX_op_and_i32, { "r", "r", "rIK" } },
1977    { INDEX_op_andc_i32, { "r", "r", "rIK" } },
1978    { INDEX_op_or_i32, { "r", "r", "rI" } },
1979    { INDEX_op_xor_i32, { "r", "r", "rI" } },
1980    { INDEX_op_neg_i32, { "r", "r" } },
1981    { INDEX_op_not_i32, { "r", "r" } },
1982
1983    { INDEX_op_shl_i32, { "r", "r", "ri" } },
1984    { INDEX_op_shr_i32, { "r", "r", "ri" } },
1985    { INDEX_op_sar_i32, { "r", "r", "ri" } },
1986    { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1987    { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1988
1989    { INDEX_op_brcond_i32, { "r", "rIN" } },
1990    { INDEX_op_setcond_i32, { "r", "r", "rIN" } },
1991    { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } },
1992
1993    { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } },
1994    { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } },
1995    { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } },
1996    { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } },
1997
1998#if TARGET_LONG_BITS == 32
1999    { INDEX_op_qemu_ld_i32, { "r", "l" } },
2000    { INDEX_op_qemu_ld_i64, { "r", "r", "l" } },
2001    { INDEX_op_qemu_st_i32, { "s", "s" } },
2002    { INDEX_op_qemu_st_i64, { "s", "s", "s" } },
2003#else
2004    { INDEX_op_qemu_ld_i32, { "r", "l", "l" } },
2005    { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } },
2006    { INDEX_op_qemu_st_i32, { "s", "s", "s" } },
2007    { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } },
2008#endif
2009
2010    { INDEX_op_bswap16_i32, { "r", "r" } },
2011    { INDEX_op_bswap32_i32, { "r", "r" } },
2012
2013    { INDEX_op_ext8s_i32, { "r", "r" } },
2014    { INDEX_op_ext16s_i32, { "r", "r" } },
2015    { INDEX_op_ext16u_i32, { "r", "r" } },
2016
2017    { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
2018
2019    { INDEX_op_div_i32, { "r", "r", "r" } },
2020    { INDEX_op_divu_i32, { "r", "r", "r" } },
2021
2022    { INDEX_op_mb, { } },
2023    { -1 },
2024};
2025
2026static void tcg_target_init(TCGContext *s)
2027{
2028    /* Only probe for the platform and capabilities if we havn't already
2029       determined maximum values at compile time.  */
2030#ifndef use_idiv_instructions
2031    {
2032        unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2033        use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
2034    }
2035#endif
2036    if (__ARM_ARCH < 7) {
2037        const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
2038        if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
2039            arm_arch = pl[1] - '0';
2040        }
2041    }
2042
2043    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2044    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
2045                     (1 << TCG_REG_R0) |
2046                     (1 << TCG_REG_R1) |
2047                     (1 << TCG_REG_R2) |
2048                     (1 << TCG_REG_R3) |
2049                     (1 << TCG_REG_R12) |
2050                     (1 << TCG_REG_R14));
2051
2052    tcg_regset_clear(s->reserved_regs);
2053    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2054    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2055    tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
2056
2057    tcg_add_target_add_op_defs(arm_op_defs);
2058}
2059
2060static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
2061                              TCGReg arg1, intptr_t arg2)
2062{
2063    tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
2064}
2065
2066static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
2067                              TCGReg arg1, intptr_t arg2)
2068{
2069    tcg_out_st32(s, COND_AL, arg, arg1, arg2);
2070}
2071
2072static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
2073                               TCGReg base, intptr_t ofs)
2074{
2075    return false;
2076}
2077
2078static inline void tcg_out_mov(TCGContext *s, TCGType type,
2079                               TCGReg ret, TCGReg arg)
2080{
2081    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
2082}
2083
2084static inline void tcg_out_movi(TCGContext *s, TCGType type,
2085                                TCGReg ret, tcg_target_long arg)
2086{
2087    tcg_out_movi32(s, COND_AL, ret, arg);
2088}
2089
2090/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2091   and tcg_register_jit.  */
2092
2093#define PUSH_SIZE  ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
2094
2095#define FRAME_SIZE \
2096    ((PUSH_SIZE \
2097      + TCG_STATIC_CALL_ARGS_SIZE \
2098      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2099      + TCG_TARGET_STACK_ALIGN - 1) \
2100     & -TCG_TARGET_STACK_ALIGN)
2101
2102static void tcg_target_qemu_prologue(TCGContext *s)
2103{
2104    int stack_addend;
2105
2106    /* Calling convention requires us to save r4-r11 and lr.  */
2107    /* stmdb sp!, { r4 - r11, lr } */
2108    tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
2109
2110    /* Reserve callee argument and tcg temp space.  */
2111    stack_addend = FRAME_SIZE - PUSH_SIZE;
2112
2113    tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
2114                   TCG_REG_CALL_STACK, stack_addend, 1);
2115    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2116                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2117
2118    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2119
2120    tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
2121    tb_ret_addr = s->code_ptr;
2122
2123    /* Epilogue.  We branch here via tb_ret_addr.  */
2124    tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
2125                   TCG_REG_CALL_STACK, stack_addend, 1);
2126
2127    /* ldmia sp!, { r4 - r11, pc } */
2128    tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
2129}
2130
2131typedef struct {
2132    DebugFrameHeader h;
2133    uint8_t fde_def_cfa[4];
2134    uint8_t fde_reg_ofs[18];
2135} DebugFrame;
2136
2137#define ELF_HOST_MACHINE EM_ARM
2138
2139/* We're expecting a 2 byte uleb128 encoded value.  */
2140QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2141
2142static const DebugFrame debug_frame = {
2143    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2144    .h.cie.id = -1,
2145    .h.cie.version = 1,
2146    .h.cie.code_align = 1,
2147    .h.cie.data_align = 0x7c,             /* sleb128 -4 */
2148    .h.cie.return_column = 14,
2149
2150    /* Total FDE size does not include the "len" member.  */
2151    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2152
2153    .fde_def_cfa = {
2154        12, 13,                         /* DW_CFA_def_cfa sp, ... */
2155        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2156        (FRAME_SIZE >> 7)
2157    },
2158    .fde_reg_ofs = {
2159        /* The following must match the stmdb in the prologue.  */
2160        0x8e, 1,                        /* DW_CFA_offset, lr, -4 */
2161        0x8b, 2,                        /* DW_CFA_offset, r11, -8 */
2162        0x8a, 3,                        /* DW_CFA_offset, r10, -12 */
2163        0x89, 4,                        /* DW_CFA_offset, r9, -16 */
2164        0x88, 5,                        /* DW_CFA_offset, r8, -20 */
2165        0x87, 6,                        /* DW_CFA_offset, r7, -24 */
2166        0x86, 7,                        /* DW_CFA_offset, r6, -28 */
2167        0x85, 8,                        /* DW_CFA_offset, r5, -32 */
2168        0x84, 9,                        /* DW_CFA_offset, r4, -36 */
2169    }
2170};
2171
2172void tcg_register_jit(void *buf, size_t buf_size)
2173{
2174    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2175}
2176