qemu/tcg/arm/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Andrzej Zaborowski
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "elf.h"
  26#include "tcg-be-ldst.h"
  27
  28/* The __ARM_ARCH define is provided by gcc 4.8.  Construct it otherwise.  */
  29#ifndef __ARM_ARCH
  30# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
  31     || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
  32     || defined(__ARM_ARCH_7EM__)
  33#  define __ARM_ARCH 7
  34# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
  35       || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
  36       || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
  37#  define __ARM_ARCH 6
  38# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \
  39       || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
  40       || defined(__ARM_ARCH_5TEJ__)
  41#  define __ARM_ARCH 5
  42# else
  43#  define __ARM_ARCH 4
  44# endif
  45#endif
  46
  47static int arm_arch = __ARM_ARCH;
  48
  49#if defined(__ARM_ARCH_5T__) \
  50    || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
  51# define use_armv5t_instructions 1
  52#else
  53# define use_armv5t_instructions use_armv6_instructions
  54#endif
  55
  56#define use_armv6_instructions  (__ARM_ARCH >= 6 || arm_arch >= 6)
  57#define use_armv7_instructions  (__ARM_ARCH >= 7 || arm_arch >= 7)
  58
  59#ifndef use_idiv_instructions
  60bool use_idiv_instructions;
  61#endif
  62
  63/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined.  */
  64#ifdef CONFIG_SOFTMMU
  65# define USING_SOFTMMU 1
  66#else
  67# define USING_SOFTMMU 0
  68#endif
  69
  70#ifdef CONFIG_DEBUG_TCG
  71static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  72    "%r0",
  73    "%r1",
  74    "%r2",
  75    "%r3",
  76    "%r4",
  77    "%r5",
  78    "%r6",
  79    "%r7",
  80    "%r8",
  81    "%r9",
  82    "%r10",
  83    "%r11",
  84    "%r12",
  85    "%r13",
  86    "%r14",
  87    "%pc",
  88};
  89#endif
  90
  91static const int tcg_target_reg_alloc_order[] = {
  92    TCG_REG_R4,
  93    TCG_REG_R5,
  94    TCG_REG_R6,
  95    TCG_REG_R7,
  96    TCG_REG_R8,
  97    TCG_REG_R9,
  98    TCG_REG_R10,
  99    TCG_REG_R11,
 100    TCG_REG_R13,
 101    TCG_REG_R0,
 102    TCG_REG_R1,
 103    TCG_REG_R2,
 104    TCG_REG_R3,
 105    TCG_REG_R12,
 106    TCG_REG_R14,
 107};
 108
 109static const int tcg_target_call_iarg_regs[4] = {
 110    TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
 111};
 112static const int tcg_target_call_oarg_regs[2] = {
 113    TCG_REG_R0, TCG_REG_R1
 114};
 115
 116#define TCG_REG_TMP  TCG_REG_R12
 117
 118static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
 119{
 120    ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
 121    *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
 122}
 123
 124static inline void reloc_pc24_atomic(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
 125{
 126    ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
 127    tcg_insn_unit insn = atomic_read(code_ptr);
 128    tcg_debug_assert(offset == sextract32(offset, 0, 24));
 129    atomic_set(code_ptr, deposit32(insn, 0, 24, offset));
 130}
 131
 132static void patch_reloc(tcg_insn_unit *code_ptr, int type,
 133                        intptr_t value, intptr_t addend)
 134{
 135    tcg_debug_assert(type == R_ARM_PC24);
 136    tcg_debug_assert(addend == 0);
 137    reloc_pc24(code_ptr, (tcg_insn_unit *)value);
 138}
 139
 140#define TCG_CT_CONST_ARM  0x100
 141#define TCG_CT_CONST_INV  0x200
 142#define TCG_CT_CONST_NEG  0x400
 143#define TCG_CT_CONST_ZERO 0x800
 144
 145/* parse target specific constraints */
 146static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 147{
 148    const char *ct_str;
 149
 150    ct_str = *pct_str;
 151    switch (ct_str[0]) {
 152    case 'I':
 153        ct->ct |= TCG_CT_CONST_ARM;
 154        break;
 155    case 'K':
 156        ct->ct |= TCG_CT_CONST_INV;
 157        break;
 158    case 'N': /* The gcc constraint letter is L, already used here.  */
 159        ct->ct |= TCG_CT_CONST_NEG;
 160        break;
 161    case 'Z':
 162        ct->ct |= TCG_CT_CONST_ZERO;
 163        break;
 164
 165    case 'r':
 166        ct->ct |= TCG_CT_REG;
 167        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
 168        break;
 169
 170    /* qemu_ld address */
 171    case 'l':
 172        ct->ct |= TCG_CT_REG;
 173        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
 174#ifdef CONFIG_SOFTMMU
 175        /* r0-r2,lr will be overwritten when reading the tlb entry,
 176           so don't use these. */
 177        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
 178        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
 179        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
 180        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
 181#endif
 182        break;
 183
 184    /* qemu_st address & data */
 185    case 's':
 186        ct->ct |= TCG_CT_REG;
 187        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
 188        /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
 189           and r0-r1 doing the byte swapping, so don't use these. */
 190        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
 191        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
 192#if defined(CONFIG_SOFTMMU)
 193        /* Avoid clashes with registers being used for helper args */
 194        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
 195#if TARGET_LONG_BITS == 64
 196        /* Avoid clashes with registers being used for helper args */
 197        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
 198#endif
 199        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
 200#endif
 201        break;
 202
 203    default:
 204        return -1;
 205    }
 206    ct_str++;
 207    *pct_str = ct_str;
 208
 209    return 0;
 210}
 211
 212static inline uint32_t rotl(uint32_t val, int n)
 213{
 214  return (val << n) | (val >> (32 - n));
 215}
 216
 217/* ARM immediates for ALU instructions are made of an unsigned 8-bit
 218   right-rotated by an even amount between 0 and 30. */
 219static inline int encode_imm(uint32_t imm)
 220{
 221    int shift;
 222
 223    /* simple case, only lower bits */
 224    if ((imm & ~0xff) == 0)
 225        return 0;
 226    /* then try a simple even shift */
 227    shift = ctz32(imm) & ~1;
 228    if (((imm >> shift) & ~0xff) == 0)
 229        return 32 - shift;
 230    /* now try harder with rotations */
 231    if ((rotl(imm, 2) & ~0xff) == 0)
 232        return 2;
 233    if ((rotl(imm, 4) & ~0xff) == 0)
 234        return 4;
 235    if ((rotl(imm, 6) & ~0xff) == 0)
 236        return 6;
 237    /* imm can't be encoded */
 238    return -1;
 239}
 240
 241static inline int check_fit_imm(uint32_t imm)
 242{
 243    return encode_imm(imm) >= 0;
 244}
 245
 246/* Test if a constant matches the constraint.
 247 * TODO: define constraints for:
 248 *
 249 * ldr/str offset:   between -0xfff and 0xfff
 250 * ldrh/strh offset: between -0xff and 0xff
 251 * mov operand2:     values represented with x << (2 * y), x < 0x100
 252 * add, sub, eor...: ditto
 253 */
 254static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
 255                                         const TCGArgConstraint *arg_ct)
 256{
 257    int ct;
 258    ct = arg_ct->ct;
 259    if (ct & TCG_CT_CONST) {
 260        return 1;
 261    } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
 262        return 1;
 263    } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
 264        return 1;
 265    } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
 266        return 1;
 267    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 268        return 1;
 269    } else {
 270        return 0;
 271    }
 272}
 273
 274#define TO_CPSR (1 << 20)
 275
 276typedef enum {
 277    ARITH_AND = 0x0 << 21,
 278    ARITH_EOR = 0x1 << 21,
 279    ARITH_SUB = 0x2 << 21,
 280    ARITH_RSB = 0x3 << 21,
 281    ARITH_ADD = 0x4 << 21,
 282    ARITH_ADC = 0x5 << 21,
 283    ARITH_SBC = 0x6 << 21,
 284    ARITH_RSC = 0x7 << 21,
 285    ARITH_TST = 0x8 << 21 | TO_CPSR,
 286    ARITH_CMP = 0xa << 21 | TO_CPSR,
 287    ARITH_CMN = 0xb << 21 | TO_CPSR,
 288    ARITH_ORR = 0xc << 21,
 289    ARITH_MOV = 0xd << 21,
 290    ARITH_BIC = 0xe << 21,
 291    ARITH_MVN = 0xf << 21,
 292
 293    INSN_LDR_IMM   = 0x04100000,
 294    INSN_LDR_REG   = 0x06100000,
 295    INSN_STR_IMM   = 0x04000000,
 296    INSN_STR_REG   = 0x06000000,
 297
 298    INSN_LDRH_IMM  = 0x005000b0,
 299    INSN_LDRH_REG  = 0x001000b0,
 300    INSN_LDRSH_IMM = 0x005000f0,
 301    INSN_LDRSH_REG = 0x001000f0,
 302    INSN_STRH_IMM  = 0x004000b0,
 303    INSN_STRH_REG  = 0x000000b0,
 304
 305    INSN_LDRB_IMM  = 0x04500000,
 306    INSN_LDRB_REG  = 0x06500000,
 307    INSN_LDRSB_IMM = 0x005000d0,
 308    INSN_LDRSB_REG = 0x001000d0,
 309    INSN_STRB_IMM  = 0x04400000,
 310    INSN_STRB_REG  = 0x06400000,
 311
 312    INSN_LDRD_IMM  = 0x004000d0,
 313    INSN_LDRD_REG  = 0x000000d0,
 314    INSN_STRD_IMM  = 0x004000f0,
 315    INSN_STRD_REG  = 0x000000f0,
 316} ARMInsn;
 317
 318#define SHIFT_IMM_LSL(im)       (((im) << 7) | 0x00)
 319#define SHIFT_IMM_LSR(im)       (((im) << 7) | 0x20)
 320#define SHIFT_IMM_ASR(im)       (((im) << 7) | 0x40)
 321#define SHIFT_IMM_ROR(im)       (((im) << 7) | 0x60)
 322#define SHIFT_REG_LSL(rs)       (((rs) << 8) | 0x10)
 323#define SHIFT_REG_LSR(rs)       (((rs) << 8) | 0x30)
 324#define SHIFT_REG_ASR(rs)       (((rs) << 8) | 0x50)
 325#define SHIFT_REG_ROR(rs)       (((rs) << 8) | 0x70)
 326
 327enum arm_cond_code_e {
 328    COND_EQ = 0x0,
 329    COND_NE = 0x1,
 330    COND_CS = 0x2,      /* Unsigned greater or equal */
 331    COND_CC = 0x3,      /* Unsigned less than */
 332    COND_MI = 0x4,      /* Negative */
 333    COND_PL = 0x5,      /* Zero or greater */
 334    COND_VS = 0x6,      /* Overflow */
 335    COND_VC = 0x7,      /* No overflow */
 336    COND_HI = 0x8,      /* Unsigned greater than */
 337    COND_LS = 0x9,      /* Unsigned less or equal */
 338    COND_GE = 0xa,
 339    COND_LT = 0xb,
 340    COND_GT = 0xc,
 341    COND_LE = 0xd,
 342    COND_AL = 0xe,
 343};
 344
 345static const uint8_t tcg_cond_to_arm_cond[] = {
 346    [TCG_COND_EQ] = COND_EQ,
 347    [TCG_COND_NE] = COND_NE,
 348    [TCG_COND_LT] = COND_LT,
 349    [TCG_COND_GE] = COND_GE,
 350    [TCG_COND_LE] = COND_LE,
 351    [TCG_COND_GT] = COND_GT,
 352    /* unsigned */
 353    [TCG_COND_LTU] = COND_CC,
 354    [TCG_COND_GEU] = COND_CS,
 355    [TCG_COND_LEU] = COND_LS,
 356    [TCG_COND_GTU] = COND_HI,
 357};
 358
 359static inline void tcg_out_bx(TCGContext *s, int cond, int rn)
 360{
 361    tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
 362}
 363
 364static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
 365{
 366    tcg_out32(s, (cond << 28) | 0x0a000000 |
 367                    (((offset - 8) >> 2) & 0x00ffffff));
 368}
 369
 370static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
 371{
 372    /* We pay attention here to not modify the branch target by masking
 373       the corresponding bytes.  This ensure that caches and memory are
 374       kept coherent during retranslation. */
 375    tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a));
 376}
 377
 378static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
 379{
 380    /* We pay attention here to not modify the branch target by masking
 381       the corresponding bytes.  This ensure that caches and memory are
 382       kept coherent during retranslation. */
 383    tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b));
 384}
 385
 386static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
 387{
 388    tcg_out32(s, (cond << 28) | 0x0b000000 |
 389                    (((offset - 8) >> 2) & 0x00ffffff));
 390}
 391
 392static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
 393{
 394    tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
 395}
 396
 397static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
 398{
 399    tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
 400                (((offset - 8) >> 2) & 0x00ffffff));
 401}
 402
 403static inline void tcg_out_dat_reg(TCGContext *s,
 404                int cond, int opc, int rd, int rn, int rm, int shift)
 405{
 406    tcg_out32(s, (cond << 28) | (0 << 25) | opc |
 407                    (rn << 16) | (rd << 12) | shift | rm);
 408}
 409
 410static inline void tcg_out_nop(TCGContext *s)
 411{
 412    if (use_armv7_instructions) {
 413        /* Architected nop introduced in v6k.  */
 414        /* ??? This is an MSR (imm) 0,0,0 insn.  Anyone know if this
 415           also Just So Happened to do nothing on pre-v6k so that we
 416           don't need to conditionalize it?  */
 417        tcg_out32(s, 0xe320f000);
 418    } else {
 419        /* Prior to that the assembler uses mov r0, r0.  */
 420        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0));
 421    }
 422}
 423
 424static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
 425{
 426    /* Simple reg-reg move, optimising out the 'do nothing' case */
 427    if (rd != rm) {
 428        tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
 429    }
 430}
 431
 432static inline void tcg_out_dat_imm(TCGContext *s,
 433                int cond, int opc, int rd, int rn, int im)
 434{
 435    tcg_out32(s, (cond << 28) | (1 << 25) | opc |
 436                    (rn << 16) | (rd << 12) | im);
 437}
 438
 439static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
 440{
 441    int rot, opc, rn;
 442
 443    /* For armv7, make sure not to use movw+movt when mov/mvn would do.
 444       Speed things up by only checking when movt would be required.
 445       Prior to armv7, have one go at fully rotated immediates before
 446       doing the decomposition thing below.  */
 447    if (!use_armv7_instructions || (arg & 0xffff0000)) {
 448        rot = encode_imm(arg);
 449        if (rot >= 0) {
 450            tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
 451                            rotl(arg, rot) | (rot << 7));
 452            return;
 453        }
 454        rot = encode_imm(~arg);
 455        if (rot >= 0) {
 456            tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
 457                            rotl(~arg, rot) | (rot << 7));
 458            return;
 459        }
 460    }
 461
 462    /* Use movw + movt.  */
 463    if (use_armv7_instructions) {
 464        /* movw */
 465        tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
 466                  | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
 467        if (arg & 0xffff0000) {
 468            /* movt */
 469            tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
 470                      | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
 471        }
 472        return;
 473    }
 474
 475    /* TODO: This is very suboptimal, we can easily have a constant
 476       pool somewhere after all the instructions.  */
 477    opc = ARITH_MOV;
 478    rn = 0;
 479    /* If we have lots of leading 1's, we can shorten the sequence by
 480       beginning with mvn and then clearing higher bits with eor.  */
 481    if (clz32(~arg) > clz32(arg)) {
 482        opc = ARITH_MVN, arg = ~arg;
 483    }
 484    do {
 485        int i = ctz32(arg) & ~1;
 486        rot = ((32 - i) << 7) & 0xf00;
 487        tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
 488        arg &= ~(0xff << i);
 489
 490        opc = ARITH_EOR;
 491        rn = rd;
 492    } while (arg);
 493}
 494
 495static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
 496                                  TCGArg lhs, TCGArg rhs, int rhs_is_const)
 497{
 498    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 499     * rhs must satisfy the "rI" constraint.
 500     */
 501    if (rhs_is_const) {
 502        int rot = encode_imm(rhs);
 503        tcg_debug_assert(rot >= 0);
 504        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 505    } else {
 506        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 507    }
 508}
 509
 510static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
 511                            TCGReg dst, TCGReg lhs, TCGArg rhs,
 512                            bool rhs_is_const)
 513{
 514    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 515     * rhs must satisfy the "rIK" constraint.
 516     */
 517    if (rhs_is_const) {
 518        int rot = encode_imm(rhs);
 519        if (rot < 0) {
 520            rhs = ~rhs;
 521            rot = encode_imm(rhs);
 522            tcg_debug_assert(rot >= 0);
 523            opc = opinv;
 524        }
 525        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 526    } else {
 527        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 528    }
 529}
 530
 531static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
 532                            TCGArg dst, TCGArg lhs, TCGArg rhs,
 533                            bool rhs_is_const)
 534{
 535    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 536     * rhs must satisfy the "rIN" constraint.
 537     */
 538    if (rhs_is_const) {
 539        int rot = encode_imm(rhs);
 540        if (rot < 0) {
 541            rhs = -rhs;
 542            rot = encode_imm(rhs);
 543            tcg_debug_assert(rot >= 0);
 544            opc = opneg;
 545        }
 546        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 547    } else {
 548        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 549    }
 550}
 551
 552static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
 553                                 TCGReg rn, TCGReg rm)
 554{
 555    /* if ArchVersion() < 6 && d == n then UNPREDICTABLE;  */
 556    if (!use_armv6_instructions && rd == rn) {
 557        if (rd == rm) {
 558            /* rd == rn == rm; copy an input to tmp first.  */
 559            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 560            rm = rn = TCG_REG_TMP;
 561        } else {
 562            rn = rm;
 563            rm = rd;
 564        }
 565    }
 566    /* mul */
 567    tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
 568}
 569
 570static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
 571                                   TCGReg rd1, TCGReg rn, TCGReg rm)
 572{
 573    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
 574    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
 575        if (rd0 == rm || rd1 == rm) {
 576            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 577            rn = TCG_REG_TMP;
 578        } else {
 579            TCGReg t = rn;
 580            rn = rm;
 581            rm = t;
 582        }
 583    }
 584    /* umull */
 585    tcg_out32(s, (cond << 28) | 0x00800090 |
 586              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 587}
 588
 589static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
 590                                   TCGReg rd1, TCGReg rn, TCGReg rm)
 591{
 592    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
 593    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
 594        if (rd0 == rm || rd1 == rm) {
 595            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 596            rn = TCG_REG_TMP;
 597        } else {
 598            TCGReg t = rn;
 599            rn = rm;
 600            rm = t;
 601        }
 602    }
 603    /* smull */
 604    tcg_out32(s, (cond << 28) | 0x00c00090 |
 605              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 606}
 607
 608static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
 609{
 610    tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 611}
 612
 613static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
 614{
 615    tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 616}
 617
 618static inline void tcg_out_ext8s(TCGContext *s, int cond,
 619                                 int rd, int rn)
 620{
 621    if (use_armv6_instructions) {
 622        /* sxtb */
 623        tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
 624    } else {
 625        tcg_out_dat_reg(s, cond, ARITH_MOV,
 626                        rd, 0, rn, SHIFT_IMM_LSL(24));
 627        tcg_out_dat_reg(s, cond, ARITH_MOV,
 628                        rd, 0, rd, SHIFT_IMM_ASR(24));
 629    }
 630}
 631
 632static inline void tcg_out_ext8u(TCGContext *s, int cond,
 633                                 int rd, int rn)
 634{
 635    tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
 636}
 637
 638static inline void tcg_out_ext16s(TCGContext *s, int cond,
 639                                  int rd, int rn)
 640{
 641    if (use_armv6_instructions) {
 642        /* sxth */
 643        tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
 644    } else {
 645        tcg_out_dat_reg(s, cond, ARITH_MOV,
 646                        rd, 0, rn, SHIFT_IMM_LSL(16));
 647        tcg_out_dat_reg(s, cond, ARITH_MOV,
 648                        rd, 0, rd, SHIFT_IMM_ASR(16));
 649    }
 650}
 651
 652static inline void tcg_out_ext16u(TCGContext *s, int cond,
 653                                  int rd, int rn)
 654{
 655    if (use_armv6_instructions) {
 656        /* uxth */
 657        tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
 658    } else {
 659        tcg_out_dat_reg(s, cond, ARITH_MOV,
 660                        rd, 0, rn, SHIFT_IMM_LSL(16));
 661        tcg_out_dat_reg(s, cond, ARITH_MOV,
 662                        rd, 0, rd, SHIFT_IMM_LSR(16));
 663    }
 664}
 665
 666static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
 667{
 668    if (use_armv6_instructions) {
 669        /* revsh */
 670        tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
 671    } else {
 672        tcg_out_dat_reg(s, cond, ARITH_MOV,
 673                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
 674        tcg_out_dat_reg(s, cond, ARITH_MOV,
 675                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
 676        tcg_out_dat_reg(s, cond, ARITH_ORR,
 677                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
 678    }
 679}
 680
 681static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
 682{
 683    if (use_armv6_instructions) {
 684        /* rev16 */
 685        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
 686    } else {
 687        tcg_out_dat_reg(s, cond, ARITH_MOV,
 688                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
 689        tcg_out_dat_reg(s, cond, ARITH_MOV,
 690                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
 691        tcg_out_dat_reg(s, cond, ARITH_ORR,
 692                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
 693    }
 694}
 695
 696/* swap the two low bytes assuming that the two high input bytes and the
 697   two high output bit can hold any value. */
 698static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
 699{
 700    if (use_armv6_instructions) {
 701        /* rev16 */
 702        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
 703    } else {
 704        tcg_out_dat_reg(s, cond, ARITH_MOV,
 705                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
 706        tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
 707        tcg_out_dat_reg(s, cond, ARITH_ORR,
 708                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
 709    }
 710}
 711
 712static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
 713{
 714    if (use_armv6_instructions) {
 715        /* rev */
 716        tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
 717    } else {
 718        tcg_out_dat_reg(s, cond, ARITH_EOR,
 719                        TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
 720        tcg_out_dat_imm(s, cond, ARITH_BIC,
 721                        TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
 722        tcg_out_dat_reg(s, cond, ARITH_MOV,
 723                        rd, 0, rn, SHIFT_IMM_ROR(8));
 724        tcg_out_dat_reg(s, cond, ARITH_EOR,
 725                        rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
 726    }
 727}
 728
 729bool tcg_target_deposit_valid(int ofs, int len)
 730{
 731    /* ??? Without bfi, we could improve over generic code by combining
 732       the right-shift from a non-zero ofs with the orr.  We do run into
 733       problems when rd == rs, and the mask generated from ofs+len doesn't
 734       fit into an immediate.  We would have to be careful not to pessimize
 735       wrt the optimizations performed on the expanded code.  */
 736    return use_armv7_instructions;
 737}
 738
 739static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
 740                                   TCGArg a1, int ofs, int len, bool const_a1)
 741{
 742    if (const_a1) {
 743        /* bfi becomes bfc with rn == 15.  */
 744        a1 = 15;
 745    }
 746    /* bfi/bfc */
 747    tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
 748              | (ofs << 7) | ((ofs + len - 1) << 16));
 749}
 750
 751/* Note that this routine is used for both LDR and LDRH formats, so we do
 752   not wish to include an immediate shift at this point.  */
 753static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 754                            TCGReg rn, TCGReg rm, bool u, bool p, bool w)
 755{
 756    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
 757              | (w << 21) | (rn << 16) | (rt << 12) | rm);
 758}
 759
 760static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 761                            TCGReg rn, int imm8, bool p, bool w)
 762{
 763    bool u = 1;
 764    if (imm8 < 0) {
 765        imm8 = -imm8;
 766        u = 0;
 767    }
 768    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
 769              (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
 770}
 771
 772static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 773                             TCGReg rn, int imm12, bool p, bool w)
 774{
 775    bool u = 1;
 776    if (imm12 < 0) {
 777        imm12 = -imm12;
 778        u = 0;
 779    }
 780    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
 781              (rn << 16) | (rt << 12) | imm12);
 782}
 783
 784static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
 785                                   TCGReg rn, int imm12)
 786{
 787    tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
 788}
 789
 790static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
 791                                   TCGReg rn, int imm12)
 792{
 793    tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
 794}
 795
 796static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
 797                                  TCGReg rn, TCGReg rm)
 798{
 799    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
 800}
 801
 802static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
 803                                  TCGReg rn, TCGReg rm)
 804{
 805    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
 806}
 807
 808static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
 809                                   TCGReg rn, int imm8)
 810{
 811    tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
 812}
 813
 814static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
 815                                  TCGReg rn, TCGReg rm)
 816{
 817    tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
 818}
 819
 820static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
 821                                   TCGReg rn, int imm8)
 822{
 823    tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
 824}
 825
 826static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
 827                                  TCGReg rn, TCGReg rm)
 828{
 829    tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
 830}
 831
 832/* Register pre-increment with base writeback.  */
 833static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
 834                                    TCGReg rn, TCGReg rm)
 835{
 836    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
 837}
 838
 839static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
 840                                    TCGReg rn, TCGReg rm)
 841{
 842    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
 843}
 844
 845static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
 846                                   TCGReg rn, int imm8)
 847{
 848    tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
 849}
 850
 851static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
 852                                  TCGReg rn, int imm8)
 853{
 854    tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
 855}
 856
 857static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
 858                                   TCGReg rn, TCGReg rm)
 859{
 860    tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
 861}
 862
 863static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
 864                                  TCGReg rn, TCGReg rm)
 865{
 866    tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
 867}
 868
 869static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
 870                                   TCGReg rn, int imm8)
 871{
 872    tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
 873}
 874
 875static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
 876                                   TCGReg rn, TCGReg rm)
 877{
 878    tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
 879}
 880
 881static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
 882                                  TCGReg rn, int imm12)
 883{
 884    tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
 885}
 886
 887static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
 888                                  TCGReg rn, int imm12)
 889{
 890    tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
 891}
 892
 893static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
 894                                 TCGReg rn, TCGReg rm)
 895{
 896    tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
 897}
 898
 899static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
 900                                 TCGReg rn, TCGReg rm)
 901{
 902    tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
 903}
 904
 905static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
 906                                  TCGReg rn, int imm8)
 907{
 908    tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
 909}
 910
 911static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
 912                                  TCGReg rn, TCGReg rm)
 913{
 914    tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
 915}
 916
 917static inline void tcg_out_ld32u(TCGContext *s, int cond,
 918                int rd, int rn, int32_t offset)
 919{
 920    if (offset > 0xfff || offset < -0xfff) {
 921        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 922        tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
 923    } else
 924        tcg_out_ld32_12(s, cond, rd, rn, offset);
 925}
 926
 927static inline void tcg_out_st32(TCGContext *s, int cond,
 928                int rd, int rn, int32_t offset)
 929{
 930    if (offset > 0xfff || offset < -0xfff) {
 931        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 932        tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
 933    } else
 934        tcg_out_st32_12(s, cond, rd, rn, offset);
 935}
 936
 937static inline void tcg_out_ld16u(TCGContext *s, int cond,
 938                int rd, int rn, int32_t offset)
 939{
 940    if (offset > 0xff || offset < -0xff) {
 941        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 942        tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
 943    } else
 944        tcg_out_ld16u_8(s, cond, rd, rn, offset);
 945}
 946
 947static inline void tcg_out_ld16s(TCGContext *s, int cond,
 948                int rd, int rn, int32_t offset)
 949{
 950    if (offset > 0xff || offset < -0xff) {
 951        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 952        tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
 953    } else
 954        tcg_out_ld16s_8(s, cond, rd, rn, offset);
 955}
 956
 957static inline void tcg_out_st16(TCGContext *s, int cond,
 958                int rd, int rn, int32_t offset)
 959{
 960    if (offset > 0xff || offset < -0xff) {
 961        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 962        tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
 963    } else
 964        tcg_out_st16_8(s, cond, rd, rn, offset);
 965}
 966
 967static inline void tcg_out_ld8u(TCGContext *s, int cond,
 968                int rd, int rn, int32_t offset)
 969{
 970    if (offset > 0xfff || offset < -0xfff) {
 971        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 972        tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
 973    } else
 974        tcg_out_ld8_12(s, cond, rd, rn, offset);
 975}
 976
 977static inline void tcg_out_ld8s(TCGContext *s, int cond,
 978                int rd, int rn, int32_t offset)
 979{
 980    if (offset > 0xff || offset < -0xff) {
 981        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 982        tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
 983    } else
 984        tcg_out_ld8s_8(s, cond, rd, rn, offset);
 985}
 986
 987static inline void tcg_out_st8(TCGContext *s, int cond,
 988                int rd, int rn, int32_t offset)
 989{
 990    if (offset > 0xfff || offset < -0xfff) {
 991        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 992        tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
 993    } else
 994        tcg_out_st8_12(s, cond, rd, rn, offset);
 995}
 996
 997/* The _goto case is normally between TBs within the same code buffer, and
 998 * with the code buffer limited to 16MB we wouldn't need the long case.
 999 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
1000 */
1001static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
1002{
1003    intptr_t addri = (intptr_t)addr;
1004    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1005
1006    if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
1007        tcg_out_b(s, cond, disp);
1008        return;
1009    }
1010
1011    tcg_out_movi32(s, cond, TCG_REG_TMP, addri);
1012    if (use_armv5t_instructions) {
1013        tcg_out_bx(s, cond, TCG_REG_TMP);
1014    } else {
1015        if (addri & 1) {
1016            tcg_abort();
1017        }
1018        tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP);
1019    }
1020}
1021
1022/* The call case is mostly used for helpers - so it's not unreasonable
1023 * for them to be beyond branch range */
1024static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
1025{
1026    intptr_t addri = (intptr_t)addr;
1027    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1028
1029    if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
1030        if (addri & 1) {
1031            /* Use BLX if the target is in Thumb mode */
1032            if (!use_armv5t_instructions) {
1033                tcg_abort();
1034            }
1035            tcg_out_blx_imm(s, disp);
1036        } else {
1037            tcg_out_bl(s, COND_AL, disp);
1038        }
1039    } else if (use_armv7_instructions) {
1040        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
1041        tcg_out_blx(s, COND_AL, TCG_REG_TMP);
1042    } else {
1043        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
1044        tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
1045        tcg_out32(s, addri);
1046    }
1047}
1048
1049void arm_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
1050{
1051    tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
1052    tcg_insn_unit *target = (tcg_insn_unit *)addr;
1053
1054    /* we could use a ldr pc, [pc, #-4] kind of branch and avoid the flush */
1055    reloc_pc24_atomic(code_ptr, target);
1056    flush_icache_range(jmp_addr, jmp_addr + 4);
1057}
1058
1059static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
1060{
1061    if (l->has_value) {
1062        tcg_out_goto(s, cond, l->u.value_ptr);
1063    } else {
1064        tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
1065        tcg_out_b_noaddr(s, cond);
1066    }
1067}
1068
1069#ifdef CONFIG_SOFTMMU
1070/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1071 *                                     int mmu_idx, uintptr_t ra)
1072 */
1073static void * const qemu_ld_helpers[16] = {
1074    [MO_UB]   = helper_ret_ldub_mmu,
1075    [MO_SB]   = helper_ret_ldsb_mmu,
1076
1077    [MO_LEUW] = helper_le_lduw_mmu,
1078    [MO_LEUL] = helper_le_ldul_mmu,
1079    [MO_LEQ]  = helper_le_ldq_mmu,
1080    [MO_LESW] = helper_le_ldsw_mmu,
1081    [MO_LESL] = helper_le_ldul_mmu,
1082
1083    [MO_BEUW] = helper_be_lduw_mmu,
1084    [MO_BEUL] = helper_be_ldul_mmu,
1085    [MO_BEQ]  = helper_be_ldq_mmu,
1086    [MO_BESW] = helper_be_ldsw_mmu,
1087    [MO_BESL] = helper_be_ldul_mmu,
1088};
1089
1090/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1091 *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
1092 */
1093static void * const qemu_st_helpers[16] = {
1094    [MO_UB]   = helper_ret_stb_mmu,
1095    [MO_LEUW] = helper_le_stw_mmu,
1096    [MO_LEUL] = helper_le_stl_mmu,
1097    [MO_LEQ]  = helper_le_stq_mmu,
1098    [MO_BEUW] = helper_be_stw_mmu,
1099    [MO_BEUL] = helper_be_stl_mmu,
1100    [MO_BEQ]  = helper_be_stq_mmu,
1101};
1102
1103/* Helper routines for marshalling helper function arguments into
1104 * the correct registers and stack.
1105 * argreg is where we want to put this argument, arg is the argument itself.
1106 * Return value is the updated argreg ready for the next call.
1107 * Note that argreg 0..3 is real registers, 4+ on stack.
1108 *
1109 * We provide routines for arguments which are: immediate, 32 bit
1110 * value in register, 16 and 8 bit values in register (which must be zero
1111 * extended before use) and 64 bit value in a lo:hi register pair.
1112 */
1113#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG)                \
1114static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg)              \
1115{                                                                          \
1116    if (argreg < 4) {                                                      \
1117        MOV_ARG(s, COND_AL, argreg, arg);                                  \
1118    } else {                                                               \
1119        int ofs = (argreg - 4) * 4;                                        \
1120        EXT_ARG;                                                           \
1121        tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE);            \
1122        tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs);         \
1123    }                                                                      \
1124    return argreg + 1;                                                     \
1125}
1126
1127DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
1128    (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1129DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
1130    (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1131DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
1132    (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1133DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
1134
1135static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
1136                                TCGReg arglo, TCGReg arghi)
1137{
1138    /* 64 bit arguments must go in even/odd register pairs
1139     * and in 8-aligned stack slots.
1140     */
1141    if (argreg & 1) {
1142        argreg++;
1143    }
1144    if (use_armv6_instructions && argreg >= 4
1145        && (arglo & 1) == 0 && arghi == arglo + 1) {
1146        tcg_out_strd_8(s, COND_AL, arglo,
1147                       TCG_REG_CALL_STACK, (argreg - 4) * 4);
1148        return argreg + 2;
1149    } else {
1150        argreg = tcg_out_arg_reg32(s, argreg, arglo);
1151        argreg = tcg_out_arg_reg32(s, argreg, arghi);
1152        return argreg;
1153    }
1154}
1155
1156#define TLB_SHIFT       (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
1157
1158/* We're expecting to use an 8-bit immediate and to mask.  */
1159QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
1160
1161/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
1162   Using the offset of the second entry in the last tlb table ensures
1163   that we can index all of the elements of the first entry.  */
1164QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
1165                  > 0xffff);
1166
1167/* Load and compare a TLB entry, leaving the flags set.  Returns the register
1168   containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */
1169
1170static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1171                               TCGMemOp s_bits, int mem_index, bool is_load)
1172{
1173    TCGReg base = TCG_AREG0;
1174    int cmp_off =
1175        (is_load
1176         ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1177         : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
1178    int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
1179
1180    /* Should generate something like the following:
1181     *   shr    tmp, addrlo, #TARGET_PAGE_BITS                    (1)
1182     *   add    r2, env, #high
1183     *   and    r0, tmp, #(CPU_TLB_SIZE - 1)                      (2)
1184     *   add    r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS               (3)
1185     *   ldr    r0, [r2, #cmp]                                    (4)
1186     *   tst    addrlo, #s_mask
1187     *   ldr    r2, [r2, #add]                                    (5)
1188     *   cmpeq  r0, tmp, lsl #TARGET_PAGE_BITS
1189     */
1190    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
1191                    0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1192
1193    /* We checked that the offset is contained within 16 bits above.  */
1194    if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
1195        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1196                        (24 << 7) | (cmp_off >> 8));
1197        base = TCG_REG_R2;
1198        add_off -= cmp_off & 0xff00;
1199        cmp_off &= 0xff;
1200    }
1201
1202    tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1203                    TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
1204    tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1205                    TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1206
1207    /* Load the tlb comparator.  Use ldrd if needed and available,
1208       but due to how the pointer needs setting up, ldm isn't useful.
1209       Base arm5 doesn't have ldrd, but armv5te does.  */
1210    if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1211        tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1212    } else {
1213        tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1214        if (TARGET_LONG_BITS == 64) {
1215            tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
1216        }
1217    }
1218
1219    /* Check alignment.  */
1220    if (s_bits) {
1221        tcg_out_dat_imm(s, COND_AL, ARITH_TST,
1222                        0, addrlo, (1 << s_bits) - 1);
1223    }
1224
1225    /* Load the tlb addend.  */
1226    tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
1227
1228    tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
1229                    TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1230
1231    if (TARGET_LONG_BITS == 64) {
1232        tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
1233                        TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
1234    }
1235
1236    return TCG_REG_R2;
1237}
1238
1239/* Record the context of a call to the out of line helper code for the slow
1240   path for a load or store, so that we can later generate the correct
1241   helper code.  */
1242static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1243                                TCGReg datalo, TCGReg datahi, TCGReg addrlo,
1244                                TCGReg addrhi, tcg_insn_unit *raddr,
1245                                tcg_insn_unit *label_ptr)
1246{
1247    TCGLabelQemuLdst *label = new_ldst_label(s);
1248
1249    label->is_ld = is_ld;
1250    label->oi = oi;
1251    label->datalo_reg = datalo;
1252    label->datahi_reg = datahi;
1253    label->addrlo_reg = addrlo;
1254    label->addrhi_reg = addrhi;
1255    label->raddr = raddr;
1256    label->label_ptr[0] = label_ptr;
1257}
1258
1259static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1260{
1261    TCGReg argreg, datalo, datahi;
1262    TCGMemOpIdx oi = lb->oi;
1263    TCGMemOp opc = get_memop(oi);
1264    void *func;
1265
1266    reloc_pc24(lb->label_ptr[0], s->code_ptr);
1267
1268    argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
1269    if (TARGET_LONG_BITS == 64) {
1270        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1271    } else {
1272        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1273    }
1274    argreg = tcg_out_arg_imm32(s, argreg, oi);
1275    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1276
1277    /* For armv6 we can use the canonical unsigned helpers and minimize
1278       icache usage.  For pre-armv6, use the signed helpers since we do
1279       not have a single insn sign-extend.  */
1280    if (use_armv6_instructions) {
1281        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
1282    } else {
1283        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
1284        if (opc & MO_SIGN) {
1285            opc = MO_UL;
1286        }
1287    }
1288    tcg_out_call(s, func);
1289
1290    datalo = lb->datalo_reg;
1291    datahi = lb->datahi_reg;
1292    switch (opc & MO_SSIZE) {
1293    case MO_SB:
1294        tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
1295        break;
1296    case MO_SW:
1297        tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
1298        break;
1299    default:
1300        tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1301        break;
1302    case MO_Q:
1303        if (datalo != TCG_REG_R1) {
1304            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1305            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1306        } else if (datahi != TCG_REG_R0) {
1307            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1308            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1309        } else {
1310            tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
1311            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1312            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
1313        }
1314        break;
1315    }
1316
1317    tcg_out_goto(s, COND_AL, lb->raddr);
1318}
1319
1320static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1321{
1322    TCGReg argreg, datalo, datahi;
1323    TCGMemOpIdx oi = lb->oi;
1324    TCGMemOp opc = get_memop(oi);
1325
1326    reloc_pc24(lb->label_ptr[0], s->code_ptr);
1327
1328    argreg = TCG_REG_R0;
1329    argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
1330    if (TARGET_LONG_BITS == 64) {
1331        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1332    } else {
1333        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1334    }
1335
1336    datalo = lb->datalo_reg;
1337    datahi = lb->datahi_reg;
1338    switch (opc & MO_SIZE) {
1339    case MO_8:
1340        argreg = tcg_out_arg_reg8(s, argreg, datalo);
1341        break;
1342    case MO_16:
1343        argreg = tcg_out_arg_reg16(s, argreg, datalo);
1344        break;
1345    case MO_32:
1346    default:
1347        argreg = tcg_out_arg_reg32(s, argreg, datalo);
1348        break;
1349    case MO_64:
1350        argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
1351        break;
1352    }
1353
1354    argreg = tcg_out_arg_imm32(s, argreg, oi);
1355    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1356
1357    /* Tail-call to the helper, which will return to the fast path.  */
1358    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1359}
1360#endif /* SOFTMMU */
1361
1362static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
1363                                         TCGReg datalo, TCGReg datahi,
1364                                         TCGReg addrlo, TCGReg addend)
1365{
1366    TCGMemOp bswap = opc & MO_BSWAP;
1367
1368    switch (opc & MO_SSIZE) {
1369    case MO_UB:
1370        tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
1371        break;
1372    case MO_SB:
1373        tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
1374        break;
1375    case MO_UW:
1376        tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1377        if (bswap) {
1378            tcg_out_bswap16(s, COND_AL, datalo, datalo);
1379        }
1380        break;
1381    case MO_SW:
1382        if (bswap) {
1383            tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1384            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1385        } else {
1386            tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
1387        }
1388        break;
1389    case MO_UL:
1390    default:
1391        tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
1392        if (bswap) {
1393            tcg_out_bswap32(s, COND_AL, datalo, datalo);
1394        }
1395        break;
1396    case MO_Q:
1397        {
1398            TCGReg dl = (bswap ? datahi : datalo);
1399            TCGReg dh = (bswap ? datalo : datahi);
1400
1401            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
1402            if (USING_SOFTMMU && use_armv6_instructions
1403                && (dl & 1) == 0 && dh == dl + 1) {
1404                tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
1405            } else if (dl != addend) {
1406                tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
1407                tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
1408            } else {
1409                tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
1410                                addend, addrlo, SHIFT_IMM_LSL(0));
1411                tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
1412                tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
1413            }
1414            if (bswap) {
1415                tcg_out_bswap32(s, COND_AL, dl, dl);
1416                tcg_out_bswap32(s, COND_AL, dh, dh);
1417            }
1418        }
1419        break;
1420    }
1421}
1422
1423static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
1424                                          TCGReg datalo, TCGReg datahi,
1425                                          TCGReg addrlo)
1426{
1427    TCGMemOp bswap = opc & MO_BSWAP;
1428
1429    switch (opc & MO_SSIZE) {
1430    case MO_UB:
1431        tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
1432        break;
1433    case MO_SB:
1434        tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
1435        break;
1436    case MO_UW:
1437        tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1438        if (bswap) {
1439            tcg_out_bswap16(s, COND_AL, datalo, datalo);
1440        }
1441        break;
1442    case MO_SW:
1443        if (bswap) {
1444            tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1445            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1446        } else {
1447            tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
1448        }
1449        break;
1450    case MO_UL:
1451    default:
1452        tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
1453        if (bswap) {
1454            tcg_out_bswap32(s, COND_AL, datalo, datalo);
1455        }
1456        break;
1457    case MO_Q:
1458        {
1459            TCGReg dl = (bswap ? datahi : datalo);
1460            TCGReg dh = (bswap ? datalo : datahi);
1461
1462            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
1463            if (USING_SOFTMMU && use_armv6_instructions
1464                && (dl & 1) == 0 && dh == dl + 1) {
1465                tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
1466            } else if (dl == addrlo) {
1467                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1468                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1469            } else {
1470                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1471                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1472            }
1473            if (bswap) {
1474                tcg_out_bswap32(s, COND_AL, dl, dl);
1475                tcg_out_bswap32(s, COND_AL, dh, dh);
1476            }
1477        }
1478        break;
1479    }
1480}
1481
1482static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1483{
1484    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1485    TCGMemOpIdx oi;
1486    TCGMemOp opc;
1487#ifdef CONFIG_SOFTMMU
1488    int mem_index;
1489    TCGReg addend;
1490    tcg_insn_unit *label_ptr;
1491#endif
1492
1493    datalo = *args++;
1494    datahi = (is64 ? *args++ : 0);
1495    addrlo = *args++;
1496    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1497    oi = *args++;
1498    opc = get_memop(oi);
1499
1500#ifdef CONFIG_SOFTMMU
1501    mem_index = get_mmuidx(oi);
1502    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1);
1503
1504    /* This a conditional BL only to load a pointer within this opcode into LR
1505       for the slow path.  We will not be using the value for a tail call.  */
1506    label_ptr = s->code_ptr;
1507    tcg_out_bl_noaddr(s, COND_NE);
1508
1509    tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
1510
1511    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1512                        s->code_ptr, label_ptr);
1513#else /* !CONFIG_SOFTMMU */
1514    if (guest_base) {
1515        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1516        tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
1517    } else {
1518        tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
1519    }
1520#endif
1521}
1522
1523static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc,
1524                                         TCGReg datalo, TCGReg datahi,
1525                                         TCGReg addrlo, TCGReg addend)
1526{
1527    TCGMemOp bswap = opc & MO_BSWAP;
1528
1529    switch (opc & MO_SIZE) {
1530    case MO_8:
1531        tcg_out_st8_r(s, cond, datalo, addrlo, addend);
1532        break;
1533    case MO_16:
1534        if (bswap) {
1535            tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
1536            tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
1537        } else {
1538            tcg_out_st16_r(s, cond, datalo, addrlo, addend);
1539        }
1540        break;
1541    case MO_32:
1542    default:
1543        if (bswap) {
1544            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1545            tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
1546        } else {
1547            tcg_out_st32_r(s, cond, datalo, addrlo, addend);
1548        }
1549        break;
1550    case MO_64:
1551        /* Avoid strd for user-only emulation, to handle unaligned.  */
1552        if (bswap) {
1553            tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
1554            tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
1555            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1556            tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
1557        } else if (USING_SOFTMMU && use_armv6_instructions
1558                   && (datalo & 1) == 0 && datahi == datalo + 1) {
1559            tcg_out_strd_r(s, cond, datalo, addrlo, addend);
1560        } else {
1561            tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
1562            tcg_out_st32_12(s, cond, datahi, addend, 4);
1563        }
1564        break;
1565    }
1566}
1567
1568static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc,
1569                                          TCGReg datalo, TCGReg datahi,
1570                                          TCGReg addrlo)
1571{
1572    TCGMemOp bswap = opc & MO_BSWAP;
1573
1574    switch (opc & MO_SIZE) {
1575    case MO_8:
1576        tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
1577        break;
1578    case MO_16:
1579        if (bswap) {
1580            tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
1581            tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
1582        } else {
1583            tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
1584        }
1585        break;
1586    case MO_32:
1587    default:
1588        if (bswap) {
1589            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1590            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1591        } else {
1592            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1593        }
1594        break;
1595    case MO_64:
1596        /* Avoid strd for user-only emulation, to handle unaligned.  */
1597        if (bswap) {
1598            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
1599            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1600            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1601            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
1602        } else if (USING_SOFTMMU && use_armv6_instructions
1603                   && (datalo & 1) == 0 && datahi == datalo + 1) {
1604            tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
1605        } else {
1606            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1607            tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
1608        }
1609        break;
1610    }
1611}
1612
1613static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1614{
1615    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1616    TCGMemOpIdx oi;
1617    TCGMemOp opc;
1618#ifdef CONFIG_SOFTMMU
1619    int mem_index;
1620    TCGReg addend;
1621    tcg_insn_unit *label_ptr;
1622#endif
1623
1624    datalo = *args++;
1625    datahi = (is64 ? *args++ : 0);
1626    addrlo = *args++;
1627    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1628    oi = *args++;
1629    opc = get_memop(oi);
1630
1631#ifdef CONFIG_SOFTMMU
1632    mem_index = get_mmuidx(oi);
1633    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0);
1634
1635    tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
1636
1637    /* The conditional call must come last, as we're going to return here.  */
1638    label_ptr = s->code_ptr;
1639    tcg_out_bl_noaddr(s, COND_NE);
1640
1641    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1642                        s->code_ptr, label_ptr);
1643#else /* !CONFIG_SOFTMMU */
1644    if (guest_base) {
1645        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1646        tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
1647                              datahi, addrlo, TCG_REG_TMP);
1648    } else {
1649        tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
1650    }
1651#endif
1652}
1653
1654static tcg_insn_unit *tb_ret_addr;
1655
1656static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1657                const TCGArg *args, const int *const_args)
1658{
1659    TCGArg a0, a1, a2, a3, a4, a5;
1660    int c;
1661
1662    switch (opc) {
1663    case INDEX_op_exit_tb:
1664        tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
1665        tcg_out_goto(s, COND_AL, tb_ret_addr);
1666        break;
1667    case INDEX_op_goto_tb:
1668        if (s->tb_jmp_insn_offset) {
1669            /* Direct jump method */
1670            s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
1671            tcg_out_b_noaddr(s, COND_AL);
1672        } else {
1673            /* Indirect jump method */
1674            intptr_t ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
1675            tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
1676            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff);
1677        }
1678        s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
1679        break;
1680    case INDEX_op_br:
1681        tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
1682        break;
1683
1684    case INDEX_op_ld8u_i32:
1685        tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
1686        break;
1687    case INDEX_op_ld8s_i32:
1688        tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
1689        break;
1690    case INDEX_op_ld16u_i32:
1691        tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
1692        break;
1693    case INDEX_op_ld16s_i32:
1694        tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
1695        break;
1696    case INDEX_op_ld_i32:
1697        tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
1698        break;
1699    case INDEX_op_st8_i32:
1700        tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
1701        break;
1702    case INDEX_op_st16_i32:
1703        tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
1704        break;
1705    case INDEX_op_st_i32:
1706        tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
1707        break;
1708
1709    case INDEX_op_movcond_i32:
1710        /* Constraints mean that v2 is always in the same register as dest,
1711         * so we only need to do "if condition passed, move v1 to dest".
1712         */
1713        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1714                        args[1], args[2], const_args[2]);
1715        tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
1716                        ARITH_MVN, args[0], 0, args[3], const_args[3]);
1717        break;
1718    case INDEX_op_add_i32:
1719        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
1720                        args[0], args[1], args[2], const_args[2]);
1721        break;
1722    case INDEX_op_sub_i32:
1723        if (const_args[1]) {
1724            if (const_args[2]) {
1725                tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
1726            } else {
1727                tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
1728                               args[0], args[2], args[1], 1);
1729            }
1730        } else {
1731            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
1732                            args[0], args[1], args[2], const_args[2]);
1733        }
1734        break;
1735    case INDEX_op_and_i32:
1736        tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
1737                        args[0], args[1], args[2], const_args[2]);
1738        break;
1739    case INDEX_op_andc_i32:
1740        tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
1741                        args[0], args[1], args[2], const_args[2]);
1742        break;
1743    case INDEX_op_or_i32:
1744        c = ARITH_ORR;
1745        goto gen_arith;
1746    case INDEX_op_xor_i32:
1747        c = ARITH_EOR;
1748        /* Fall through.  */
1749    gen_arith:
1750        tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
1751        break;
1752    case INDEX_op_add2_i32:
1753        a0 = args[0], a1 = args[1], a2 = args[2];
1754        a3 = args[3], a4 = args[4], a5 = args[5];
1755        if (a0 == a3 || (a0 == a5 && !const_args[5])) {
1756            a0 = TCG_REG_TMP;
1757        }
1758        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
1759                        a0, a2, a4, const_args[4]);
1760        tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
1761                        a1, a3, a5, const_args[5]);
1762        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1763        break;
1764    case INDEX_op_sub2_i32:
1765        a0 = args[0], a1 = args[1], a2 = args[2];
1766        a3 = args[3], a4 = args[4], a5 = args[5];
1767        if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
1768            a0 = TCG_REG_TMP;
1769        }
1770        if (const_args[2]) {
1771            if (const_args[4]) {
1772                tcg_out_movi32(s, COND_AL, a0, a4);
1773                a4 = a0;
1774            }
1775            tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
1776        } else {
1777            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
1778                            ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
1779        }
1780        if (const_args[3]) {
1781            if (const_args[5]) {
1782                tcg_out_movi32(s, COND_AL, a1, a5);
1783                a5 = a1;
1784            }
1785            tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
1786        } else {
1787            tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
1788                            a1, a3, a5, const_args[5]);
1789        }
1790        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1791        break;
1792    case INDEX_op_neg_i32:
1793        tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
1794        break;
1795    case INDEX_op_not_i32:
1796        tcg_out_dat_reg(s, COND_AL,
1797                        ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
1798        break;
1799    case INDEX_op_mul_i32:
1800        tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
1801        break;
1802    case INDEX_op_mulu2_i32:
1803        tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1804        break;
1805    case INDEX_op_muls2_i32:
1806        tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1807        break;
1808    /* XXX: Perhaps args[2] & 0x1f is wrong */
1809    case INDEX_op_shl_i32:
1810        c = const_args[2] ?
1811                SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
1812        goto gen_shift32;
1813    case INDEX_op_shr_i32:
1814        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
1815                SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
1816        goto gen_shift32;
1817    case INDEX_op_sar_i32:
1818        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
1819                SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
1820        goto gen_shift32;
1821    case INDEX_op_rotr_i32:
1822        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
1823                SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
1824        /* Fall through.  */
1825    gen_shift32:
1826        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
1827        break;
1828
1829    case INDEX_op_rotl_i32:
1830        if (const_args[2]) {
1831            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1832                            ((0x20 - args[2]) & 0x1f) ?
1833                            SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
1834                            SHIFT_IMM_LSL(0));
1835        } else {
1836            tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
1837            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1838                            SHIFT_REG_ROR(TCG_REG_TMP));
1839        }
1840        break;
1841
1842    case INDEX_op_brcond_i32:
1843        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1844                       args[0], args[1], const_args[1]);
1845        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
1846                           arg_label(args[3]));
1847        break;
1848    case INDEX_op_brcond2_i32:
1849        /* The resulting conditions are:
1850         * TCG_COND_EQ    -->  a0 == a2 && a1 == a3,
1851         * TCG_COND_NE    --> (a0 != a2 && a1 == a3) ||  a1 != a3,
1852         * TCG_COND_LT(U) --> (a0 <  a2 && a1 == a3) ||  a1 <  a3,
1853         * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
1854         * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
1855         * TCG_COND_GT(U) --> (a0 >  a2 && a1 == a3) ||  a1 >  a3,
1856         */
1857        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1858                        args[1], args[3], const_args[3]);
1859        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1860                        args[0], args[2], const_args[2]);
1861        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]],
1862                           arg_label(args[5]));
1863        break;
1864    case INDEX_op_setcond_i32:
1865        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1866                        args[1], args[2], const_args[2]);
1867        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
1868                        ARITH_MOV, args[0], 0, 1);
1869        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
1870                        ARITH_MOV, args[0], 0, 0);
1871        break;
1872    case INDEX_op_setcond2_i32:
1873        /* See brcond2_i32 comment */
1874        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1875                        args[2], args[4], const_args[4]);
1876        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1877                        args[1], args[3], const_args[3]);
1878        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
1879                        ARITH_MOV, args[0], 0, 1);
1880        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
1881                        ARITH_MOV, args[0], 0, 0);
1882        break;
1883
1884    case INDEX_op_qemu_ld_i32:
1885        tcg_out_qemu_ld(s, args, 0);
1886        break;
1887    case INDEX_op_qemu_ld_i64:
1888        tcg_out_qemu_ld(s, args, 1);
1889        break;
1890    case INDEX_op_qemu_st_i32:
1891        tcg_out_qemu_st(s, args, 0);
1892        break;
1893    case INDEX_op_qemu_st_i64:
1894        tcg_out_qemu_st(s, args, 1);
1895        break;
1896
1897    case INDEX_op_bswap16_i32:
1898        tcg_out_bswap16(s, COND_AL, args[0], args[1]);
1899        break;
1900    case INDEX_op_bswap32_i32:
1901        tcg_out_bswap32(s, COND_AL, args[0], args[1]);
1902        break;
1903
1904    case INDEX_op_ext8s_i32:
1905        tcg_out_ext8s(s, COND_AL, args[0], args[1]);
1906        break;
1907    case INDEX_op_ext16s_i32:
1908        tcg_out_ext16s(s, COND_AL, args[0], args[1]);
1909        break;
1910    case INDEX_op_ext16u_i32:
1911        tcg_out_ext16u(s, COND_AL, args[0], args[1]);
1912        break;
1913
1914    case INDEX_op_deposit_i32:
1915        tcg_out_deposit(s, COND_AL, args[0], args[2],
1916                        args[3], args[4], const_args[2]);
1917        break;
1918
1919    case INDEX_op_div_i32:
1920        tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
1921        break;
1922    case INDEX_op_divu_i32:
1923        tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
1924        break;
1925
1926    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
1927    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
1928    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
1929    default:
1930        tcg_abort();
1931    }
1932}
1933
1934static const TCGTargetOpDef arm_op_defs[] = {
1935    { INDEX_op_exit_tb, { } },
1936    { INDEX_op_goto_tb, { } },
1937    { INDEX_op_br, { } },
1938
1939    { INDEX_op_ld8u_i32, { "r", "r" } },
1940    { INDEX_op_ld8s_i32, { "r", "r" } },
1941    { INDEX_op_ld16u_i32, { "r", "r" } },
1942    { INDEX_op_ld16s_i32, { "r", "r" } },
1943    { INDEX_op_ld_i32, { "r", "r" } },
1944    { INDEX_op_st8_i32, { "r", "r" } },
1945    { INDEX_op_st16_i32, { "r", "r" } },
1946    { INDEX_op_st_i32, { "r", "r" } },
1947
1948    /* TODO: "r", "r", "ri" */
1949    { INDEX_op_add_i32, { "r", "r", "rIN" } },
1950    { INDEX_op_sub_i32, { "r", "rI", "rIN" } },
1951    { INDEX_op_mul_i32, { "r", "r", "r" } },
1952    { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
1953    { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
1954    { INDEX_op_and_i32, { "r", "r", "rIK" } },
1955    { INDEX_op_andc_i32, { "r", "r", "rIK" } },
1956    { INDEX_op_or_i32, { "r", "r", "rI" } },
1957    { INDEX_op_xor_i32, { "r", "r", "rI" } },
1958    { INDEX_op_neg_i32, { "r", "r" } },
1959    { INDEX_op_not_i32, { "r", "r" } },
1960
1961    { INDEX_op_shl_i32, { "r", "r", "ri" } },
1962    { INDEX_op_shr_i32, { "r", "r", "ri" } },
1963    { INDEX_op_sar_i32, { "r", "r", "ri" } },
1964    { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1965    { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1966
1967    { INDEX_op_brcond_i32, { "r", "rIN" } },
1968    { INDEX_op_setcond_i32, { "r", "r", "rIN" } },
1969    { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } },
1970
1971    { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } },
1972    { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } },
1973    { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } },
1974    { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } },
1975
1976#if TARGET_LONG_BITS == 32
1977    { INDEX_op_qemu_ld_i32, { "r", "l" } },
1978    { INDEX_op_qemu_ld_i64, { "r", "r", "l" } },
1979    { INDEX_op_qemu_st_i32, { "s", "s" } },
1980    { INDEX_op_qemu_st_i64, { "s", "s", "s" } },
1981#else
1982    { INDEX_op_qemu_ld_i32, { "r", "l", "l" } },
1983    { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } },
1984    { INDEX_op_qemu_st_i32, { "s", "s", "s" } },
1985    { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } },
1986#endif
1987
1988    { INDEX_op_bswap16_i32, { "r", "r" } },
1989    { INDEX_op_bswap32_i32, { "r", "r" } },
1990
1991    { INDEX_op_ext8s_i32, { "r", "r" } },
1992    { INDEX_op_ext16s_i32, { "r", "r" } },
1993    { INDEX_op_ext16u_i32, { "r", "r" } },
1994
1995    { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1996
1997    { INDEX_op_div_i32, { "r", "r", "r" } },
1998    { INDEX_op_divu_i32, { "r", "r", "r" } },
1999
2000    { -1 },
2001};
2002
2003static void tcg_target_init(TCGContext *s)
2004{
2005    /* Only probe for the platform and capabilities if we havn't already
2006       determined maximum values at compile time.  */
2007#ifndef use_idiv_instructions
2008    {
2009        unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2010        use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
2011    }
2012#endif
2013    if (__ARM_ARCH < 7) {
2014        const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
2015        if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
2016            arm_arch = pl[1] - '0';
2017        }
2018    }
2019
2020    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2021    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
2022                     (1 << TCG_REG_R0) |
2023                     (1 << TCG_REG_R1) |
2024                     (1 << TCG_REG_R2) |
2025                     (1 << TCG_REG_R3) |
2026                     (1 << TCG_REG_R12) |
2027                     (1 << TCG_REG_R14));
2028
2029    tcg_regset_clear(s->reserved_regs);
2030    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2031    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2032    tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
2033
2034    tcg_add_target_add_op_defs(arm_op_defs);
2035}
2036
2037static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
2038                              TCGReg arg1, intptr_t arg2)
2039{
2040    tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
2041}
2042
2043static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
2044                              TCGReg arg1, intptr_t arg2)
2045{
2046    tcg_out_st32(s, COND_AL, arg, arg1, arg2);
2047}
2048
2049static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
2050                               TCGReg base, intptr_t ofs)
2051{
2052    return false;
2053}
2054
2055static inline void tcg_out_mov(TCGContext *s, TCGType type,
2056                               TCGReg ret, TCGReg arg)
2057{
2058    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
2059}
2060
2061static inline void tcg_out_movi(TCGContext *s, TCGType type,
2062                                TCGReg ret, tcg_target_long arg)
2063{
2064    tcg_out_movi32(s, COND_AL, ret, arg);
2065}
2066
2067/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2068   and tcg_register_jit.  */
2069
2070#define PUSH_SIZE  ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
2071
2072#define FRAME_SIZE \
2073    ((PUSH_SIZE \
2074      + TCG_STATIC_CALL_ARGS_SIZE \
2075      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2076      + TCG_TARGET_STACK_ALIGN - 1) \
2077     & -TCG_TARGET_STACK_ALIGN)
2078
2079static void tcg_target_qemu_prologue(TCGContext *s)
2080{
2081    int stack_addend;
2082
2083    /* Calling convention requires us to save r4-r11 and lr.  */
2084    /* stmdb sp!, { r4 - r11, lr } */
2085    tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
2086
2087    /* Reserve callee argument and tcg temp space.  */
2088    stack_addend = FRAME_SIZE - PUSH_SIZE;
2089
2090    tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
2091                   TCG_REG_CALL_STACK, stack_addend, 1);
2092    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2093                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2094
2095    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2096
2097    tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
2098    tb_ret_addr = s->code_ptr;
2099
2100    /* Epilogue.  We branch here via tb_ret_addr.  */
2101    tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
2102                   TCG_REG_CALL_STACK, stack_addend, 1);
2103
2104    /* ldmia sp!, { r4 - r11, pc } */
2105    tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
2106}
2107
2108typedef struct {
2109    DebugFrameHeader h;
2110    uint8_t fde_def_cfa[4];
2111    uint8_t fde_reg_ofs[18];
2112} DebugFrame;
2113
2114#define ELF_HOST_MACHINE EM_ARM
2115
2116/* We're expecting a 2 byte uleb128 encoded value.  */
2117QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2118
2119static const DebugFrame debug_frame = {
2120    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2121    .h.cie.id = -1,
2122    .h.cie.version = 1,
2123    .h.cie.code_align = 1,
2124    .h.cie.data_align = 0x7c,             /* sleb128 -4 */
2125    .h.cie.return_column = 14,
2126
2127    /* Total FDE size does not include the "len" member.  */
2128    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2129
2130    .fde_def_cfa = {
2131        12, 13,                         /* DW_CFA_def_cfa sp, ... */
2132        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2133        (FRAME_SIZE >> 7)
2134    },
2135    .fde_reg_ofs = {
2136        /* The following must match the stmdb in the prologue.  */
2137        0x8e, 1,                        /* DW_CFA_offset, lr, -4 */
2138        0x8b, 2,                        /* DW_CFA_offset, r11, -8 */
2139        0x8a, 3,                        /* DW_CFA_offset, r10, -12 */
2140        0x89, 4,                        /* DW_CFA_offset, r9, -16 */
2141        0x88, 5,                        /* DW_CFA_offset, r8, -20 */
2142        0x87, 6,                        /* DW_CFA_offset, r7, -24 */
2143        0x86, 7,                        /* DW_CFA_offset, r6, -28 */
2144        0x85, 8,                        /* DW_CFA_offset, r5, -32 */
2145        0x84, 9,                        /* DW_CFA_offset, r4, -36 */
2146    }
2147};
2148
2149void tcg_register_jit(void *buf, size_t buf_size)
2150{
2151    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2152}
2153