qemu/tcg/arm/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Andrzej Zaborowski
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "elf.h"
  26#include "tcg-be-ldst.h"
  27
  28/* The __ARM_ARCH define is provided by gcc 4.8.  Construct it otherwise.  */
  29#ifndef __ARM_ARCH
  30# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
  31     || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
  32     || defined(__ARM_ARCH_7EM__)
  33#  define __ARM_ARCH 7
  34# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
  35       || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
  36       || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
  37#  define __ARM_ARCH 6
  38# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \
  39       || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
  40       || defined(__ARM_ARCH_5TEJ__)
  41#  define __ARM_ARCH 5
  42# else
  43#  define __ARM_ARCH 4
  44# endif
  45#endif
  46
  47static int arm_arch = __ARM_ARCH;
  48
  49#if defined(__ARM_ARCH_5T__) \
  50    || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
  51# define use_armv5t_instructions 1
  52#else
  53# define use_armv5t_instructions use_armv6_instructions
  54#endif
  55
  56#define use_armv6_instructions  (__ARM_ARCH >= 6 || arm_arch >= 6)
  57#define use_armv7_instructions  (__ARM_ARCH >= 7 || arm_arch >= 7)
  58
  59#ifndef use_idiv_instructions
  60bool use_idiv_instructions;
  61#endif
  62
  63/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined.  */
  64#ifdef CONFIG_SOFTMMU
  65# define USING_SOFTMMU 1
  66#else
  67# define USING_SOFTMMU 0
  68#endif
  69
  70#ifndef NDEBUG
  71static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  72    "%r0",
  73    "%r1",
  74    "%r2",
  75    "%r3",
  76    "%r4",
  77    "%r5",
  78    "%r6",
  79    "%r7",
  80    "%r8",
  81    "%r9",
  82    "%r10",
  83    "%r11",
  84    "%r12",
  85    "%r13",
  86    "%r14",
  87    "%pc",
  88};
  89#endif
  90
  91static const int tcg_target_reg_alloc_order[] = {
  92    TCG_REG_R4,
  93    TCG_REG_R5,
  94    TCG_REG_R6,
  95    TCG_REG_R7,
  96    TCG_REG_R8,
  97    TCG_REG_R9,
  98    TCG_REG_R10,
  99    TCG_REG_R11,
 100    TCG_REG_R13,
 101    TCG_REG_R0,
 102    TCG_REG_R1,
 103    TCG_REG_R2,
 104    TCG_REG_R3,
 105    TCG_REG_R12,
 106    TCG_REG_R14,
 107};
 108
 109static const int tcg_target_call_iarg_regs[4] = {
 110    TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
 111};
 112static const int tcg_target_call_oarg_regs[2] = {
 113    TCG_REG_R0, TCG_REG_R1
 114};
 115
 116#define TCG_REG_TMP  TCG_REG_R12
 117
 118static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
 119{
 120    ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
 121    *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
 122}
 123
 124static void patch_reloc(tcg_insn_unit *code_ptr, int type,
 125                        intptr_t value, intptr_t addend)
 126{
 127    assert(type == R_ARM_PC24);
 128    assert(addend == 0);
 129    reloc_pc24(code_ptr, (tcg_insn_unit *)value);
 130}
 131
 132#define TCG_CT_CONST_ARM  0x100
 133#define TCG_CT_CONST_INV  0x200
 134#define TCG_CT_CONST_NEG  0x400
 135#define TCG_CT_CONST_ZERO 0x800
 136
 137/* parse target specific constraints */
 138static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 139{
 140    const char *ct_str;
 141
 142    ct_str = *pct_str;
 143    switch (ct_str[0]) {
 144    case 'I':
 145        ct->ct |= TCG_CT_CONST_ARM;
 146        break;
 147    case 'K':
 148        ct->ct |= TCG_CT_CONST_INV;
 149        break;
 150    case 'N': /* The gcc constraint letter is L, already used here.  */
 151        ct->ct |= TCG_CT_CONST_NEG;
 152        break;
 153    case 'Z':
 154        ct->ct |= TCG_CT_CONST_ZERO;
 155        break;
 156
 157    case 'r':
 158        ct->ct |= TCG_CT_REG;
 159        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
 160        break;
 161
 162    /* qemu_ld address */
 163    case 'l':
 164        ct->ct |= TCG_CT_REG;
 165        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
 166#ifdef CONFIG_SOFTMMU
 167        /* r0-r2,lr will be overwritten when reading the tlb entry,
 168           so don't use these. */
 169        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
 170        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
 171        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
 172        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
 173#endif
 174        break;
 175
 176    /* qemu_st address & data */
 177    case 's':
 178        ct->ct |= TCG_CT_REG;
 179        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
 180        /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
 181           and r0-r1 doing the byte swapping, so don't use these. */
 182        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
 183        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
 184#if defined(CONFIG_SOFTMMU)
 185        /* Avoid clashes with registers being used for helper args */
 186        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
 187#if TARGET_LONG_BITS == 64
 188        /* Avoid clashes with registers being used for helper args */
 189        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
 190#endif
 191        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
 192#endif
 193        break;
 194
 195    default:
 196        return -1;
 197    }
 198    ct_str++;
 199    *pct_str = ct_str;
 200
 201    return 0;
 202}
 203
 204static inline uint32_t rotl(uint32_t val, int n)
 205{
 206  return (val << n) | (val >> (32 - n));
 207}
 208
 209/* ARM immediates for ALU instructions are made of an unsigned 8-bit
 210   right-rotated by an even amount between 0 and 30. */
 211static inline int encode_imm(uint32_t imm)
 212{
 213    int shift;
 214
 215    /* simple case, only lower bits */
 216    if ((imm & ~0xff) == 0)
 217        return 0;
 218    /* then try a simple even shift */
 219    shift = ctz32(imm) & ~1;
 220    if (((imm >> shift) & ~0xff) == 0)
 221        return 32 - shift;
 222    /* now try harder with rotations */
 223    if ((rotl(imm, 2) & ~0xff) == 0)
 224        return 2;
 225    if ((rotl(imm, 4) & ~0xff) == 0)
 226        return 4;
 227    if ((rotl(imm, 6) & ~0xff) == 0)
 228        return 6;
 229    /* imm can't be encoded */
 230    return -1;
 231}
 232
 233static inline int check_fit_imm(uint32_t imm)
 234{
 235    return encode_imm(imm) >= 0;
 236}
 237
 238/* Test if a constant matches the constraint.
 239 * TODO: define constraints for:
 240 *
 241 * ldr/str offset:   between -0xfff and 0xfff
 242 * ldrh/strh offset: between -0xff and 0xff
 243 * mov operand2:     values represented with x << (2 * y), x < 0x100
 244 * add, sub, eor...: ditto
 245 */
 246static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
 247                                         const TCGArgConstraint *arg_ct)
 248{
 249    int ct;
 250    ct = arg_ct->ct;
 251    if (ct & TCG_CT_CONST) {
 252        return 1;
 253    } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
 254        return 1;
 255    } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
 256        return 1;
 257    } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
 258        return 1;
 259    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 260        return 1;
 261    } else {
 262        return 0;
 263    }
 264}
 265
 266#define TO_CPSR (1 << 20)
 267
 268typedef enum {
 269    ARITH_AND = 0x0 << 21,
 270    ARITH_EOR = 0x1 << 21,
 271    ARITH_SUB = 0x2 << 21,
 272    ARITH_RSB = 0x3 << 21,
 273    ARITH_ADD = 0x4 << 21,
 274    ARITH_ADC = 0x5 << 21,
 275    ARITH_SBC = 0x6 << 21,
 276    ARITH_RSC = 0x7 << 21,
 277    ARITH_TST = 0x8 << 21 | TO_CPSR,
 278    ARITH_CMP = 0xa << 21 | TO_CPSR,
 279    ARITH_CMN = 0xb << 21 | TO_CPSR,
 280    ARITH_ORR = 0xc << 21,
 281    ARITH_MOV = 0xd << 21,
 282    ARITH_BIC = 0xe << 21,
 283    ARITH_MVN = 0xf << 21,
 284
 285    INSN_LDR_IMM   = 0x04100000,
 286    INSN_LDR_REG   = 0x06100000,
 287    INSN_STR_IMM   = 0x04000000,
 288    INSN_STR_REG   = 0x06000000,
 289
 290    INSN_LDRH_IMM  = 0x005000b0,
 291    INSN_LDRH_REG  = 0x001000b0,
 292    INSN_LDRSH_IMM = 0x005000f0,
 293    INSN_LDRSH_REG = 0x001000f0,
 294    INSN_STRH_IMM  = 0x004000b0,
 295    INSN_STRH_REG  = 0x000000b0,
 296
 297    INSN_LDRB_IMM  = 0x04500000,
 298    INSN_LDRB_REG  = 0x06500000,
 299    INSN_LDRSB_IMM = 0x005000d0,
 300    INSN_LDRSB_REG = 0x001000d0,
 301    INSN_STRB_IMM  = 0x04400000,
 302    INSN_STRB_REG  = 0x06400000,
 303
 304    INSN_LDRD_IMM  = 0x004000d0,
 305    INSN_LDRD_REG  = 0x000000d0,
 306    INSN_STRD_IMM  = 0x004000f0,
 307    INSN_STRD_REG  = 0x000000f0,
 308} ARMInsn;
 309
 310#define SHIFT_IMM_LSL(im)       (((im) << 7) | 0x00)
 311#define SHIFT_IMM_LSR(im)       (((im) << 7) | 0x20)
 312#define SHIFT_IMM_ASR(im)       (((im) << 7) | 0x40)
 313#define SHIFT_IMM_ROR(im)       (((im) << 7) | 0x60)
 314#define SHIFT_REG_LSL(rs)       (((rs) << 8) | 0x10)
 315#define SHIFT_REG_LSR(rs)       (((rs) << 8) | 0x30)
 316#define SHIFT_REG_ASR(rs)       (((rs) << 8) | 0x50)
 317#define SHIFT_REG_ROR(rs)       (((rs) << 8) | 0x70)
 318
 319enum arm_cond_code_e {
 320    COND_EQ = 0x0,
 321    COND_NE = 0x1,
 322    COND_CS = 0x2,      /* Unsigned greater or equal */
 323    COND_CC = 0x3,      /* Unsigned less than */
 324    COND_MI = 0x4,      /* Negative */
 325    COND_PL = 0x5,      /* Zero or greater */
 326    COND_VS = 0x6,      /* Overflow */
 327    COND_VC = 0x7,      /* No overflow */
 328    COND_HI = 0x8,      /* Unsigned greater than */
 329    COND_LS = 0x9,      /* Unsigned less or equal */
 330    COND_GE = 0xa,
 331    COND_LT = 0xb,
 332    COND_GT = 0xc,
 333    COND_LE = 0xd,
 334    COND_AL = 0xe,
 335};
 336
 337static const uint8_t tcg_cond_to_arm_cond[] = {
 338    [TCG_COND_EQ] = COND_EQ,
 339    [TCG_COND_NE] = COND_NE,
 340    [TCG_COND_LT] = COND_LT,
 341    [TCG_COND_GE] = COND_GE,
 342    [TCG_COND_LE] = COND_LE,
 343    [TCG_COND_GT] = COND_GT,
 344    /* unsigned */
 345    [TCG_COND_LTU] = COND_CC,
 346    [TCG_COND_GEU] = COND_CS,
 347    [TCG_COND_LEU] = COND_LS,
 348    [TCG_COND_GTU] = COND_HI,
 349};
 350
 351static inline void tcg_out_bx(TCGContext *s, int cond, int rn)
 352{
 353    tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
 354}
 355
 356static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
 357{
 358    tcg_out32(s, (cond << 28) | 0x0a000000 |
 359                    (((offset - 8) >> 2) & 0x00ffffff));
 360}
 361
 362static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
 363{
 364    /* We pay attention here to not modify the branch target by masking
 365       the corresponding bytes.  This ensure that caches and memory are
 366       kept coherent during retranslation. */
 367    tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a));
 368}
 369
 370static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
 371{
 372    /* We pay attention here to not modify the branch target by masking
 373       the corresponding bytes.  This ensure that caches and memory are
 374       kept coherent during retranslation. */
 375    tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b));
 376}
 377
 378static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
 379{
 380    tcg_out32(s, (cond << 28) | 0x0b000000 |
 381                    (((offset - 8) >> 2) & 0x00ffffff));
 382}
 383
 384static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
 385{
 386    tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
 387}
 388
 389static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
 390{
 391    tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
 392                (((offset - 8) >> 2) & 0x00ffffff));
 393}
 394
 395static inline void tcg_out_dat_reg(TCGContext *s,
 396                int cond, int opc, int rd, int rn, int rm, int shift)
 397{
 398    tcg_out32(s, (cond << 28) | (0 << 25) | opc |
 399                    (rn << 16) | (rd << 12) | shift | rm);
 400}
 401
 402static inline void tcg_out_nop(TCGContext *s)
 403{
 404    if (use_armv7_instructions) {
 405        /* Architected nop introduced in v6k.  */
 406        /* ??? This is an MSR (imm) 0,0,0 insn.  Anyone know if this
 407           also Just So Happened to do nothing on pre-v6k so that we
 408           don't need to conditionalize it?  */
 409        tcg_out32(s, 0xe320f000);
 410    } else {
 411        /* Prior to that the assembler uses mov r0, r0.  */
 412        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0));
 413    }
 414}
 415
 416static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
 417{
 418    /* Simple reg-reg move, optimising out the 'do nothing' case */
 419    if (rd != rm) {
 420        tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
 421    }
 422}
 423
 424static inline void tcg_out_dat_imm(TCGContext *s,
 425                int cond, int opc, int rd, int rn, int im)
 426{
 427    tcg_out32(s, (cond << 28) | (1 << 25) | opc |
 428                    (rn << 16) | (rd << 12) | im);
 429}
 430
 431static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
 432{
 433    int rot, opc, rn;
 434
 435    /* For armv7, make sure not to use movw+movt when mov/mvn would do.
 436       Speed things up by only checking when movt would be required.
 437       Prior to armv7, have one go at fully rotated immediates before
 438       doing the decomposition thing below.  */
 439    if (!use_armv7_instructions || (arg & 0xffff0000)) {
 440        rot = encode_imm(arg);
 441        if (rot >= 0) {
 442            tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
 443                            rotl(arg, rot) | (rot << 7));
 444            return;
 445        }
 446        rot = encode_imm(~arg);
 447        if (rot >= 0) {
 448            tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
 449                            rotl(~arg, rot) | (rot << 7));
 450            return;
 451        }
 452    }
 453
 454    /* Use movw + movt.  */
 455    if (use_armv7_instructions) {
 456        /* movw */
 457        tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
 458                  | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
 459        if (arg & 0xffff0000) {
 460            /* movt */
 461            tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
 462                      | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
 463        }
 464        return;
 465    }
 466
 467    /* TODO: This is very suboptimal, we can easily have a constant
 468       pool somewhere after all the instructions.  */
 469    opc = ARITH_MOV;
 470    rn = 0;
 471    /* If we have lots of leading 1's, we can shorten the sequence by
 472       beginning with mvn and then clearing higher bits with eor.  */
 473    if (clz32(~arg) > clz32(arg)) {
 474        opc = ARITH_MVN, arg = ~arg;
 475    }
 476    do {
 477        int i = ctz32(arg) & ~1;
 478        rot = ((32 - i) << 7) & 0xf00;
 479        tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
 480        arg &= ~(0xff << i);
 481
 482        opc = ARITH_EOR;
 483        rn = rd;
 484    } while (arg);
 485}
 486
 487static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
 488                                  TCGArg lhs, TCGArg rhs, int rhs_is_const)
 489{
 490    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 491     * rhs must satisfy the "rI" constraint.
 492     */
 493    if (rhs_is_const) {
 494        int rot = encode_imm(rhs);
 495        assert(rot >= 0);
 496        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 497    } else {
 498        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 499    }
 500}
 501
 502static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
 503                            TCGReg dst, TCGReg lhs, TCGArg rhs,
 504                            bool rhs_is_const)
 505{
 506    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 507     * rhs must satisfy the "rIK" constraint.
 508     */
 509    if (rhs_is_const) {
 510        int rot = encode_imm(rhs);
 511        if (rot < 0) {
 512            rhs = ~rhs;
 513            rot = encode_imm(rhs);
 514            assert(rot >= 0);
 515            opc = opinv;
 516        }
 517        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 518    } else {
 519        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 520    }
 521}
 522
 523static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
 524                            TCGArg dst, TCGArg lhs, TCGArg rhs,
 525                            bool rhs_is_const)
 526{
 527    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 528     * rhs must satisfy the "rIN" constraint.
 529     */
 530    if (rhs_is_const) {
 531        int rot = encode_imm(rhs);
 532        if (rot < 0) {
 533            rhs = -rhs;
 534            rot = encode_imm(rhs);
 535            assert(rot >= 0);
 536            opc = opneg;
 537        }
 538        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 539    } else {
 540        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 541    }
 542}
 543
 544static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
 545                                 TCGReg rn, TCGReg rm)
 546{
 547    /* if ArchVersion() < 6 && d == n then UNPREDICTABLE;  */
 548    if (!use_armv6_instructions && rd == rn) {
 549        if (rd == rm) {
 550            /* rd == rn == rm; copy an input to tmp first.  */
 551            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 552            rm = rn = TCG_REG_TMP;
 553        } else {
 554            rn = rm;
 555            rm = rd;
 556        }
 557    }
 558    /* mul */
 559    tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
 560}
 561
 562static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
 563                                   TCGReg rd1, TCGReg rn, TCGReg rm)
 564{
 565    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
 566    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
 567        if (rd0 == rm || rd1 == rm) {
 568            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 569            rn = TCG_REG_TMP;
 570        } else {
 571            TCGReg t = rn;
 572            rn = rm;
 573            rm = t;
 574        }
 575    }
 576    /* umull */
 577    tcg_out32(s, (cond << 28) | 0x00800090 |
 578              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 579}
 580
 581static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
 582                                   TCGReg rd1, TCGReg rn, TCGReg rm)
 583{
 584    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
 585    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
 586        if (rd0 == rm || rd1 == rm) {
 587            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 588            rn = TCG_REG_TMP;
 589        } else {
 590            TCGReg t = rn;
 591            rn = rm;
 592            rm = t;
 593        }
 594    }
 595    /* smull */
 596    tcg_out32(s, (cond << 28) | 0x00c00090 |
 597              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 598}
 599
 600static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
 601{
 602    tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 603}
 604
 605static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
 606{
 607    tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 608}
 609
 610static inline void tcg_out_ext8s(TCGContext *s, int cond,
 611                                 int rd, int rn)
 612{
 613    if (use_armv6_instructions) {
 614        /* sxtb */
 615        tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
 616    } else {
 617        tcg_out_dat_reg(s, cond, ARITH_MOV,
 618                        rd, 0, rn, SHIFT_IMM_LSL(24));
 619        tcg_out_dat_reg(s, cond, ARITH_MOV,
 620                        rd, 0, rd, SHIFT_IMM_ASR(24));
 621    }
 622}
 623
 624static inline void tcg_out_ext8u(TCGContext *s, int cond,
 625                                 int rd, int rn)
 626{
 627    tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
 628}
 629
 630static inline void tcg_out_ext16s(TCGContext *s, int cond,
 631                                  int rd, int rn)
 632{
 633    if (use_armv6_instructions) {
 634        /* sxth */
 635        tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
 636    } else {
 637        tcg_out_dat_reg(s, cond, ARITH_MOV,
 638                        rd, 0, rn, SHIFT_IMM_LSL(16));
 639        tcg_out_dat_reg(s, cond, ARITH_MOV,
 640                        rd, 0, rd, SHIFT_IMM_ASR(16));
 641    }
 642}
 643
 644static inline void tcg_out_ext16u(TCGContext *s, int cond,
 645                                  int rd, int rn)
 646{
 647    if (use_armv6_instructions) {
 648        /* uxth */
 649        tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
 650    } else {
 651        tcg_out_dat_reg(s, cond, ARITH_MOV,
 652                        rd, 0, rn, SHIFT_IMM_LSL(16));
 653        tcg_out_dat_reg(s, cond, ARITH_MOV,
 654                        rd, 0, rd, SHIFT_IMM_LSR(16));
 655    }
 656}
 657
 658static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
 659{
 660    if (use_armv6_instructions) {
 661        /* revsh */
 662        tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
 663    } else {
 664        tcg_out_dat_reg(s, cond, ARITH_MOV,
 665                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
 666        tcg_out_dat_reg(s, cond, ARITH_MOV,
 667                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
 668        tcg_out_dat_reg(s, cond, ARITH_ORR,
 669                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
 670    }
 671}
 672
 673static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
 674{
 675    if (use_armv6_instructions) {
 676        /* rev16 */
 677        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
 678    } else {
 679        tcg_out_dat_reg(s, cond, ARITH_MOV,
 680                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
 681        tcg_out_dat_reg(s, cond, ARITH_MOV,
 682                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
 683        tcg_out_dat_reg(s, cond, ARITH_ORR,
 684                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
 685    }
 686}
 687
 688/* swap the two low bytes assuming that the two high input bytes and the
 689   two high output bit can hold any value. */
 690static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
 691{
 692    if (use_armv6_instructions) {
 693        /* rev16 */
 694        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
 695    } else {
 696        tcg_out_dat_reg(s, cond, ARITH_MOV,
 697                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
 698        tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
 699        tcg_out_dat_reg(s, cond, ARITH_ORR,
 700                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
 701    }
 702}
 703
 704static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
 705{
 706    if (use_armv6_instructions) {
 707        /* rev */
 708        tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
 709    } else {
 710        tcg_out_dat_reg(s, cond, ARITH_EOR,
 711                        TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
 712        tcg_out_dat_imm(s, cond, ARITH_BIC,
 713                        TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
 714        tcg_out_dat_reg(s, cond, ARITH_MOV,
 715                        rd, 0, rn, SHIFT_IMM_ROR(8));
 716        tcg_out_dat_reg(s, cond, ARITH_EOR,
 717                        rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
 718    }
 719}
 720
 721bool tcg_target_deposit_valid(int ofs, int len)
 722{
 723    /* ??? Without bfi, we could improve over generic code by combining
 724       the right-shift from a non-zero ofs with the orr.  We do run into
 725       problems when rd == rs, and the mask generated from ofs+len doesn't
 726       fit into an immediate.  We would have to be careful not to pessimize
 727       wrt the optimizations performed on the expanded code.  */
 728    return use_armv7_instructions;
 729}
 730
 731static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
 732                                   TCGArg a1, int ofs, int len, bool const_a1)
 733{
 734    if (const_a1) {
 735        /* bfi becomes bfc with rn == 15.  */
 736        a1 = 15;
 737    }
 738    /* bfi/bfc */
 739    tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
 740              | (ofs << 7) | ((ofs + len - 1) << 16));
 741}
 742
 743/* Note that this routine is used for both LDR and LDRH formats, so we do
 744   not wish to include an immediate shift at this point.  */
 745static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 746                            TCGReg rn, TCGReg rm, bool u, bool p, bool w)
 747{
 748    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
 749              | (w << 21) | (rn << 16) | (rt << 12) | rm);
 750}
 751
 752static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 753                            TCGReg rn, int imm8, bool p, bool w)
 754{
 755    bool u = 1;
 756    if (imm8 < 0) {
 757        imm8 = -imm8;
 758        u = 0;
 759    }
 760    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
 761              (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
 762}
 763
 764static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 765                             TCGReg rn, int imm12, bool p, bool w)
 766{
 767    bool u = 1;
 768    if (imm12 < 0) {
 769        imm12 = -imm12;
 770        u = 0;
 771    }
 772    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
 773              (rn << 16) | (rt << 12) | imm12);
 774}
 775
 776static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
 777                                   TCGReg rn, int imm12)
 778{
 779    tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
 780}
 781
 782static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
 783                                   TCGReg rn, int imm12)
 784{
 785    tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
 786}
 787
 788static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
 789                                  TCGReg rn, TCGReg rm)
 790{
 791    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
 792}
 793
 794static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
 795                                  TCGReg rn, TCGReg rm)
 796{
 797    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
 798}
 799
 800static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
 801                                   TCGReg rn, int imm8)
 802{
 803    tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
 804}
 805
 806static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
 807                                  TCGReg rn, TCGReg rm)
 808{
 809    tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
 810}
 811
 812static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
 813                                   TCGReg rn, int imm8)
 814{
 815    tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
 816}
 817
 818static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
 819                                  TCGReg rn, TCGReg rm)
 820{
 821    tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
 822}
 823
 824/* Register pre-increment with base writeback.  */
 825static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
 826                                    TCGReg rn, TCGReg rm)
 827{
 828    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
 829}
 830
 831static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
 832                                    TCGReg rn, TCGReg rm)
 833{
 834    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
 835}
 836
 837static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
 838                                   TCGReg rn, int imm8)
 839{
 840    tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
 841}
 842
 843static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
 844                                  TCGReg rn, int imm8)
 845{
 846    tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
 847}
 848
 849static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
 850                                   TCGReg rn, TCGReg rm)
 851{
 852    tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
 853}
 854
 855static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
 856                                  TCGReg rn, TCGReg rm)
 857{
 858    tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
 859}
 860
 861static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
 862                                   TCGReg rn, int imm8)
 863{
 864    tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
 865}
 866
 867static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
 868                                   TCGReg rn, TCGReg rm)
 869{
 870    tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
 871}
 872
 873static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
 874                                  TCGReg rn, int imm12)
 875{
 876    tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
 877}
 878
 879static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
 880                                  TCGReg rn, int imm12)
 881{
 882    tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
 883}
 884
 885static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
 886                                 TCGReg rn, TCGReg rm)
 887{
 888    tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
 889}
 890
 891static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
 892                                 TCGReg rn, TCGReg rm)
 893{
 894    tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
 895}
 896
 897static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
 898                                  TCGReg rn, int imm8)
 899{
 900    tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
 901}
 902
 903static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
 904                                  TCGReg rn, TCGReg rm)
 905{
 906    tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
 907}
 908
 909static inline void tcg_out_ld32u(TCGContext *s, int cond,
 910                int rd, int rn, int32_t offset)
 911{
 912    if (offset > 0xfff || offset < -0xfff) {
 913        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 914        tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
 915    } else
 916        tcg_out_ld32_12(s, cond, rd, rn, offset);
 917}
 918
 919static inline void tcg_out_st32(TCGContext *s, int cond,
 920                int rd, int rn, int32_t offset)
 921{
 922    if (offset > 0xfff || offset < -0xfff) {
 923        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 924        tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
 925    } else
 926        tcg_out_st32_12(s, cond, rd, rn, offset);
 927}
 928
 929static inline void tcg_out_ld16u(TCGContext *s, int cond,
 930                int rd, int rn, int32_t offset)
 931{
 932    if (offset > 0xff || offset < -0xff) {
 933        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 934        tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
 935    } else
 936        tcg_out_ld16u_8(s, cond, rd, rn, offset);
 937}
 938
 939static inline void tcg_out_ld16s(TCGContext *s, int cond,
 940                int rd, int rn, int32_t offset)
 941{
 942    if (offset > 0xff || offset < -0xff) {
 943        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 944        tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
 945    } else
 946        tcg_out_ld16s_8(s, cond, rd, rn, offset);
 947}
 948
 949static inline void tcg_out_st16(TCGContext *s, int cond,
 950                int rd, int rn, int32_t offset)
 951{
 952    if (offset > 0xff || offset < -0xff) {
 953        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 954        tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
 955    } else
 956        tcg_out_st16_8(s, cond, rd, rn, offset);
 957}
 958
 959static inline void tcg_out_ld8u(TCGContext *s, int cond,
 960                int rd, int rn, int32_t offset)
 961{
 962    if (offset > 0xfff || offset < -0xfff) {
 963        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 964        tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
 965    } else
 966        tcg_out_ld8_12(s, cond, rd, rn, offset);
 967}
 968
 969static inline void tcg_out_ld8s(TCGContext *s, int cond,
 970                int rd, int rn, int32_t offset)
 971{
 972    if (offset > 0xff || offset < -0xff) {
 973        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 974        tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
 975    } else
 976        tcg_out_ld8s_8(s, cond, rd, rn, offset);
 977}
 978
 979static inline void tcg_out_st8(TCGContext *s, int cond,
 980                int rd, int rn, int32_t offset)
 981{
 982    if (offset > 0xfff || offset < -0xfff) {
 983        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 984        tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
 985    } else
 986        tcg_out_st8_12(s, cond, rd, rn, offset);
 987}
 988
 989/* The _goto case is normally between TBs within the same code buffer, and
 990 * with the code buffer limited to 16MB we wouldn't need the long case.
 991 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
 992 */
 993static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
 994{
 995    intptr_t addri = (intptr_t)addr;
 996    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
 997
 998    if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
 999        tcg_out_b(s, cond, disp);
1000        return;
1001    }
1002
1003    tcg_out_movi32(s, cond, TCG_REG_TMP, addri);
1004    if (use_armv5t_instructions) {
1005        tcg_out_bx(s, cond, TCG_REG_TMP);
1006    } else {
1007        if (addri & 1) {
1008            tcg_abort();
1009        }
1010        tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP);
1011    }
1012}
1013
1014/* The call case is mostly used for helpers - so it's not unreasonable
1015 * for them to be beyond branch range */
1016static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
1017{
1018    intptr_t addri = (intptr_t)addr;
1019    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1020
1021    if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
1022        if (addri & 1) {
1023            /* Use BLX if the target is in Thumb mode */
1024            if (!use_armv5t_instructions) {
1025                tcg_abort();
1026            }
1027            tcg_out_blx_imm(s, disp);
1028        } else {
1029            tcg_out_bl(s, COND_AL, disp);
1030        }
1031    } else if (use_armv7_instructions) {
1032        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
1033        tcg_out_blx(s, COND_AL, TCG_REG_TMP);
1034    } else {
1035        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
1036        tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
1037        tcg_out32(s, addri);
1038    }
1039}
1040
1041static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
1042{
1043    if (l->has_value) {
1044        tcg_out_goto(s, cond, l->u.value_ptr);
1045    } else {
1046        tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
1047        tcg_out_b_noaddr(s, cond);
1048    }
1049}
1050
1051#ifdef CONFIG_SOFTMMU
1052/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1053 *                                     int mmu_idx, uintptr_t ra)
1054 */
1055static void * const qemu_ld_helpers[16] = {
1056    [MO_UB]   = helper_ret_ldub_mmu,
1057    [MO_SB]   = helper_ret_ldsb_mmu,
1058
1059    [MO_LEUW] = helper_le_lduw_mmu,
1060    [MO_LEUL] = helper_le_ldul_mmu,
1061    [MO_LEQ]  = helper_le_ldq_mmu,
1062    [MO_LESW] = helper_le_ldsw_mmu,
1063    [MO_LESL] = helper_le_ldul_mmu,
1064
1065    [MO_BEUW] = helper_be_lduw_mmu,
1066    [MO_BEUL] = helper_be_ldul_mmu,
1067    [MO_BEQ]  = helper_be_ldq_mmu,
1068    [MO_BESW] = helper_be_ldsw_mmu,
1069    [MO_BESL] = helper_be_ldul_mmu,
1070};
1071
1072/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1073 *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
1074 */
1075static void * const qemu_st_helpers[16] = {
1076    [MO_UB]   = helper_ret_stb_mmu,
1077    [MO_LEUW] = helper_le_stw_mmu,
1078    [MO_LEUL] = helper_le_stl_mmu,
1079    [MO_LEQ]  = helper_le_stq_mmu,
1080    [MO_BEUW] = helper_be_stw_mmu,
1081    [MO_BEUL] = helper_be_stl_mmu,
1082    [MO_BEQ]  = helper_be_stq_mmu,
1083};
1084
1085/* Helper routines for marshalling helper function arguments into
1086 * the correct registers and stack.
1087 * argreg is where we want to put this argument, arg is the argument itself.
1088 * Return value is the updated argreg ready for the next call.
1089 * Note that argreg 0..3 is real registers, 4+ on stack.
1090 *
1091 * We provide routines for arguments which are: immediate, 32 bit
1092 * value in register, 16 and 8 bit values in register (which must be zero
1093 * extended before use) and 64 bit value in a lo:hi register pair.
1094 */
1095#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG)                \
1096static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg)              \
1097{                                                                          \
1098    if (argreg < 4) {                                                      \
1099        MOV_ARG(s, COND_AL, argreg, arg);                                  \
1100    } else {                                                               \
1101        int ofs = (argreg - 4) * 4;                                        \
1102        EXT_ARG;                                                           \
1103        assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE);                      \
1104        tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs);         \
1105    }                                                                      \
1106    return argreg + 1;                                                     \
1107}
1108
1109DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
1110    (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1111DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
1112    (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1113DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
1114    (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1115DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
1116
1117static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
1118                                TCGReg arglo, TCGReg arghi)
1119{
1120    /* 64 bit arguments must go in even/odd register pairs
1121     * and in 8-aligned stack slots.
1122     */
1123    if (argreg & 1) {
1124        argreg++;
1125    }
1126    if (use_armv6_instructions && argreg >= 4
1127        && (arglo & 1) == 0 && arghi == arglo + 1) {
1128        tcg_out_strd_8(s, COND_AL, arglo,
1129                       TCG_REG_CALL_STACK, (argreg - 4) * 4);
1130        return argreg + 2;
1131    } else {
1132        argreg = tcg_out_arg_reg32(s, argreg, arglo);
1133        argreg = tcg_out_arg_reg32(s, argreg, arghi);
1134        return argreg;
1135    }
1136}
1137
1138#define TLB_SHIFT       (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
1139
1140/* We're expecting to use an 8-bit immediate and to mask.  */
1141QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
1142
1143/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
1144   Using the offset of the second entry in the last tlb table ensures
1145   that we can index all of the elements of the first entry.  */
1146QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
1147                  > 0xffff);
1148
1149/* Load and compare a TLB entry, leaving the flags set.  Returns the register
1150   containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */
1151
1152static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1153                               TCGMemOp s_bits, int mem_index, bool is_load)
1154{
1155    TCGReg base = TCG_AREG0;
1156    int cmp_off =
1157        (is_load
1158         ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1159         : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
1160    int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
1161
1162    /* Should generate something like the following:
1163     *   shr    tmp, addrlo, #TARGET_PAGE_BITS                    (1)
1164     *   add    r2, env, #high
1165     *   and    r0, tmp, #(CPU_TLB_SIZE - 1)                      (2)
1166     *   add    r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS               (3)
1167     *   ldr    r0, [r2, #cmp]                                    (4)
1168     *   tst    addrlo, #s_mask
1169     *   ldr    r2, [r2, #add]                                    (5)
1170     *   cmpeq  r0, tmp, lsl #TARGET_PAGE_BITS
1171     */
1172    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
1173                    0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1174
1175    /* We checked that the offset is contained within 16 bits above.  */
1176    if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
1177        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1178                        (24 << 7) | (cmp_off >> 8));
1179        base = TCG_REG_R2;
1180        add_off -= cmp_off & 0xff00;
1181        cmp_off &= 0xff;
1182    }
1183
1184    tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1185                    TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
1186    tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1187                    TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1188
1189    /* Load the tlb comparator.  Use ldrd if needed and available,
1190       but due to how the pointer needs setting up, ldm isn't useful.
1191       Base arm5 doesn't have ldrd, but armv5te does.  */
1192    if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1193        tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1194    } else {
1195        tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1196        if (TARGET_LONG_BITS == 64) {
1197            tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
1198        }
1199    }
1200
1201    /* Check alignment.  */
1202    if (s_bits) {
1203        tcg_out_dat_imm(s, COND_AL, ARITH_TST,
1204                        0, addrlo, (1 << s_bits) - 1);
1205    }
1206
1207    /* Load the tlb addend.  */
1208    tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
1209
1210    tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
1211                    TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1212
1213    if (TARGET_LONG_BITS == 64) {
1214        tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
1215                        TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
1216    }
1217
1218    return TCG_REG_R2;
1219}
1220
1221/* Record the context of a call to the out of line helper code for the slow
1222   path for a load or store, so that we can later generate the correct
1223   helper code.  */
1224static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1225                                TCGReg datalo, TCGReg datahi, TCGReg addrlo,
1226                                TCGReg addrhi, tcg_insn_unit *raddr,
1227                                tcg_insn_unit *label_ptr)
1228{
1229    TCGLabelQemuLdst *label = new_ldst_label(s);
1230
1231    label->is_ld = is_ld;
1232    label->oi = oi;
1233    label->datalo_reg = datalo;
1234    label->datahi_reg = datahi;
1235    label->addrlo_reg = addrlo;
1236    label->addrhi_reg = addrhi;
1237    label->raddr = raddr;
1238    label->label_ptr[0] = label_ptr;
1239}
1240
1241static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1242{
1243    TCGReg argreg, datalo, datahi;
1244    TCGMemOpIdx oi = lb->oi;
1245    TCGMemOp opc = get_memop(oi);
1246    void *func;
1247
1248    reloc_pc24(lb->label_ptr[0], s->code_ptr);
1249
1250    argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
1251    if (TARGET_LONG_BITS == 64) {
1252        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1253    } else {
1254        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1255    }
1256    argreg = tcg_out_arg_imm32(s, argreg, oi);
1257    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1258
1259    /* For armv6 we can use the canonical unsigned helpers and minimize
1260       icache usage.  For pre-armv6, use the signed helpers since we do
1261       not have a single insn sign-extend.  */
1262    if (use_armv6_instructions) {
1263        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
1264    } else {
1265        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
1266        if (opc & MO_SIGN) {
1267            opc = MO_UL;
1268        }
1269    }
1270    tcg_out_call(s, func);
1271
1272    datalo = lb->datalo_reg;
1273    datahi = lb->datahi_reg;
1274    switch (opc & MO_SSIZE) {
1275    case MO_SB:
1276        tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
1277        break;
1278    case MO_SW:
1279        tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
1280        break;
1281    default:
1282        tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1283        break;
1284    case MO_Q:
1285        if (datalo != TCG_REG_R1) {
1286            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1287            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1288        } else if (datahi != TCG_REG_R0) {
1289            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1290            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1291        } else {
1292            tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
1293            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1294            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
1295        }
1296        break;
1297    }
1298
1299    tcg_out_goto(s, COND_AL, lb->raddr);
1300}
1301
1302static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1303{
1304    TCGReg argreg, datalo, datahi;
1305    TCGMemOpIdx oi = lb->oi;
1306    TCGMemOp opc = get_memop(oi);
1307
1308    reloc_pc24(lb->label_ptr[0], s->code_ptr);
1309
1310    argreg = TCG_REG_R0;
1311    argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
1312    if (TARGET_LONG_BITS == 64) {
1313        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1314    } else {
1315        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1316    }
1317
1318    datalo = lb->datalo_reg;
1319    datahi = lb->datahi_reg;
1320    switch (opc & MO_SIZE) {
1321    case MO_8:
1322        argreg = tcg_out_arg_reg8(s, argreg, datalo);
1323        break;
1324    case MO_16:
1325        argreg = tcg_out_arg_reg16(s, argreg, datalo);
1326        break;
1327    case MO_32:
1328    default:
1329        argreg = tcg_out_arg_reg32(s, argreg, datalo);
1330        break;
1331    case MO_64:
1332        argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
1333        break;
1334    }
1335
1336    argreg = tcg_out_arg_imm32(s, argreg, oi);
1337    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1338
1339    /* Tail-call to the helper, which will return to the fast path.  */
1340    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1341}
1342#endif /* SOFTMMU */
1343
1344static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
1345                                         TCGReg datalo, TCGReg datahi,
1346                                         TCGReg addrlo, TCGReg addend)
1347{
1348    TCGMemOp bswap = opc & MO_BSWAP;
1349
1350    switch (opc & MO_SSIZE) {
1351    case MO_UB:
1352        tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
1353        break;
1354    case MO_SB:
1355        tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
1356        break;
1357    case MO_UW:
1358        tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1359        if (bswap) {
1360            tcg_out_bswap16(s, COND_AL, datalo, datalo);
1361        }
1362        break;
1363    case MO_SW:
1364        if (bswap) {
1365            tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1366            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1367        } else {
1368            tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
1369        }
1370        break;
1371    case MO_UL:
1372    default:
1373        tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
1374        if (bswap) {
1375            tcg_out_bswap32(s, COND_AL, datalo, datalo);
1376        }
1377        break;
1378    case MO_Q:
1379        {
1380            TCGReg dl = (bswap ? datahi : datalo);
1381            TCGReg dh = (bswap ? datalo : datahi);
1382
1383            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
1384            if (USING_SOFTMMU && use_armv6_instructions
1385                && (dl & 1) == 0 && dh == dl + 1) {
1386                tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
1387            } else if (dl != addend) {
1388                tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
1389                tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
1390            } else {
1391                tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
1392                                addend, addrlo, SHIFT_IMM_LSL(0));
1393                tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
1394                tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
1395            }
1396            if (bswap) {
1397                tcg_out_bswap32(s, COND_AL, dl, dl);
1398                tcg_out_bswap32(s, COND_AL, dh, dh);
1399            }
1400        }
1401        break;
1402    }
1403}
1404
1405static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
1406                                          TCGReg datalo, TCGReg datahi,
1407                                          TCGReg addrlo)
1408{
1409    TCGMemOp bswap = opc & MO_BSWAP;
1410
1411    switch (opc & MO_SSIZE) {
1412    case MO_UB:
1413        tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
1414        break;
1415    case MO_SB:
1416        tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
1417        break;
1418    case MO_UW:
1419        tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1420        if (bswap) {
1421            tcg_out_bswap16(s, COND_AL, datalo, datalo);
1422        }
1423        break;
1424    case MO_SW:
1425        if (bswap) {
1426            tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1427            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1428        } else {
1429            tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
1430        }
1431        break;
1432    case MO_UL:
1433    default:
1434        tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
1435        if (bswap) {
1436            tcg_out_bswap32(s, COND_AL, datalo, datalo);
1437        }
1438        break;
1439    case MO_Q:
1440        {
1441            TCGReg dl = (bswap ? datahi : datalo);
1442            TCGReg dh = (bswap ? datalo : datahi);
1443
1444            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
1445            if (USING_SOFTMMU && use_armv6_instructions
1446                && (dl & 1) == 0 && dh == dl + 1) {
1447                tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
1448            } else if (dl == addrlo) {
1449                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1450                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1451            } else {
1452                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1453                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1454            }
1455            if (bswap) {
1456                tcg_out_bswap32(s, COND_AL, dl, dl);
1457                tcg_out_bswap32(s, COND_AL, dh, dh);
1458            }
1459        }
1460        break;
1461    }
1462}
1463
1464static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1465{
1466    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1467    TCGMemOpIdx oi;
1468    TCGMemOp opc;
1469#ifdef CONFIG_SOFTMMU
1470    int mem_index;
1471    TCGReg addend;
1472    tcg_insn_unit *label_ptr;
1473#endif
1474
1475    datalo = *args++;
1476    datahi = (is64 ? *args++ : 0);
1477    addrlo = *args++;
1478    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1479    oi = *args++;
1480    opc = get_memop(oi);
1481
1482#ifdef CONFIG_SOFTMMU
1483    mem_index = get_mmuidx(oi);
1484    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1);
1485
1486    /* This a conditional BL only to load a pointer within this opcode into LR
1487       for the slow path.  We will not be using the value for a tail call.  */
1488    label_ptr = s->code_ptr;
1489    tcg_out_bl_noaddr(s, COND_NE);
1490
1491    tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
1492
1493    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1494                        s->code_ptr, label_ptr);
1495#else /* !CONFIG_SOFTMMU */
1496    if (guest_base) {
1497        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1498        tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
1499    } else {
1500        tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
1501    }
1502#endif
1503}
1504
1505static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc,
1506                                         TCGReg datalo, TCGReg datahi,
1507                                         TCGReg addrlo, TCGReg addend)
1508{
1509    TCGMemOp bswap = opc & MO_BSWAP;
1510
1511    switch (opc & MO_SIZE) {
1512    case MO_8:
1513        tcg_out_st8_r(s, cond, datalo, addrlo, addend);
1514        break;
1515    case MO_16:
1516        if (bswap) {
1517            tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
1518            tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
1519        } else {
1520            tcg_out_st16_r(s, cond, datalo, addrlo, addend);
1521        }
1522        break;
1523    case MO_32:
1524    default:
1525        if (bswap) {
1526            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1527            tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
1528        } else {
1529            tcg_out_st32_r(s, cond, datalo, addrlo, addend);
1530        }
1531        break;
1532    case MO_64:
1533        /* Avoid strd for user-only emulation, to handle unaligned.  */
1534        if (bswap) {
1535            tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
1536            tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
1537            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1538            tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
1539        } else if (USING_SOFTMMU && use_armv6_instructions
1540                   && (datalo & 1) == 0 && datahi == datalo + 1) {
1541            tcg_out_strd_r(s, cond, datalo, addrlo, addend);
1542        } else {
1543            tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
1544            tcg_out_st32_12(s, cond, datahi, addend, 4);
1545        }
1546        break;
1547    }
1548}
1549
1550static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc,
1551                                          TCGReg datalo, TCGReg datahi,
1552                                          TCGReg addrlo)
1553{
1554    TCGMemOp bswap = opc & MO_BSWAP;
1555
1556    switch (opc & MO_SIZE) {
1557    case MO_8:
1558        tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
1559        break;
1560    case MO_16:
1561        if (bswap) {
1562            tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
1563            tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
1564        } else {
1565            tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
1566        }
1567        break;
1568    case MO_32:
1569    default:
1570        if (bswap) {
1571            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1572            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1573        } else {
1574            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1575        }
1576        break;
1577    case MO_64:
1578        /* Avoid strd for user-only emulation, to handle unaligned.  */
1579        if (bswap) {
1580            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
1581            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1582            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1583            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
1584        } else if (USING_SOFTMMU && use_armv6_instructions
1585                   && (datalo & 1) == 0 && datahi == datalo + 1) {
1586            tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
1587        } else {
1588            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1589            tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
1590        }
1591        break;
1592    }
1593}
1594
1595static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1596{
1597    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1598    TCGMemOpIdx oi;
1599    TCGMemOp opc;
1600#ifdef CONFIG_SOFTMMU
1601    int mem_index;
1602    TCGReg addend;
1603    tcg_insn_unit *label_ptr;
1604#endif
1605
1606    datalo = *args++;
1607    datahi = (is64 ? *args++ : 0);
1608    addrlo = *args++;
1609    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1610    oi = *args++;
1611    opc = get_memop(oi);
1612
1613#ifdef CONFIG_SOFTMMU
1614    mem_index = get_mmuidx(oi);
1615    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0);
1616
1617    tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
1618
1619    /* The conditional call must come last, as we're going to return here.  */
1620    label_ptr = s->code_ptr;
1621    tcg_out_bl_noaddr(s, COND_NE);
1622
1623    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1624                        s->code_ptr, label_ptr);
1625#else /* !CONFIG_SOFTMMU */
1626    if (guest_base) {
1627        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1628        tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
1629                              datahi, addrlo, TCG_REG_TMP);
1630    } else {
1631        tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
1632    }
1633#endif
1634}
1635
1636static tcg_insn_unit *tb_ret_addr;
1637
1638static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1639                const TCGArg *args, const int *const_args)
1640{
1641    TCGArg a0, a1, a2, a3, a4, a5;
1642    int c;
1643
1644    switch (opc) {
1645    case INDEX_op_exit_tb:
1646        tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
1647        tcg_out_goto(s, COND_AL, tb_ret_addr);
1648        break;
1649    case INDEX_op_goto_tb:
1650        if (s->tb_jmp_offset) {
1651            /* Direct jump method */
1652            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
1653            tcg_out_b_noaddr(s, COND_AL);
1654        } else {
1655            /* Indirect jump method */
1656            intptr_t ptr = (intptr_t)(s->tb_next + args[0]);
1657            tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
1658            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff);
1659        }
1660        s->tb_next_offset[args[0]] = tcg_current_code_size(s);
1661        break;
1662    case INDEX_op_br:
1663        tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
1664        break;
1665
1666    case INDEX_op_ld8u_i32:
1667        tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
1668        break;
1669    case INDEX_op_ld8s_i32:
1670        tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
1671        break;
1672    case INDEX_op_ld16u_i32:
1673        tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
1674        break;
1675    case INDEX_op_ld16s_i32:
1676        tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
1677        break;
1678    case INDEX_op_ld_i32:
1679        tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
1680        break;
1681    case INDEX_op_st8_i32:
1682        tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
1683        break;
1684    case INDEX_op_st16_i32:
1685        tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
1686        break;
1687    case INDEX_op_st_i32:
1688        tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
1689        break;
1690
1691    case INDEX_op_movcond_i32:
1692        /* Constraints mean that v2 is always in the same register as dest,
1693         * so we only need to do "if condition passed, move v1 to dest".
1694         */
1695        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1696                        args[1], args[2], const_args[2]);
1697        tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
1698                        ARITH_MVN, args[0], 0, args[3], const_args[3]);
1699        break;
1700    case INDEX_op_add_i32:
1701        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
1702                        args[0], args[1], args[2], const_args[2]);
1703        break;
1704    case INDEX_op_sub_i32:
1705        if (const_args[1]) {
1706            if (const_args[2]) {
1707                tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
1708            } else {
1709                tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
1710                               args[0], args[2], args[1], 1);
1711            }
1712        } else {
1713            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
1714                            args[0], args[1], args[2], const_args[2]);
1715        }
1716        break;
1717    case INDEX_op_and_i32:
1718        tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
1719                        args[0], args[1], args[2], const_args[2]);
1720        break;
1721    case INDEX_op_andc_i32:
1722        tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
1723                        args[0], args[1], args[2], const_args[2]);
1724        break;
1725    case INDEX_op_or_i32:
1726        c = ARITH_ORR;
1727        goto gen_arith;
1728    case INDEX_op_xor_i32:
1729        c = ARITH_EOR;
1730        /* Fall through.  */
1731    gen_arith:
1732        tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
1733        break;
1734    case INDEX_op_add2_i32:
1735        a0 = args[0], a1 = args[1], a2 = args[2];
1736        a3 = args[3], a4 = args[4], a5 = args[5];
1737        if (a0 == a3 || (a0 == a5 && !const_args[5])) {
1738            a0 = TCG_REG_TMP;
1739        }
1740        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
1741                        a0, a2, a4, const_args[4]);
1742        tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
1743                        a1, a3, a5, const_args[5]);
1744        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1745        break;
1746    case INDEX_op_sub2_i32:
1747        a0 = args[0], a1 = args[1], a2 = args[2];
1748        a3 = args[3], a4 = args[4], a5 = args[5];
1749        if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
1750            a0 = TCG_REG_TMP;
1751        }
1752        if (const_args[2]) {
1753            if (const_args[4]) {
1754                tcg_out_movi32(s, COND_AL, a0, a4);
1755                a4 = a0;
1756            }
1757            tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
1758        } else {
1759            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
1760                            ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
1761        }
1762        if (const_args[3]) {
1763            if (const_args[5]) {
1764                tcg_out_movi32(s, COND_AL, a1, a5);
1765                a5 = a1;
1766            }
1767            tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
1768        } else {
1769            tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
1770                            a1, a3, a5, const_args[5]);
1771        }
1772        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1773        break;
1774    case INDEX_op_neg_i32:
1775        tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
1776        break;
1777    case INDEX_op_not_i32:
1778        tcg_out_dat_reg(s, COND_AL,
1779                        ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
1780        break;
1781    case INDEX_op_mul_i32:
1782        tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
1783        break;
1784    case INDEX_op_mulu2_i32:
1785        tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1786        break;
1787    case INDEX_op_muls2_i32:
1788        tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1789        break;
1790    /* XXX: Perhaps args[2] & 0x1f is wrong */
1791    case INDEX_op_shl_i32:
1792        c = const_args[2] ?
1793                SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
1794        goto gen_shift32;
1795    case INDEX_op_shr_i32:
1796        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
1797                SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
1798        goto gen_shift32;
1799    case INDEX_op_sar_i32:
1800        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
1801                SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
1802        goto gen_shift32;
1803    case INDEX_op_rotr_i32:
1804        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
1805                SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
1806        /* Fall through.  */
1807    gen_shift32:
1808        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
1809        break;
1810
1811    case INDEX_op_rotl_i32:
1812        if (const_args[2]) {
1813            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1814                            ((0x20 - args[2]) & 0x1f) ?
1815                            SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
1816                            SHIFT_IMM_LSL(0));
1817        } else {
1818            tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
1819            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1820                            SHIFT_REG_ROR(TCG_REG_TMP));
1821        }
1822        break;
1823
1824    case INDEX_op_brcond_i32:
1825        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1826                       args[0], args[1], const_args[1]);
1827        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
1828                           arg_label(args[3]));
1829        break;
1830    case INDEX_op_brcond2_i32:
1831        /* The resulting conditions are:
1832         * TCG_COND_EQ    -->  a0 == a2 && a1 == a3,
1833         * TCG_COND_NE    --> (a0 != a2 && a1 == a3) ||  a1 != a3,
1834         * TCG_COND_LT(U) --> (a0 <  a2 && a1 == a3) ||  a1 <  a3,
1835         * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
1836         * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
1837         * TCG_COND_GT(U) --> (a0 >  a2 && a1 == a3) ||  a1 >  a3,
1838         */
1839        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1840                        args[1], args[3], const_args[3]);
1841        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1842                        args[0], args[2], const_args[2]);
1843        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]],
1844                           arg_label(args[5]));
1845        break;
1846    case INDEX_op_setcond_i32:
1847        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1848                        args[1], args[2], const_args[2]);
1849        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
1850                        ARITH_MOV, args[0], 0, 1);
1851        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
1852                        ARITH_MOV, args[0], 0, 0);
1853        break;
1854    case INDEX_op_setcond2_i32:
1855        /* See brcond2_i32 comment */
1856        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1857                        args[2], args[4], const_args[4]);
1858        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1859                        args[1], args[3], const_args[3]);
1860        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
1861                        ARITH_MOV, args[0], 0, 1);
1862        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
1863                        ARITH_MOV, args[0], 0, 0);
1864        break;
1865
1866    case INDEX_op_qemu_ld_i32:
1867        tcg_out_qemu_ld(s, args, 0);
1868        break;
1869    case INDEX_op_qemu_ld_i64:
1870        tcg_out_qemu_ld(s, args, 1);
1871        break;
1872    case INDEX_op_qemu_st_i32:
1873        tcg_out_qemu_st(s, args, 0);
1874        break;
1875    case INDEX_op_qemu_st_i64:
1876        tcg_out_qemu_st(s, args, 1);
1877        break;
1878
1879    case INDEX_op_bswap16_i32:
1880        tcg_out_bswap16(s, COND_AL, args[0], args[1]);
1881        break;
1882    case INDEX_op_bswap32_i32:
1883        tcg_out_bswap32(s, COND_AL, args[0], args[1]);
1884        break;
1885
1886    case INDEX_op_ext8s_i32:
1887        tcg_out_ext8s(s, COND_AL, args[0], args[1]);
1888        break;
1889    case INDEX_op_ext16s_i32:
1890        tcg_out_ext16s(s, COND_AL, args[0], args[1]);
1891        break;
1892    case INDEX_op_ext16u_i32:
1893        tcg_out_ext16u(s, COND_AL, args[0], args[1]);
1894        break;
1895
1896    case INDEX_op_deposit_i32:
1897        tcg_out_deposit(s, COND_AL, args[0], args[2],
1898                        args[3], args[4], const_args[2]);
1899        break;
1900
1901    case INDEX_op_div_i32:
1902        tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
1903        break;
1904    case INDEX_op_divu_i32:
1905        tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
1906        break;
1907
1908    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
1909    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
1910    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
1911    default:
1912        tcg_abort();
1913    }
1914}
1915
1916static const TCGTargetOpDef arm_op_defs[] = {
1917    { INDEX_op_exit_tb, { } },
1918    { INDEX_op_goto_tb, { } },
1919    { INDEX_op_br, { } },
1920
1921    { INDEX_op_ld8u_i32, { "r", "r" } },
1922    { INDEX_op_ld8s_i32, { "r", "r" } },
1923    { INDEX_op_ld16u_i32, { "r", "r" } },
1924    { INDEX_op_ld16s_i32, { "r", "r" } },
1925    { INDEX_op_ld_i32, { "r", "r" } },
1926    { INDEX_op_st8_i32, { "r", "r" } },
1927    { INDEX_op_st16_i32, { "r", "r" } },
1928    { INDEX_op_st_i32, { "r", "r" } },
1929
1930    /* TODO: "r", "r", "ri" */
1931    { INDEX_op_add_i32, { "r", "r", "rIN" } },
1932    { INDEX_op_sub_i32, { "r", "rI", "rIN" } },
1933    { INDEX_op_mul_i32, { "r", "r", "r" } },
1934    { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
1935    { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
1936    { INDEX_op_and_i32, { "r", "r", "rIK" } },
1937    { INDEX_op_andc_i32, { "r", "r", "rIK" } },
1938    { INDEX_op_or_i32, { "r", "r", "rI" } },
1939    { INDEX_op_xor_i32, { "r", "r", "rI" } },
1940    { INDEX_op_neg_i32, { "r", "r" } },
1941    { INDEX_op_not_i32, { "r", "r" } },
1942
1943    { INDEX_op_shl_i32, { "r", "r", "ri" } },
1944    { INDEX_op_shr_i32, { "r", "r", "ri" } },
1945    { INDEX_op_sar_i32, { "r", "r", "ri" } },
1946    { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1947    { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1948
1949    { INDEX_op_brcond_i32, { "r", "rIN" } },
1950    { INDEX_op_setcond_i32, { "r", "r", "rIN" } },
1951    { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } },
1952
1953    { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } },
1954    { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } },
1955    { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } },
1956    { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } },
1957
1958#if TARGET_LONG_BITS == 32
1959    { INDEX_op_qemu_ld_i32, { "r", "l" } },
1960    { INDEX_op_qemu_ld_i64, { "r", "r", "l" } },
1961    { INDEX_op_qemu_st_i32, { "s", "s" } },
1962    { INDEX_op_qemu_st_i64, { "s", "s", "s" } },
1963#else
1964    { INDEX_op_qemu_ld_i32, { "r", "l", "l" } },
1965    { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } },
1966    { INDEX_op_qemu_st_i32, { "s", "s", "s" } },
1967    { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } },
1968#endif
1969
1970    { INDEX_op_bswap16_i32, { "r", "r" } },
1971    { INDEX_op_bswap32_i32, { "r", "r" } },
1972
1973    { INDEX_op_ext8s_i32, { "r", "r" } },
1974    { INDEX_op_ext16s_i32, { "r", "r" } },
1975    { INDEX_op_ext16u_i32, { "r", "r" } },
1976
1977    { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1978
1979    { INDEX_op_div_i32, { "r", "r", "r" } },
1980    { INDEX_op_divu_i32, { "r", "r", "r" } },
1981
1982    { -1 },
1983};
1984
1985static void tcg_target_init(TCGContext *s)
1986{
1987    /* Only probe for the platform and capabilities if we havn't already
1988       determined maximum values at compile time.  */
1989#ifndef use_idiv_instructions
1990    {
1991        unsigned long hwcap = qemu_getauxval(AT_HWCAP);
1992        use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
1993    }
1994#endif
1995    if (__ARM_ARCH < 7) {
1996        const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
1997        if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
1998            arm_arch = pl[1] - '0';
1999        }
2000    }
2001
2002    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2003    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
2004                     (1 << TCG_REG_R0) |
2005                     (1 << TCG_REG_R1) |
2006                     (1 << TCG_REG_R2) |
2007                     (1 << TCG_REG_R3) |
2008                     (1 << TCG_REG_R12) |
2009                     (1 << TCG_REG_R14));
2010
2011    tcg_regset_clear(s->reserved_regs);
2012    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2013    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2014    tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
2015
2016    tcg_add_target_add_op_defs(arm_op_defs);
2017}
2018
2019static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
2020                              TCGReg arg1, intptr_t arg2)
2021{
2022    tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
2023}
2024
2025static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
2026                              TCGReg arg1, intptr_t arg2)
2027{
2028    tcg_out_st32(s, COND_AL, arg, arg1, arg2);
2029}
2030
2031static inline void tcg_out_mov(TCGContext *s, TCGType type,
2032                               TCGReg ret, TCGReg arg)
2033{
2034    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
2035}
2036
2037static inline void tcg_out_movi(TCGContext *s, TCGType type,
2038                                TCGReg ret, tcg_target_long arg)
2039{
2040    tcg_out_movi32(s, COND_AL, ret, arg);
2041}
2042
2043/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2044   and tcg_register_jit.  */
2045
2046#define PUSH_SIZE  ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
2047
2048#define FRAME_SIZE \
2049    ((PUSH_SIZE \
2050      + TCG_STATIC_CALL_ARGS_SIZE \
2051      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2052      + TCG_TARGET_STACK_ALIGN - 1) \
2053     & -TCG_TARGET_STACK_ALIGN)
2054
2055static void tcg_target_qemu_prologue(TCGContext *s)
2056{
2057    int stack_addend;
2058
2059    /* Calling convention requires us to save r4-r11 and lr.  */
2060    /* stmdb sp!, { r4 - r11, lr } */
2061    tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
2062
2063    /* Reserve callee argument and tcg temp space.  */
2064    stack_addend = FRAME_SIZE - PUSH_SIZE;
2065
2066    tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
2067                   TCG_REG_CALL_STACK, stack_addend, 1);
2068    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2069                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2070
2071    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2072
2073    tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
2074    tb_ret_addr = s->code_ptr;
2075
2076    /* Epilogue.  We branch here via tb_ret_addr.  */
2077    tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
2078                   TCG_REG_CALL_STACK, stack_addend, 1);
2079
2080    /* ldmia sp!, { r4 - r11, pc } */
2081    tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
2082}
2083
2084typedef struct {
2085    DebugFrameHeader h;
2086    uint8_t fde_def_cfa[4];
2087    uint8_t fde_reg_ofs[18];
2088} DebugFrame;
2089
2090#define ELF_HOST_MACHINE EM_ARM
2091
2092/* We're expecting a 2 byte uleb128 encoded value.  */
2093QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2094
2095static const DebugFrame debug_frame = {
2096    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2097    .h.cie.id = -1,
2098    .h.cie.version = 1,
2099    .h.cie.code_align = 1,
2100    .h.cie.data_align = 0x7c,             /* sleb128 -4 */
2101    .h.cie.return_column = 14,
2102
2103    /* Total FDE size does not include the "len" member.  */
2104    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2105
2106    .fde_def_cfa = {
2107        12, 13,                         /* DW_CFA_def_cfa sp, ... */
2108        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2109        (FRAME_SIZE >> 7)
2110    },
2111    .fde_reg_ofs = {
2112        /* The following must match the stmdb in the prologue.  */
2113        0x8e, 1,                        /* DW_CFA_offset, lr, -4 */
2114        0x8b, 2,                        /* DW_CFA_offset, r11, -8 */
2115        0x8a, 3,                        /* DW_CFA_offset, r10, -12 */
2116        0x89, 4,                        /* DW_CFA_offset, r9, -16 */
2117        0x88, 5,                        /* DW_CFA_offset, r8, -20 */
2118        0x87, 6,                        /* DW_CFA_offset, r7, -24 */
2119        0x86, 7,                        /* DW_CFA_offset, r6, -28 */
2120        0x85, 8,                        /* DW_CFA_offset, r5, -32 */
2121        0x84, 9,                        /* DW_CFA_offset, r4, -36 */
2122    }
2123};
2124
2125void tcg_register_jit(void *buf, size_t buf_size)
2126{
2127    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2128}
2129