qemu/tcg/arm/tcg-target.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Andrzej Zaborowski
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "elf.h"
  26#include "tcg-be-ldst.h"
  27
  28/* The __ARM_ARCH define is provided by gcc 4.8.  Construct it otherwise.  */
  29#ifndef __ARM_ARCH
  30# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
  31     || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
  32     || defined(__ARM_ARCH_7EM__)
  33#  define __ARM_ARCH 7
  34# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
  35       || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
  36       || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
  37#  define __ARM_ARCH 6
  38# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \
  39       || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
  40       || defined(__ARM_ARCH_5TEJ__)
  41#  define __ARM_ARCH 5
  42# else
  43#  define __ARM_ARCH 4
  44# endif
  45#endif
  46
  47static int arm_arch = __ARM_ARCH;
  48
  49#if defined(__ARM_ARCH_5T__) \
  50    || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
  51# define use_armv5t_instructions 1
  52#else
  53# define use_armv5t_instructions use_armv6_instructions
  54#endif
  55
  56#define use_armv6_instructions  (__ARM_ARCH >= 6 || arm_arch >= 6)
  57#define use_armv7_instructions  (__ARM_ARCH >= 7 || arm_arch >= 7)
  58
  59#ifndef use_idiv_instructions
  60bool use_idiv_instructions;
  61#endif
  62
  63/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined.  */
  64#ifdef CONFIG_SOFTMMU
  65# define USING_SOFTMMU 1
  66#else
  67# define USING_SOFTMMU 0
  68#endif
  69
  70#ifndef NDEBUG
  71static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  72    "%r0",
  73    "%r1",
  74    "%r2",
  75    "%r3",
  76    "%r4",
  77    "%r5",
  78    "%r6",
  79    "%r7",
  80    "%r8",
  81    "%r9",
  82    "%r10",
  83    "%r11",
  84    "%r12",
  85    "%r13",
  86    "%r14",
  87    "%pc",
  88};
  89#endif
  90
  91static const int tcg_target_reg_alloc_order[] = {
  92    TCG_REG_R4,
  93    TCG_REG_R5,
  94    TCG_REG_R6,
  95    TCG_REG_R7,
  96    TCG_REG_R8,
  97    TCG_REG_R9,
  98    TCG_REG_R10,
  99    TCG_REG_R11,
 100    TCG_REG_R13,
 101    TCG_REG_R0,
 102    TCG_REG_R1,
 103    TCG_REG_R2,
 104    TCG_REG_R3,
 105    TCG_REG_R12,
 106    TCG_REG_R14,
 107};
 108
 109static const int tcg_target_call_iarg_regs[4] = {
 110    TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
 111};
 112static const int tcg_target_call_oarg_regs[2] = {
 113    TCG_REG_R0, TCG_REG_R1
 114};
 115
 116#define TCG_REG_TMP  TCG_REG_R12
 117
 118static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
 119{
 120    ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
 121    *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
 122}
 123
 124static void patch_reloc(tcg_insn_unit *code_ptr, int type,
 125                        intptr_t value, intptr_t addend)
 126{
 127    assert(type == R_ARM_PC24);
 128    assert(addend == 0);
 129    reloc_pc24(code_ptr, (tcg_insn_unit *)value);
 130}
 131
 132#define TCG_CT_CONST_ARM  0x100
 133#define TCG_CT_CONST_INV  0x200
 134#define TCG_CT_CONST_NEG  0x400
 135#define TCG_CT_CONST_ZERO 0x800
 136
 137/* parse target specific constraints */
 138static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 139{
 140    const char *ct_str;
 141
 142    ct_str = *pct_str;
 143    switch (ct_str[0]) {
 144    case 'I':
 145        ct->ct |= TCG_CT_CONST_ARM;
 146        break;
 147    case 'K':
 148        ct->ct |= TCG_CT_CONST_INV;
 149        break;
 150    case 'N': /* The gcc constraint letter is L, already used here.  */
 151        ct->ct |= TCG_CT_CONST_NEG;
 152        break;
 153    case 'Z':
 154        ct->ct |= TCG_CT_CONST_ZERO;
 155        break;
 156
 157    case 'r':
 158        ct->ct |= TCG_CT_REG;
 159        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
 160        break;
 161
 162    /* qemu_ld address */
 163    case 'l':
 164        ct->ct |= TCG_CT_REG;
 165        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
 166#ifdef CONFIG_SOFTMMU
 167        /* r0-r2,lr will be overwritten when reading the tlb entry,
 168           so don't use these. */
 169        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
 170        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
 171        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
 172        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
 173#endif
 174        break;
 175
 176    /* qemu_st address & data */
 177    case 's':
 178        ct->ct |= TCG_CT_REG;
 179        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
 180        /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
 181           and r0-r1 doing the byte swapping, so don't use these. */
 182        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
 183        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
 184#if defined(CONFIG_SOFTMMU)
 185        /* Avoid clashes with registers being used for helper args */
 186        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
 187#if TARGET_LONG_BITS == 64
 188        /* Avoid clashes with registers being used for helper args */
 189        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
 190#endif
 191        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
 192#endif
 193        break;
 194
 195    default:
 196        return -1;
 197    }
 198    ct_str++;
 199    *pct_str = ct_str;
 200
 201    return 0;
 202}
 203
 204static inline uint32_t rotl(uint32_t val, int n)
 205{
 206  return (val << n) | (val >> (32 - n));
 207}
 208
 209/* ARM immediates for ALU instructions are made of an unsigned 8-bit
 210   right-rotated by an even amount between 0 and 30. */
 211static inline int encode_imm(uint32_t imm)
 212{
 213    int shift;
 214
 215    /* simple case, only lower bits */
 216    if ((imm & ~0xff) == 0)
 217        return 0;
 218    /* then try a simple even shift */
 219    shift = ctz32(imm) & ~1;
 220    if (((imm >> shift) & ~0xff) == 0)
 221        return 32 - shift;
 222    /* now try harder with rotations */
 223    if ((rotl(imm, 2) & ~0xff) == 0)
 224        return 2;
 225    if ((rotl(imm, 4) & ~0xff) == 0)
 226        return 4;
 227    if ((rotl(imm, 6) & ~0xff) == 0)
 228        return 6;
 229    /* imm can't be encoded */
 230    return -1;
 231}
 232
 233static inline int check_fit_imm(uint32_t imm)
 234{
 235    return encode_imm(imm) >= 0;
 236}
 237
 238/* Test if a constant matches the constraint.
 239 * TODO: define constraints for:
 240 *
 241 * ldr/str offset:   between -0xfff and 0xfff
 242 * ldrh/strh offset: between -0xff and 0xff
 243 * mov operand2:     values represented with x << (2 * y), x < 0x100
 244 * add, sub, eor...: ditto
 245 */
 246static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
 247                                         const TCGArgConstraint *arg_ct)
 248{
 249    int ct;
 250    ct = arg_ct->ct;
 251    if (ct & TCG_CT_CONST) {
 252        return 1;
 253    } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
 254        return 1;
 255    } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
 256        return 1;
 257    } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
 258        return 1;
 259    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 260        return 1;
 261    } else {
 262        return 0;
 263    }
 264}
 265
 266#define TO_CPSR (1 << 20)
 267
 268typedef enum {
 269    ARITH_AND = 0x0 << 21,
 270    ARITH_EOR = 0x1 << 21,
 271    ARITH_SUB = 0x2 << 21,
 272    ARITH_RSB = 0x3 << 21,
 273    ARITH_ADD = 0x4 << 21,
 274    ARITH_ADC = 0x5 << 21,
 275    ARITH_SBC = 0x6 << 21,
 276    ARITH_RSC = 0x7 << 21,
 277    ARITH_TST = 0x8 << 21 | TO_CPSR,
 278    ARITH_CMP = 0xa << 21 | TO_CPSR,
 279    ARITH_CMN = 0xb << 21 | TO_CPSR,
 280    ARITH_ORR = 0xc << 21,
 281    ARITH_MOV = 0xd << 21,
 282    ARITH_BIC = 0xe << 21,
 283    ARITH_MVN = 0xf << 21,
 284
 285    INSN_LDR_IMM   = 0x04100000,
 286    INSN_LDR_REG   = 0x06100000,
 287    INSN_STR_IMM   = 0x04000000,
 288    INSN_STR_REG   = 0x06000000,
 289
 290    INSN_LDRH_IMM  = 0x005000b0,
 291    INSN_LDRH_REG  = 0x001000b0,
 292    INSN_LDRSH_IMM = 0x005000f0,
 293    INSN_LDRSH_REG = 0x001000f0,
 294    INSN_STRH_IMM  = 0x004000b0,
 295    INSN_STRH_REG  = 0x000000b0,
 296
 297    INSN_LDRB_IMM  = 0x04500000,
 298    INSN_LDRB_REG  = 0x06500000,
 299    INSN_LDRSB_IMM = 0x005000d0,
 300    INSN_LDRSB_REG = 0x001000d0,
 301    INSN_STRB_IMM  = 0x04400000,
 302    INSN_STRB_REG  = 0x06400000,
 303
 304    INSN_LDRD_IMM  = 0x004000d0,
 305    INSN_LDRD_REG  = 0x000000d0,
 306    INSN_STRD_IMM  = 0x004000f0,
 307    INSN_STRD_REG  = 0x000000f0,
 308} ARMInsn;
 309
 310#define SHIFT_IMM_LSL(im)       (((im) << 7) | 0x00)
 311#define SHIFT_IMM_LSR(im)       (((im) << 7) | 0x20)
 312#define SHIFT_IMM_ASR(im)       (((im) << 7) | 0x40)
 313#define SHIFT_IMM_ROR(im)       (((im) << 7) | 0x60)
 314#define SHIFT_REG_LSL(rs)       (((rs) << 8) | 0x10)
 315#define SHIFT_REG_LSR(rs)       (((rs) << 8) | 0x30)
 316#define SHIFT_REG_ASR(rs)       (((rs) << 8) | 0x50)
 317#define SHIFT_REG_ROR(rs)       (((rs) << 8) | 0x70)
 318
 319enum arm_cond_code_e {
 320    COND_EQ = 0x0,
 321    COND_NE = 0x1,
 322    COND_CS = 0x2,      /* Unsigned greater or equal */
 323    COND_CC = 0x3,      /* Unsigned less than */
 324    COND_MI = 0x4,      /* Negative */
 325    COND_PL = 0x5,      /* Zero or greater */
 326    COND_VS = 0x6,      /* Overflow */
 327    COND_VC = 0x7,      /* No overflow */
 328    COND_HI = 0x8,      /* Unsigned greater than */
 329    COND_LS = 0x9,      /* Unsigned less or equal */
 330    COND_GE = 0xa,
 331    COND_LT = 0xb,
 332    COND_GT = 0xc,
 333    COND_LE = 0xd,
 334    COND_AL = 0xe,
 335};
 336
 337static const uint8_t tcg_cond_to_arm_cond[] = {
 338    [TCG_COND_EQ] = COND_EQ,
 339    [TCG_COND_NE] = COND_NE,
 340    [TCG_COND_LT] = COND_LT,
 341    [TCG_COND_GE] = COND_GE,
 342    [TCG_COND_LE] = COND_LE,
 343    [TCG_COND_GT] = COND_GT,
 344    /* unsigned */
 345    [TCG_COND_LTU] = COND_CC,
 346    [TCG_COND_GEU] = COND_CS,
 347    [TCG_COND_LEU] = COND_LS,
 348    [TCG_COND_GTU] = COND_HI,
 349};
 350
 351static inline void tcg_out_bx(TCGContext *s, int cond, int rn)
 352{
 353    tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
 354}
 355
 356static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
 357{
 358    tcg_out32(s, (cond << 28) | 0x0a000000 |
 359                    (((offset - 8) >> 2) & 0x00ffffff));
 360}
 361
 362static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
 363{
 364    /* We pay attention here to not modify the branch target by masking
 365       the corresponding bytes.  This ensure that caches and memory are
 366       kept coherent during retranslation. */
 367    tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a));
 368}
 369
 370static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
 371{
 372    /* We pay attention here to not modify the branch target by masking
 373       the corresponding bytes.  This ensure that caches and memory are
 374       kept coherent during retranslation. */
 375    tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b));
 376}
 377
 378static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
 379{
 380    tcg_out32(s, (cond << 28) | 0x0b000000 |
 381                    (((offset - 8) >> 2) & 0x00ffffff));
 382}
 383
 384static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
 385{
 386    tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
 387}
 388
 389static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
 390{
 391    tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
 392                (((offset - 8) >> 2) & 0x00ffffff));
 393}
 394
 395static inline void tcg_out_dat_reg(TCGContext *s,
 396                int cond, int opc, int rd, int rn, int rm, int shift)
 397{
 398    tcg_out32(s, (cond << 28) | (0 << 25) | opc |
 399                    (rn << 16) | (rd << 12) | shift | rm);
 400}
 401
 402static inline void tcg_out_nop(TCGContext *s)
 403{
 404    if (use_armv7_instructions) {
 405        /* Architected nop introduced in v6k.  */
 406        /* ??? This is an MSR (imm) 0,0,0 insn.  Anyone know if this
 407           also Just So Happened to do nothing on pre-v6k so that we
 408           don't need to conditionalize it?  */
 409        tcg_out32(s, 0xe320f000);
 410    } else {
 411        /* Prior to that the assembler uses mov r0, r0.  */
 412        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0));
 413    }
 414}
 415
 416static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
 417{
 418    /* Simple reg-reg move, optimising out the 'do nothing' case */
 419    if (rd != rm) {
 420        tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
 421    }
 422}
 423
 424static inline void tcg_out_dat_imm(TCGContext *s,
 425                int cond, int opc, int rd, int rn, int im)
 426{
 427    tcg_out32(s, (cond << 28) | (1 << 25) | opc |
 428                    (rn << 16) | (rd << 12) | im);
 429}
 430
 431static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
 432{
 433    int rot, opc, rn;
 434
 435    /* For armv7, make sure not to use movw+movt when mov/mvn would do.
 436       Speed things up by only checking when movt would be required.
 437       Prior to armv7, have one go at fully rotated immediates before
 438       doing the decomposition thing below.  */
 439    if (!use_armv7_instructions || (arg & 0xffff0000)) {
 440        rot = encode_imm(arg);
 441        if (rot >= 0) {
 442            tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
 443                            rotl(arg, rot) | (rot << 7));
 444            return;
 445        }
 446        rot = encode_imm(~arg);
 447        if (rot >= 0) {
 448            tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
 449                            rotl(~arg, rot) | (rot << 7));
 450            return;
 451        }
 452    }
 453
 454    /* Use movw + movt.  */
 455    if (use_armv7_instructions) {
 456        /* movw */
 457        tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
 458                  | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
 459        if (arg & 0xffff0000) {
 460            /* movt */
 461            tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
 462                      | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
 463        }
 464        return;
 465    }
 466
 467    /* TODO: This is very suboptimal, we can easily have a constant
 468       pool somewhere after all the instructions.  */
 469    opc = ARITH_MOV;
 470    rn = 0;
 471    /* If we have lots of leading 1's, we can shorten the sequence by
 472       beginning with mvn and then clearing higher bits with eor.  */
 473    if (clz32(~arg) > clz32(arg)) {
 474        opc = ARITH_MVN, arg = ~arg;
 475    }
 476    do {
 477        int i = ctz32(arg) & ~1;
 478        rot = ((32 - i) << 7) & 0xf00;
 479        tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
 480        arg &= ~(0xff << i);
 481
 482        opc = ARITH_EOR;
 483        rn = rd;
 484    } while (arg);
 485}
 486
 487static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
 488                                  TCGArg lhs, TCGArg rhs, int rhs_is_const)
 489{
 490    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 491     * rhs must satisfy the "rI" constraint.
 492     */
 493    if (rhs_is_const) {
 494        int rot = encode_imm(rhs);
 495        assert(rot >= 0);
 496        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 497    } else {
 498        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 499    }
 500}
 501
 502static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
 503                            TCGReg dst, TCGReg lhs, TCGArg rhs,
 504                            bool rhs_is_const)
 505{
 506    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 507     * rhs must satisfy the "rIK" constraint.
 508     */
 509    if (rhs_is_const) {
 510        int rot = encode_imm(rhs);
 511        if (rot < 0) {
 512            rhs = ~rhs;
 513            rot = encode_imm(rhs);
 514            assert(rot >= 0);
 515            opc = opinv;
 516        }
 517        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 518    } else {
 519        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 520    }
 521}
 522
 523static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
 524                            TCGArg dst, TCGArg lhs, TCGArg rhs,
 525                            bool rhs_is_const)
 526{
 527    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 528     * rhs must satisfy the "rIN" constraint.
 529     */
 530    if (rhs_is_const) {
 531        int rot = encode_imm(rhs);
 532        if (rot < 0) {
 533            rhs = -rhs;
 534            rot = encode_imm(rhs);
 535            assert(rot >= 0);
 536            opc = opneg;
 537        }
 538        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 539    } else {
 540        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 541    }
 542}
 543
 544static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
 545                                 TCGReg rn, TCGReg rm)
 546{
 547    /* if ArchVersion() < 6 && d == n then UNPREDICTABLE;  */
 548    if (!use_armv6_instructions && rd == rn) {
 549        if (rd == rm) {
 550            /* rd == rn == rm; copy an input to tmp first.  */
 551            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 552            rm = rn = TCG_REG_TMP;
 553        } else {
 554            rn = rm;
 555            rm = rd;
 556        }
 557    }
 558    /* mul */
 559    tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
 560}
 561
 562static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
 563                                   TCGReg rd1, TCGReg rn, TCGReg rm)
 564{
 565    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
 566    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
 567        if (rd0 == rm || rd1 == rm) {
 568            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 569            rn = TCG_REG_TMP;
 570        } else {
 571            TCGReg t = rn;
 572            rn = rm;
 573            rm = t;
 574        }
 575    }
 576    /* umull */
 577    tcg_out32(s, (cond << 28) | 0x00800090 |
 578              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 579}
 580
 581static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
 582                                   TCGReg rd1, TCGReg rn, TCGReg rm)
 583{
 584    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
 585    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
 586        if (rd0 == rm || rd1 == rm) {
 587            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 588            rn = TCG_REG_TMP;
 589        } else {
 590            TCGReg t = rn;
 591            rn = rm;
 592            rm = t;
 593        }
 594    }
 595    /* smull */
 596    tcg_out32(s, (cond << 28) | 0x00c00090 |
 597              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 598}
 599
 600static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
 601{
 602    tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 603}
 604
 605static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
 606{
 607    tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 608}
 609
 610static inline void tcg_out_ext8s(TCGContext *s, int cond,
 611                                 int rd, int rn)
 612{
 613    if (use_armv6_instructions) {
 614        /* sxtb */
 615        tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
 616    } else {
 617        tcg_out_dat_reg(s, cond, ARITH_MOV,
 618                        rd, 0, rn, SHIFT_IMM_LSL(24));
 619        tcg_out_dat_reg(s, cond, ARITH_MOV,
 620                        rd, 0, rd, SHIFT_IMM_ASR(24));
 621    }
 622}
 623
 624static inline void tcg_out_ext8u(TCGContext *s, int cond,
 625                                 int rd, int rn)
 626{
 627    tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
 628}
 629
 630static inline void tcg_out_ext16s(TCGContext *s, int cond,
 631                                  int rd, int rn)
 632{
 633    if (use_armv6_instructions) {
 634        /* sxth */
 635        tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
 636    } else {
 637        tcg_out_dat_reg(s, cond, ARITH_MOV,
 638                        rd, 0, rn, SHIFT_IMM_LSL(16));
 639        tcg_out_dat_reg(s, cond, ARITH_MOV,
 640                        rd, 0, rd, SHIFT_IMM_ASR(16));
 641    }
 642}
 643
 644static inline void tcg_out_ext16u(TCGContext *s, int cond,
 645                                  int rd, int rn)
 646{
 647    if (use_armv6_instructions) {
 648        /* uxth */
 649        tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
 650    } else {
 651        tcg_out_dat_reg(s, cond, ARITH_MOV,
 652                        rd, 0, rn, SHIFT_IMM_LSL(16));
 653        tcg_out_dat_reg(s, cond, ARITH_MOV,
 654                        rd, 0, rd, SHIFT_IMM_LSR(16));
 655    }
 656}
 657
 658static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
 659{
 660    if (use_armv6_instructions) {
 661        /* revsh */
 662        tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
 663    } else {
 664        tcg_out_dat_reg(s, cond, ARITH_MOV,
 665                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
 666        tcg_out_dat_reg(s, cond, ARITH_MOV,
 667                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
 668        tcg_out_dat_reg(s, cond, ARITH_ORR,
 669                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
 670    }
 671}
 672
 673static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
 674{
 675    if (use_armv6_instructions) {
 676        /* rev16 */
 677        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
 678    } else {
 679        tcg_out_dat_reg(s, cond, ARITH_MOV,
 680                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
 681        tcg_out_dat_reg(s, cond, ARITH_MOV,
 682                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
 683        tcg_out_dat_reg(s, cond, ARITH_ORR,
 684                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
 685    }
 686}
 687
 688/* swap the two low bytes assuming that the two high input bytes and the
 689   two high output bit can hold any value. */
 690static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
 691{
 692    if (use_armv6_instructions) {
 693        /* rev16 */
 694        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
 695    } else {
 696        tcg_out_dat_reg(s, cond, ARITH_MOV,
 697                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
 698        tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
 699        tcg_out_dat_reg(s, cond, ARITH_ORR,
 700                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
 701    }
 702}
 703
 704static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
 705{
 706    if (use_armv6_instructions) {
 707        /* rev */
 708        tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
 709    } else {
 710        tcg_out_dat_reg(s, cond, ARITH_EOR,
 711                        TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
 712        tcg_out_dat_imm(s, cond, ARITH_BIC,
 713                        TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
 714        tcg_out_dat_reg(s, cond, ARITH_MOV,
 715                        rd, 0, rn, SHIFT_IMM_ROR(8));
 716        tcg_out_dat_reg(s, cond, ARITH_EOR,
 717                        rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
 718    }
 719}
 720
 721bool tcg_target_deposit_valid(int ofs, int len)
 722{
 723    /* ??? Without bfi, we could improve over generic code by combining
 724       the right-shift from a non-zero ofs with the orr.  We do run into
 725       problems when rd == rs, and the mask generated from ofs+len doesn't
 726       fit into an immediate.  We would have to be careful not to pessimize
 727       wrt the optimizations performed on the expanded code.  */
 728    return use_armv7_instructions;
 729}
 730
 731static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
 732                                   TCGArg a1, int ofs, int len, bool const_a1)
 733{
 734    if (const_a1) {
 735        /* bfi becomes bfc with rn == 15.  */
 736        a1 = 15;
 737    }
 738    /* bfi/bfc */
 739    tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
 740              | (ofs << 7) | ((ofs + len - 1) << 16));
 741}
 742
 743/* Note that this routine is used for both LDR and LDRH formats, so we do
 744   not wish to include an immediate shift at this point.  */
 745static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 746                            TCGReg rn, TCGReg rm, bool u, bool p, bool w)
 747{
 748    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
 749              | (w << 21) | (rn << 16) | (rt << 12) | rm);
 750}
 751
 752static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 753                            TCGReg rn, int imm8, bool p, bool w)
 754{
 755    bool u = 1;
 756    if (imm8 < 0) {
 757        imm8 = -imm8;
 758        u = 0;
 759    }
 760    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
 761              (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
 762}
 763
 764static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 765                             TCGReg rn, int imm12, bool p, bool w)
 766{
 767    bool u = 1;
 768    if (imm12 < 0) {
 769        imm12 = -imm12;
 770        u = 0;
 771    }
 772    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
 773              (rn << 16) | (rt << 12) | imm12);
 774}
 775
 776static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
 777                                   TCGReg rn, int imm12)
 778{
 779    tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
 780}
 781
 782static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
 783                                   TCGReg rn, int imm12)
 784{
 785    tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
 786}
 787
 788static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
 789                                  TCGReg rn, TCGReg rm)
 790{
 791    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
 792}
 793
 794static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
 795                                  TCGReg rn, TCGReg rm)
 796{
 797    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
 798}
 799
 800static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
 801                                   TCGReg rn, int imm8)
 802{
 803    tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
 804}
 805
 806static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
 807                                  TCGReg rn, TCGReg rm)
 808{
 809    tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
 810}
 811
 812static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
 813                                   TCGReg rn, int imm8)
 814{
 815    tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
 816}
 817
 818static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
 819                                  TCGReg rn, TCGReg rm)
 820{
 821    tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
 822}
 823
 824/* Register pre-increment with base writeback.  */
 825static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
 826                                    TCGReg rn, TCGReg rm)
 827{
 828    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
 829}
 830
 831static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
 832                                    TCGReg rn, TCGReg rm)
 833{
 834    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
 835}
 836
 837static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
 838                                   TCGReg rn, int imm8)
 839{
 840    tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
 841}
 842
 843static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
 844                                  TCGReg rn, int imm8)
 845{
 846    tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
 847}
 848
 849static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
 850                                   TCGReg rn, TCGReg rm)
 851{
 852    tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
 853}
 854
 855static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
 856                                  TCGReg rn, TCGReg rm)
 857{
 858    tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
 859}
 860
 861static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
 862                                   TCGReg rn, int imm8)
 863{
 864    tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
 865}
 866
 867static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
 868                                   TCGReg rn, TCGReg rm)
 869{
 870    tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
 871}
 872
 873static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
 874                                  TCGReg rn, int imm12)
 875{
 876    tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
 877}
 878
 879static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
 880                                  TCGReg rn, int imm12)
 881{
 882    tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
 883}
 884
 885static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
 886                                 TCGReg rn, TCGReg rm)
 887{
 888    tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
 889}
 890
 891static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
 892                                 TCGReg rn, TCGReg rm)
 893{
 894    tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
 895}
 896
 897static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
 898                                  TCGReg rn, int imm8)
 899{
 900    tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
 901}
 902
 903static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
 904                                  TCGReg rn, TCGReg rm)
 905{
 906    tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
 907}
 908
 909static inline void tcg_out_ld32u(TCGContext *s, int cond,
 910                int rd, int rn, int32_t offset)
 911{
 912    if (offset > 0xfff || offset < -0xfff) {
 913        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 914        tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
 915    } else
 916        tcg_out_ld32_12(s, cond, rd, rn, offset);
 917}
 918
 919static inline void tcg_out_st32(TCGContext *s, int cond,
 920                int rd, int rn, int32_t offset)
 921{
 922    if (offset > 0xfff || offset < -0xfff) {
 923        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 924        tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
 925    } else
 926        tcg_out_st32_12(s, cond, rd, rn, offset);
 927}
 928
 929static inline void tcg_out_ld16u(TCGContext *s, int cond,
 930                int rd, int rn, int32_t offset)
 931{
 932    if (offset > 0xff || offset < -0xff) {
 933        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 934        tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
 935    } else
 936        tcg_out_ld16u_8(s, cond, rd, rn, offset);
 937}
 938
 939static inline void tcg_out_ld16s(TCGContext *s, int cond,
 940                int rd, int rn, int32_t offset)
 941{
 942    if (offset > 0xff || offset < -0xff) {
 943        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 944        tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
 945    } else
 946        tcg_out_ld16s_8(s, cond, rd, rn, offset);
 947}
 948
 949static inline void tcg_out_st16(TCGContext *s, int cond,
 950                int rd, int rn, int32_t offset)
 951{
 952    if (offset > 0xff || offset < -0xff) {
 953        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 954        tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
 955    } else
 956        tcg_out_st16_8(s, cond, rd, rn, offset);
 957}
 958
 959static inline void tcg_out_ld8u(TCGContext *s, int cond,
 960                int rd, int rn, int32_t offset)
 961{
 962    if (offset > 0xfff || offset < -0xfff) {
 963        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 964        tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
 965    } else
 966        tcg_out_ld8_12(s, cond, rd, rn, offset);
 967}
 968
 969static inline void tcg_out_ld8s(TCGContext *s, int cond,
 970                int rd, int rn, int32_t offset)
 971{
 972    if (offset > 0xff || offset < -0xff) {
 973        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 974        tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
 975    } else
 976        tcg_out_ld8s_8(s, cond, rd, rn, offset);
 977}
 978
 979static inline void tcg_out_st8(TCGContext *s, int cond,
 980                int rd, int rn, int32_t offset)
 981{
 982    if (offset > 0xfff || offset < -0xfff) {
 983        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 984        tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
 985    } else
 986        tcg_out_st8_12(s, cond, rd, rn, offset);
 987}
 988
 989/* The _goto case is normally between TBs within the same code buffer, and
 990 * with the code buffer limited to 16MB we wouldn't need the long case.
 991 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
 992 */
 993static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
 994{
 995    intptr_t addri = (intptr_t)addr;
 996    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
 997
 998    if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
 999        tcg_out_b(s, cond, disp);
1000        return;
1001    }
1002
1003    tcg_out_movi32(s, cond, TCG_REG_TMP, addri);
1004    if (use_armv5t_instructions) {
1005        tcg_out_bx(s, cond, TCG_REG_TMP);
1006    } else {
1007        if (addri & 1) {
1008            tcg_abort();
1009        }
1010        tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP);
1011    }
1012}
1013
1014/* The call case is mostly used for helpers - so it's not unreasonable
1015 * for them to be beyond branch range */
1016static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
1017{
1018    intptr_t addri = (intptr_t)addr;
1019    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1020
1021    if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
1022        if (addri & 1) {
1023            /* Use BLX if the target is in Thumb mode */
1024            if (!use_armv5t_instructions) {
1025                tcg_abort();
1026            }
1027            tcg_out_blx_imm(s, disp);
1028        } else {
1029            tcg_out_bl(s, COND_AL, disp);
1030        }
1031    } else if (use_armv7_instructions) {
1032        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
1033        tcg_out_blx(s, COND_AL, TCG_REG_TMP);
1034    } else {
1035        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
1036        tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
1037        tcg_out32(s, addri);
1038    }
1039}
1040
1041static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index)
1042{
1043    TCGLabel *l = &s->labels[label_index];
1044
1045    if (l->has_value) {
1046        tcg_out_goto(s, cond, l->u.value_ptr);
1047    } else {
1048        tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, label_index, 0);
1049        tcg_out_b_noaddr(s, cond);
1050    }
1051}
1052
1053#ifdef CONFIG_SOFTMMU
1054/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1055 *                                     int mmu_idx, uintptr_t ra)
1056 */
1057static void * const qemu_ld_helpers[16] = {
1058    [MO_UB]   = helper_ret_ldub_mmu,
1059    [MO_SB]   = helper_ret_ldsb_mmu,
1060
1061    [MO_LEUW] = helper_le_lduw_mmu,
1062    [MO_LEUL] = helper_le_ldul_mmu,
1063    [MO_LEQ]  = helper_le_ldq_mmu,
1064    [MO_LESW] = helper_le_ldsw_mmu,
1065    [MO_LESL] = helper_le_ldul_mmu,
1066
1067    [MO_BEUW] = helper_be_lduw_mmu,
1068    [MO_BEUL] = helper_be_ldul_mmu,
1069    [MO_BEQ]  = helper_be_ldq_mmu,
1070    [MO_BESW] = helper_be_ldsw_mmu,
1071    [MO_BESL] = helper_be_ldul_mmu,
1072};
1073
1074/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1075 *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
1076 */
1077static void * const qemu_st_helpers[16] = {
1078    [MO_UB]   = helper_ret_stb_mmu,
1079    [MO_LEUW] = helper_le_stw_mmu,
1080    [MO_LEUL] = helper_le_stl_mmu,
1081    [MO_LEQ]  = helper_le_stq_mmu,
1082    [MO_BEUW] = helper_be_stw_mmu,
1083    [MO_BEUL] = helper_be_stl_mmu,
1084    [MO_BEQ]  = helper_be_stq_mmu,
1085};
1086
1087/* Helper routines for marshalling helper function arguments into
1088 * the correct registers and stack.
1089 * argreg is where we want to put this argument, arg is the argument itself.
1090 * Return value is the updated argreg ready for the next call.
1091 * Note that argreg 0..3 is real registers, 4+ on stack.
1092 *
1093 * We provide routines for arguments which are: immediate, 32 bit
1094 * value in register, 16 and 8 bit values in register (which must be zero
1095 * extended before use) and 64 bit value in a lo:hi register pair.
1096 */
1097#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG)                \
1098static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg)              \
1099{                                                                          \
1100    if (argreg < 4) {                                                      \
1101        MOV_ARG(s, COND_AL, argreg, arg);                                  \
1102    } else {                                                               \
1103        int ofs = (argreg - 4) * 4;                                        \
1104        EXT_ARG;                                                           \
1105        assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE);                      \
1106        tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs);         \
1107    }                                                                      \
1108    return argreg + 1;                                                     \
1109}
1110
1111DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
1112    (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1113DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
1114    (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1115DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
1116    (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1117DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
1118
1119static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
1120                                TCGReg arglo, TCGReg arghi)
1121{
1122    /* 64 bit arguments must go in even/odd register pairs
1123     * and in 8-aligned stack slots.
1124     */
1125    if (argreg & 1) {
1126        argreg++;
1127    }
1128    if (use_armv6_instructions && argreg >= 4
1129        && (arglo & 1) == 0 && arghi == arglo + 1) {
1130        tcg_out_strd_8(s, COND_AL, arglo,
1131                       TCG_REG_CALL_STACK, (argreg - 4) * 4);
1132        return argreg + 2;
1133    } else {
1134        argreg = tcg_out_arg_reg32(s, argreg, arglo);
1135        argreg = tcg_out_arg_reg32(s, argreg, arghi);
1136        return argreg;
1137    }
1138}
1139
1140#define TLB_SHIFT       (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
1141
1142/* We're expecting to use an 8-bit immediate and to mask.  */
1143QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
1144
1145/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
1146   Using the offset of the second entry in the last tlb table ensures
1147   that we can index all of the elements of the first entry.  */
1148QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
1149                  > 0xffff);
1150
1151/* Load and compare a TLB entry, leaving the flags set.  Returns the register
1152   containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */
1153
1154static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1155                               TCGMemOp s_bits, int mem_index, bool is_load)
1156{
1157    TCGReg base = TCG_AREG0;
1158    int cmp_off =
1159        (is_load
1160         ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1161         : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
1162    int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
1163
1164    /* Should generate something like the following:
1165     *   shr    tmp, addrlo, #TARGET_PAGE_BITS                    (1)
1166     *   add    r2, env, #high
1167     *   and    r0, tmp, #(CPU_TLB_SIZE - 1)                      (2)
1168     *   add    r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS               (3)
1169     *   ldr    r0, [r2, #cmp]                                    (4)
1170     *   tst    addrlo, #s_mask
1171     *   ldr    r2, [r2, #add]                                    (5)
1172     *   cmpeq  r0, tmp, lsl #TARGET_PAGE_BITS
1173     */
1174    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
1175                    0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1176
1177    /* We checked that the offset is contained within 16 bits above.  */
1178    if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
1179        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1180                        (24 << 7) | (cmp_off >> 8));
1181        base = TCG_REG_R2;
1182        add_off -= cmp_off & 0xff00;
1183        cmp_off &= 0xff;
1184    }
1185
1186    tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1187                    TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
1188    tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1189                    TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1190
1191    /* Load the tlb comparator.  Use ldrd if needed and available,
1192       but due to how the pointer needs setting up, ldm isn't useful.
1193       Base arm5 doesn't have ldrd, but armv5te does.  */
1194    if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1195        tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1196    } else {
1197        tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1198        if (TARGET_LONG_BITS == 64) {
1199            tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
1200        }
1201    }
1202
1203    /* Check alignment.  */
1204    if (s_bits) {
1205        tcg_out_dat_imm(s, COND_AL, ARITH_TST,
1206                        0, addrlo, (1 << s_bits) - 1);
1207    }
1208
1209    /* Load the tlb addend.  */
1210    tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
1211
1212    tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
1213                    TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1214
1215    if (TARGET_LONG_BITS == 64) {
1216        tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
1217                        TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
1218    }
1219
1220    return TCG_REG_R2;
1221}
1222
1223/* Record the context of a call to the out of line helper code for the slow
1224   path for a load or store, so that we can later generate the correct
1225   helper code.  */
1226static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
1227                                TCGReg datalo, TCGReg datahi, TCGReg addrlo,
1228                                TCGReg addrhi, int mem_index,
1229                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1230{
1231    TCGLabelQemuLdst *label = new_ldst_label(s);
1232
1233    label->is_ld = is_ld;
1234    label->opc = opc;
1235    label->datalo_reg = datalo;
1236    label->datahi_reg = datahi;
1237    label->addrlo_reg = addrlo;
1238    label->addrhi_reg = addrhi;
1239    label->mem_index = mem_index;
1240    label->raddr = raddr;
1241    label->label_ptr[0] = label_ptr;
1242}
1243
1244static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1245{
1246    TCGReg argreg, datalo, datahi;
1247    TCGMemOp opc = lb->opc;
1248    void *func;
1249
1250    reloc_pc24(lb->label_ptr[0], s->code_ptr);
1251
1252    argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
1253    if (TARGET_LONG_BITS == 64) {
1254        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1255    } else {
1256        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1257    }
1258    argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index);
1259    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1260
1261    /* For armv6 we can use the canonical unsigned helpers and minimize
1262       icache usage.  For pre-armv6, use the signed helpers since we do
1263       not have a single insn sign-extend.  */
1264    if (use_armv6_instructions) {
1265        func = qemu_ld_helpers[opc & ~MO_SIGN];
1266    } else {
1267        func = qemu_ld_helpers[opc];
1268        if (opc & MO_SIGN) {
1269            opc = MO_UL;
1270        }
1271    }
1272    tcg_out_call(s, func);
1273
1274    datalo = lb->datalo_reg;
1275    datahi = lb->datahi_reg;
1276    switch (opc & MO_SSIZE) {
1277    case MO_SB:
1278        tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
1279        break;
1280    case MO_SW:
1281        tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
1282        break;
1283    default:
1284        tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1285        break;
1286    case MO_Q:
1287        if (datalo != TCG_REG_R1) {
1288            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1289            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1290        } else if (datahi != TCG_REG_R0) {
1291            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1292            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1293        } else {
1294            tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
1295            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1296            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
1297        }
1298        break;
1299    }
1300
1301    tcg_out_goto(s, COND_AL, lb->raddr);
1302}
1303
1304static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1305{
1306    TCGReg argreg, datalo, datahi;
1307    TCGMemOp opc = lb->opc;
1308
1309    reloc_pc24(lb->label_ptr[0], s->code_ptr);
1310
1311    argreg = TCG_REG_R0;
1312    argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
1313    if (TARGET_LONG_BITS == 64) {
1314        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1315    } else {
1316        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1317    }
1318
1319    datalo = lb->datalo_reg;
1320    datahi = lb->datahi_reg;
1321    switch (opc & MO_SIZE) {
1322    case MO_8:
1323        argreg = tcg_out_arg_reg8(s, argreg, datalo);
1324        break;
1325    case MO_16:
1326        argreg = tcg_out_arg_reg16(s, argreg, datalo);
1327        break;
1328    case MO_32:
1329    default:
1330        argreg = tcg_out_arg_reg32(s, argreg, datalo);
1331        break;
1332    case MO_64:
1333        argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
1334        break;
1335    }
1336
1337    argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index);
1338    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1339
1340    /* Tail-call to the helper, which will return to the fast path.  */
1341    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc]);
1342}
1343#endif /* SOFTMMU */
1344
1345static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
1346                                         TCGReg datalo, TCGReg datahi,
1347                                         TCGReg addrlo, TCGReg addend)
1348{
1349    TCGMemOp bswap = opc & MO_BSWAP;
1350
1351    switch (opc & MO_SSIZE) {
1352    case MO_UB:
1353        tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
1354        break;
1355    case MO_SB:
1356        tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
1357        break;
1358    case MO_UW:
1359        tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1360        if (bswap) {
1361            tcg_out_bswap16(s, COND_AL, datalo, datalo);
1362        }
1363        break;
1364    case MO_SW:
1365        if (bswap) {
1366            tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1367            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1368        } else {
1369            tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
1370        }
1371        break;
1372    case MO_UL:
1373    default:
1374        tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
1375        if (bswap) {
1376            tcg_out_bswap32(s, COND_AL, datalo, datalo);
1377        }
1378        break;
1379    case MO_Q:
1380        {
1381            TCGReg dl = (bswap ? datahi : datalo);
1382            TCGReg dh = (bswap ? datalo : datahi);
1383
1384            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
1385            if (USING_SOFTMMU && use_armv6_instructions
1386                && (dl & 1) == 0 && dh == dl + 1) {
1387                tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
1388            } else if (dl != addend) {
1389                tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
1390                tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
1391            } else {
1392                tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
1393                                addend, addrlo, SHIFT_IMM_LSL(0));
1394                tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
1395                tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
1396            }
1397            if (bswap) {
1398                tcg_out_bswap32(s, COND_AL, dl, dl);
1399                tcg_out_bswap32(s, COND_AL, dh, dh);
1400            }
1401        }
1402        break;
1403    }
1404}
1405
1406static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
1407                                          TCGReg datalo, TCGReg datahi,
1408                                          TCGReg addrlo)
1409{
1410    TCGMemOp bswap = opc & MO_BSWAP;
1411
1412    switch (opc & MO_SSIZE) {
1413    case MO_UB:
1414        tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
1415        break;
1416    case MO_SB:
1417        tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
1418        break;
1419    case MO_UW:
1420        tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1421        if (bswap) {
1422            tcg_out_bswap16(s, COND_AL, datalo, datalo);
1423        }
1424        break;
1425    case MO_SW:
1426        if (bswap) {
1427            tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1428            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1429        } else {
1430            tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
1431        }
1432        break;
1433    case MO_UL:
1434    default:
1435        tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
1436        if (bswap) {
1437            tcg_out_bswap32(s, COND_AL, datalo, datalo);
1438        }
1439        break;
1440    case MO_Q:
1441        {
1442            TCGReg dl = (bswap ? datahi : datalo);
1443            TCGReg dh = (bswap ? datalo : datahi);
1444
1445            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
1446            if (USING_SOFTMMU && use_armv6_instructions
1447                && (dl & 1) == 0 && dh == dl + 1) {
1448                tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
1449            } else if (dl == addrlo) {
1450                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1451                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1452            } else {
1453                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1454                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1455            }
1456            if (bswap) {
1457                tcg_out_bswap32(s, COND_AL, dl, dl);
1458                tcg_out_bswap32(s, COND_AL, dh, dh);
1459            }
1460        }
1461        break;
1462    }
1463}
1464
1465static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1466{
1467    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1468    TCGMemOp opc;
1469#ifdef CONFIG_SOFTMMU
1470    int mem_index;
1471    TCGReg addend;
1472    tcg_insn_unit *label_ptr;
1473#endif
1474
1475    datalo = *args++;
1476    datahi = (is64 ? *args++ : 0);
1477    addrlo = *args++;
1478    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1479    opc = *args++;
1480
1481#ifdef CONFIG_SOFTMMU
1482    mem_index = *args;
1483    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1);
1484
1485    /* This a conditional BL only to load a pointer within this opcode into LR
1486       for the slow path.  We will not be using the value for a tail call.  */
1487    label_ptr = s->code_ptr;
1488    tcg_out_bl_noaddr(s, COND_NE);
1489
1490    tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
1491
1492    add_qemu_ldst_label(s, true, opc, datalo, datahi, addrlo, addrhi,
1493                        mem_index, s->code_ptr, label_ptr);
1494#else /* !CONFIG_SOFTMMU */
1495    if (GUEST_BASE) {
1496        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE);
1497        tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
1498    } else {
1499        tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
1500    }
1501#endif
1502}
1503
1504static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc,
1505                                         TCGReg datalo, TCGReg datahi,
1506                                         TCGReg addrlo, TCGReg addend)
1507{
1508    TCGMemOp bswap = opc & MO_BSWAP;
1509
1510    switch (opc & MO_SIZE) {
1511    case MO_8:
1512        tcg_out_st8_r(s, cond, datalo, addrlo, addend);
1513        break;
1514    case MO_16:
1515        if (bswap) {
1516            tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
1517            tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
1518        } else {
1519            tcg_out_st16_r(s, cond, datalo, addrlo, addend);
1520        }
1521        break;
1522    case MO_32:
1523    default:
1524        if (bswap) {
1525            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1526            tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
1527        } else {
1528            tcg_out_st32_r(s, cond, datalo, addrlo, addend);
1529        }
1530        break;
1531    case MO_64:
1532        /* Avoid strd for user-only emulation, to handle unaligned.  */
1533        if (bswap) {
1534            tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
1535            tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
1536            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1537            tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
1538        } else if (USING_SOFTMMU && use_armv6_instructions
1539                   && (datalo & 1) == 0 && datahi == datalo + 1) {
1540            tcg_out_strd_r(s, cond, datalo, addrlo, addend);
1541        } else {
1542            tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
1543            tcg_out_st32_12(s, cond, datahi, addend, 4);
1544        }
1545        break;
1546    }
1547}
1548
1549static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc,
1550                                          TCGReg datalo, TCGReg datahi,
1551                                          TCGReg addrlo)
1552{
1553    TCGMemOp bswap = opc & MO_BSWAP;
1554
1555    switch (opc & MO_SIZE) {
1556    case MO_8:
1557        tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
1558        break;
1559    case MO_16:
1560        if (bswap) {
1561            tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
1562            tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
1563        } else {
1564            tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
1565        }
1566        break;
1567    case MO_32:
1568    default:
1569        if (bswap) {
1570            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1571            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1572        } else {
1573            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1574        }
1575        break;
1576    case MO_64:
1577        /* Avoid strd for user-only emulation, to handle unaligned.  */
1578        if (bswap) {
1579            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
1580            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1581            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1582            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
1583        } else if (USING_SOFTMMU && use_armv6_instructions
1584                   && (datalo & 1) == 0 && datahi == datalo + 1) {
1585            tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
1586        } else {
1587            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1588            tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
1589        }
1590        break;
1591    }
1592}
1593
1594static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1595{
1596    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1597    TCGMemOp opc;
1598#ifdef CONFIG_SOFTMMU
1599    int mem_index;
1600    TCGReg addend;
1601    tcg_insn_unit *label_ptr;
1602#endif
1603
1604    datalo = *args++;
1605    datahi = (is64 ? *args++ : 0);
1606    addrlo = *args++;
1607    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1608    opc = *args++;
1609
1610#ifdef CONFIG_SOFTMMU
1611    mem_index = *args;
1612    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0);
1613
1614    tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
1615
1616    /* The conditional call must come last, as we're going to return here.  */
1617    label_ptr = s->code_ptr;
1618    tcg_out_bl_noaddr(s, COND_NE);
1619
1620    add_qemu_ldst_label(s, false, opc, datalo, datahi, addrlo, addrhi,
1621                        mem_index, s->code_ptr, label_ptr);
1622#else /* !CONFIG_SOFTMMU */
1623    if (GUEST_BASE) {
1624        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE);
1625        tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
1626                              datahi, addrlo, TCG_REG_TMP);
1627    } else {
1628        tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
1629    }
1630#endif
1631}
1632
1633static tcg_insn_unit *tb_ret_addr;
1634
1635static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1636                const TCGArg *args, const int *const_args)
1637{
1638    TCGArg a0, a1, a2, a3, a4, a5;
1639    int c;
1640
1641    switch (opc) {
1642    case INDEX_op_exit_tb:
1643        tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
1644        tcg_out_goto(s, COND_AL, tb_ret_addr);
1645        break;
1646    case INDEX_op_goto_tb:
1647        if (s->tb_jmp_offset) {
1648            /* Direct jump method */
1649            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
1650            tcg_out_b_noaddr(s, COND_AL);
1651        } else {
1652            /* Indirect jump method */
1653            intptr_t ptr = (intptr_t)(s->tb_next + args[0]);
1654            tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
1655            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff);
1656        }
1657        s->tb_next_offset[args[0]] = tcg_current_code_size(s);
1658        break;
1659    case INDEX_op_br:
1660        tcg_out_goto_label(s, COND_AL, args[0]);
1661        break;
1662
1663    case INDEX_op_ld8u_i32:
1664        tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
1665        break;
1666    case INDEX_op_ld8s_i32:
1667        tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
1668        break;
1669    case INDEX_op_ld16u_i32:
1670        tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
1671        break;
1672    case INDEX_op_ld16s_i32:
1673        tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
1674        break;
1675    case INDEX_op_ld_i32:
1676        tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
1677        break;
1678    case INDEX_op_st8_i32:
1679        tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
1680        break;
1681    case INDEX_op_st16_i32:
1682        tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
1683        break;
1684    case INDEX_op_st_i32:
1685        tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
1686        break;
1687
1688    case INDEX_op_movcond_i32:
1689        /* Constraints mean that v2 is always in the same register as dest,
1690         * so we only need to do "if condition passed, move v1 to dest".
1691         */
1692        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1693                        args[1], args[2], const_args[2]);
1694        tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
1695                        ARITH_MVN, args[0], 0, args[3], const_args[3]);
1696        break;
1697    case INDEX_op_add_i32:
1698        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
1699                        args[0], args[1], args[2], const_args[2]);
1700        break;
1701    case INDEX_op_sub_i32:
1702        if (const_args[1]) {
1703            if (const_args[2]) {
1704                tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
1705            } else {
1706                tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
1707                               args[0], args[2], args[1], 1);
1708            }
1709        } else {
1710            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
1711                            args[0], args[1], args[2], const_args[2]);
1712        }
1713        break;
1714    case INDEX_op_and_i32:
1715        tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
1716                        args[0], args[1], args[2], const_args[2]);
1717        break;
1718    case INDEX_op_andc_i32:
1719        tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
1720                        args[0], args[1], args[2], const_args[2]);
1721        break;
1722    case INDEX_op_or_i32:
1723        c = ARITH_ORR;
1724        goto gen_arith;
1725    case INDEX_op_xor_i32:
1726        c = ARITH_EOR;
1727        /* Fall through.  */
1728    gen_arith:
1729        tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
1730        break;
1731    case INDEX_op_add2_i32:
1732        a0 = args[0], a1 = args[1], a2 = args[2];
1733        a3 = args[3], a4 = args[4], a5 = args[5];
1734        if (a0 == a3 || (a0 == a5 && !const_args[5])) {
1735            a0 = TCG_REG_TMP;
1736        }
1737        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
1738                        a0, a2, a4, const_args[4]);
1739        tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
1740                        a1, a3, a5, const_args[5]);
1741        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1742        break;
1743    case INDEX_op_sub2_i32:
1744        a0 = args[0], a1 = args[1], a2 = args[2];
1745        a3 = args[3], a4 = args[4], a5 = args[5];
1746        if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
1747            a0 = TCG_REG_TMP;
1748        }
1749        if (const_args[2]) {
1750            if (const_args[4]) {
1751                tcg_out_movi32(s, COND_AL, a0, a4);
1752                a4 = a0;
1753            }
1754            tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
1755        } else {
1756            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
1757                            ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
1758        }
1759        if (const_args[3]) {
1760            if (const_args[5]) {
1761                tcg_out_movi32(s, COND_AL, a1, a5);
1762                a5 = a1;
1763            }
1764            tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
1765        } else {
1766            tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
1767                            a1, a3, a5, const_args[5]);
1768        }
1769        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1770        break;
1771    case INDEX_op_neg_i32:
1772        tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
1773        break;
1774    case INDEX_op_not_i32:
1775        tcg_out_dat_reg(s, COND_AL,
1776                        ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
1777        break;
1778    case INDEX_op_mul_i32:
1779        tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
1780        break;
1781    case INDEX_op_mulu2_i32:
1782        tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1783        break;
1784    case INDEX_op_muls2_i32:
1785        tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1786        break;
1787    /* XXX: Perhaps args[2] & 0x1f is wrong */
1788    case INDEX_op_shl_i32:
1789        c = const_args[2] ?
1790                SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
1791        goto gen_shift32;
1792    case INDEX_op_shr_i32:
1793        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
1794                SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
1795        goto gen_shift32;
1796    case INDEX_op_sar_i32:
1797        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
1798                SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
1799        goto gen_shift32;
1800    case INDEX_op_rotr_i32:
1801        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
1802                SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
1803        /* Fall through.  */
1804    gen_shift32:
1805        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
1806        break;
1807
1808    case INDEX_op_rotl_i32:
1809        if (const_args[2]) {
1810            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1811                            ((0x20 - args[2]) & 0x1f) ?
1812                            SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
1813                            SHIFT_IMM_LSL(0));
1814        } else {
1815            tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
1816            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1817                            SHIFT_REG_ROR(TCG_REG_TMP));
1818        }
1819        break;
1820
1821    case INDEX_op_brcond_i32:
1822        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1823                       args[0], args[1], const_args[1]);
1824        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], args[3]);
1825        break;
1826    case INDEX_op_brcond2_i32:
1827        /* The resulting conditions are:
1828         * TCG_COND_EQ    -->  a0 == a2 && a1 == a3,
1829         * TCG_COND_NE    --> (a0 != a2 && a1 == a3) ||  a1 != a3,
1830         * TCG_COND_LT(U) --> (a0 <  a2 && a1 == a3) ||  a1 <  a3,
1831         * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
1832         * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
1833         * TCG_COND_GT(U) --> (a0 >  a2 && a1 == a3) ||  a1 >  a3,
1834         */
1835        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1836                        args[1], args[3], const_args[3]);
1837        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1838                        args[0], args[2], const_args[2]);
1839        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], args[5]);
1840        break;
1841    case INDEX_op_setcond_i32:
1842        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1843                        args[1], args[2], const_args[2]);
1844        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
1845                        ARITH_MOV, args[0], 0, 1);
1846        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
1847                        ARITH_MOV, args[0], 0, 0);
1848        break;
1849    case INDEX_op_setcond2_i32:
1850        /* See brcond2_i32 comment */
1851        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1852                        args[2], args[4], const_args[4]);
1853        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1854                        args[1], args[3], const_args[3]);
1855        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
1856                        ARITH_MOV, args[0], 0, 1);
1857        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
1858                        ARITH_MOV, args[0], 0, 0);
1859        break;
1860
1861    case INDEX_op_qemu_ld_i32:
1862        tcg_out_qemu_ld(s, args, 0);
1863        break;
1864    case INDEX_op_qemu_ld_i64:
1865        tcg_out_qemu_ld(s, args, 1);
1866        break;
1867    case INDEX_op_qemu_st_i32:
1868        tcg_out_qemu_st(s, args, 0);
1869        break;
1870    case INDEX_op_qemu_st_i64:
1871        tcg_out_qemu_st(s, args, 1);
1872        break;
1873
1874    case INDEX_op_bswap16_i32:
1875        tcg_out_bswap16(s, COND_AL, args[0], args[1]);
1876        break;
1877    case INDEX_op_bswap32_i32:
1878        tcg_out_bswap32(s, COND_AL, args[0], args[1]);
1879        break;
1880
1881    case INDEX_op_ext8s_i32:
1882        tcg_out_ext8s(s, COND_AL, args[0], args[1]);
1883        break;
1884    case INDEX_op_ext16s_i32:
1885        tcg_out_ext16s(s, COND_AL, args[0], args[1]);
1886        break;
1887    case INDEX_op_ext16u_i32:
1888        tcg_out_ext16u(s, COND_AL, args[0], args[1]);
1889        break;
1890
1891    case INDEX_op_deposit_i32:
1892        tcg_out_deposit(s, COND_AL, args[0], args[2],
1893                        args[3], args[4], const_args[2]);
1894        break;
1895
1896    case INDEX_op_div_i32:
1897        tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
1898        break;
1899    case INDEX_op_divu_i32:
1900        tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
1901        break;
1902
1903    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
1904    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
1905    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
1906    default:
1907        tcg_abort();
1908    }
1909}
1910
1911static const TCGTargetOpDef arm_op_defs[] = {
1912    { INDEX_op_exit_tb, { } },
1913    { INDEX_op_goto_tb, { } },
1914    { INDEX_op_br, { } },
1915
1916    { INDEX_op_ld8u_i32, { "r", "r" } },
1917    { INDEX_op_ld8s_i32, { "r", "r" } },
1918    { INDEX_op_ld16u_i32, { "r", "r" } },
1919    { INDEX_op_ld16s_i32, { "r", "r" } },
1920    { INDEX_op_ld_i32, { "r", "r" } },
1921    { INDEX_op_st8_i32, { "r", "r" } },
1922    { INDEX_op_st16_i32, { "r", "r" } },
1923    { INDEX_op_st_i32, { "r", "r" } },
1924
1925    /* TODO: "r", "r", "ri" */
1926    { INDEX_op_add_i32, { "r", "r", "rIN" } },
1927    { INDEX_op_sub_i32, { "r", "rI", "rIN" } },
1928    { INDEX_op_mul_i32, { "r", "r", "r" } },
1929    { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
1930    { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
1931    { INDEX_op_and_i32, { "r", "r", "rIK" } },
1932    { INDEX_op_andc_i32, { "r", "r", "rIK" } },
1933    { INDEX_op_or_i32, { "r", "r", "rI" } },
1934    { INDEX_op_xor_i32, { "r", "r", "rI" } },
1935    { INDEX_op_neg_i32, { "r", "r" } },
1936    { INDEX_op_not_i32, { "r", "r" } },
1937
1938    { INDEX_op_shl_i32, { "r", "r", "ri" } },
1939    { INDEX_op_shr_i32, { "r", "r", "ri" } },
1940    { INDEX_op_sar_i32, { "r", "r", "ri" } },
1941    { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1942    { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1943
1944    { INDEX_op_brcond_i32, { "r", "rIN" } },
1945    { INDEX_op_setcond_i32, { "r", "r", "rIN" } },
1946    { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } },
1947
1948    { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } },
1949    { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } },
1950    { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } },
1951    { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } },
1952
1953#if TARGET_LONG_BITS == 32
1954    { INDEX_op_qemu_ld_i32, { "r", "l" } },
1955    { INDEX_op_qemu_ld_i64, { "r", "r", "l" } },
1956    { INDEX_op_qemu_st_i32, { "s", "s" } },
1957    { INDEX_op_qemu_st_i64, { "s", "s", "s" } },
1958#else
1959    { INDEX_op_qemu_ld_i32, { "r", "l", "l" } },
1960    { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } },
1961    { INDEX_op_qemu_st_i32, { "s", "s", "s" } },
1962    { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } },
1963#endif
1964
1965    { INDEX_op_bswap16_i32, { "r", "r" } },
1966    { INDEX_op_bswap32_i32, { "r", "r" } },
1967
1968    { INDEX_op_ext8s_i32, { "r", "r" } },
1969    { INDEX_op_ext16s_i32, { "r", "r" } },
1970    { INDEX_op_ext16u_i32, { "r", "r" } },
1971
1972    { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1973
1974    { INDEX_op_div_i32, { "r", "r", "r" } },
1975    { INDEX_op_divu_i32, { "r", "r", "r" } },
1976
1977    { -1 },
1978};
1979
1980static void tcg_target_init(TCGContext *s)
1981{
1982    /* Only probe for the platform and capabilities if we havn't already
1983       determined maximum values at compile time.  */
1984#ifndef use_idiv_instructions
1985    {
1986        unsigned long hwcap = qemu_getauxval(AT_HWCAP);
1987        use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
1988    }
1989#endif
1990    if (__ARM_ARCH < 7) {
1991        const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
1992        if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
1993            arm_arch = pl[1] - '0';
1994        }
1995    }
1996
1997    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
1998    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1999                     (1 << TCG_REG_R0) |
2000                     (1 << TCG_REG_R1) |
2001                     (1 << TCG_REG_R2) |
2002                     (1 << TCG_REG_R3) |
2003                     (1 << TCG_REG_R12) |
2004                     (1 << TCG_REG_R14));
2005
2006    tcg_regset_clear(s->reserved_regs);
2007    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2008    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2009    tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
2010
2011    tcg_add_target_add_op_defs(arm_op_defs);
2012}
2013
2014static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
2015                              TCGReg arg1, intptr_t arg2)
2016{
2017    tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
2018}
2019
2020static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
2021                              TCGReg arg1, intptr_t arg2)
2022{
2023    tcg_out_st32(s, COND_AL, arg, arg1, arg2);
2024}
2025
2026static inline void tcg_out_mov(TCGContext *s, TCGType type,
2027                               TCGReg ret, TCGReg arg)
2028{
2029    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
2030}
2031
2032static inline void tcg_out_movi(TCGContext *s, TCGType type,
2033                                TCGReg ret, tcg_target_long arg)
2034{
2035    tcg_out_movi32(s, COND_AL, ret, arg);
2036}
2037
2038/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2039   and tcg_register_jit.  */
2040
2041#define PUSH_SIZE  ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
2042
2043#define FRAME_SIZE \
2044    ((PUSH_SIZE \
2045      + TCG_STATIC_CALL_ARGS_SIZE \
2046      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2047      + TCG_TARGET_STACK_ALIGN - 1) \
2048     & -TCG_TARGET_STACK_ALIGN)
2049
2050static void tcg_target_qemu_prologue(TCGContext *s)
2051{
2052    int stack_addend;
2053
2054    /* Calling convention requires us to save r4-r11 and lr.  */
2055    /* stmdb sp!, { r4 - r11, lr } */
2056    tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
2057
2058    /* Reserve callee argument and tcg temp space.  */
2059    stack_addend = FRAME_SIZE - PUSH_SIZE;
2060
2061    tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
2062                   TCG_REG_CALL_STACK, stack_addend, 1);
2063    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2064                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2065
2066    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2067
2068    tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
2069    tb_ret_addr = s->code_ptr;
2070
2071    /* Epilogue.  We branch here via tb_ret_addr.  */
2072    tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
2073                   TCG_REG_CALL_STACK, stack_addend, 1);
2074
2075    /* ldmia sp!, { r4 - r11, pc } */
2076    tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
2077}
2078
2079typedef struct {
2080    DebugFrameHeader h;
2081    uint8_t fde_def_cfa[4];
2082    uint8_t fde_reg_ofs[18];
2083} DebugFrame;
2084
2085#define ELF_HOST_MACHINE EM_ARM
2086
2087/* We're expecting a 2 byte uleb128 encoded value.  */
2088QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2089
2090static const DebugFrame debug_frame = {
2091    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2092    .h.cie.id = -1,
2093    .h.cie.version = 1,
2094    .h.cie.code_align = 1,
2095    .h.cie.data_align = 0x7c,             /* sleb128 -4 */
2096    .h.cie.return_column = 14,
2097
2098    /* Total FDE size does not include the "len" member.  */
2099    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2100
2101    .fde_def_cfa = {
2102        12, 13,                         /* DW_CFA_def_cfa sp, ... */
2103        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2104        (FRAME_SIZE >> 7)
2105    },
2106    .fde_reg_ofs = {
2107        /* The following must match the stmdb in the prologue.  */
2108        0x8e, 1,                        /* DW_CFA_offset, lr, -4 */
2109        0x8b, 2,                        /* DW_CFA_offset, r11, -8 */
2110        0x8a, 3,                        /* DW_CFA_offset, r10, -12 */
2111        0x89, 4,                        /* DW_CFA_offset, r9, -16 */
2112        0x88, 5,                        /* DW_CFA_offset, r8, -20 */
2113        0x87, 6,                        /* DW_CFA_offset, r7, -24 */
2114        0x86, 7,                        /* DW_CFA_offset, r6, -28 */
2115        0x85, 8,                        /* DW_CFA_offset, r5, -32 */
2116        0x84, 9,                        /* DW_CFA_offset, r4, -36 */
2117    }
2118};
2119
2120void tcg_register_jit(void *buf, size_t buf_size)
2121{
2122    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2123}
2124