qemu/tcg/arm/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Andrzej Zaborowski
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "elf.h"
  26#include "tcg-pool.inc.c"
  27
  28int arm_arch = __ARM_ARCH;
  29
  30#ifndef use_idiv_instructions
  31bool use_idiv_instructions;
  32#endif
  33
  34/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined.  */
  35#ifdef CONFIG_SOFTMMU
  36# define USING_SOFTMMU 1
  37#else
  38# define USING_SOFTMMU 0
  39#endif
  40
  41#ifdef CONFIG_DEBUG_TCG
  42static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  43    "%r0",
  44    "%r1",
  45    "%r2",
  46    "%r3",
  47    "%r4",
  48    "%r5",
  49    "%r6",
  50    "%r7",
  51    "%r8",
  52    "%r9",
  53    "%r10",
  54    "%r11",
  55    "%r12",
  56    "%r13",
  57    "%r14",
  58    "%pc",
  59};
  60#endif
  61
  62static const int tcg_target_reg_alloc_order[] = {
  63    TCG_REG_R4,
  64    TCG_REG_R5,
  65    TCG_REG_R6,
  66    TCG_REG_R7,
  67    TCG_REG_R8,
  68    TCG_REG_R9,
  69    TCG_REG_R10,
  70    TCG_REG_R11,
  71    TCG_REG_R13,
  72    TCG_REG_R0,
  73    TCG_REG_R1,
  74    TCG_REG_R2,
  75    TCG_REG_R3,
  76    TCG_REG_R12,
  77    TCG_REG_R14,
  78};
  79
  80static const int tcg_target_call_iarg_regs[4] = {
  81    TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
  82};
  83static const int tcg_target_call_oarg_regs[2] = {
  84    TCG_REG_R0, TCG_REG_R1
  85};
  86
  87#define TCG_REG_TMP  TCG_REG_R12
  88
  89enum arm_cond_code_e {
  90    COND_EQ = 0x0,
  91    COND_NE = 0x1,
  92    COND_CS = 0x2,      /* Unsigned greater or equal */
  93    COND_CC = 0x3,      /* Unsigned less than */
  94    COND_MI = 0x4,      /* Negative */
  95    COND_PL = 0x5,      /* Zero or greater */
  96    COND_VS = 0x6,      /* Overflow */
  97    COND_VC = 0x7,      /* No overflow */
  98    COND_HI = 0x8,      /* Unsigned greater than */
  99    COND_LS = 0x9,      /* Unsigned less or equal */
 100    COND_GE = 0xa,
 101    COND_LT = 0xb,
 102    COND_GT = 0xc,
 103    COND_LE = 0xd,
 104    COND_AL = 0xe,
 105};
 106
 107#define TO_CPSR (1 << 20)
 108
 109#define SHIFT_IMM_LSL(im)       (((im) << 7) | 0x00)
 110#define SHIFT_IMM_LSR(im)       (((im) << 7) | 0x20)
 111#define SHIFT_IMM_ASR(im)       (((im) << 7) | 0x40)
 112#define SHIFT_IMM_ROR(im)       (((im) << 7) | 0x60)
 113#define SHIFT_REG_LSL(rs)       (((rs) << 8) | 0x10)
 114#define SHIFT_REG_LSR(rs)       (((rs) << 8) | 0x30)
 115#define SHIFT_REG_ASR(rs)       (((rs) << 8) | 0x50)
 116#define SHIFT_REG_ROR(rs)       (((rs) << 8) | 0x70)
 117
 118typedef enum {
 119    ARITH_AND = 0x0 << 21,
 120    ARITH_EOR = 0x1 << 21,
 121    ARITH_SUB = 0x2 << 21,
 122    ARITH_RSB = 0x3 << 21,
 123    ARITH_ADD = 0x4 << 21,
 124    ARITH_ADC = 0x5 << 21,
 125    ARITH_SBC = 0x6 << 21,
 126    ARITH_RSC = 0x7 << 21,
 127    ARITH_TST = 0x8 << 21 | TO_CPSR,
 128    ARITH_CMP = 0xa << 21 | TO_CPSR,
 129    ARITH_CMN = 0xb << 21 | TO_CPSR,
 130    ARITH_ORR = 0xc << 21,
 131    ARITH_MOV = 0xd << 21,
 132    ARITH_BIC = 0xe << 21,
 133    ARITH_MVN = 0xf << 21,
 134
 135    INSN_CLZ       = 0x016f0f10,
 136    INSN_RBIT      = 0x06ff0f30,
 137
 138    INSN_LDR_IMM   = 0x04100000,
 139    INSN_LDR_REG   = 0x06100000,
 140    INSN_STR_IMM   = 0x04000000,
 141    INSN_STR_REG   = 0x06000000,
 142
 143    INSN_LDRH_IMM  = 0x005000b0,
 144    INSN_LDRH_REG  = 0x001000b0,
 145    INSN_LDRSH_IMM = 0x005000f0,
 146    INSN_LDRSH_REG = 0x001000f0,
 147    INSN_STRH_IMM  = 0x004000b0,
 148    INSN_STRH_REG  = 0x000000b0,
 149
 150    INSN_LDRB_IMM  = 0x04500000,
 151    INSN_LDRB_REG  = 0x06500000,
 152    INSN_LDRSB_IMM = 0x005000d0,
 153    INSN_LDRSB_REG = 0x001000d0,
 154    INSN_STRB_IMM  = 0x04400000,
 155    INSN_STRB_REG  = 0x06400000,
 156
 157    INSN_LDRD_IMM  = 0x004000d0,
 158    INSN_LDRD_REG  = 0x000000d0,
 159    INSN_STRD_IMM  = 0x004000f0,
 160    INSN_STRD_REG  = 0x000000f0,
 161
 162    INSN_DMB_ISH   = 0x5bf07ff5,
 163    INSN_DMB_MCR   = 0xba0f07ee,
 164
 165    /* Architected nop introduced in v6k.  */
 166    /* ??? This is an MSR (imm) 0,0,0 insn.  Anyone know if this
 167       also Just So Happened to do nothing on pre-v6k so that we
 168       don't need to conditionalize it?  */
 169    INSN_NOP_v6k   = 0xe320f000,
 170    /* Otherwise the assembler uses mov r0,r0 */
 171    INSN_NOP_v4    = (COND_AL << 28) | ARITH_MOV,
 172} ARMInsn;
 173
 174#define INSN_NOP   (use_armv7_instructions ? INSN_NOP_v6k : INSN_NOP_v4)
 175
 176static const uint8_t tcg_cond_to_arm_cond[] = {
 177    [TCG_COND_EQ] = COND_EQ,
 178    [TCG_COND_NE] = COND_NE,
 179    [TCG_COND_LT] = COND_LT,
 180    [TCG_COND_GE] = COND_GE,
 181    [TCG_COND_LE] = COND_LE,
 182    [TCG_COND_GT] = COND_GT,
 183    /* unsigned */
 184    [TCG_COND_LTU] = COND_CC,
 185    [TCG_COND_GEU] = COND_CS,
 186    [TCG_COND_LEU] = COND_LS,
 187    [TCG_COND_GTU] = COND_HI,
 188};
 189
 190static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
 191{
 192    ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
 193    *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
 194}
 195
 196static inline void reloc_pc24_atomic(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
 197{
 198    ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
 199    tcg_insn_unit insn = atomic_read(code_ptr);
 200    tcg_debug_assert(offset == sextract32(offset, 0, 24));
 201    atomic_set(code_ptr, deposit32(insn, 0, 24, offset));
 202}
 203
 204static void patch_reloc(tcg_insn_unit *code_ptr, int type,
 205                        intptr_t value, intptr_t addend)
 206{
 207    tcg_debug_assert(addend == 0);
 208
 209    if (type == R_ARM_PC24) {
 210        reloc_pc24(code_ptr, (tcg_insn_unit *)value);
 211    } else if (type == R_ARM_PC13) {
 212        intptr_t diff = value - (uintptr_t)(code_ptr + 2);
 213        tcg_insn_unit insn = *code_ptr;
 214        bool u;
 215
 216        if (diff >= -0xfff && diff <= 0xfff) {
 217            u = (diff >= 0);
 218            if (!u) {
 219                diff = -diff;
 220            }
 221        } else {
 222            int rd = extract32(insn, 12, 4);
 223            int rt = rd == TCG_REG_PC ? TCG_REG_TMP : rd;
 224            assert(diff >= 0x1000 && diff < 0x100000);
 225            /* add rt, pc, #high */
 226            *code_ptr++ = ((insn & 0xf0000000) | (1 << 25) | ARITH_ADD
 227                           | (TCG_REG_PC << 16) | (rt << 12)
 228                           | (20 << 7) | (diff >> 12));
 229            /* ldr rd, [rt, #low] */
 230            insn = deposit32(insn, 12, 4, rt);
 231            diff &= 0xfff;
 232            u = 1;
 233        }
 234        insn = deposit32(insn, 23, 1, u);
 235        insn = deposit32(insn, 0, 12, diff);
 236        *code_ptr = insn;
 237    } else {
 238        g_assert_not_reached();
 239    }
 240}
 241
 242#define TCG_CT_CONST_ARM  0x100
 243#define TCG_CT_CONST_INV  0x200
 244#define TCG_CT_CONST_NEG  0x400
 245#define TCG_CT_CONST_ZERO 0x800
 246
 247/* parse target specific constraints */
 248static const char *target_parse_constraint(TCGArgConstraint *ct,
 249                                           const char *ct_str, TCGType type)
 250{
 251    switch (*ct_str++) {
 252    case 'I':
 253        ct->ct |= TCG_CT_CONST_ARM;
 254        break;
 255    case 'K':
 256        ct->ct |= TCG_CT_CONST_INV;
 257        break;
 258    case 'N': /* The gcc constraint letter is L, already used here.  */
 259        ct->ct |= TCG_CT_CONST_NEG;
 260        break;
 261    case 'Z':
 262        ct->ct |= TCG_CT_CONST_ZERO;
 263        break;
 264
 265    case 'r':
 266        ct->ct |= TCG_CT_REG;
 267        ct->u.regs = 0xffff;
 268        break;
 269
 270    /* qemu_ld address */
 271    case 'l':
 272        ct->ct |= TCG_CT_REG;
 273        ct->u.regs = 0xffff;
 274#ifdef CONFIG_SOFTMMU
 275        /* r0-r2,lr will be overwritten when reading the tlb entry,
 276           so don't use these. */
 277        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
 278        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
 279        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
 280        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
 281#endif
 282        break;
 283
 284    /* qemu_st address & data */
 285    case 's':
 286        ct->ct |= TCG_CT_REG;
 287        ct->u.regs = 0xffff;
 288        /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
 289           and r0-r1 doing the byte swapping, so don't use these. */
 290        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
 291        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
 292#if defined(CONFIG_SOFTMMU)
 293        /* Avoid clashes with registers being used for helper args */
 294        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
 295#if TARGET_LONG_BITS == 64
 296        /* Avoid clashes with registers being used for helper args */
 297        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
 298#endif
 299        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
 300#endif
 301        break;
 302
 303    default:
 304        return NULL;
 305    }
 306    return ct_str;
 307}
 308
 309static inline uint32_t rotl(uint32_t val, int n)
 310{
 311  return (val << n) | (val >> (32 - n));
 312}
 313
 314/* ARM immediates for ALU instructions are made of an unsigned 8-bit
 315   right-rotated by an even amount between 0 and 30. */
 316static inline int encode_imm(uint32_t imm)
 317{
 318    int shift;
 319
 320    /* simple case, only lower bits */
 321    if ((imm & ~0xff) == 0)
 322        return 0;
 323    /* then try a simple even shift */
 324    shift = ctz32(imm) & ~1;
 325    if (((imm >> shift) & ~0xff) == 0)
 326        return 32 - shift;
 327    /* now try harder with rotations */
 328    if ((rotl(imm, 2) & ~0xff) == 0)
 329        return 2;
 330    if ((rotl(imm, 4) & ~0xff) == 0)
 331        return 4;
 332    if ((rotl(imm, 6) & ~0xff) == 0)
 333        return 6;
 334    /* imm can't be encoded */
 335    return -1;
 336}
 337
 338static inline int check_fit_imm(uint32_t imm)
 339{
 340    return encode_imm(imm) >= 0;
 341}
 342
 343/* Test if a constant matches the constraint.
 344 * TODO: define constraints for:
 345 *
 346 * ldr/str offset:   between -0xfff and 0xfff
 347 * ldrh/strh offset: between -0xff and 0xff
 348 * mov operand2:     values represented with x << (2 * y), x < 0x100
 349 * add, sub, eor...: ditto
 350 */
 351static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
 352                                         const TCGArgConstraint *arg_ct)
 353{
 354    int ct;
 355    ct = arg_ct->ct;
 356    if (ct & TCG_CT_CONST) {
 357        return 1;
 358    } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
 359        return 1;
 360    } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
 361        return 1;
 362    } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
 363        return 1;
 364    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 365        return 1;
 366    } else {
 367        return 0;
 368    }
 369}
 370
 371static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
 372{
 373    tcg_out32(s, (cond << 28) | 0x0a000000 |
 374                    (((offset - 8) >> 2) & 0x00ffffff));
 375}
 376
 377static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
 378{
 379    /* We pay attention here to not modify the branch target by masking
 380       the corresponding bytes.  This ensure that caches and memory are
 381       kept coherent during retranslation. */
 382    tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a));
 383}
 384
 385static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
 386{
 387    /* We pay attention here to not modify the branch target by masking
 388       the corresponding bytes.  This ensure that caches and memory are
 389       kept coherent during retranslation. */
 390    tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b));
 391}
 392
 393static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
 394{
 395    tcg_out32(s, (cond << 28) | 0x0b000000 |
 396                    (((offset - 8) >> 2) & 0x00ffffff));
 397}
 398
 399static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
 400{
 401    tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
 402}
 403
 404static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
 405{
 406    tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
 407                (((offset - 8) >> 2) & 0x00ffffff));
 408}
 409
 410static inline void tcg_out_dat_reg(TCGContext *s,
 411                int cond, int opc, int rd, int rn, int rm, int shift)
 412{
 413    tcg_out32(s, (cond << 28) | (0 << 25) | opc |
 414                    (rn << 16) | (rd << 12) | shift | rm);
 415}
 416
 417static inline void tcg_out_nop(TCGContext *s)
 418{
 419    tcg_out32(s, INSN_NOP);
 420}
 421
 422static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
 423{
 424    /* Simple reg-reg move, optimising out the 'do nothing' case */
 425    if (rd != rm) {
 426        tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
 427    }
 428}
 429
 430static inline void tcg_out_bx(TCGContext *s, int cond, TCGReg rn)
 431{
 432    /* Unless the C portion of QEMU is compiled as thumb, we don't
 433       actually need true BX semantics; merely a branch to an address
 434       held in a register.  */
 435    if (use_armv5t_instructions) {
 436        tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
 437    } else {
 438        tcg_out_mov_reg(s, cond, TCG_REG_PC, rn);
 439    }
 440}
 441
 442static inline void tcg_out_dat_imm(TCGContext *s,
 443                int cond, int opc, int rd, int rn, int im)
 444{
 445    tcg_out32(s, (cond << 28) | (1 << 25) | opc |
 446                    (rn << 16) | (rd << 12) | im);
 447}
 448
 449/* Note that this routine is used for both LDR and LDRH formats, so we do
 450   not wish to include an immediate shift at this point.  */
 451static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 452                            TCGReg rn, TCGReg rm, bool u, bool p, bool w)
 453{
 454    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
 455              | (w << 21) | (rn << 16) | (rt << 12) | rm);
 456}
 457
 458static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 459                            TCGReg rn, int imm8, bool p, bool w)
 460{
 461    bool u = 1;
 462    if (imm8 < 0) {
 463        imm8 = -imm8;
 464        u = 0;
 465    }
 466    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
 467              (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
 468}
 469
 470static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 471                             TCGReg rn, int imm12, bool p, bool w)
 472{
 473    bool u = 1;
 474    if (imm12 < 0) {
 475        imm12 = -imm12;
 476        u = 0;
 477    }
 478    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
 479              (rn << 16) | (rt << 12) | imm12);
 480}
 481
 482static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
 483                                   TCGReg rn, int imm12)
 484{
 485    tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
 486}
 487
 488static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
 489                                   TCGReg rn, int imm12)
 490{
 491    tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
 492}
 493
 494static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
 495                                  TCGReg rn, TCGReg rm)
 496{
 497    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
 498}
 499
 500static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
 501                                  TCGReg rn, TCGReg rm)
 502{
 503    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
 504}
 505
 506static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
 507                                   TCGReg rn, int imm8)
 508{
 509    tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
 510}
 511
 512static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
 513                                  TCGReg rn, TCGReg rm)
 514{
 515    tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
 516}
 517
 518static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
 519                                   TCGReg rn, int imm8)
 520{
 521    tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
 522}
 523
 524static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
 525                                  TCGReg rn, TCGReg rm)
 526{
 527    tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
 528}
 529
 530/* Register pre-increment with base writeback.  */
 531static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
 532                                    TCGReg rn, TCGReg rm)
 533{
 534    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
 535}
 536
 537static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
 538                                    TCGReg rn, TCGReg rm)
 539{
 540    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
 541}
 542
 543static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
 544                                   TCGReg rn, int imm8)
 545{
 546    tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
 547}
 548
 549static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
 550                                  TCGReg rn, int imm8)
 551{
 552    tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
 553}
 554
 555static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
 556                                   TCGReg rn, TCGReg rm)
 557{
 558    tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
 559}
 560
 561static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
 562                                  TCGReg rn, TCGReg rm)
 563{
 564    tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
 565}
 566
 567static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
 568                                   TCGReg rn, int imm8)
 569{
 570    tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
 571}
 572
 573static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
 574                                   TCGReg rn, TCGReg rm)
 575{
 576    tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
 577}
 578
 579static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
 580                                  TCGReg rn, int imm12)
 581{
 582    tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
 583}
 584
 585static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
 586                                  TCGReg rn, int imm12)
 587{
 588    tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
 589}
 590
 591static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
 592                                 TCGReg rn, TCGReg rm)
 593{
 594    tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
 595}
 596
 597static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
 598                                 TCGReg rn, TCGReg rm)
 599{
 600    tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
 601}
 602
 603static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
 604                                  TCGReg rn, int imm8)
 605{
 606    tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
 607}
 608
 609static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
 610                                  TCGReg rn, TCGReg rm)
 611{
 612    tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
 613}
 614
 615static void tcg_out_movi_pool(TCGContext *s, int cond, int rd, uint32_t arg)
 616{
 617    /* The 12-bit range on the ldr insn is sometimes a bit too small.
 618       In order to get around that we require two insns, one of which
 619       will usually be a nop, but may be replaced in patch_reloc.  */
 620    new_pool_label(s, arg, R_ARM_PC13, s->code_ptr, 0);
 621    tcg_out_ld32_12(s, cond, rd, TCG_REG_PC, 0);
 622    tcg_out_nop(s);
 623}
 624
 625static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
 626{
 627    int rot, diff, opc, sh1, sh2;
 628    uint32_t tt0, tt1, tt2;
 629
 630    /* Check a single MOV/MVN before anything else.  */
 631    rot = encode_imm(arg);
 632    if (rot >= 0) {
 633        tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
 634                        rotl(arg, rot) | (rot << 7));
 635        return;
 636    }
 637    rot = encode_imm(~arg);
 638    if (rot >= 0) {
 639        tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
 640                        rotl(~arg, rot) | (rot << 7));
 641        return;
 642    }
 643
 644    /* Check for a pc-relative address.  This will usually be the TB,
 645       or within the TB, which is immediately before the code block.  */
 646    diff = arg - ((intptr_t)s->code_ptr + 8);
 647    if (diff >= 0) {
 648        rot = encode_imm(diff);
 649        if (rot >= 0) {
 650            tcg_out_dat_imm(s, cond, ARITH_ADD, rd, TCG_REG_PC,
 651                            rotl(diff, rot) | (rot << 7));
 652            return;
 653        }
 654    } else {
 655        rot = encode_imm(-diff);
 656        if (rot >= 0) {
 657            tcg_out_dat_imm(s, cond, ARITH_SUB, rd, TCG_REG_PC,
 658                            rotl(-diff, rot) | (rot << 7));
 659            return;
 660        }
 661    }
 662
 663    /* Use movw + movt.  */
 664    if (use_armv7_instructions) {
 665        /* movw */
 666        tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
 667                  | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
 668        if (arg & 0xffff0000) {
 669            /* movt */
 670            tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
 671                      | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
 672        }
 673        return;
 674    }
 675
 676    /* Look for sequences of two insns.  If we have lots of 1's, we can
 677       shorten the sequence by beginning with mvn and then clearing
 678       higher bits with eor.  */
 679    tt0 = arg;
 680    opc = ARITH_MOV;
 681    if (ctpop32(arg) > 16) {
 682        tt0 = ~arg;
 683        opc = ARITH_MVN;
 684    }
 685    sh1 = ctz32(tt0) & ~1;
 686    tt1 = tt0 & ~(0xff << sh1);
 687    sh2 = ctz32(tt1) & ~1;
 688    tt2 = tt1 & ~(0xff << sh2);
 689    if (tt2 == 0) {
 690        rot = ((32 - sh1) << 7) & 0xf00;
 691        tcg_out_dat_imm(s, cond, opc, rd,  0, ((tt0 >> sh1) & 0xff) | rot);
 692        rot = ((32 - sh2) << 7) & 0xf00;
 693        tcg_out_dat_imm(s, cond, ARITH_EOR, rd, rd,
 694                        ((tt0 >> sh2) & 0xff) | rot);
 695        return;
 696    }
 697
 698    /* Otherwise, drop it into the constant pool.  */
 699    tcg_out_movi_pool(s, cond, rd, arg);
 700}
 701
 702static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
 703                                  TCGArg lhs, TCGArg rhs, int rhs_is_const)
 704{
 705    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 706     * rhs must satisfy the "rI" constraint.
 707     */
 708    if (rhs_is_const) {
 709        int rot = encode_imm(rhs);
 710        tcg_debug_assert(rot >= 0);
 711        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 712    } else {
 713        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 714    }
 715}
 716
 717static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
 718                            TCGReg dst, TCGReg lhs, TCGArg rhs,
 719                            bool rhs_is_const)
 720{
 721    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 722     * rhs must satisfy the "rIK" constraint.
 723     */
 724    if (rhs_is_const) {
 725        int rot = encode_imm(rhs);
 726        if (rot < 0) {
 727            rhs = ~rhs;
 728            rot = encode_imm(rhs);
 729            tcg_debug_assert(rot >= 0);
 730            opc = opinv;
 731        }
 732        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 733    } else {
 734        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 735    }
 736}
 737
 738static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
 739                            TCGArg dst, TCGArg lhs, TCGArg rhs,
 740                            bool rhs_is_const)
 741{
 742    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 743     * rhs must satisfy the "rIN" constraint.
 744     */
 745    if (rhs_is_const) {
 746        int rot = encode_imm(rhs);
 747        if (rot < 0) {
 748            rhs = -rhs;
 749            rot = encode_imm(rhs);
 750            tcg_debug_assert(rot >= 0);
 751            opc = opneg;
 752        }
 753        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 754    } else {
 755        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 756    }
 757}
 758
 759static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
 760                                 TCGReg rn, TCGReg rm)
 761{
 762    /* if ArchVersion() < 6 && d == n then UNPREDICTABLE;  */
 763    if (!use_armv6_instructions && rd == rn) {
 764        if (rd == rm) {
 765            /* rd == rn == rm; copy an input to tmp first.  */
 766            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 767            rm = rn = TCG_REG_TMP;
 768        } else {
 769            rn = rm;
 770            rm = rd;
 771        }
 772    }
 773    /* mul */
 774    tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
 775}
 776
 777static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
 778                                   TCGReg rd1, TCGReg rn, TCGReg rm)
 779{
 780    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
 781    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
 782        if (rd0 == rm || rd1 == rm) {
 783            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 784            rn = TCG_REG_TMP;
 785        } else {
 786            TCGReg t = rn;
 787            rn = rm;
 788            rm = t;
 789        }
 790    }
 791    /* umull */
 792    tcg_out32(s, (cond << 28) | 0x00800090 |
 793              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 794}
 795
 796static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
 797                                   TCGReg rd1, TCGReg rn, TCGReg rm)
 798{
 799    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
 800    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
 801        if (rd0 == rm || rd1 == rm) {
 802            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 803            rn = TCG_REG_TMP;
 804        } else {
 805            TCGReg t = rn;
 806            rn = rm;
 807            rm = t;
 808        }
 809    }
 810    /* smull */
 811    tcg_out32(s, (cond << 28) | 0x00c00090 |
 812              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 813}
 814
 815static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
 816{
 817    tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 818}
 819
 820static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
 821{
 822    tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 823}
 824
 825static inline void tcg_out_ext8s(TCGContext *s, int cond,
 826                                 int rd, int rn)
 827{
 828    if (use_armv6_instructions) {
 829        /* sxtb */
 830        tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
 831    } else {
 832        tcg_out_dat_reg(s, cond, ARITH_MOV,
 833                        rd, 0, rn, SHIFT_IMM_LSL(24));
 834        tcg_out_dat_reg(s, cond, ARITH_MOV,
 835                        rd, 0, rd, SHIFT_IMM_ASR(24));
 836    }
 837}
 838
 839static inline void tcg_out_ext8u(TCGContext *s, int cond,
 840                                 int rd, int rn)
 841{
 842    tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
 843}
 844
 845static inline void tcg_out_ext16s(TCGContext *s, int cond,
 846                                  int rd, int rn)
 847{
 848    if (use_armv6_instructions) {
 849        /* sxth */
 850        tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
 851    } else {
 852        tcg_out_dat_reg(s, cond, ARITH_MOV,
 853                        rd, 0, rn, SHIFT_IMM_LSL(16));
 854        tcg_out_dat_reg(s, cond, ARITH_MOV,
 855                        rd, 0, rd, SHIFT_IMM_ASR(16));
 856    }
 857}
 858
 859static inline void tcg_out_ext16u(TCGContext *s, int cond,
 860                                  int rd, int rn)
 861{
 862    if (use_armv6_instructions) {
 863        /* uxth */
 864        tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
 865    } else {
 866        tcg_out_dat_reg(s, cond, ARITH_MOV,
 867                        rd, 0, rn, SHIFT_IMM_LSL(16));
 868        tcg_out_dat_reg(s, cond, ARITH_MOV,
 869                        rd, 0, rd, SHIFT_IMM_LSR(16));
 870    }
 871}
 872
 873static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
 874{
 875    if (use_armv6_instructions) {
 876        /* revsh */
 877        tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
 878    } else {
 879        tcg_out_dat_reg(s, cond, ARITH_MOV,
 880                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
 881        tcg_out_dat_reg(s, cond, ARITH_MOV,
 882                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
 883        tcg_out_dat_reg(s, cond, ARITH_ORR,
 884                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
 885    }
 886}
 887
 888static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
 889{
 890    if (use_armv6_instructions) {
 891        /* rev16 */
 892        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
 893    } else {
 894        tcg_out_dat_reg(s, cond, ARITH_MOV,
 895                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
 896        tcg_out_dat_reg(s, cond, ARITH_MOV,
 897                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
 898        tcg_out_dat_reg(s, cond, ARITH_ORR,
 899                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
 900    }
 901}
 902
 903/* swap the two low bytes assuming that the two high input bytes and the
 904   two high output bit can hold any value. */
 905static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
 906{
 907    if (use_armv6_instructions) {
 908        /* rev16 */
 909        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
 910    } else {
 911        tcg_out_dat_reg(s, cond, ARITH_MOV,
 912                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
 913        tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
 914        tcg_out_dat_reg(s, cond, ARITH_ORR,
 915                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
 916    }
 917}
 918
 919static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
 920{
 921    if (use_armv6_instructions) {
 922        /* rev */
 923        tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
 924    } else {
 925        tcg_out_dat_reg(s, cond, ARITH_EOR,
 926                        TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
 927        tcg_out_dat_imm(s, cond, ARITH_BIC,
 928                        TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
 929        tcg_out_dat_reg(s, cond, ARITH_MOV,
 930                        rd, 0, rn, SHIFT_IMM_ROR(8));
 931        tcg_out_dat_reg(s, cond, ARITH_EOR,
 932                        rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
 933    }
 934}
 935
 936static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
 937                                   TCGArg a1, int ofs, int len, bool const_a1)
 938{
 939    if (const_a1) {
 940        /* bfi becomes bfc with rn == 15.  */
 941        a1 = 15;
 942    }
 943    /* bfi/bfc */
 944    tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
 945              | (ofs << 7) | ((ofs + len - 1) << 16));
 946}
 947
 948static inline void tcg_out_extract(TCGContext *s, int cond, TCGReg rd,
 949                                   TCGArg a1, int ofs, int len)
 950{
 951    /* ubfx */
 952    tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | a1
 953              | (ofs << 7) | ((len - 1) << 16));
 954}
 955
 956static inline void tcg_out_sextract(TCGContext *s, int cond, TCGReg rd,
 957                                    TCGArg a1, int ofs, int len)
 958{
 959    /* sbfx */
 960    tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | a1
 961              | (ofs << 7) | ((len - 1) << 16));
 962}
 963
 964static inline void tcg_out_ld32u(TCGContext *s, int cond,
 965                int rd, int rn, int32_t offset)
 966{
 967    if (offset > 0xfff || offset < -0xfff) {
 968        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 969        tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
 970    } else
 971        tcg_out_ld32_12(s, cond, rd, rn, offset);
 972}
 973
 974static inline void tcg_out_st32(TCGContext *s, int cond,
 975                int rd, int rn, int32_t offset)
 976{
 977    if (offset > 0xfff || offset < -0xfff) {
 978        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 979        tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
 980    } else
 981        tcg_out_st32_12(s, cond, rd, rn, offset);
 982}
 983
 984static inline void tcg_out_ld16u(TCGContext *s, int cond,
 985                int rd, int rn, int32_t offset)
 986{
 987    if (offset > 0xff || offset < -0xff) {
 988        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 989        tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
 990    } else
 991        tcg_out_ld16u_8(s, cond, rd, rn, offset);
 992}
 993
 994static inline void tcg_out_ld16s(TCGContext *s, int cond,
 995                int rd, int rn, int32_t offset)
 996{
 997    if (offset > 0xff || offset < -0xff) {
 998        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 999        tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
1000    } else
1001        tcg_out_ld16s_8(s, cond, rd, rn, offset);
1002}
1003
1004static inline void tcg_out_st16(TCGContext *s, int cond,
1005                int rd, int rn, int32_t offset)
1006{
1007    if (offset > 0xff || offset < -0xff) {
1008        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
1009        tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
1010    } else
1011        tcg_out_st16_8(s, cond, rd, rn, offset);
1012}
1013
1014static inline void tcg_out_ld8u(TCGContext *s, int cond,
1015                int rd, int rn, int32_t offset)
1016{
1017    if (offset > 0xfff || offset < -0xfff) {
1018        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
1019        tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
1020    } else
1021        tcg_out_ld8_12(s, cond, rd, rn, offset);
1022}
1023
1024static inline void tcg_out_ld8s(TCGContext *s, int cond,
1025                int rd, int rn, int32_t offset)
1026{
1027    if (offset > 0xff || offset < -0xff) {
1028        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
1029        tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
1030    } else
1031        tcg_out_ld8s_8(s, cond, rd, rn, offset);
1032}
1033
1034static inline void tcg_out_st8(TCGContext *s, int cond,
1035                int rd, int rn, int32_t offset)
1036{
1037    if (offset > 0xfff || offset < -0xfff) {
1038        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
1039        tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
1040    } else
1041        tcg_out_st8_12(s, cond, rd, rn, offset);
1042}
1043
1044/* The _goto case is normally between TBs within the same code buffer, and
1045 * with the code buffer limited to 16MB we wouldn't need the long case.
1046 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
1047 */
1048static void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
1049{
1050    intptr_t addri = (intptr_t)addr;
1051    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1052
1053    if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
1054        tcg_out_b(s, cond, disp);
1055        return;
1056    }
1057    tcg_out_movi_pool(s, cond, TCG_REG_PC, addri);
1058}
1059
1060/* The call case is mostly used for helpers - so it's not unreasonable
1061 * for them to be beyond branch range */
1062static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
1063{
1064    intptr_t addri = (intptr_t)addr;
1065    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1066
1067    if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
1068        if (addri & 1) {
1069            /* Use BLX if the target is in Thumb mode */
1070            if (!use_armv5t_instructions) {
1071                tcg_abort();
1072            }
1073            tcg_out_blx_imm(s, disp);
1074        } else {
1075            tcg_out_bl(s, COND_AL, disp);
1076        }
1077    } else if (use_armv7_instructions) {
1078        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
1079        tcg_out_blx(s, COND_AL, TCG_REG_TMP);
1080    } else {
1081        /* ??? Know that movi_pool emits exactly 2 insns.  */
1082        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
1083        tcg_out_movi_pool(s, COND_AL, TCG_REG_PC, addri);
1084    }
1085}
1086
1087static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
1088{
1089    if (l->has_value) {
1090        tcg_out_goto(s, cond, l->u.value_ptr);
1091    } else {
1092        tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
1093        tcg_out_b_noaddr(s, cond);
1094    }
1095}
1096
1097static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1098{
1099    if (use_armv7_instructions) {
1100        tcg_out32(s, INSN_DMB_ISH);
1101    } else if (use_armv6_instructions) {
1102        tcg_out32(s, INSN_DMB_MCR);
1103    }
1104}
1105
1106#ifdef CONFIG_SOFTMMU
1107#include "tcg-ldst.inc.c"
1108
1109/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1110 *                                     int mmu_idx, uintptr_t ra)
1111 */
1112static void * const qemu_ld_helpers[16] = {
1113    [MO_UB]   = helper_ret_ldub_mmu,
1114    [MO_SB]   = helper_ret_ldsb_mmu,
1115
1116    [MO_LEUW] = helper_le_lduw_mmu,
1117    [MO_LEUL] = helper_le_ldul_mmu,
1118    [MO_LEQ]  = helper_le_ldq_mmu,
1119    [MO_LESW] = helper_le_ldsw_mmu,
1120    [MO_LESL] = helper_le_ldul_mmu,
1121
1122    [MO_BEUW] = helper_be_lduw_mmu,
1123    [MO_BEUL] = helper_be_ldul_mmu,
1124    [MO_BEQ]  = helper_be_ldq_mmu,
1125    [MO_BESW] = helper_be_ldsw_mmu,
1126    [MO_BESL] = helper_be_ldul_mmu,
1127};
1128
1129/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1130 *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
1131 */
1132static void * const qemu_st_helpers[16] = {
1133    [MO_UB]   = helper_ret_stb_mmu,
1134    [MO_LEUW] = helper_le_stw_mmu,
1135    [MO_LEUL] = helper_le_stl_mmu,
1136    [MO_LEQ]  = helper_le_stq_mmu,
1137    [MO_BEUW] = helper_be_stw_mmu,
1138    [MO_BEUL] = helper_be_stl_mmu,
1139    [MO_BEQ]  = helper_be_stq_mmu,
1140};
1141
1142/* Helper routines for marshalling helper function arguments into
1143 * the correct registers and stack.
1144 * argreg is where we want to put this argument, arg is the argument itself.
1145 * Return value is the updated argreg ready for the next call.
1146 * Note that argreg 0..3 is real registers, 4+ on stack.
1147 *
1148 * We provide routines for arguments which are: immediate, 32 bit
1149 * value in register, 16 and 8 bit values in register (which must be zero
1150 * extended before use) and 64 bit value in a lo:hi register pair.
1151 */
1152#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG)                \
1153static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg)              \
1154{                                                                          \
1155    if (argreg < 4) {                                                      \
1156        MOV_ARG(s, COND_AL, argreg, arg);                                  \
1157    } else {                                                               \
1158        int ofs = (argreg - 4) * 4;                                        \
1159        EXT_ARG;                                                           \
1160        tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE);            \
1161        tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs);         \
1162    }                                                                      \
1163    return argreg + 1;                                                     \
1164}
1165
1166DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
1167    (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1168DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
1169    (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1170DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
1171    (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1172DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
1173
1174static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
1175                                TCGReg arglo, TCGReg arghi)
1176{
1177    /* 64 bit arguments must go in even/odd register pairs
1178     * and in 8-aligned stack slots.
1179     */
1180    if (argreg & 1) {
1181        argreg++;
1182    }
1183    if (use_armv6_instructions && argreg >= 4
1184        && (arglo & 1) == 0 && arghi == arglo + 1) {
1185        tcg_out_strd_8(s, COND_AL, arglo,
1186                       TCG_REG_CALL_STACK, (argreg - 4) * 4);
1187        return argreg + 2;
1188    } else {
1189        argreg = tcg_out_arg_reg32(s, argreg, arglo);
1190        argreg = tcg_out_arg_reg32(s, argreg, arghi);
1191        return argreg;
1192    }
1193}
1194
1195#define TLB_SHIFT       (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
1196
1197/* We're expecting to use an 8-bit immediate and to mask.  */
1198QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
1199
1200/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
1201   Using the offset of the second entry in the last tlb table ensures
1202   that we can index all of the elements of the first entry.  */
1203QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
1204                  > 0xffff);
1205
1206/* Load and compare a TLB entry, leaving the flags set.  Returns the register
1207   containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */
1208
1209static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1210                               TCGMemOp opc, int mem_index, bool is_load)
1211{
1212    TCGReg base = TCG_AREG0;
1213    int cmp_off =
1214        (is_load
1215         ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1216         : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
1217    int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
1218    unsigned s_bits = opc & MO_SIZE;
1219    unsigned a_bits = get_alignment_bits(opc);
1220
1221    /* V7 generates the following:
1222     *   ubfx   r0, addrlo, #TARGET_PAGE_BITS, #CPU_TLB_BITS
1223     *   add    r2, env, #high
1224     *   add    r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS
1225     *   ldr    r0, [r2, #cmp]
1226     *   ldr    r2, [r2, #add]
1227     *   movw   tmp, #page_align_mask
1228     *   bic    tmp, addrlo, tmp
1229     *   cmp    r0, tmp
1230     *
1231     * Otherwise we generate:
1232     *   shr    tmp, addrlo, #TARGET_PAGE_BITS
1233     *   add    r2, env, #high
1234     *   and    r0, tmp, #(CPU_TLB_SIZE - 1)
1235     *   add    r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS
1236     *   ldr    r0, [r2, #cmp]
1237     *   ldr    r2, [r2, #add]
1238     *   tst    addrlo, #s_mask
1239     *   cmpeq  r0, tmp, lsl #TARGET_PAGE_BITS
1240     */
1241    if (use_armv7_instructions) {
1242        tcg_out_extract(s, COND_AL, TCG_REG_R0, addrlo,
1243                        TARGET_PAGE_BITS, CPU_TLB_BITS);
1244    } else {
1245        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
1246                        0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1247    }
1248
1249    /* We checked that the offset is contained within 16 bits above.  */
1250    if (add_off > 0xfff
1251        || (use_armv6_instructions && TARGET_LONG_BITS == 64
1252            && cmp_off > 0xff)) {
1253        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1254                        (24 << 7) | (cmp_off >> 8));
1255        base = TCG_REG_R2;
1256        add_off -= cmp_off & 0xff00;
1257        cmp_off &= 0xff;
1258    }
1259    if (!use_armv7_instructions) {
1260        tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1261                        TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
1262    }
1263    tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1264                    TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1265
1266    /* Load the tlb comparator.  Use ldrd if needed and available,
1267       but due to how the pointer needs setting up, ldm isn't useful.
1268       Base arm5 doesn't have ldrd, but armv5te does.  */
1269    if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1270        tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1271    } else {
1272        tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1273        if (TARGET_LONG_BITS == 64) {
1274            tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
1275        }
1276    }
1277
1278    /* Load the tlb addend.  */
1279    tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
1280
1281    /* Check alignment.  We don't support inline unaligned acceses,
1282       but we can easily support overalignment checks.  */
1283    if (a_bits < s_bits) {
1284        a_bits = s_bits;
1285    }
1286
1287    if (use_armv7_instructions) {
1288        tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1));
1289        int rot = encode_imm(mask);
1290
1291        if (rot >= 0) { 
1292            tcg_out_dat_imm(s, COND_AL, ARITH_BIC, TCG_REG_TMP, addrlo,
1293                            rotl(mask, rot) | (rot << 7));
1294        } else {
1295            tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask);
1296            tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
1297                            addrlo, TCG_REG_TMP, 0);
1298        }
1299        tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R0, TCG_REG_TMP, 0);
1300    } else {
1301        if (a_bits) {
1302            tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo,
1303                            (1 << a_bits) - 1);
1304        }
1305        tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP,
1306                        0, TCG_REG_R0, TCG_REG_TMP,
1307                        SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1308    }
1309
1310    if (TARGET_LONG_BITS == 64) {
1311        tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addrhi, 0);
1312    }
1313
1314    return TCG_REG_R2;
1315}
1316
1317/* Record the context of a call to the out of line helper code for the slow
1318   path for a load or store, so that we can later generate the correct
1319   helper code.  */
1320static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1321                                TCGReg datalo, TCGReg datahi, TCGReg addrlo,
1322                                TCGReg addrhi, tcg_insn_unit *raddr,
1323                                tcg_insn_unit *label_ptr)
1324{
1325    TCGLabelQemuLdst *label = new_ldst_label(s);
1326
1327    label->is_ld = is_ld;
1328    label->oi = oi;
1329    label->datalo_reg = datalo;
1330    label->datahi_reg = datahi;
1331    label->addrlo_reg = addrlo;
1332    label->addrhi_reg = addrhi;
1333    label->raddr = raddr;
1334    label->label_ptr[0] = label_ptr;
1335}
1336
1337static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1338{
1339    TCGReg argreg, datalo, datahi;
1340    TCGMemOpIdx oi = lb->oi;
1341    TCGMemOp opc = get_memop(oi);
1342    void *func;
1343
1344    reloc_pc24(lb->label_ptr[0], s->code_ptr);
1345
1346    argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
1347    if (TARGET_LONG_BITS == 64) {
1348        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1349    } else {
1350        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1351    }
1352    argreg = tcg_out_arg_imm32(s, argreg, oi);
1353    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1354
1355    /* For armv6 we can use the canonical unsigned helpers and minimize
1356       icache usage.  For pre-armv6, use the signed helpers since we do
1357       not have a single insn sign-extend.  */
1358    if (use_armv6_instructions) {
1359        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
1360    } else {
1361        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
1362        if (opc & MO_SIGN) {
1363            opc = MO_UL;
1364        }
1365    }
1366    tcg_out_call(s, func);
1367
1368    datalo = lb->datalo_reg;
1369    datahi = lb->datahi_reg;
1370    switch (opc & MO_SSIZE) {
1371    case MO_SB:
1372        tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
1373        break;
1374    case MO_SW:
1375        tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
1376        break;
1377    default:
1378        tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1379        break;
1380    case MO_Q:
1381        if (datalo != TCG_REG_R1) {
1382            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1383            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1384        } else if (datahi != TCG_REG_R0) {
1385            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1386            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1387        } else {
1388            tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
1389            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1390            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
1391        }
1392        break;
1393    }
1394
1395    tcg_out_goto(s, COND_AL, lb->raddr);
1396}
1397
1398static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1399{
1400    TCGReg argreg, datalo, datahi;
1401    TCGMemOpIdx oi = lb->oi;
1402    TCGMemOp opc = get_memop(oi);
1403
1404    reloc_pc24(lb->label_ptr[0], s->code_ptr);
1405
1406    argreg = TCG_REG_R0;
1407    argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
1408    if (TARGET_LONG_BITS == 64) {
1409        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1410    } else {
1411        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1412    }
1413
1414    datalo = lb->datalo_reg;
1415    datahi = lb->datahi_reg;
1416    switch (opc & MO_SIZE) {
1417    case MO_8:
1418        argreg = tcg_out_arg_reg8(s, argreg, datalo);
1419        break;
1420    case MO_16:
1421        argreg = tcg_out_arg_reg16(s, argreg, datalo);
1422        break;
1423    case MO_32:
1424    default:
1425        argreg = tcg_out_arg_reg32(s, argreg, datalo);
1426        break;
1427    case MO_64:
1428        argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
1429        break;
1430    }
1431
1432    argreg = tcg_out_arg_imm32(s, argreg, oi);
1433    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1434
1435    /* Tail-call to the helper, which will return to the fast path.  */
1436    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1437}
1438#endif /* SOFTMMU */
1439
1440static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
1441                                         TCGReg datalo, TCGReg datahi,
1442                                         TCGReg addrlo, TCGReg addend)
1443{
1444    TCGMemOp bswap = opc & MO_BSWAP;
1445
1446    switch (opc & MO_SSIZE) {
1447    case MO_UB:
1448        tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
1449        break;
1450    case MO_SB:
1451        tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
1452        break;
1453    case MO_UW:
1454        tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1455        if (bswap) {
1456            tcg_out_bswap16(s, COND_AL, datalo, datalo);
1457        }
1458        break;
1459    case MO_SW:
1460        if (bswap) {
1461            tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1462            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1463        } else {
1464            tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
1465        }
1466        break;
1467    case MO_UL:
1468    default:
1469        tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
1470        if (bswap) {
1471            tcg_out_bswap32(s, COND_AL, datalo, datalo);
1472        }
1473        break;
1474    case MO_Q:
1475        {
1476            TCGReg dl = (bswap ? datahi : datalo);
1477            TCGReg dh = (bswap ? datalo : datahi);
1478
1479            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
1480            if (USING_SOFTMMU && use_armv6_instructions
1481                && (dl & 1) == 0 && dh == dl + 1) {
1482                tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
1483            } else if (dl != addend) {
1484                tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
1485                tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
1486            } else {
1487                tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
1488                                addend, addrlo, SHIFT_IMM_LSL(0));
1489                tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
1490                tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
1491            }
1492            if (bswap) {
1493                tcg_out_bswap32(s, COND_AL, dl, dl);
1494                tcg_out_bswap32(s, COND_AL, dh, dh);
1495            }
1496        }
1497        break;
1498    }
1499}
1500
1501static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
1502                                          TCGReg datalo, TCGReg datahi,
1503                                          TCGReg addrlo)
1504{
1505    TCGMemOp bswap = opc & MO_BSWAP;
1506
1507    switch (opc & MO_SSIZE) {
1508    case MO_UB:
1509        tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
1510        break;
1511    case MO_SB:
1512        tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
1513        break;
1514    case MO_UW:
1515        tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1516        if (bswap) {
1517            tcg_out_bswap16(s, COND_AL, datalo, datalo);
1518        }
1519        break;
1520    case MO_SW:
1521        if (bswap) {
1522            tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1523            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1524        } else {
1525            tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
1526        }
1527        break;
1528    case MO_UL:
1529    default:
1530        tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
1531        if (bswap) {
1532            tcg_out_bswap32(s, COND_AL, datalo, datalo);
1533        }
1534        break;
1535    case MO_Q:
1536        {
1537            TCGReg dl = (bswap ? datahi : datalo);
1538            TCGReg dh = (bswap ? datalo : datahi);
1539
1540            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
1541            if (USING_SOFTMMU && use_armv6_instructions
1542                && (dl & 1) == 0 && dh == dl + 1) {
1543                tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
1544            } else if (dl == addrlo) {
1545                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1546                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1547            } else {
1548                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1549                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1550            }
1551            if (bswap) {
1552                tcg_out_bswap32(s, COND_AL, dl, dl);
1553                tcg_out_bswap32(s, COND_AL, dh, dh);
1554            }
1555        }
1556        break;
1557    }
1558}
1559
1560static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1561{
1562    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1563    TCGMemOpIdx oi;
1564    TCGMemOp opc;
1565#ifdef CONFIG_SOFTMMU
1566    int mem_index;
1567    TCGReg addend;
1568    tcg_insn_unit *label_ptr;
1569#endif
1570
1571    datalo = *args++;
1572    datahi = (is64 ? *args++ : 0);
1573    addrlo = *args++;
1574    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1575    oi = *args++;
1576    opc = get_memop(oi);
1577
1578#ifdef CONFIG_SOFTMMU
1579    mem_index = get_mmuidx(oi);
1580    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
1581
1582    /* This a conditional BL only to load a pointer within this opcode into LR
1583       for the slow path.  We will not be using the value for a tail call.  */
1584    label_ptr = s->code_ptr;
1585    tcg_out_bl_noaddr(s, COND_NE);
1586
1587    tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
1588
1589    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1590                        s->code_ptr, label_ptr);
1591#else /* !CONFIG_SOFTMMU */
1592    if (guest_base) {
1593        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1594        tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
1595    } else {
1596        tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
1597    }
1598#endif
1599}
1600
1601static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc,
1602                                         TCGReg datalo, TCGReg datahi,
1603                                         TCGReg addrlo, TCGReg addend)
1604{
1605    TCGMemOp bswap = opc & MO_BSWAP;
1606
1607    switch (opc & MO_SIZE) {
1608    case MO_8:
1609        tcg_out_st8_r(s, cond, datalo, addrlo, addend);
1610        break;
1611    case MO_16:
1612        if (bswap) {
1613            tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
1614            tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
1615        } else {
1616            tcg_out_st16_r(s, cond, datalo, addrlo, addend);
1617        }
1618        break;
1619    case MO_32:
1620    default:
1621        if (bswap) {
1622            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1623            tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
1624        } else {
1625            tcg_out_st32_r(s, cond, datalo, addrlo, addend);
1626        }
1627        break;
1628    case MO_64:
1629        /* Avoid strd for user-only emulation, to handle unaligned.  */
1630        if (bswap) {
1631            tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
1632            tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
1633            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1634            tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
1635        } else if (USING_SOFTMMU && use_armv6_instructions
1636                   && (datalo & 1) == 0 && datahi == datalo + 1) {
1637            tcg_out_strd_r(s, cond, datalo, addrlo, addend);
1638        } else {
1639            tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
1640            tcg_out_st32_12(s, cond, datahi, addend, 4);
1641        }
1642        break;
1643    }
1644}
1645
1646static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc,
1647                                          TCGReg datalo, TCGReg datahi,
1648                                          TCGReg addrlo)
1649{
1650    TCGMemOp bswap = opc & MO_BSWAP;
1651
1652    switch (opc & MO_SIZE) {
1653    case MO_8:
1654        tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
1655        break;
1656    case MO_16:
1657        if (bswap) {
1658            tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
1659            tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
1660        } else {
1661            tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
1662        }
1663        break;
1664    case MO_32:
1665    default:
1666        if (bswap) {
1667            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1668            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1669        } else {
1670            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1671        }
1672        break;
1673    case MO_64:
1674        /* Avoid strd for user-only emulation, to handle unaligned.  */
1675        if (bswap) {
1676            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
1677            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1678            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1679            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
1680        } else if (USING_SOFTMMU && use_armv6_instructions
1681                   && (datalo & 1) == 0 && datahi == datalo + 1) {
1682            tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
1683        } else {
1684            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1685            tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
1686        }
1687        break;
1688    }
1689}
1690
1691static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1692{
1693    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1694    TCGMemOpIdx oi;
1695    TCGMemOp opc;
1696#ifdef CONFIG_SOFTMMU
1697    int mem_index;
1698    TCGReg addend;
1699    tcg_insn_unit *label_ptr;
1700#endif
1701
1702    datalo = *args++;
1703    datahi = (is64 ? *args++ : 0);
1704    addrlo = *args++;
1705    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1706    oi = *args++;
1707    opc = get_memop(oi);
1708
1709#ifdef CONFIG_SOFTMMU
1710    mem_index = get_mmuidx(oi);
1711    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
1712
1713    tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
1714
1715    /* The conditional call must come last, as we're going to return here.  */
1716    label_ptr = s->code_ptr;
1717    tcg_out_bl_noaddr(s, COND_NE);
1718
1719    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1720                        s->code_ptr, label_ptr);
1721#else /* !CONFIG_SOFTMMU */
1722    if (guest_base) {
1723        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1724        tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
1725                              datahi, addrlo, TCG_REG_TMP);
1726    } else {
1727        tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
1728    }
1729#endif
1730}
1731
1732static tcg_insn_unit *tb_ret_addr;
1733
1734static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1735                const TCGArg *args, const int *const_args)
1736{
1737    TCGArg a0, a1, a2, a3, a4, a5;
1738    int c;
1739
1740    switch (opc) {
1741    case INDEX_op_exit_tb:
1742        /* Reuse the zeroing that exists for goto_ptr.  */
1743        a0 = args[0];
1744        if (a0 == 0) {
1745            tcg_out_goto(s, COND_AL, s->code_gen_epilogue);
1746        } else {
1747            tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
1748            tcg_out_goto(s, COND_AL, tb_ret_addr);
1749        }
1750        break;
1751    case INDEX_op_goto_tb:
1752        {
1753            /* Indirect jump method */
1754            intptr_t ptr, dif, dil;
1755            TCGReg base = TCG_REG_PC;
1756
1757            tcg_debug_assert(s->tb_jmp_insn_offset == 0);
1758            ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
1759            dif = ptr - ((intptr_t)s->code_ptr + 8);
1760            dil = sextract32(dif, 0, 12);
1761            if (dif != dil) {
1762                /* The TB is close, but outside the 12 bits addressable by
1763                   the load.  We can extend this to 20 bits with a sub of a
1764                   shifted immediate from pc.  In the vastly unlikely event
1765                   the code requires more than 1MB, we'll use 2 insns and
1766                   be no worse off.  */
1767                base = TCG_REG_R0;
1768                tcg_out_movi32(s, COND_AL, base, ptr - dil);
1769            }
1770            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil);
1771            s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
1772        }
1773        break;
1774    case INDEX_op_goto_ptr:
1775        tcg_out_bx(s, COND_AL, args[0]);
1776        break;
1777    case INDEX_op_br:
1778        tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
1779        break;
1780
1781    case INDEX_op_ld8u_i32:
1782        tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
1783        break;
1784    case INDEX_op_ld8s_i32:
1785        tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
1786        break;
1787    case INDEX_op_ld16u_i32:
1788        tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
1789        break;
1790    case INDEX_op_ld16s_i32:
1791        tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
1792        break;
1793    case INDEX_op_ld_i32:
1794        tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
1795        break;
1796    case INDEX_op_st8_i32:
1797        tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
1798        break;
1799    case INDEX_op_st16_i32:
1800        tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
1801        break;
1802    case INDEX_op_st_i32:
1803        tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
1804        break;
1805
1806    case INDEX_op_movcond_i32:
1807        /* Constraints mean that v2 is always in the same register as dest,
1808         * so we only need to do "if condition passed, move v1 to dest".
1809         */
1810        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1811                        args[1], args[2], const_args[2]);
1812        tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
1813                        ARITH_MVN, args[0], 0, args[3], const_args[3]);
1814        break;
1815    case INDEX_op_add_i32:
1816        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
1817                        args[0], args[1], args[2], const_args[2]);
1818        break;
1819    case INDEX_op_sub_i32:
1820        if (const_args[1]) {
1821            if (const_args[2]) {
1822                tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
1823            } else {
1824                tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
1825                               args[0], args[2], args[1], 1);
1826            }
1827        } else {
1828            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
1829                            args[0], args[1], args[2], const_args[2]);
1830        }
1831        break;
1832    case INDEX_op_and_i32:
1833        tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
1834                        args[0], args[1], args[2], const_args[2]);
1835        break;
1836    case INDEX_op_andc_i32:
1837        tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
1838                        args[0], args[1], args[2], const_args[2]);
1839        break;
1840    case INDEX_op_or_i32:
1841        c = ARITH_ORR;
1842        goto gen_arith;
1843    case INDEX_op_xor_i32:
1844        c = ARITH_EOR;
1845        /* Fall through.  */
1846    gen_arith:
1847        tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
1848        break;
1849    case INDEX_op_add2_i32:
1850        a0 = args[0], a1 = args[1], a2 = args[2];
1851        a3 = args[3], a4 = args[4], a5 = args[5];
1852        if (a0 == a3 || (a0 == a5 && !const_args[5])) {
1853            a0 = TCG_REG_TMP;
1854        }
1855        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
1856                        a0, a2, a4, const_args[4]);
1857        tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
1858                        a1, a3, a5, const_args[5]);
1859        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1860        break;
1861    case INDEX_op_sub2_i32:
1862        a0 = args[0], a1 = args[1], a2 = args[2];
1863        a3 = args[3], a4 = args[4], a5 = args[5];
1864        if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
1865            a0 = TCG_REG_TMP;
1866        }
1867        if (const_args[2]) {
1868            if (const_args[4]) {
1869                tcg_out_movi32(s, COND_AL, a0, a4);
1870                a4 = a0;
1871            }
1872            tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
1873        } else {
1874            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
1875                            ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
1876        }
1877        if (const_args[3]) {
1878            if (const_args[5]) {
1879                tcg_out_movi32(s, COND_AL, a1, a5);
1880                a5 = a1;
1881            }
1882            tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
1883        } else {
1884            tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
1885                            a1, a3, a5, const_args[5]);
1886        }
1887        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1888        break;
1889    case INDEX_op_neg_i32:
1890        tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
1891        break;
1892    case INDEX_op_not_i32:
1893        tcg_out_dat_reg(s, COND_AL,
1894                        ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
1895        break;
1896    case INDEX_op_mul_i32:
1897        tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
1898        break;
1899    case INDEX_op_mulu2_i32:
1900        tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1901        break;
1902    case INDEX_op_muls2_i32:
1903        tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1904        break;
1905    /* XXX: Perhaps args[2] & 0x1f is wrong */
1906    case INDEX_op_shl_i32:
1907        c = const_args[2] ?
1908                SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
1909        goto gen_shift32;
1910    case INDEX_op_shr_i32:
1911        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
1912                SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
1913        goto gen_shift32;
1914    case INDEX_op_sar_i32:
1915        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
1916                SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
1917        goto gen_shift32;
1918    case INDEX_op_rotr_i32:
1919        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
1920                SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
1921        /* Fall through.  */
1922    gen_shift32:
1923        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
1924        break;
1925
1926    case INDEX_op_rotl_i32:
1927        if (const_args[2]) {
1928            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1929                            ((0x20 - args[2]) & 0x1f) ?
1930                            SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
1931                            SHIFT_IMM_LSL(0));
1932        } else {
1933            tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
1934            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1935                            SHIFT_REG_ROR(TCG_REG_TMP));
1936        }
1937        break;
1938
1939    case INDEX_op_ctz_i32:
1940        tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0);
1941        a1 = TCG_REG_TMP;
1942        goto do_clz;
1943
1944    case INDEX_op_clz_i32:
1945        a1 = args[1];
1946    do_clz:
1947        a0 = args[0];
1948        a2 = args[2];
1949        c = const_args[2];
1950        if (c && a2 == 32) {
1951            tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0);
1952            break;
1953        }
1954        tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, a1, 0);
1955        tcg_out_dat_reg(s, COND_NE, INSN_CLZ, a0, 0, a1, 0);
1956        if (c || a0 != a2) {
1957            tcg_out_dat_rIK(s, COND_EQ, ARITH_MOV, ARITH_MVN, a0, 0, a2, c);
1958        }
1959        break;
1960
1961    case INDEX_op_brcond_i32:
1962        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1963                       args[0], args[1], const_args[1]);
1964        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
1965                           arg_label(args[3]));
1966        break;
1967    case INDEX_op_brcond2_i32:
1968        /* The resulting conditions are:
1969         * TCG_COND_EQ    -->  a0 == a2 && a1 == a3,
1970         * TCG_COND_NE    --> (a0 != a2 && a1 == a3) ||  a1 != a3,
1971         * TCG_COND_LT(U) --> (a0 <  a2 && a1 == a3) ||  a1 <  a3,
1972         * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
1973         * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
1974         * TCG_COND_GT(U) --> (a0 >  a2 && a1 == a3) ||  a1 >  a3,
1975         */
1976        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1977                        args[1], args[3], const_args[3]);
1978        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1979                        args[0], args[2], const_args[2]);
1980        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]],
1981                           arg_label(args[5]));
1982        break;
1983    case INDEX_op_setcond_i32:
1984        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1985                        args[1], args[2], const_args[2]);
1986        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
1987                        ARITH_MOV, args[0], 0, 1);
1988        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
1989                        ARITH_MOV, args[0], 0, 0);
1990        break;
1991    case INDEX_op_setcond2_i32:
1992        /* See brcond2_i32 comment */
1993        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1994                        args[2], args[4], const_args[4]);
1995        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1996                        args[1], args[3], const_args[3]);
1997        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
1998                        ARITH_MOV, args[0], 0, 1);
1999        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
2000                        ARITH_MOV, args[0], 0, 0);
2001        break;
2002
2003    case INDEX_op_qemu_ld_i32:
2004        tcg_out_qemu_ld(s, args, 0);
2005        break;
2006    case INDEX_op_qemu_ld_i64:
2007        tcg_out_qemu_ld(s, args, 1);
2008        break;
2009    case INDEX_op_qemu_st_i32:
2010        tcg_out_qemu_st(s, args, 0);
2011        break;
2012    case INDEX_op_qemu_st_i64:
2013        tcg_out_qemu_st(s, args, 1);
2014        break;
2015
2016    case INDEX_op_bswap16_i32:
2017        tcg_out_bswap16(s, COND_AL, args[0], args[1]);
2018        break;
2019    case INDEX_op_bswap32_i32:
2020        tcg_out_bswap32(s, COND_AL, args[0], args[1]);
2021        break;
2022
2023    case INDEX_op_ext8s_i32:
2024        tcg_out_ext8s(s, COND_AL, args[0], args[1]);
2025        break;
2026    case INDEX_op_ext16s_i32:
2027        tcg_out_ext16s(s, COND_AL, args[0], args[1]);
2028        break;
2029    case INDEX_op_ext16u_i32:
2030        tcg_out_ext16u(s, COND_AL, args[0], args[1]);
2031        break;
2032
2033    case INDEX_op_deposit_i32:
2034        tcg_out_deposit(s, COND_AL, args[0], args[2],
2035                        args[3], args[4], const_args[2]);
2036        break;
2037    case INDEX_op_extract_i32:
2038        tcg_out_extract(s, COND_AL, args[0], args[1], args[2], args[3]);
2039        break;
2040    case INDEX_op_sextract_i32:
2041        tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]);
2042        break;
2043
2044    case INDEX_op_div_i32:
2045        tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
2046        break;
2047    case INDEX_op_divu_i32:
2048        tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
2049        break;
2050
2051    case INDEX_op_mb:
2052        tcg_out_mb(s, args[0]);
2053        break;
2054
2055    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2056    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
2057    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2058    default:
2059        tcg_abort();
2060    }
2061}
2062
2063static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2064{
2065    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2066    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2067    static const TCGTargetOpDef s_s = { .args_ct_str = { "s", "s" } };
2068    static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2069    static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2070    static const TCGTargetOpDef r_r_l = { .args_ct_str = { "r", "r", "l" } };
2071    static const TCGTargetOpDef r_l_l = { .args_ct_str = { "r", "l", "l" } };
2072    static const TCGTargetOpDef s_s_s = { .args_ct_str = { "s", "s", "s" } };
2073    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2074    static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } };
2075    static const TCGTargetOpDef r_r_rIN
2076        = { .args_ct_str = { "r", "r", "rIN" } };
2077    static const TCGTargetOpDef r_r_rIK
2078        = { .args_ct_str = { "r", "r", "rIK" } };
2079    static const TCGTargetOpDef r_r_r_r
2080        = { .args_ct_str = { "r", "r", "r", "r" } };
2081    static const TCGTargetOpDef r_r_l_l
2082        = { .args_ct_str = { "r", "r", "l", "l" } };
2083    static const TCGTargetOpDef s_s_s_s
2084        = { .args_ct_str = { "s", "s", "s", "s" } };
2085    static const TCGTargetOpDef br
2086        = { .args_ct_str = { "r", "rIN" } };
2087    static const TCGTargetOpDef dep
2088        = { .args_ct_str = { "r", "0", "rZ" } };
2089    static const TCGTargetOpDef movc
2090        = { .args_ct_str = { "r", "r", "rIN", "rIK", "0" } };
2091    static const TCGTargetOpDef add2
2092        = { .args_ct_str = { "r", "r", "r", "r", "rIN", "rIK" } };
2093    static const TCGTargetOpDef sub2
2094        = { .args_ct_str = { "r", "r", "rI", "rI", "rIN", "rIK" } };
2095    static const TCGTargetOpDef br2
2096        = { .args_ct_str = { "r", "r", "rIN", "rIN" } };
2097    static const TCGTargetOpDef setc2
2098        = { .args_ct_str = { "r", "r", "r", "rIN", "rIN" } };
2099
2100    switch (op) {
2101    case INDEX_op_goto_ptr:
2102        return &r;
2103
2104    case INDEX_op_ld8u_i32:
2105    case INDEX_op_ld8s_i32:
2106    case INDEX_op_ld16u_i32:
2107    case INDEX_op_ld16s_i32:
2108    case INDEX_op_ld_i32:
2109    case INDEX_op_st8_i32:
2110    case INDEX_op_st16_i32:
2111    case INDEX_op_st_i32:
2112    case INDEX_op_neg_i32:
2113    case INDEX_op_not_i32:
2114    case INDEX_op_bswap16_i32:
2115    case INDEX_op_bswap32_i32:
2116    case INDEX_op_ext8s_i32:
2117    case INDEX_op_ext16s_i32:
2118    case INDEX_op_ext16u_i32:
2119    case INDEX_op_extract_i32:
2120    case INDEX_op_sextract_i32:
2121        return &r_r;
2122
2123    case INDEX_op_add_i32:
2124    case INDEX_op_sub_i32:
2125    case INDEX_op_setcond_i32:
2126        return &r_r_rIN;
2127    case INDEX_op_and_i32:
2128    case INDEX_op_andc_i32:
2129    case INDEX_op_clz_i32:
2130    case INDEX_op_ctz_i32:
2131        return &r_r_rIK;
2132    case INDEX_op_mul_i32:
2133    case INDEX_op_div_i32:
2134    case INDEX_op_divu_i32:
2135        return &r_r_r;
2136    case INDEX_op_mulu2_i32:
2137    case INDEX_op_muls2_i32:
2138        return &r_r_r_r;
2139    case INDEX_op_or_i32:
2140    case INDEX_op_xor_i32:
2141        return &r_r_rI;
2142    case INDEX_op_shl_i32:
2143    case INDEX_op_shr_i32:
2144    case INDEX_op_sar_i32:
2145    case INDEX_op_rotl_i32:
2146    case INDEX_op_rotr_i32:
2147        return &r_r_ri;
2148
2149    case INDEX_op_brcond_i32:
2150        return &br;
2151    case INDEX_op_deposit_i32:
2152        return &dep;
2153    case INDEX_op_movcond_i32:
2154        return &movc;
2155    case INDEX_op_add2_i32:
2156        return &add2;
2157    case INDEX_op_sub2_i32:
2158        return &sub2;
2159    case INDEX_op_brcond2_i32:
2160        return &br2;
2161    case INDEX_op_setcond2_i32:
2162        return &setc2;
2163
2164    case INDEX_op_qemu_ld_i32:
2165        return TARGET_LONG_BITS == 32 ? &r_l : &r_l_l;
2166    case INDEX_op_qemu_ld_i64:
2167        return TARGET_LONG_BITS == 32 ? &r_r_l : &r_r_l_l;
2168    case INDEX_op_qemu_st_i32:
2169        return TARGET_LONG_BITS == 32 ? &s_s : &s_s_s;
2170    case INDEX_op_qemu_st_i64:
2171        return TARGET_LONG_BITS == 32 ? &s_s_s : &s_s_s_s;
2172
2173    default:
2174        return NULL;
2175    }
2176}
2177
2178static void tcg_target_init(TCGContext *s)
2179{
2180    /* Only probe for the platform and capabilities if we havn't already
2181       determined maximum values at compile time.  */
2182#ifndef use_idiv_instructions
2183    {
2184        unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2185        use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
2186    }
2187#endif
2188    if (__ARM_ARCH < 7) {
2189        const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
2190        if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
2191            arm_arch = pl[1] - '0';
2192        }
2193    }
2194
2195    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
2196
2197    tcg_target_call_clobber_regs = 0;
2198    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
2199    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
2200    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
2201    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
2202    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
2203    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
2204
2205    s->reserved_regs = 0;
2206    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2207    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2208    tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
2209}
2210
2211static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
2212                              TCGReg arg1, intptr_t arg2)
2213{
2214    tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
2215}
2216
2217static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
2218                              TCGReg arg1, intptr_t arg2)
2219{
2220    tcg_out_st32(s, COND_AL, arg, arg1, arg2);
2221}
2222
2223static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
2224                               TCGReg base, intptr_t ofs)
2225{
2226    return false;
2227}
2228
2229static inline void tcg_out_mov(TCGContext *s, TCGType type,
2230                               TCGReg ret, TCGReg arg)
2231{
2232    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
2233}
2234
2235static inline void tcg_out_movi(TCGContext *s, TCGType type,
2236                                TCGReg ret, tcg_target_long arg)
2237{
2238    tcg_out_movi32(s, COND_AL, ret, arg);
2239}
2240
2241static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2242{
2243    int i;
2244    for (i = 0; i < count; ++i) {
2245        p[i] = INSN_NOP;
2246    }
2247}
2248
2249/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2250   and tcg_register_jit.  */
2251
2252#define PUSH_SIZE  ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
2253
2254#define FRAME_SIZE \
2255    ((PUSH_SIZE \
2256      + TCG_STATIC_CALL_ARGS_SIZE \
2257      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2258      + TCG_TARGET_STACK_ALIGN - 1) \
2259     & -TCG_TARGET_STACK_ALIGN)
2260
2261static void tcg_target_qemu_prologue(TCGContext *s)
2262{
2263    int stack_addend;
2264
2265    /* Calling convention requires us to save r4-r11 and lr.  */
2266    /* stmdb sp!, { r4 - r11, lr } */
2267    tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
2268
2269    /* Reserve callee argument and tcg temp space.  */
2270    stack_addend = FRAME_SIZE - PUSH_SIZE;
2271
2272    tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
2273                   TCG_REG_CALL_STACK, stack_addend, 1);
2274    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2275                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2276
2277    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2278
2279    tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
2280
2281    /*
2282     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2283     * and fall through to the rest of the epilogue.
2284     */
2285    s->code_gen_epilogue = s->code_ptr;
2286    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
2287
2288    /* TB epilogue */
2289    tb_ret_addr = s->code_ptr;
2290    tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
2291                   TCG_REG_CALL_STACK, stack_addend, 1);
2292
2293    /* ldmia sp!, { r4 - r11, pc } */
2294    tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
2295}
2296
2297typedef struct {
2298    DebugFrameHeader h;
2299    uint8_t fde_def_cfa[4];
2300    uint8_t fde_reg_ofs[18];
2301} DebugFrame;
2302
2303#define ELF_HOST_MACHINE EM_ARM
2304
2305/* We're expecting a 2 byte uleb128 encoded value.  */
2306QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2307
2308static const DebugFrame debug_frame = {
2309    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2310    .h.cie.id = -1,
2311    .h.cie.version = 1,
2312    .h.cie.code_align = 1,
2313    .h.cie.data_align = 0x7c,             /* sleb128 -4 */
2314    .h.cie.return_column = 14,
2315
2316    /* Total FDE size does not include the "len" member.  */
2317    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2318
2319    .fde_def_cfa = {
2320        12, 13,                         /* DW_CFA_def_cfa sp, ... */
2321        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2322        (FRAME_SIZE >> 7)
2323    },
2324    .fde_reg_ofs = {
2325        /* The following must match the stmdb in the prologue.  */
2326        0x8e, 1,                        /* DW_CFA_offset, lr, -4 */
2327        0x8b, 2,                        /* DW_CFA_offset, r11, -8 */
2328        0x8a, 3,                        /* DW_CFA_offset, r10, -12 */
2329        0x89, 4,                        /* DW_CFA_offset, r9, -16 */
2330        0x88, 5,                        /* DW_CFA_offset, r8, -20 */
2331        0x87, 6,                        /* DW_CFA_offset, r7, -24 */
2332        0x86, 7,                        /* DW_CFA_offset, r6, -28 */
2333        0x85, 8,                        /* DW_CFA_offset, r5, -32 */
2334        0x84, 9,                        /* DW_CFA_offset, r4, -36 */
2335    }
2336};
2337
2338void tcg_register_jit(void *buf, size_t buf_size)
2339{
2340    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2341}
2342