qemu/tcg/arm/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Andrzej Zaborowski
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "elf.h"
  26#include "tcg-pool.inc.c"
  27
  28int arm_arch = __ARM_ARCH;
  29
  30#ifndef use_idiv_instructions
  31bool use_idiv_instructions;
  32#endif
  33
  34/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined.  */
  35#ifdef CONFIG_SOFTMMU
  36# define USING_SOFTMMU 1
  37#else
  38# define USING_SOFTMMU 0
  39#endif
  40
  41#ifdef CONFIG_DEBUG_TCG
  42static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  43    "%r0",
  44    "%r1",
  45    "%r2",
  46    "%r3",
  47    "%r4",
  48    "%r5",
  49    "%r6",
  50    "%r7",
  51    "%r8",
  52    "%r9",
  53    "%r10",
  54    "%r11",
  55    "%r12",
  56    "%r13",
  57    "%r14",
  58    "%pc",
  59};
  60#endif
  61
  62static const int tcg_target_reg_alloc_order[] = {
  63    TCG_REG_R4,
  64    TCG_REG_R5,
  65    TCG_REG_R6,
  66    TCG_REG_R7,
  67    TCG_REG_R8,
  68    TCG_REG_R9,
  69    TCG_REG_R10,
  70    TCG_REG_R11,
  71    TCG_REG_R13,
  72    TCG_REG_R0,
  73    TCG_REG_R1,
  74    TCG_REG_R2,
  75    TCG_REG_R3,
  76    TCG_REG_R12,
  77    TCG_REG_R14,
  78};
  79
  80static const int tcg_target_call_iarg_regs[4] = {
  81    TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
  82};
  83static const int tcg_target_call_oarg_regs[2] = {
  84    TCG_REG_R0, TCG_REG_R1
  85};
  86
  87#define TCG_REG_TMP  TCG_REG_R12
  88
  89enum arm_cond_code_e {
  90    COND_EQ = 0x0,
  91    COND_NE = 0x1,
  92    COND_CS = 0x2,      /* Unsigned greater or equal */
  93    COND_CC = 0x3,      /* Unsigned less than */
  94    COND_MI = 0x4,      /* Negative */
  95    COND_PL = 0x5,      /* Zero or greater */
  96    COND_VS = 0x6,      /* Overflow */
  97    COND_VC = 0x7,      /* No overflow */
  98    COND_HI = 0x8,      /* Unsigned greater than */
  99    COND_LS = 0x9,      /* Unsigned less or equal */
 100    COND_GE = 0xa,
 101    COND_LT = 0xb,
 102    COND_GT = 0xc,
 103    COND_LE = 0xd,
 104    COND_AL = 0xe,
 105};
 106
 107#define TO_CPSR (1 << 20)
 108
 109#define SHIFT_IMM_LSL(im)       (((im) << 7) | 0x00)
 110#define SHIFT_IMM_LSR(im)       (((im) << 7) | 0x20)
 111#define SHIFT_IMM_ASR(im)       (((im) << 7) | 0x40)
 112#define SHIFT_IMM_ROR(im)       (((im) << 7) | 0x60)
 113#define SHIFT_REG_LSL(rs)       (((rs) << 8) | 0x10)
 114#define SHIFT_REG_LSR(rs)       (((rs) << 8) | 0x30)
 115#define SHIFT_REG_ASR(rs)       (((rs) << 8) | 0x50)
 116#define SHIFT_REG_ROR(rs)       (((rs) << 8) | 0x70)
 117
 118typedef enum {
 119    ARITH_AND = 0x0 << 21,
 120    ARITH_EOR = 0x1 << 21,
 121    ARITH_SUB = 0x2 << 21,
 122    ARITH_RSB = 0x3 << 21,
 123    ARITH_ADD = 0x4 << 21,
 124    ARITH_ADC = 0x5 << 21,
 125    ARITH_SBC = 0x6 << 21,
 126    ARITH_RSC = 0x7 << 21,
 127    ARITH_TST = 0x8 << 21 | TO_CPSR,
 128    ARITH_CMP = 0xa << 21 | TO_CPSR,
 129    ARITH_CMN = 0xb << 21 | TO_CPSR,
 130    ARITH_ORR = 0xc << 21,
 131    ARITH_MOV = 0xd << 21,
 132    ARITH_BIC = 0xe << 21,
 133    ARITH_MVN = 0xf << 21,
 134
 135    INSN_CLZ       = 0x016f0f10,
 136    INSN_RBIT      = 0x06ff0f30,
 137
 138    INSN_LDR_IMM   = 0x04100000,
 139    INSN_LDR_REG   = 0x06100000,
 140    INSN_STR_IMM   = 0x04000000,
 141    INSN_STR_REG   = 0x06000000,
 142
 143    INSN_LDRH_IMM  = 0x005000b0,
 144    INSN_LDRH_REG  = 0x001000b0,
 145    INSN_LDRSH_IMM = 0x005000f0,
 146    INSN_LDRSH_REG = 0x001000f0,
 147    INSN_STRH_IMM  = 0x004000b0,
 148    INSN_STRH_REG  = 0x000000b0,
 149
 150    INSN_LDRB_IMM  = 0x04500000,
 151    INSN_LDRB_REG  = 0x06500000,
 152    INSN_LDRSB_IMM = 0x005000d0,
 153    INSN_LDRSB_REG = 0x001000d0,
 154    INSN_STRB_IMM  = 0x04400000,
 155    INSN_STRB_REG  = 0x06400000,
 156
 157    INSN_LDRD_IMM  = 0x004000d0,
 158    INSN_LDRD_REG  = 0x000000d0,
 159    INSN_STRD_IMM  = 0x004000f0,
 160    INSN_STRD_REG  = 0x000000f0,
 161
 162    INSN_DMB_ISH   = 0xf57ff05b,
 163    INSN_DMB_MCR   = 0xee070fba,
 164
 165    /* Architected nop introduced in v6k.  */
 166    /* ??? This is an MSR (imm) 0,0,0 insn.  Anyone know if this
 167       also Just So Happened to do nothing on pre-v6k so that we
 168       don't need to conditionalize it?  */
 169    INSN_NOP_v6k   = 0xe320f000,
 170    /* Otherwise the assembler uses mov r0,r0 */
 171    INSN_NOP_v4    = (COND_AL << 28) | ARITH_MOV,
 172} ARMInsn;
 173
 174#define INSN_NOP   (use_armv7_instructions ? INSN_NOP_v6k : INSN_NOP_v4)
 175
 176static const uint8_t tcg_cond_to_arm_cond[] = {
 177    [TCG_COND_EQ] = COND_EQ,
 178    [TCG_COND_NE] = COND_NE,
 179    [TCG_COND_LT] = COND_LT,
 180    [TCG_COND_GE] = COND_GE,
 181    [TCG_COND_LE] = COND_LE,
 182    [TCG_COND_GT] = COND_GT,
 183    /* unsigned */
 184    [TCG_COND_LTU] = COND_CC,
 185    [TCG_COND_GEU] = COND_CS,
 186    [TCG_COND_LEU] = COND_LS,
 187    [TCG_COND_GTU] = COND_HI,
 188};
 189
 190static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
 191{
 192    ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
 193    *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
 194}
 195
 196static inline void reloc_pc24_atomic(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
 197{
 198    ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
 199    tcg_insn_unit insn = atomic_read(code_ptr);
 200    tcg_debug_assert(offset == sextract32(offset, 0, 24));
 201    atomic_set(code_ptr, deposit32(insn, 0, 24, offset));
 202}
 203
 204static void patch_reloc(tcg_insn_unit *code_ptr, int type,
 205                        intptr_t value, intptr_t addend)
 206{
 207    tcg_debug_assert(addend == 0);
 208
 209    if (type == R_ARM_PC24) {
 210        reloc_pc24(code_ptr, (tcg_insn_unit *)value);
 211    } else if (type == R_ARM_PC13) {
 212        intptr_t diff = value - (uintptr_t)(code_ptr + 2);
 213        tcg_insn_unit insn = *code_ptr;
 214        bool u;
 215
 216        if (diff >= -0xfff && diff <= 0xfff) {
 217            u = (diff >= 0);
 218            if (!u) {
 219                diff = -diff;
 220            }
 221        } else {
 222            int rd = extract32(insn, 12, 4);
 223            int rt = rd == TCG_REG_PC ? TCG_REG_TMP : rd;
 224            assert(diff >= 0x1000 && diff < 0x100000);
 225            /* add rt, pc, #high */
 226            *code_ptr++ = ((insn & 0xf0000000) | (1 << 25) | ARITH_ADD
 227                           | (TCG_REG_PC << 16) | (rt << 12)
 228                           | (20 << 7) | (diff >> 12));
 229            /* ldr rd, [rt, #low] */
 230            insn = deposit32(insn, 12, 4, rt);
 231            diff &= 0xfff;
 232            u = 1;
 233        }
 234        insn = deposit32(insn, 23, 1, u);
 235        insn = deposit32(insn, 0, 12, diff);
 236        *code_ptr = insn;
 237    } else {
 238        g_assert_not_reached();
 239    }
 240}
 241
 242#define TCG_CT_CONST_ARM  0x100
 243#define TCG_CT_CONST_INV  0x200
 244#define TCG_CT_CONST_NEG  0x400
 245#define TCG_CT_CONST_ZERO 0x800
 246
 247/* parse target specific constraints */
 248static const char *target_parse_constraint(TCGArgConstraint *ct,
 249                                           const char *ct_str, TCGType type)
 250{
 251    switch (*ct_str++) {
 252    case 'I':
 253        ct->ct |= TCG_CT_CONST_ARM;
 254        break;
 255    case 'K':
 256        ct->ct |= TCG_CT_CONST_INV;
 257        break;
 258    case 'N': /* The gcc constraint letter is L, already used here.  */
 259        ct->ct |= TCG_CT_CONST_NEG;
 260        break;
 261    case 'Z':
 262        ct->ct |= TCG_CT_CONST_ZERO;
 263        break;
 264
 265    case 'r':
 266        ct->ct |= TCG_CT_REG;
 267        ct->u.regs = 0xffff;
 268        break;
 269
 270    /* qemu_ld address */
 271    case 'l':
 272        ct->ct |= TCG_CT_REG;
 273        ct->u.regs = 0xffff;
 274#ifdef CONFIG_SOFTMMU
 275        /* r0-r2,lr will be overwritten when reading the tlb entry,
 276           so don't use these. */
 277        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
 278        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
 279        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
 280        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
 281#endif
 282        break;
 283
 284    /* qemu_st address & data */
 285    case 's':
 286        ct->ct |= TCG_CT_REG;
 287        ct->u.regs = 0xffff;
 288        /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
 289           and r0-r1 doing the byte swapping, so don't use these. */
 290        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
 291        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
 292#if defined(CONFIG_SOFTMMU)
 293        /* Avoid clashes with registers being used for helper args */
 294        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
 295#if TARGET_LONG_BITS == 64
 296        /* Avoid clashes with registers being used for helper args */
 297        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
 298#endif
 299        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
 300#endif
 301        break;
 302
 303    default:
 304        return NULL;
 305    }
 306    return ct_str;
 307}
 308
 309static inline uint32_t rotl(uint32_t val, int n)
 310{
 311  return (val << n) | (val >> (32 - n));
 312}
 313
 314/* ARM immediates for ALU instructions are made of an unsigned 8-bit
 315   right-rotated by an even amount between 0 and 30. */
 316static inline int encode_imm(uint32_t imm)
 317{
 318    int shift;
 319
 320    /* simple case, only lower bits */
 321    if ((imm & ~0xff) == 0)
 322        return 0;
 323    /* then try a simple even shift */
 324    shift = ctz32(imm) & ~1;
 325    if (((imm >> shift) & ~0xff) == 0)
 326        return 32 - shift;
 327    /* now try harder with rotations */
 328    if ((rotl(imm, 2) & ~0xff) == 0)
 329        return 2;
 330    if ((rotl(imm, 4) & ~0xff) == 0)
 331        return 4;
 332    if ((rotl(imm, 6) & ~0xff) == 0)
 333        return 6;
 334    /* imm can't be encoded */
 335    return -1;
 336}
 337
 338static inline int check_fit_imm(uint32_t imm)
 339{
 340    return encode_imm(imm) >= 0;
 341}
 342
 343/* Test if a constant matches the constraint.
 344 * TODO: define constraints for:
 345 *
 346 * ldr/str offset:   between -0xfff and 0xfff
 347 * ldrh/strh offset: between -0xff and 0xff
 348 * mov operand2:     values represented with x << (2 * y), x < 0x100
 349 * add, sub, eor...: ditto
 350 */
 351static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
 352                                         const TCGArgConstraint *arg_ct)
 353{
 354    int ct;
 355    ct = arg_ct->ct;
 356    if (ct & TCG_CT_CONST) {
 357        return 1;
 358    } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
 359        return 1;
 360    } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
 361        return 1;
 362    } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
 363        return 1;
 364    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 365        return 1;
 366    } else {
 367        return 0;
 368    }
 369}
 370
 371static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
 372{
 373    tcg_out32(s, (cond << 28) | 0x0a000000 |
 374                    (((offset - 8) >> 2) & 0x00ffffff));
 375}
 376
 377static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
 378{
 379    /* We pay attention here to not modify the branch target by masking
 380       the corresponding bytes.  This ensure that caches and memory are
 381       kept coherent during retranslation. */
 382    tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a));
 383}
 384
 385static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
 386{
 387    /* We pay attention here to not modify the branch target by masking
 388       the corresponding bytes.  This ensure that caches and memory are
 389       kept coherent during retranslation. */
 390    tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b));
 391}
 392
 393static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
 394{
 395    tcg_out32(s, (cond << 28) | 0x0b000000 |
 396                    (((offset - 8) >> 2) & 0x00ffffff));
 397}
 398
 399static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
 400{
 401    tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
 402}
 403
 404static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
 405{
 406    tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
 407                (((offset - 8) >> 2) & 0x00ffffff));
 408}
 409
 410static inline void tcg_out_dat_reg(TCGContext *s,
 411                int cond, int opc, int rd, int rn, int rm, int shift)
 412{
 413    tcg_out32(s, (cond << 28) | (0 << 25) | opc |
 414                    (rn << 16) | (rd << 12) | shift | rm);
 415}
 416
 417static inline void tcg_out_nop(TCGContext *s)
 418{
 419    tcg_out32(s, INSN_NOP);
 420}
 421
 422static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
 423{
 424    /* Simple reg-reg move, optimising out the 'do nothing' case */
 425    if (rd != rm) {
 426        tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
 427    }
 428}
 429
 430static inline void tcg_out_bx(TCGContext *s, int cond, TCGReg rn)
 431{
 432    /* Unless the C portion of QEMU is compiled as thumb, we don't
 433       actually need true BX semantics; merely a branch to an address
 434       held in a register.  */
 435    if (use_armv5t_instructions) {
 436        tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
 437    } else {
 438        tcg_out_mov_reg(s, cond, TCG_REG_PC, rn);
 439    }
 440}
 441
 442static inline void tcg_out_dat_imm(TCGContext *s,
 443                int cond, int opc, int rd, int rn, int im)
 444{
 445    tcg_out32(s, (cond << 28) | (1 << 25) | opc |
 446                    (rn << 16) | (rd << 12) | im);
 447}
 448
 449/* Note that this routine is used for both LDR and LDRH formats, so we do
 450   not wish to include an immediate shift at this point.  */
 451static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 452                            TCGReg rn, TCGReg rm, bool u, bool p, bool w)
 453{
 454    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
 455              | (w << 21) | (rn << 16) | (rt << 12) | rm);
 456}
 457
 458static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 459                            TCGReg rn, int imm8, bool p, bool w)
 460{
 461    bool u = 1;
 462    if (imm8 < 0) {
 463        imm8 = -imm8;
 464        u = 0;
 465    }
 466    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
 467              (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
 468}
 469
 470static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 471                             TCGReg rn, int imm12, bool p, bool w)
 472{
 473    bool u = 1;
 474    if (imm12 < 0) {
 475        imm12 = -imm12;
 476        u = 0;
 477    }
 478    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
 479              (rn << 16) | (rt << 12) | imm12);
 480}
 481
 482static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
 483                                   TCGReg rn, int imm12)
 484{
 485    tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
 486}
 487
 488static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
 489                                   TCGReg rn, int imm12)
 490{
 491    tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
 492}
 493
 494static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
 495                                  TCGReg rn, TCGReg rm)
 496{
 497    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
 498}
 499
 500static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
 501                                  TCGReg rn, TCGReg rm)
 502{
 503    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
 504}
 505
 506static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
 507                                   TCGReg rn, int imm8)
 508{
 509    tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
 510}
 511
 512static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
 513                                  TCGReg rn, TCGReg rm)
 514{
 515    tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
 516}
 517
 518static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
 519                                   TCGReg rn, int imm8)
 520{
 521    tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
 522}
 523
 524static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
 525                                  TCGReg rn, TCGReg rm)
 526{
 527    tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
 528}
 529
 530/* Register pre-increment with base writeback.  */
 531static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
 532                                    TCGReg rn, TCGReg rm)
 533{
 534    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
 535}
 536
 537static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
 538                                    TCGReg rn, TCGReg rm)
 539{
 540    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
 541}
 542
 543static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
 544                                   TCGReg rn, int imm8)
 545{
 546    tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
 547}
 548
 549static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
 550                                  TCGReg rn, int imm8)
 551{
 552    tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
 553}
 554
 555static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
 556                                   TCGReg rn, TCGReg rm)
 557{
 558    tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
 559}
 560
 561static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
 562                                  TCGReg rn, TCGReg rm)
 563{
 564    tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
 565}
 566
 567static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
 568                                   TCGReg rn, int imm8)
 569{
 570    tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
 571}
 572
 573static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
 574                                   TCGReg rn, TCGReg rm)
 575{
 576    tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
 577}
 578
 579static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
 580                                  TCGReg rn, int imm12)
 581{
 582    tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
 583}
 584
 585static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
 586                                  TCGReg rn, int imm12)
 587{
 588    tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
 589}
 590
 591static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
 592                                 TCGReg rn, TCGReg rm)
 593{
 594    tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
 595}
 596
 597static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
 598                                 TCGReg rn, TCGReg rm)
 599{
 600    tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
 601}
 602
 603static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
 604                                  TCGReg rn, int imm8)
 605{
 606    tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
 607}
 608
 609static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
 610                                  TCGReg rn, TCGReg rm)
 611{
 612    tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
 613}
 614
 615static void tcg_out_movi_pool(TCGContext *s, int cond, int rd, uint32_t arg)
 616{
 617    /* The 12-bit range on the ldr insn is sometimes a bit too small.
 618       In order to get around that we require two insns, one of which
 619       will usually be a nop, but may be replaced in patch_reloc.  */
 620    new_pool_label(s, arg, R_ARM_PC13, s->code_ptr, 0);
 621    tcg_out_ld32_12(s, cond, rd, TCG_REG_PC, 0);
 622    tcg_out_nop(s);
 623}
 624
 625static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
 626{
 627    int rot, diff, opc, sh1, sh2;
 628    uint32_t tt0, tt1, tt2;
 629
 630    /* Check a single MOV/MVN before anything else.  */
 631    rot = encode_imm(arg);
 632    if (rot >= 0) {
 633        tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
 634                        rotl(arg, rot) | (rot << 7));
 635        return;
 636    }
 637    rot = encode_imm(~arg);
 638    if (rot >= 0) {
 639        tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
 640                        rotl(~arg, rot) | (rot << 7));
 641        return;
 642    }
 643
 644    /* Check for a pc-relative address.  This will usually be the TB,
 645       or within the TB, which is immediately before the code block.  */
 646    diff = arg - ((intptr_t)s->code_ptr + 8);
 647    if (diff >= 0) {
 648        rot = encode_imm(diff);
 649        if (rot >= 0) {
 650            tcg_out_dat_imm(s, cond, ARITH_ADD, rd, TCG_REG_PC,
 651                            rotl(diff, rot) | (rot << 7));
 652            return;
 653        }
 654    } else {
 655        rot = encode_imm(-diff);
 656        if (rot >= 0) {
 657            tcg_out_dat_imm(s, cond, ARITH_SUB, rd, TCG_REG_PC,
 658                            rotl(-diff, rot) | (rot << 7));
 659            return;
 660        }
 661    }
 662
 663    /* Use movw + movt.  */
 664    if (use_armv7_instructions) {
 665        /* movw */
 666        tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
 667                  | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
 668        if (arg & 0xffff0000) {
 669            /* movt */
 670            tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
 671                      | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
 672        }
 673        return;
 674    }
 675
 676    /* Look for sequences of two insns.  If we have lots of 1's, we can
 677       shorten the sequence by beginning with mvn and then clearing
 678       higher bits with eor.  */
 679    tt0 = arg;
 680    opc = ARITH_MOV;
 681    if (ctpop32(arg) > 16) {
 682        tt0 = ~arg;
 683        opc = ARITH_MVN;
 684    }
 685    sh1 = ctz32(tt0) & ~1;
 686    tt1 = tt0 & ~(0xff << sh1);
 687    sh2 = ctz32(tt1) & ~1;
 688    tt2 = tt1 & ~(0xff << sh2);
 689    if (tt2 == 0) {
 690        rot = ((32 - sh1) << 7) & 0xf00;
 691        tcg_out_dat_imm(s, cond, opc, rd,  0, ((tt0 >> sh1) & 0xff) | rot);
 692        rot = ((32 - sh2) << 7) & 0xf00;
 693        tcg_out_dat_imm(s, cond, ARITH_EOR, rd, rd,
 694                        ((tt0 >> sh2) & 0xff) | rot);
 695        return;
 696    }
 697
 698    /* Otherwise, drop it into the constant pool.  */
 699    tcg_out_movi_pool(s, cond, rd, arg);
 700}
 701
 702static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
 703                                  TCGArg lhs, TCGArg rhs, int rhs_is_const)
 704{
 705    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 706     * rhs must satisfy the "rI" constraint.
 707     */
 708    if (rhs_is_const) {
 709        int rot = encode_imm(rhs);
 710        tcg_debug_assert(rot >= 0);
 711        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 712    } else {
 713        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 714    }
 715}
 716
 717static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
 718                            TCGReg dst, TCGReg lhs, TCGArg rhs,
 719                            bool rhs_is_const)
 720{
 721    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 722     * rhs must satisfy the "rIK" constraint.
 723     */
 724    if (rhs_is_const) {
 725        int rot = encode_imm(rhs);
 726        if (rot < 0) {
 727            rhs = ~rhs;
 728            rot = encode_imm(rhs);
 729            tcg_debug_assert(rot >= 0);
 730            opc = opinv;
 731        }
 732        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 733    } else {
 734        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 735    }
 736}
 737
 738static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
 739                            TCGArg dst, TCGArg lhs, TCGArg rhs,
 740                            bool rhs_is_const)
 741{
 742    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 743     * rhs must satisfy the "rIN" constraint.
 744     */
 745    if (rhs_is_const) {
 746        int rot = encode_imm(rhs);
 747        if (rot < 0) {
 748            rhs = -rhs;
 749            rot = encode_imm(rhs);
 750            tcg_debug_assert(rot >= 0);
 751            opc = opneg;
 752        }
 753        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 754    } else {
 755        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 756    }
 757}
 758
 759static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
 760                                 TCGReg rn, TCGReg rm)
 761{
 762    /* if ArchVersion() < 6 && d == n then UNPREDICTABLE;  */
 763    if (!use_armv6_instructions && rd == rn) {
 764        if (rd == rm) {
 765            /* rd == rn == rm; copy an input to tmp first.  */
 766            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 767            rm = rn = TCG_REG_TMP;
 768        } else {
 769            rn = rm;
 770            rm = rd;
 771        }
 772    }
 773    /* mul */
 774    tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
 775}
 776
 777static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
 778                                   TCGReg rd1, TCGReg rn, TCGReg rm)
 779{
 780    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
 781    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
 782        if (rd0 == rm || rd1 == rm) {
 783            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 784            rn = TCG_REG_TMP;
 785        } else {
 786            TCGReg t = rn;
 787            rn = rm;
 788            rm = t;
 789        }
 790    }
 791    /* umull */
 792    tcg_out32(s, (cond << 28) | 0x00800090 |
 793              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 794}
 795
 796static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
 797                                   TCGReg rd1, TCGReg rn, TCGReg rm)
 798{
 799    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
 800    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
 801        if (rd0 == rm || rd1 == rm) {
 802            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 803            rn = TCG_REG_TMP;
 804        } else {
 805            TCGReg t = rn;
 806            rn = rm;
 807            rm = t;
 808        }
 809    }
 810    /* smull */
 811    tcg_out32(s, (cond << 28) | 0x00c00090 |
 812              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 813}
 814
 815static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
 816{
 817    tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 818}
 819
 820static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
 821{
 822    tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 823}
 824
 825static inline void tcg_out_ext8s(TCGContext *s, int cond,
 826                                 int rd, int rn)
 827{
 828    if (use_armv6_instructions) {
 829        /* sxtb */
 830        tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
 831    } else {
 832        tcg_out_dat_reg(s, cond, ARITH_MOV,
 833                        rd, 0, rn, SHIFT_IMM_LSL(24));
 834        tcg_out_dat_reg(s, cond, ARITH_MOV,
 835                        rd, 0, rd, SHIFT_IMM_ASR(24));
 836    }
 837}
 838
 839static inline void tcg_out_ext8u(TCGContext *s, int cond,
 840                                 int rd, int rn)
 841{
 842    tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
 843}
 844
 845static inline void tcg_out_ext16s(TCGContext *s, int cond,
 846                                  int rd, int rn)
 847{
 848    if (use_armv6_instructions) {
 849        /* sxth */
 850        tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
 851    } else {
 852        tcg_out_dat_reg(s, cond, ARITH_MOV,
 853                        rd, 0, rn, SHIFT_IMM_LSL(16));
 854        tcg_out_dat_reg(s, cond, ARITH_MOV,
 855                        rd, 0, rd, SHIFT_IMM_ASR(16));
 856    }
 857}
 858
 859static inline void tcg_out_ext16u(TCGContext *s, int cond,
 860                                  int rd, int rn)
 861{
 862    if (use_armv6_instructions) {
 863        /* uxth */
 864        tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
 865    } else {
 866        tcg_out_dat_reg(s, cond, ARITH_MOV,
 867                        rd, 0, rn, SHIFT_IMM_LSL(16));
 868        tcg_out_dat_reg(s, cond, ARITH_MOV,
 869                        rd, 0, rd, SHIFT_IMM_LSR(16));
 870    }
 871}
 872
 873static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
 874{
 875    if (use_armv6_instructions) {
 876        /* revsh */
 877        tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
 878    } else {
 879        tcg_out_dat_reg(s, cond, ARITH_MOV,
 880                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
 881        tcg_out_dat_reg(s, cond, ARITH_MOV,
 882                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
 883        tcg_out_dat_reg(s, cond, ARITH_ORR,
 884                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
 885    }
 886}
 887
 888static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
 889{
 890    if (use_armv6_instructions) {
 891        /* rev16 */
 892        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
 893    } else {
 894        tcg_out_dat_reg(s, cond, ARITH_MOV,
 895                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
 896        tcg_out_dat_reg(s, cond, ARITH_MOV,
 897                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
 898        tcg_out_dat_reg(s, cond, ARITH_ORR,
 899                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
 900    }
 901}
 902
 903/* swap the two low bytes assuming that the two high input bytes and the
 904   two high output bit can hold any value. */
 905static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
 906{
 907    if (use_armv6_instructions) {
 908        /* rev16 */
 909        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
 910    } else {
 911        tcg_out_dat_reg(s, cond, ARITH_MOV,
 912                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
 913        tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
 914        tcg_out_dat_reg(s, cond, ARITH_ORR,
 915                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
 916    }
 917}
 918
 919static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
 920{
 921    if (use_armv6_instructions) {
 922        /* rev */
 923        tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
 924    } else {
 925        tcg_out_dat_reg(s, cond, ARITH_EOR,
 926                        TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
 927        tcg_out_dat_imm(s, cond, ARITH_BIC,
 928                        TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
 929        tcg_out_dat_reg(s, cond, ARITH_MOV,
 930                        rd, 0, rn, SHIFT_IMM_ROR(8));
 931        tcg_out_dat_reg(s, cond, ARITH_EOR,
 932                        rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
 933    }
 934}
 935
 936static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
 937                                   TCGArg a1, int ofs, int len, bool const_a1)
 938{
 939    if (const_a1) {
 940        /* bfi becomes bfc with rn == 15.  */
 941        a1 = 15;
 942    }
 943    /* bfi/bfc */
 944    tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
 945              | (ofs << 7) | ((ofs + len - 1) << 16));
 946}
 947
 948static inline void tcg_out_extract(TCGContext *s, int cond, TCGReg rd,
 949                                   TCGArg a1, int ofs, int len)
 950{
 951    /* ubfx */
 952    tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | a1
 953              | (ofs << 7) | ((len - 1) << 16));
 954}
 955
 956static inline void tcg_out_sextract(TCGContext *s, int cond, TCGReg rd,
 957                                    TCGArg a1, int ofs, int len)
 958{
 959    /* sbfx */
 960    tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | a1
 961              | (ofs << 7) | ((len - 1) << 16));
 962}
 963
 964static inline void tcg_out_ld32u(TCGContext *s, int cond,
 965                int rd, int rn, int32_t offset)
 966{
 967    if (offset > 0xfff || offset < -0xfff) {
 968        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 969        tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
 970    } else
 971        tcg_out_ld32_12(s, cond, rd, rn, offset);
 972}
 973
 974static inline void tcg_out_st32(TCGContext *s, int cond,
 975                int rd, int rn, int32_t offset)
 976{
 977    if (offset > 0xfff || offset < -0xfff) {
 978        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 979        tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
 980    } else
 981        tcg_out_st32_12(s, cond, rd, rn, offset);
 982}
 983
 984static inline void tcg_out_ld16u(TCGContext *s, int cond,
 985                int rd, int rn, int32_t offset)
 986{
 987    if (offset > 0xff || offset < -0xff) {
 988        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 989        tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
 990    } else
 991        tcg_out_ld16u_8(s, cond, rd, rn, offset);
 992}
 993
 994static inline void tcg_out_ld16s(TCGContext *s, int cond,
 995                int rd, int rn, int32_t offset)
 996{
 997    if (offset > 0xff || offset < -0xff) {
 998        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 999        tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
1000    } else
1001        tcg_out_ld16s_8(s, cond, rd, rn, offset);
1002}
1003
1004static inline void tcg_out_st16(TCGContext *s, int cond,
1005                int rd, int rn, int32_t offset)
1006{
1007    if (offset > 0xff || offset < -0xff) {
1008        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
1009        tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
1010    } else
1011        tcg_out_st16_8(s, cond, rd, rn, offset);
1012}
1013
1014static inline void tcg_out_ld8u(TCGContext *s, int cond,
1015                int rd, int rn, int32_t offset)
1016{
1017    if (offset > 0xfff || offset < -0xfff) {
1018        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
1019        tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
1020    } else
1021        tcg_out_ld8_12(s, cond, rd, rn, offset);
1022}
1023
1024static inline void tcg_out_ld8s(TCGContext *s, int cond,
1025                int rd, int rn, int32_t offset)
1026{
1027    if (offset > 0xff || offset < -0xff) {
1028        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
1029        tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
1030    } else
1031        tcg_out_ld8s_8(s, cond, rd, rn, offset);
1032}
1033
1034static inline void tcg_out_st8(TCGContext *s, int cond,
1035                int rd, int rn, int32_t offset)
1036{
1037    if (offset > 0xfff || offset < -0xfff) {
1038        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
1039        tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
1040    } else
1041        tcg_out_st8_12(s, cond, rd, rn, offset);
1042}
1043
1044/* The _goto case is normally between TBs within the same code buffer, and
1045 * with the code buffer limited to 16MB we wouldn't need the long case.
1046 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
1047 */
1048static void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
1049{
1050    intptr_t addri = (intptr_t)addr;
1051    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1052
1053    if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
1054        tcg_out_b(s, cond, disp);
1055        return;
1056    }
1057    tcg_out_movi_pool(s, cond, TCG_REG_PC, addri);
1058}
1059
1060/* The call case is mostly used for helpers - so it's not unreasonable
1061 * for them to be beyond branch range */
1062static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
1063{
1064    intptr_t addri = (intptr_t)addr;
1065    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1066
1067    if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
1068        if (addri & 1) {
1069            /* Use BLX if the target is in Thumb mode */
1070            if (!use_armv5t_instructions) {
1071                tcg_abort();
1072            }
1073            tcg_out_blx_imm(s, disp);
1074        } else {
1075            tcg_out_bl(s, COND_AL, disp);
1076        }
1077    } else if (use_armv7_instructions) {
1078        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
1079        tcg_out_blx(s, COND_AL, TCG_REG_TMP);
1080    } else {
1081        /* ??? Know that movi_pool emits exactly 2 insns.  */
1082        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
1083        tcg_out_movi_pool(s, COND_AL, TCG_REG_PC, addri);
1084    }
1085}
1086
1087static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
1088{
1089    if (l->has_value) {
1090        tcg_out_goto(s, cond, l->u.value_ptr);
1091    } else {
1092        tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
1093        tcg_out_b_noaddr(s, cond);
1094    }
1095}
1096
1097static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1098{
1099    if (use_armv7_instructions) {
1100        tcg_out32(s, INSN_DMB_ISH);
1101    } else if (use_armv6_instructions) {
1102        tcg_out32(s, INSN_DMB_MCR);
1103    }
1104}
1105
1106static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args,
1107                            const int *const_args)
1108{
1109    TCGReg al = args[0];
1110    TCGReg ah = args[1];
1111    TCGArg bl = args[2];
1112    TCGArg bh = args[3];
1113    TCGCond cond = args[4];
1114    int const_bl = const_args[2];
1115    int const_bh = const_args[3];
1116
1117    switch (cond) {
1118    case TCG_COND_EQ:
1119    case TCG_COND_NE:
1120    case TCG_COND_LTU:
1121    case TCG_COND_LEU:
1122    case TCG_COND_GTU:
1123    case TCG_COND_GEU:
1124        /* We perform a conditional comparision.  If the high half is
1125           equal, then overwrite the flags with the comparison of the
1126           low half.  The resulting flags cover the whole.  */
1127        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, ah, bh, const_bh);
1128        tcg_out_dat_rI(s, COND_EQ, ARITH_CMP, 0, al, bl, const_bl);
1129        return cond;
1130
1131    case TCG_COND_LT:
1132    case TCG_COND_GE:
1133        /* We perform a double-word subtraction and examine the result.
1134           We do not actually need the result of the subtract, so the
1135           low part "subtract" is a compare.  For the high half we have
1136           no choice but to compute into a temporary.  */
1137        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, al, bl, const_bl);
1138        tcg_out_dat_rI(s, COND_AL, ARITH_SBC | TO_CPSR,
1139                       TCG_REG_TMP, ah, bh, const_bh);
1140        return cond;
1141
1142    case TCG_COND_LE:
1143    case TCG_COND_GT:
1144        /* Similar, but with swapped arguments, via reversed subtract.  */
1145        tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR,
1146                       TCG_REG_TMP, al, bl, const_bl);
1147        tcg_out_dat_rI(s, COND_AL, ARITH_RSC | TO_CPSR,
1148                       TCG_REG_TMP, ah, bh, const_bh);
1149        return tcg_swap_cond(cond);
1150
1151    default:
1152        g_assert_not_reached();
1153    }
1154}
1155
1156#ifdef CONFIG_SOFTMMU
1157#include "tcg-ldst.inc.c"
1158
1159/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1160 *                                     int mmu_idx, uintptr_t ra)
1161 */
1162static void * const qemu_ld_helpers[16] = {
1163    [MO_UB]   = helper_ret_ldub_mmu,
1164    [MO_SB]   = helper_ret_ldsb_mmu,
1165
1166    [MO_LEUW] = helper_le_lduw_mmu,
1167    [MO_LEUL] = helper_le_ldul_mmu,
1168    [MO_LEQ]  = helper_le_ldq_mmu,
1169    [MO_LESW] = helper_le_ldsw_mmu,
1170    [MO_LESL] = helper_le_ldul_mmu,
1171
1172    [MO_BEUW] = helper_be_lduw_mmu,
1173    [MO_BEUL] = helper_be_ldul_mmu,
1174    [MO_BEQ]  = helper_be_ldq_mmu,
1175    [MO_BESW] = helper_be_ldsw_mmu,
1176    [MO_BESL] = helper_be_ldul_mmu,
1177};
1178
1179/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1180 *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
1181 */
1182static void * const qemu_st_helpers[16] = {
1183    [MO_UB]   = helper_ret_stb_mmu,
1184    [MO_LEUW] = helper_le_stw_mmu,
1185    [MO_LEUL] = helper_le_stl_mmu,
1186    [MO_LEQ]  = helper_le_stq_mmu,
1187    [MO_BEUW] = helper_be_stw_mmu,
1188    [MO_BEUL] = helper_be_stl_mmu,
1189    [MO_BEQ]  = helper_be_stq_mmu,
1190};
1191
1192/* Helper routines for marshalling helper function arguments into
1193 * the correct registers and stack.
1194 * argreg is where we want to put this argument, arg is the argument itself.
1195 * Return value is the updated argreg ready for the next call.
1196 * Note that argreg 0..3 is real registers, 4+ on stack.
1197 *
1198 * We provide routines for arguments which are: immediate, 32 bit
1199 * value in register, 16 and 8 bit values in register (which must be zero
1200 * extended before use) and 64 bit value in a lo:hi register pair.
1201 */
1202#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG)                \
1203static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg)              \
1204{                                                                          \
1205    if (argreg < 4) {                                                      \
1206        MOV_ARG(s, COND_AL, argreg, arg);                                  \
1207    } else {                                                               \
1208        int ofs = (argreg - 4) * 4;                                        \
1209        EXT_ARG;                                                           \
1210        tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE);            \
1211        tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs);         \
1212    }                                                                      \
1213    return argreg + 1;                                                     \
1214}
1215
1216DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
1217    (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1218DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
1219    (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1220DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
1221    (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1222DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
1223
1224static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
1225                                TCGReg arglo, TCGReg arghi)
1226{
1227    /* 64 bit arguments must go in even/odd register pairs
1228     * and in 8-aligned stack slots.
1229     */
1230    if (argreg & 1) {
1231        argreg++;
1232    }
1233    if (use_armv6_instructions && argreg >= 4
1234        && (arglo & 1) == 0 && arghi == arglo + 1) {
1235        tcg_out_strd_8(s, COND_AL, arglo,
1236                       TCG_REG_CALL_STACK, (argreg - 4) * 4);
1237        return argreg + 2;
1238    } else {
1239        argreg = tcg_out_arg_reg32(s, argreg, arglo);
1240        argreg = tcg_out_arg_reg32(s, argreg, arghi);
1241        return argreg;
1242    }
1243}
1244
1245#define TLB_SHIFT       (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
1246
1247/* We're expecting to use an 8-bit immediate and to mask.  */
1248QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
1249
1250/* Load and compare a TLB entry, leaving the flags set.  Returns the register
1251   containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */
1252
1253static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1254                               TCGMemOp opc, int mem_index, bool is_load)
1255{
1256    TCGReg base = TCG_AREG0;
1257    int cmp_off =
1258        (is_load
1259         ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1260         : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
1261    int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
1262    int mask_off;
1263    unsigned s_bits = opc & MO_SIZE;
1264    unsigned a_bits = get_alignment_bits(opc);
1265
1266    /* V7 generates the following:
1267     *   ubfx   r0, addrlo, #TARGET_PAGE_BITS, #CPU_TLB_BITS
1268     *   add    r2, env, #high
1269     *   add    r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS
1270     *   ldr    r0, [r2, #cmp]
1271     *   ldr    r2, [r2, #add]
1272     *   movw   tmp, #page_align_mask
1273     *   bic    tmp, addrlo, tmp
1274     *   cmp    r0, tmp
1275     *
1276     * Otherwise we generate:
1277     *   shr    tmp, addrlo, #TARGET_PAGE_BITS
1278     *   add    r2, env, #high
1279     *   and    r0, tmp, #(CPU_TLB_SIZE - 1)
1280     *   add    r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS
1281     *   ldr    r0, [r2, #cmp]
1282     *   ldr    r2, [r2, #add]
1283     *   tst    addrlo, #s_mask
1284     *   cmpeq  r0, tmp, lsl #TARGET_PAGE_BITS
1285     */
1286    if (use_armv7_instructions) {
1287        tcg_out_extract(s, COND_AL, TCG_REG_R0, addrlo,
1288                        TARGET_PAGE_BITS, CPU_TLB_BITS);
1289    } else {
1290        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
1291                        0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1292    }
1293
1294    /* Add portions of the offset until the memory access is in range.
1295     * If we plan on using ldrd, reduce to an 8-bit offset; otherwise
1296     * we can use a 12-bit offset.  */
1297    if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1298        mask_off = 0xff;
1299    } else {
1300        mask_off = 0xfff;
1301    }
1302    while (cmp_off > mask_off) {
1303        int shift = ctz32(cmp_off & ~mask_off) & ~1;
1304        int rot = ((32 - shift) << 7) & 0xf00;
1305        int addend = cmp_off & (0xff << shift);
1306        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1307                        rot | ((cmp_off >> shift) & 0xff));
1308        base = TCG_REG_R2;
1309        add_off -= addend;
1310        cmp_off -= addend;
1311    }
1312
1313    if (!use_armv7_instructions) {
1314        tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1315                        TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
1316    }
1317    tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1318                    TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1319
1320    /* Load the tlb comparator.  Use ldrd if needed and available,
1321       but due to how the pointer needs setting up, ldm isn't useful.
1322       Base arm5 doesn't have ldrd, but armv5te does.  */
1323    if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1324        tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1325    } else {
1326        tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1327        if (TARGET_LONG_BITS == 64) {
1328            tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
1329        }
1330    }
1331
1332    /* Load the tlb addend.  */
1333    tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
1334
1335    /* Check alignment.  We don't support inline unaligned acceses,
1336       but we can easily support overalignment checks.  */
1337    if (a_bits < s_bits) {
1338        a_bits = s_bits;
1339    }
1340
1341    if (use_armv7_instructions) {
1342        tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1));
1343        int rot = encode_imm(mask);
1344
1345        if (rot >= 0) { 
1346            tcg_out_dat_imm(s, COND_AL, ARITH_BIC, TCG_REG_TMP, addrlo,
1347                            rotl(mask, rot) | (rot << 7));
1348        } else {
1349            tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask);
1350            tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
1351                            addrlo, TCG_REG_TMP, 0);
1352        }
1353        tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R0, TCG_REG_TMP, 0);
1354    } else {
1355        if (a_bits) {
1356            tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo,
1357                            (1 << a_bits) - 1);
1358        }
1359        tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP,
1360                        0, TCG_REG_R0, TCG_REG_TMP,
1361                        SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1362    }
1363
1364    if (TARGET_LONG_BITS == 64) {
1365        tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addrhi, 0);
1366    }
1367
1368    return TCG_REG_R2;
1369}
1370
1371/* Record the context of a call to the out of line helper code for the slow
1372   path for a load or store, so that we can later generate the correct
1373   helper code.  */
1374static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1375                                TCGReg datalo, TCGReg datahi, TCGReg addrlo,
1376                                TCGReg addrhi, tcg_insn_unit *raddr,
1377                                tcg_insn_unit *label_ptr)
1378{
1379    TCGLabelQemuLdst *label = new_ldst_label(s);
1380
1381    label->is_ld = is_ld;
1382    label->oi = oi;
1383    label->datalo_reg = datalo;
1384    label->datahi_reg = datahi;
1385    label->addrlo_reg = addrlo;
1386    label->addrhi_reg = addrhi;
1387    label->raddr = raddr;
1388    label->label_ptr[0] = label_ptr;
1389}
1390
1391static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1392{
1393    TCGReg argreg, datalo, datahi;
1394    TCGMemOpIdx oi = lb->oi;
1395    TCGMemOp opc = get_memop(oi);
1396    void *func;
1397
1398    reloc_pc24(lb->label_ptr[0], s->code_ptr);
1399
1400    argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
1401    if (TARGET_LONG_BITS == 64) {
1402        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1403    } else {
1404        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1405    }
1406    argreg = tcg_out_arg_imm32(s, argreg, oi);
1407    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1408
1409    /* For armv6 we can use the canonical unsigned helpers and minimize
1410       icache usage.  For pre-armv6, use the signed helpers since we do
1411       not have a single insn sign-extend.  */
1412    if (use_armv6_instructions) {
1413        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
1414    } else {
1415        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
1416        if (opc & MO_SIGN) {
1417            opc = MO_UL;
1418        }
1419    }
1420    tcg_out_call(s, func);
1421
1422    datalo = lb->datalo_reg;
1423    datahi = lb->datahi_reg;
1424    switch (opc & MO_SSIZE) {
1425    case MO_SB:
1426        tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
1427        break;
1428    case MO_SW:
1429        tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
1430        break;
1431    default:
1432        tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1433        break;
1434    case MO_Q:
1435        if (datalo != TCG_REG_R1) {
1436            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1437            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1438        } else if (datahi != TCG_REG_R0) {
1439            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1440            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1441        } else {
1442            tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
1443            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1444            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
1445        }
1446        break;
1447    }
1448
1449    tcg_out_goto(s, COND_AL, lb->raddr);
1450}
1451
1452static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1453{
1454    TCGReg argreg, datalo, datahi;
1455    TCGMemOpIdx oi = lb->oi;
1456    TCGMemOp opc = get_memop(oi);
1457
1458    reloc_pc24(lb->label_ptr[0], s->code_ptr);
1459
1460    argreg = TCG_REG_R0;
1461    argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
1462    if (TARGET_LONG_BITS == 64) {
1463        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1464    } else {
1465        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1466    }
1467
1468    datalo = lb->datalo_reg;
1469    datahi = lb->datahi_reg;
1470    switch (opc & MO_SIZE) {
1471    case MO_8:
1472        argreg = tcg_out_arg_reg8(s, argreg, datalo);
1473        break;
1474    case MO_16:
1475        argreg = tcg_out_arg_reg16(s, argreg, datalo);
1476        break;
1477    case MO_32:
1478    default:
1479        argreg = tcg_out_arg_reg32(s, argreg, datalo);
1480        break;
1481    case MO_64:
1482        argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
1483        break;
1484    }
1485
1486    argreg = tcg_out_arg_imm32(s, argreg, oi);
1487    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1488
1489    /* Tail-call to the helper, which will return to the fast path.  */
1490    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1491}
1492#endif /* SOFTMMU */
1493
1494static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
1495                                         TCGReg datalo, TCGReg datahi,
1496                                         TCGReg addrlo, TCGReg addend)
1497{
1498    TCGMemOp bswap = opc & MO_BSWAP;
1499
1500    switch (opc & MO_SSIZE) {
1501    case MO_UB:
1502        tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
1503        break;
1504    case MO_SB:
1505        tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
1506        break;
1507    case MO_UW:
1508        tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1509        if (bswap) {
1510            tcg_out_bswap16(s, COND_AL, datalo, datalo);
1511        }
1512        break;
1513    case MO_SW:
1514        if (bswap) {
1515            tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1516            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1517        } else {
1518            tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
1519        }
1520        break;
1521    case MO_UL:
1522    default:
1523        tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
1524        if (bswap) {
1525            tcg_out_bswap32(s, COND_AL, datalo, datalo);
1526        }
1527        break;
1528    case MO_Q:
1529        {
1530            TCGReg dl = (bswap ? datahi : datalo);
1531            TCGReg dh = (bswap ? datalo : datahi);
1532
1533            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
1534            if (USING_SOFTMMU && use_armv6_instructions
1535                && (dl & 1) == 0 && dh == dl + 1) {
1536                tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
1537            } else if (dl != addend) {
1538                tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
1539                tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
1540            } else {
1541                tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
1542                                addend, addrlo, SHIFT_IMM_LSL(0));
1543                tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
1544                tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
1545            }
1546            if (bswap) {
1547                tcg_out_bswap32(s, COND_AL, dl, dl);
1548                tcg_out_bswap32(s, COND_AL, dh, dh);
1549            }
1550        }
1551        break;
1552    }
1553}
1554
1555static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
1556                                          TCGReg datalo, TCGReg datahi,
1557                                          TCGReg addrlo)
1558{
1559    TCGMemOp bswap = opc & MO_BSWAP;
1560
1561    switch (opc & MO_SSIZE) {
1562    case MO_UB:
1563        tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
1564        break;
1565    case MO_SB:
1566        tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
1567        break;
1568    case MO_UW:
1569        tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1570        if (bswap) {
1571            tcg_out_bswap16(s, COND_AL, datalo, datalo);
1572        }
1573        break;
1574    case MO_SW:
1575        if (bswap) {
1576            tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1577            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1578        } else {
1579            tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
1580        }
1581        break;
1582    case MO_UL:
1583    default:
1584        tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
1585        if (bswap) {
1586            tcg_out_bswap32(s, COND_AL, datalo, datalo);
1587        }
1588        break;
1589    case MO_Q:
1590        {
1591            TCGReg dl = (bswap ? datahi : datalo);
1592            TCGReg dh = (bswap ? datalo : datahi);
1593
1594            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
1595            if (USING_SOFTMMU && use_armv6_instructions
1596                && (dl & 1) == 0 && dh == dl + 1) {
1597                tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
1598            } else if (dl == addrlo) {
1599                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1600                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1601            } else {
1602                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1603                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1604            }
1605            if (bswap) {
1606                tcg_out_bswap32(s, COND_AL, dl, dl);
1607                tcg_out_bswap32(s, COND_AL, dh, dh);
1608            }
1609        }
1610        break;
1611    }
1612}
1613
1614static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1615{
1616    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1617    TCGMemOpIdx oi;
1618    TCGMemOp opc;
1619#ifdef CONFIG_SOFTMMU
1620    int mem_index;
1621    TCGReg addend;
1622    tcg_insn_unit *label_ptr;
1623#endif
1624
1625    datalo = *args++;
1626    datahi = (is64 ? *args++ : 0);
1627    addrlo = *args++;
1628    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1629    oi = *args++;
1630    opc = get_memop(oi);
1631
1632#ifdef CONFIG_SOFTMMU
1633    mem_index = get_mmuidx(oi);
1634    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
1635
1636    /* This a conditional BL only to load a pointer within this opcode into LR
1637       for the slow path.  We will not be using the value for a tail call.  */
1638    label_ptr = s->code_ptr;
1639    tcg_out_bl_noaddr(s, COND_NE);
1640
1641    tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
1642
1643    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1644                        s->code_ptr, label_ptr);
1645#else /* !CONFIG_SOFTMMU */
1646    if (guest_base) {
1647        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1648        tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
1649    } else {
1650        tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
1651    }
1652#endif
1653}
1654
1655static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc,
1656                                         TCGReg datalo, TCGReg datahi,
1657                                         TCGReg addrlo, TCGReg addend)
1658{
1659    TCGMemOp bswap = opc & MO_BSWAP;
1660
1661    switch (opc & MO_SIZE) {
1662    case MO_8:
1663        tcg_out_st8_r(s, cond, datalo, addrlo, addend);
1664        break;
1665    case MO_16:
1666        if (bswap) {
1667            tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
1668            tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
1669        } else {
1670            tcg_out_st16_r(s, cond, datalo, addrlo, addend);
1671        }
1672        break;
1673    case MO_32:
1674    default:
1675        if (bswap) {
1676            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1677            tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
1678        } else {
1679            tcg_out_st32_r(s, cond, datalo, addrlo, addend);
1680        }
1681        break;
1682    case MO_64:
1683        /* Avoid strd for user-only emulation, to handle unaligned.  */
1684        if (bswap) {
1685            tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
1686            tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
1687            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1688            tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
1689        } else if (USING_SOFTMMU && use_armv6_instructions
1690                   && (datalo & 1) == 0 && datahi == datalo + 1) {
1691            tcg_out_strd_r(s, cond, datalo, addrlo, addend);
1692        } else {
1693            tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
1694            tcg_out_st32_12(s, cond, datahi, addend, 4);
1695        }
1696        break;
1697    }
1698}
1699
1700static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc,
1701                                          TCGReg datalo, TCGReg datahi,
1702                                          TCGReg addrlo)
1703{
1704    TCGMemOp bswap = opc & MO_BSWAP;
1705
1706    switch (opc & MO_SIZE) {
1707    case MO_8:
1708        tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
1709        break;
1710    case MO_16:
1711        if (bswap) {
1712            tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
1713            tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
1714        } else {
1715            tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
1716        }
1717        break;
1718    case MO_32:
1719    default:
1720        if (bswap) {
1721            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1722            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1723        } else {
1724            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1725        }
1726        break;
1727    case MO_64:
1728        /* Avoid strd for user-only emulation, to handle unaligned.  */
1729        if (bswap) {
1730            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
1731            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1732            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1733            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
1734        } else if (USING_SOFTMMU && use_armv6_instructions
1735                   && (datalo & 1) == 0 && datahi == datalo + 1) {
1736            tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
1737        } else {
1738            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1739            tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
1740        }
1741        break;
1742    }
1743}
1744
1745static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1746{
1747    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1748    TCGMemOpIdx oi;
1749    TCGMemOp opc;
1750#ifdef CONFIG_SOFTMMU
1751    int mem_index;
1752    TCGReg addend;
1753    tcg_insn_unit *label_ptr;
1754#endif
1755
1756    datalo = *args++;
1757    datahi = (is64 ? *args++ : 0);
1758    addrlo = *args++;
1759    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1760    oi = *args++;
1761    opc = get_memop(oi);
1762
1763#ifdef CONFIG_SOFTMMU
1764    mem_index = get_mmuidx(oi);
1765    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
1766
1767    tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
1768
1769    /* The conditional call must come last, as we're going to return here.  */
1770    label_ptr = s->code_ptr;
1771    tcg_out_bl_noaddr(s, COND_NE);
1772
1773    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1774                        s->code_ptr, label_ptr);
1775#else /* !CONFIG_SOFTMMU */
1776    if (guest_base) {
1777        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1778        tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
1779                              datahi, addrlo, TCG_REG_TMP);
1780    } else {
1781        tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
1782    }
1783#endif
1784}
1785
1786static tcg_insn_unit *tb_ret_addr;
1787
1788static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1789                const TCGArg *args, const int *const_args)
1790{
1791    TCGArg a0, a1, a2, a3, a4, a5;
1792    int c;
1793
1794    switch (opc) {
1795    case INDEX_op_exit_tb:
1796        /* Reuse the zeroing that exists for goto_ptr.  */
1797        a0 = args[0];
1798        if (a0 == 0) {
1799            tcg_out_goto(s, COND_AL, s->code_gen_epilogue);
1800        } else {
1801            tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
1802            tcg_out_goto(s, COND_AL, tb_ret_addr);
1803        }
1804        break;
1805    case INDEX_op_goto_tb:
1806        {
1807            /* Indirect jump method */
1808            intptr_t ptr, dif, dil;
1809            TCGReg base = TCG_REG_PC;
1810
1811            tcg_debug_assert(s->tb_jmp_insn_offset == 0);
1812            ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
1813            dif = ptr - ((intptr_t)s->code_ptr + 8);
1814            dil = sextract32(dif, 0, 12);
1815            if (dif != dil) {
1816                /* The TB is close, but outside the 12 bits addressable by
1817                   the load.  We can extend this to 20 bits with a sub of a
1818                   shifted immediate from pc.  In the vastly unlikely event
1819                   the code requires more than 1MB, we'll use 2 insns and
1820                   be no worse off.  */
1821                base = TCG_REG_R0;
1822                tcg_out_movi32(s, COND_AL, base, ptr - dil);
1823            }
1824            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil);
1825            set_jmp_reset_offset(s, args[0]);
1826        }
1827        break;
1828    case INDEX_op_goto_ptr:
1829        tcg_out_bx(s, COND_AL, args[0]);
1830        break;
1831    case INDEX_op_br:
1832        tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
1833        break;
1834
1835    case INDEX_op_ld8u_i32:
1836        tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
1837        break;
1838    case INDEX_op_ld8s_i32:
1839        tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
1840        break;
1841    case INDEX_op_ld16u_i32:
1842        tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
1843        break;
1844    case INDEX_op_ld16s_i32:
1845        tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
1846        break;
1847    case INDEX_op_ld_i32:
1848        tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
1849        break;
1850    case INDEX_op_st8_i32:
1851        tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
1852        break;
1853    case INDEX_op_st16_i32:
1854        tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
1855        break;
1856    case INDEX_op_st_i32:
1857        tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
1858        break;
1859
1860    case INDEX_op_movcond_i32:
1861        /* Constraints mean that v2 is always in the same register as dest,
1862         * so we only need to do "if condition passed, move v1 to dest".
1863         */
1864        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1865                        args[1], args[2], const_args[2]);
1866        tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
1867                        ARITH_MVN, args[0], 0, args[3], const_args[3]);
1868        break;
1869    case INDEX_op_add_i32:
1870        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
1871                        args[0], args[1], args[2], const_args[2]);
1872        break;
1873    case INDEX_op_sub_i32:
1874        if (const_args[1]) {
1875            if (const_args[2]) {
1876                tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
1877            } else {
1878                tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
1879                               args[0], args[2], args[1], 1);
1880            }
1881        } else {
1882            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
1883                            args[0], args[1], args[2], const_args[2]);
1884        }
1885        break;
1886    case INDEX_op_and_i32:
1887        tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
1888                        args[0], args[1], args[2], const_args[2]);
1889        break;
1890    case INDEX_op_andc_i32:
1891        tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
1892                        args[0], args[1], args[2], const_args[2]);
1893        break;
1894    case INDEX_op_or_i32:
1895        c = ARITH_ORR;
1896        goto gen_arith;
1897    case INDEX_op_xor_i32:
1898        c = ARITH_EOR;
1899        /* Fall through.  */
1900    gen_arith:
1901        tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
1902        break;
1903    case INDEX_op_add2_i32:
1904        a0 = args[0], a1 = args[1], a2 = args[2];
1905        a3 = args[3], a4 = args[4], a5 = args[5];
1906        if (a0 == a3 || (a0 == a5 && !const_args[5])) {
1907            a0 = TCG_REG_TMP;
1908        }
1909        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
1910                        a0, a2, a4, const_args[4]);
1911        tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
1912                        a1, a3, a5, const_args[5]);
1913        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1914        break;
1915    case INDEX_op_sub2_i32:
1916        a0 = args[0], a1 = args[1], a2 = args[2];
1917        a3 = args[3], a4 = args[4], a5 = args[5];
1918        if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
1919            a0 = TCG_REG_TMP;
1920        }
1921        if (const_args[2]) {
1922            if (const_args[4]) {
1923                tcg_out_movi32(s, COND_AL, a0, a4);
1924                a4 = a0;
1925            }
1926            tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
1927        } else {
1928            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
1929                            ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
1930        }
1931        if (const_args[3]) {
1932            if (const_args[5]) {
1933                tcg_out_movi32(s, COND_AL, a1, a5);
1934                a5 = a1;
1935            }
1936            tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
1937        } else {
1938            tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
1939                            a1, a3, a5, const_args[5]);
1940        }
1941        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1942        break;
1943    case INDEX_op_neg_i32:
1944        tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
1945        break;
1946    case INDEX_op_not_i32:
1947        tcg_out_dat_reg(s, COND_AL,
1948                        ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
1949        break;
1950    case INDEX_op_mul_i32:
1951        tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
1952        break;
1953    case INDEX_op_mulu2_i32:
1954        tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1955        break;
1956    case INDEX_op_muls2_i32:
1957        tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1958        break;
1959    /* XXX: Perhaps args[2] & 0x1f is wrong */
1960    case INDEX_op_shl_i32:
1961        c = const_args[2] ?
1962                SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
1963        goto gen_shift32;
1964    case INDEX_op_shr_i32:
1965        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
1966                SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
1967        goto gen_shift32;
1968    case INDEX_op_sar_i32:
1969        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
1970                SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
1971        goto gen_shift32;
1972    case INDEX_op_rotr_i32:
1973        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
1974                SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
1975        /* Fall through.  */
1976    gen_shift32:
1977        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
1978        break;
1979
1980    case INDEX_op_rotl_i32:
1981        if (const_args[2]) {
1982            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1983                            ((0x20 - args[2]) & 0x1f) ?
1984                            SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
1985                            SHIFT_IMM_LSL(0));
1986        } else {
1987            tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
1988            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1989                            SHIFT_REG_ROR(TCG_REG_TMP));
1990        }
1991        break;
1992
1993    case INDEX_op_ctz_i32:
1994        tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0);
1995        a1 = TCG_REG_TMP;
1996        goto do_clz;
1997
1998    case INDEX_op_clz_i32:
1999        a1 = args[1];
2000    do_clz:
2001        a0 = args[0];
2002        a2 = args[2];
2003        c = const_args[2];
2004        if (c && a2 == 32) {
2005            tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0);
2006            break;
2007        }
2008        tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, a1, 0);
2009        tcg_out_dat_reg(s, COND_NE, INSN_CLZ, a0, 0, a1, 0);
2010        if (c || a0 != a2) {
2011            tcg_out_dat_rIK(s, COND_EQ, ARITH_MOV, ARITH_MVN, a0, 0, a2, c);
2012        }
2013        break;
2014
2015    case INDEX_op_brcond_i32:
2016        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
2017                       args[0], args[1], const_args[1]);
2018        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
2019                           arg_label(args[3]));
2020        break;
2021    case INDEX_op_setcond_i32:
2022        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
2023                        args[1], args[2], const_args[2]);
2024        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
2025                        ARITH_MOV, args[0], 0, 1);
2026        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
2027                        ARITH_MOV, args[0], 0, 0);
2028        break;
2029
2030    case INDEX_op_brcond2_i32:
2031        c = tcg_out_cmp2(s, args, const_args);
2032        tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[5]));
2033        break;
2034    case INDEX_op_setcond2_i32:
2035        c = tcg_out_cmp2(s, args + 1, const_args + 1);
2036        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1);
2037        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)],
2038                        ARITH_MOV, args[0], 0, 0);
2039        break;
2040
2041    case INDEX_op_qemu_ld_i32:
2042        tcg_out_qemu_ld(s, args, 0);
2043        break;
2044    case INDEX_op_qemu_ld_i64:
2045        tcg_out_qemu_ld(s, args, 1);
2046        break;
2047    case INDEX_op_qemu_st_i32:
2048        tcg_out_qemu_st(s, args, 0);
2049        break;
2050    case INDEX_op_qemu_st_i64:
2051        tcg_out_qemu_st(s, args, 1);
2052        break;
2053
2054    case INDEX_op_bswap16_i32:
2055        tcg_out_bswap16(s, COND_AL, args[0], args[1]);
2056        break;
2057    case INDEX_op_bswap32_i32:
2058        tcg_out_bswap32(s, COND_AL, args[0], args[1]);
2059        break;
2060
2061    case INDEX_op_ext8s_i32:
2062        tcg_out_ext8s(s, COND_AL, args[0], args[1]);
2063        break;
2064    case INDEX_op_ext16s_i32:
2065        tcg_out_ext16s(s, COND_AL, args[0], args[1]);
2066        break;
2067    case INDEX_op_ext16u_i32:
2068        tcg_out_ext16u(s, COND_AL, args[0], args[1]);
2069        break;
2070
2071    case INDEX_op_deposit_i32:
2072        tcg_out_deposit(s, COND_AL, args[0], args[2],
2073                        args[3], args[4], const_args[2]);
2074        break;
2075    case INDEX_op_extract_i32:
2076        tcg_out_extract(s, COND_AL, args[0], args[1], args[2], args[3]);
2077        break;
2078    case INDEX_op_sextract_i32:
2079        tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]);
2080        break;
2081
2082    case INDEX_op_div_i32:
2083        tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
2084        break;
2085    case INDEX_op_divu_i32:
2086        tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
2087        break;
2088
2089    case INDEX_op_mb:
2090        tcg_out_mb(s, args[0]);
2091        break;
2092
2093    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2094    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
2095    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2096    default:
2097        tcg_abort();
2098    }
2099}
2100
2101static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2102{
2103    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2104    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2105    static const TCGTargetOpDef s_s = { .args_ct_str = { "s", "s" } };
2106    static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2107    static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2108    static const TCGTargetOpDef r_r_l = { .args_ct_str = { "r", "r", "l" } };
2109    static const TCGTargetOpDef r_l_l = { .args_ct_str = { "r", "l", "l" } };
2110    static const TCGTargetOpDef s_s_s = { .args_ct_str = { "s", "s", "s" } };
2111    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2112    static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } };
2113    static const TCGTargetOpDef r_r_rIN
2114        = { .args_ct_str = { "r", "r", "rIN" } };
2115    static const TCGTargetOpDef r_r_rIK
2116        = { .args_ct_str = { "r", "r", "rIK" } };
2117    static const TCGTargetOpDef r_r_r_r
2118        = { .args_ct_str = { "r", "r", "r", "r" } };
2119    static const TCGTargetOpDef r_r_l_l
2120        = { .args_ct_str = { "r", "r", "l", "l" } };
2121    static const TCGTargetOpDef s_s_s_s
2122        = { .args_ct_str = { "s", "s", "s", "s" } };
2123    static const TCGTargetOpDef br
2124        = { .args_ct_str = { "r", "rIN" } };
2125    static const TCGTargetOpDef dep
2126        = { .args_ct_str = { "r", "0", "rZ" } };
2127    static const TCGTargetOpDef movc
2128        = { .args_ct_str = { "r", "r", "rIN", "rIK", "0" } };
2129    static const TCGTargetOpDef add2
2130        = { .args_ct_str = { "r", "r", "r", "r", "rIN", "rIK" } };
2131    static const TCGTargetOpDef sub2
2132        = { .args_ct_str = { "r", "r", "rI", "rI", "rIN", "rIK" } };
2133    static const TCGTargetOpDef br2
2134        = { .args_ct_str = { "r", "r", "rI", "rI" } };
2135    static const TCGTargetOpDef setc2
2136        = { .args_ct_str = { "r", "r", "r", "rI", "rI" } };
2137
2138    switch (op) {
2139    case INDEX_op_goto_ptr:
2140        return &r;
2141
2142    case INDEX_op_ld8u_i32:
2143    case INDEX_op_ld8s_i32:
2144    case INDEX_op_ld16u_i32:
2145    case INDEX_op_ld16s_i32:
2146    case INDEX_op_ld_i32:
2147    case INDEX_op_st8_i32:
2148    case INDEX_op_st16_i32:
2149    case INDEX_op_st_i32:
2150    case INDEX_op_neg_i32:
2151    case INDEX_op_not_i32:
2152    case INDEX_op_bswap16_i32:
2153    case INDEX_op_bswap32_i32:
2154    case INDEX_op_ext8s_i32:
2155    case INDEX_op_ext16s_i32:
2156    case INDEX_op_ext16u_i32:
2157    case INDEX_op_extract_i32:
2158    case INDEX_op_sextract_i32:
2159        return &r_r;
2160
2161    case INDEX_op_add_i32:
2162    case INDEX_op_sub_i32:
2163    case INDEX_op_setcond_i32:
2164        return &r_r_rIN;
2165    case INDEX_op_and_i32:
2166    case INDEX_op_andc_i32:
2167    case INDEX_op_clz_i32:
2168    case INDEX_op_ctz_i32:
2169        return &r_r_rIK;
2170    case INDEX_op_mul_i32:
2171    case INDEX_op_div_i32:
2172    case INDEX_op_divu_i32:
2173        return &r_r_r;
2174    case INDEX_op_mulu2_i32:
2175    case INDEX_op_muls2_i32:
2176        return &r_r_r_r;
2177    case INDEX_op_or_i32:
2178    case INDEX_op_xor_i32:
2179        return &r_r_rI;
2180    case INDEX_op_shl_i32:
2181    case INDEX_op_shr_i32:
2182    case INDEX_op_sar_i32:
2183    case INDEX_op_rotl_i32:
2184    case INDEX_op_rotr_i32:
2185        return &r_r_ri;
2186
2187    case INDEX_op_brcond_i32:
2188        return &br;
2189    case INDEX_op_deposit_i32:
2190        return &dep;
2191    case INDEX_op_movcond_i32:
2192        return &movc;
2193    case INDEX_op_add2_i32:
2194        return &add2;
2195    case INDEX_op_sub2_i32:
2196        return &sub2;
2197    case INDEX_op_brcond2_i32:
2198        return &br2;
2199    case INDEX_op_setcond2_i32:
2200        return &setc2;
2201
2202    case INDEX_op_qemu_ld_i32:
2203        return TARGET_LONG_BITS == 32 ? &r_l : &r_l_l;
2204    case INDEX_op_qemu_ld_i64:
2205        return TARGET_LONG_BITS == 32 ? &r_r_l : &r_r_l_l;
2206    case INDEX_op_qemu_st_i32:
2207        return TARGET_LONG_BITS == 32 ? &s_s : &s_s_s;
2208    case INDEX_op_qemu_st_i64:
2209        return TARGET_LONG_BITS == 32 ? &s_s_s : &s_s_s_s;
2210
2211    default:
2212        return NULL;
2213    }
2214}
2215
2216static void tcg_target_init(TCGContext *s)
2217{
2218    /* Only probe for the platform and capabilities if we havn't already
2219       determined maximum values at compile time.  */
2220#ifndef use_idiv_instructions
2221    {
2222        unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2223        use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
2224    }
2225#endif
2226    if (__ARM_ARCH < 7) {
2227        const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
2228        if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
2229            arm_arch = pl[1] - '0';
2230        }
2231    }
2232
2233    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
2234
2235    tcg_target_call_clobber_regs = 0;
2236    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
2237    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
2238    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
2239    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
2240    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
2241    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
2242
2243    s->reserved_regs = 0;
2244    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2245    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2246    tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
2247}
2248
2249static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
2250                              TCGReg arg1, intptr_t arg2)
2251{
2252    tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
2253}
2254
2255static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
2256                              TCGReg arg1, intptr_t arg2)
2257{
2258    tcg_out_st32(s, COND_AL, arg, arg1, arg2);
2259}
2260
2261static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
2262                               TCGReg base, intptr_t ofs)
2263{
2264    return false;
2265}
2266
2267static inline void tcg_out_mov(TCGContext *s, TCGType type,
2268                               TCGReg ret, TCGReg arg)
2269{
2270    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
2271}
2272
2273static inline void tcg_out_movi(TCGContext *s, TCGType type,
2274                                TCGReg ret, tcg_target_long arg)
2275{
2276    tcg_out_movi32(s, COND_AL, ret, arg);
2277}
2278
2279static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2280{
2281    int i;
2282    for (i = 0; i < count; ++i) {
2283        p[i] = INSN_NOP;
2284    }
2285}
2286
2287/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2288   and tcg_register_jit.  */
2289
2290#define PUSH_SIZE  ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
2291
2292#define FRAME_SIZE \
2293    ((PUSH_SIZE \
2294      + TCG_STATIC_CALL_ARGS_SIZE \
2295      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2296      + TCG_TARGET_STACK_ALIGN - 1) \
2297     & -TCG_TARGET_STACK_ALIGN)
2298
2299static void tcg_target_qemu_prologue(TCGContext *s)
2300{
2301    int stack_addend;
2302
2303    /* Calling convention requires us to save r4-r11 and lr.  */
2304    /* stmdb sp!, { r4 - r11, lr } */
2305    tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
2306
2307    /* Reserve callee argument and tcg temp space.  */
2308    stack_addend = FRAME_SIZE - PUSH_SIZE;
2309
2310    tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
2311                   TCG_REG_CALL_STACK, stack_addend, 1);
2312    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2313                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2314
2315    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2316
2317    tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
2318
2319    /*
2320     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2321     * and fall through to the rest of the epilogue.
2322     */
2323    s->code_gen_epilogue = s->code_ptr;
2324    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
2325
2326    /* TB epilogue */
2327    tb_ret_addr = s->code_ptr;
2328    tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
2329                   TCG_REG_CALL_STACK, stack_addend, 1);
2330
2331    /* ldmia sp!, { r4 - r11, pc } */
2332    tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
2333}
2334
2335typedef struct {
2336    DebugFrameHeader h;
2337    uint8_t fde_def_cfa[4];
2338    uint8_t fde_reg_ofs[18];
2339} DebugFrame;
2340
2341#define ELF_HOST_MACHINE EM_ARM
2342
2343/* We're expecting a 2 byte uleb128 encoded value.  */
2344QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2345
2346static const DebugFrame debug_frame = {
2347    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2348    .h.cie.id = -1,
2349    .h.cie.version = 1,
2350    .h.cie.code_align = 1,
2351    .h.cie.data_align = 0x7c,             /* sleb128 -4 */
2352    .h.cie.return_column = 14,
2353
2354    /* Total FDE size does not include the "len" member.  */
2355    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2356
2357    .fde_def_cfa = {
2358        12, 13,                         /* DW_CFA_def_cfa sp, ... */
2359        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2360        (FRAME_SIZE >> 7)
2361    },
2362    .fde_reg_ofs = {
2363        /* The following must match the stmdb in the prologue.  */
2364        0x8e, 1,                        /* DW_CFA_offset, lr, -4 */
2365        0x8b, 2,                        /* DW_CFA_offset, r11, -8 */
2366        0x8a, 3,                        /* DW_CFA_offset, r10, -12 */
2367        0x89, 4,                        /* DW_CFA_offset, r9, -16 */
2368        0x88, 5,                        /* DW_CFA_offset, r8, -20 */
2369        0x87, 6,                        /* DW_CFA_offset, r7, -24 */
2370        0x86, 7,                        /* DW_CFA_offset, r6, -28 */
2371        0x85, 8,                        /* DW_CFA_offset, r5, -32 */
2372        0x84, 9,                        /* DW_CFA_offset, r4, -36 */
2373    }
2374};
2375
2376void tcg_register_jit(void *buf, size_t buf_size)
2377{
2378    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2379}
2380