qemu/tcg/arm/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Andrzej Zaborowski
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "elf.h"
  26#include "tcg-pool.inc.c"
  27
  28int arm_arch = __ARM_ARCH;
  29
  30#ifndef use_idiv_instructions
  31bool use_idiv_instructions;
  32#endif
  33
  34/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined.  */
  35#ifdef CONFIG_SOFTMMU
  36# define USING_SOFTMMU 1
  37#else
  38# define USING_SOFTMMU 0
  39#endif
  40
  41#ifdef CONFIG_DEBUG_TCG
  42static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  43    "%r0",
  44    "%r1",
  45    "%r2",
  46    "%r3",
  47    "%r4",
  48    "%r5",
  49    "%r6",
  50    "%r7",
  51    "%r8",
  52    "%r9",
  53    "%r10",
  54    "%r11",
  55    "%r12",
  56    "%r13",
  57    "%r14",
  58    "%pc",
  59};
  60#endif
  61
  62static const int tcg_target_reg_alloc_order[] = {
  63    TCG_REG_R4,
  64    TCG_REG_R5,
  65    TCG_REG_R6,
  66    TCG_REG_R7,
  67    TCG_REG_R8,
  68    TCG_REG_R9,
  69    TCG_REG_R10,
  70    TCG_REG_R11,
  71    TCG_REG_R13,
  72    TCG_REG_R0,
  73    TCG_REG_R1,
  74    TCG_REG_R2,
  75    TCG_REG_R3,
  76    TCG_REG_R12,
  77    TCG_REG_R14,
  78};
  79
  80static const int tcg_target_call_iarg_regs[4] = {
  81    TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
  82};
  83static const int tcg_target_call_oarg_regs[2] = {
  84    TCG_REG_R0, TCG_REG_R1
  85};
  86
  87#define TCG_REG_TMP  TCG_REG_R12
  88
  89enum arm_cond_code_e {
  90    COND_EQ = 0x0,
  91    COND_NE = 0x1,
  92    COND_CS = 0x2,      /* Unsigned greater or equal */
  93    COND_CC = 0x3,      /* Unsigned less than */
  94    COND_MI = 0x4,      /* Negative */
  95    COND_PL = 0x5,      /* Zero or greater */
  96    COND_VS = 0x6,      /* Overflow */
  97    COND_VC = 0x7,      /* No overflow */
  98    COND_HI = 0x8,      /* Unsigned greater than */
  99    COND_LS = 0x9,      /* Unsigned less or equal */
 100    COND_GE = 0xa,
 101    COND_LT = 0xb,
 102    COND_GT = 0xc,
 103    COND_LE = 0xd,
 104    COND_AL = 0xe,
 105};
 106
 107#define TO_CPSR (1 << 20)
 108
 109#define SHIFT_IMM_LSL(im)       (((im) << 7) | 0x00)
 110#define SHIFT_IMM_LSR(im)       (((im) << 7) | 0x20)
 111#define SHIFT_IMM_ASR(im)       (((im) << 7) | 0x40)
 112#define SHIFT_IMM_ROR(im)       (((im) << 7) | 0x60)
 113#define SHIFT_REG_LSL(rs)       (((rs) << 8) | 0x10)
 114#define SHIFT_REG_LSR(rs)       (((rs) << 8) | 0x30)
 115#define SHIFT_REG_ASR(rs)       (((rs) << 8) | 0x50)
 116#define SHIFT_REG_ROR(rs)       (((rs) << 8) | 0x70)
 117
 118typedef enum {
 119    ARITH_AND = 0x0 << 21,
 120    ARITH_EOR = 0x1 << 21,
 121    ARITH_SUB = 0x2 << 21,
 122    ARITH_RSB = 0x3 << 21,
 123    ARITH_ADD = 0x4 << 21,
 124    ARITH_ADC = 0x5 << 21,
 125    ARITH_SBC = 0x6 << 21,
 126    ARITH_RSC = 0x7 << 21,
 127    ARITH_TST = 0x8 << 21 | TO_CPSR,
 128    ARITH_CMP = 0xa << 21 | TO_CPSR,
 129    ARITH_CMN = 0xb << 21 | TO_CPSR,
 130    ARITH_ORR = 0xc << 21,
 131    ARITH_MOV = 0xd << 21,
 132    ARITH_BIC = 0xe << 21,
 133    ARITH_MVN = 0xf << 21,
 134
 135    INSN_CLZ       = 0x016f0f10,
 136    INSN_RBIT      = 0x06ff0f30,
 137
 138    INSN_LDR_IMM   = 0x04100000,
 139    INSN_LDR_REG   = 0x06100000,
 140    INSN_STR_IMM   = 0x04000000,
 141    INSN_STR_REG   = 0x06000000,
 142
 143    INSN_LDRH_IMM  = 0x005000b0,
 144    INSN_LDRH_REG  = 0x001000b0,
 145    INSN_LDRSH_IMM = 0x005000f0,
 146    INSN_LDRSH_REG = 0x001000f0,
 147    INSN_STRH_IMM  = 0x004000b0,
 148    INSN_STRH_REG  = 0x000000b0,
 149
 150    INSN_LDRB_IMM  = 0x04500000,
 151    INSN_LDRB_REG  = 0x06500000,
 152    INSN_LDRSB_IMM = 0x005000d0,
 153    INSN_LDRSB_REG = 0x001000d0,
 154    INSN_STRB_IMM  = 0x04400000,
 155    INSN_STRB_REG  = 0x06400000,
 156
 157    INSN_LDRD_IMM  = 0x004000d0,
 158    INSN_LDRD_REG  = 0x000000d0,
 159    INSN_STRD_IMM  = 0x004000f0,
 160    INSN_STRD_REG  = 0x000000f0,
 161
 162    INSN_DMB_ISH   = 0xf57ff05b,
 163    INSN_DMB_MCR   = 0xee070fba,
 164
 165    /* Architected nop introduced in v6k.  */
 166    /* ??? This is an MSR (imm) 0,0,0 insn.  Anyone know if this
 167       also Just So Happened to do nothing on pre-v6k so that we
 168       don't need to conditionalize it?  */
 169    INSN_NOP_v6k   = 0xe320f000,
 170    /* Otherwise the assembler uses mov r0,r0 */
 171    INSN_NOP_v4    = (COND_AL << 28) | ARITH_MOV,
 172} ARMInsn;
 173
 174#define INSN_NOP   (use_armv7_instructions ? INSN_NOP_v6k : INSN_NOP_v4)
 175
 176static const uint8_t tcg_cond_to_arm_cond[] = {
 177    [TCG_COND_EQ] = COND_EQ,
 178    [TCG_COND_NE] = COND_NE,
 179    [TCG_COND_LT] = COND_LT,
 180    [TCG_COND_GE] = COND_GE,
 181    [TCG_COND_LE] = COND_LE,
 182    [TCG_COND_GT] = COND_GT,
 183    /* unsigned */
 184    [TCG_COND_LTU] = COND_CC,
 185    [TCG_COND_GEU] = COND_CS,
 186    [TCG_COND_LEU] = COND_LS,
 187    [TCG_COND_GTU] = COND_HI,
 188};
 189
 190static inline bool reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
 191{
 192    ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
 193    if (offset == sextract32(offset, 0, 24)) {
 194        *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
 195        return true;
 196    }
 197    return false;
 198}
 199
 200static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 201                        intptr_t value, intptr_t addend)
 202{
 203    tcg_debug_assert(addend == 0);
 204
 205    if (type == R_ARM_PC24) {
 206        return reloc_pc24(code_ptr, (tcg_insn_unit *)value);
 207    } else if (type == R_ARM_PC13) {
 208        intptr_t diff = value - (uintptr_t)(code_ptr + 2);
 209        tcg_insn_unit insn = *code_ptr;
 210        bool u;
 211
 212        if (diff >= -0xfff && diff <= 0xfff) {
 213            u = (diff >= 0);
 214            if (!u) {
 215                diff = -diff;
 216            }
 217        } else {
 218            int rd = extract32(insn, 12, 4);
 219            int rt = rd == TCG_REG_PC ? TCG_REG_TMP : rd;
 220
 221            if (diff < 0x1000 || diff >= 0x100000) {
 222                return false;
 223            }
 224
 225            /* add rt, pc, #high */
 226            *code_ptr++ = ((insn & 0xf0000000) | (1 << 25) | ARITH_ADD
 227                           | (TCG_REG_PC << 16) | (rt << 12)
 228                           | (20 << 7) | (diff >> 12));
 229            /* ldr rd, [rt, #low] */
 230            insn = deposit32(insn, 12, 4, rt);
 231            diff &= 0xfff;
 232            u = 1;
 233        }
 234        insn = deposit32(insn, 23, 1, u);
 235        insn = deposit32(insn, 0, 12, diff);
 236        *code_ptr = insn;
 237    } else {
 238        g_assert_not_reached();
 239    }
 240    return true;
 241}
 242
 243#define TCG_CT_CONST_ARM  0x100
 244#define TCG_CT_CONST_INV  0x200
 245#define TCG_CT_CONST_NEG  0x400
 246#define TCG_CT_CONST_ZERO 0x800
 247
 248/* parse target specific constraints */
 249static const char *target_parse_constraint(TCGArgConstraint *ct,
 250                                           const char *ct_str, TCGType type)
 251{
 252    switch (*ct_str++) {
 253    case 'I':
 254        ct->ct |= TCG_CT_CONST_ARM;
 255        break;
 256    case 'K':
 257        ct->ct |= TCG_CT_CONST_INV;
 258        break;
 259    case 'N': /* The gcc constraint letter is L, already used here.  */
 260        ct->ct |= TCG_CT_CONST_NEG;
 261        break;
 262    case 'Z':
 263        ct->ct |= TCG_CT_CONST_ZERO;
 264        break;
 265
 266    case 'r':
 267        ct->ct |= TCG_CT_REG;
 268        ct->u.regs = 0xffff;
 269        break;
 270
 271    /* qemu_ld address */
 272    case 'l':
 273        ct->ct |= TCG_CT_REG;
 274        ct->u.regs = 0xffff;
 275#ifdef CONFIG_SOFTMMU
 276        /* r0-r2,lr will be overwritten when reading the tlb entry,
 277           so don't use these. */
 278        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
 279        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
 280        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
 281        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
 282#endif
 283        break;
 284
 285    /* qemu_st address & data */
 286    case 's':
 287        ct->ct |= TCG_CT_REG;
 288        ct->u.regs = 0xffff;
 289        /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
 290           and r0-r1 doing the byte swapping, so don't use these. */
 291        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
 292        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
 293#if defined(CONFIG_SOFTMMU)
 294        /* Avoid clashes with registers being used for helper args */
 295        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
 296#if TARGET_LONG_BITS == 64
 297        /* Avoid clashes with registers being used for helper args */
 298        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
 299#endif
 300        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
 301#endif
 302        break;
 303
 304    default:
 305        return NULL;
 306    }
 307    return ct_str;
 308}
 309
 310static inline uint32_t rotl(uint32_t val, int n)
 311{
 312  return (val << n) | (val >> (32 - n));
 313}
 314
 315/* ARM immediates for ALU instructions are made of an unsigned 8-bit
 316   right-rotated by an even amount between 0 and 30. */
 317static inline int encode_imm(uint32_t imm)
 318{
 319    int shift;
 320
 321    /* simple case, only lower bits */
 322    if ((imm & ~0xff) == 0)
 323        return 0;
 324    /* then try a simple even shift */
 325    shift = ctz32(imm) & ~1;
 326    if (((imm >> shift) & ~0xff) == 0)
 327        return 32 - shift;
 328    /* now try harder with rotations */
 329    if ((rotl(imm, 2) & ~0xff) == 0)
 330        return 2;
 331    if ((rotl(imm, 4) & ~0xff) == 0)
 332        return 4;
 333    if ((rotl(imm, 6) & ~0xff) == 0)
 334        return 6;
 335    /* imm can't be encoded */
 336    return -1;
 337}
 338
 339static inline int check_fit_imm(uint32_t imm)
 340{
 341    return encode_imm(imm) >= 0;
 342}
 343
 344/* Test if a constant matches the constraint.
 345 * TODO: define constraints for:
 346 *
 347 * ldr/str offset:   between -0xfff and 0xfff
 348 * ldrh/strh offset: between -0xff and 0xff
 349 * mov operand2:     values represented with x << (2 * y), x < 0x100
 350 * add, sub, eor...: ditto
 351 */
 352static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
 353                                         const TCGArgConstraint *arg_ct)
 354{
 355    int ct;
 356    ct = arg_ct->ct;
 357    if (ct & TCG_CT_CONST) {
 358        return 1;
 359    } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
 360        return 1;
 361    } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
 362        return 1;
 363    } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
 364        return 1;
 365    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 366        return 1;
 367    } else {
 368        return 0;
 369    }
 370}
 371
 372static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
 373{
 374    tcg_out32(s, (cond << 28) | 0x0a000000 |
 375                    (((offset - 8) >> 2) & 0x00ffffff));
 376}
 377
 378static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
 379{
 380    tcg_out32(s, (cond << 28) | 0x0b000000 |
 381                    (((offset - 8) >> 2) & 0x00ffffff));
 382}
 383
 384static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
 385{
 386    tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
 387}
 388
 389static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
 390{
 391    tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
 392                (((offset - 8) >> 2) & 0x00ffffff));
 393}
 394
 395static inline void tcg_out_dat_reg(TCGContext *s,
 396                int cond, int opc, int rd, int rn, int rm, int shift)
 397{
 398    tcg_out32(s, (cond << 28) | (0 << 25) | opc |
 399                    (rn << 16) | (rd << 12) | shift | rm);
 400}
 401
 402static inline void tcg_out_nop(TCGContext *s)
 403{
 404    tcg_out32(s, INSN_NOP);
 405}
 406
 407static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
 408{
 409    /* Simple reg-reg move, optimising out the 'do nothing' case */
 410    if (rd != rm) {
 411        tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
 412    }
 413}
 414
 415static inline void tcg_out_bx(TCGContext *s, int cond, TCGReg rn)
 416{
 417    /* Unless the C portion of QEMU is compiled as thumb, we don't
 418       actually need true BX semantics; merely a branch to an address
 419       held in a register.  */
 420    if (use_armv5t_instructions) {
 421        tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
 422    } else {
 423        tcg_out_mov_reg(s, cond, TCG_REG_PC, rn);
 424    }
 425}
 426
 427static inline void tcg_out_dat_imm(TCGContext *s,
 428                int cond, int opc, int rd, int rn, int im)
 429{
 430    tcg_out32(s, (cond << 28) | (1 << 25) | opc |
 431                    (rn << 16) | (rd << 12) | im);
 432}
 433
 434/* Note that this routine is used for both LDR and LDRH formats, so we do
 435   not wish to include an immediate shift at this point.  */
 436static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 437                            TCGReg rn, TCGReg rm, bool u, bool p, bool w)
 438{
 439    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
 440              | (w << 21) | (rn << 16) | (rt << 12) | rm);
 441}
 442
 443static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 444                            TCGReg rn, int imm8, bool p, bool w)
 445{
 446    bool u = 1;
 447    if (imm8 < 0) {
 448        imm8 = -imm8;
 449        u = 0;
 450    }
 451    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
 452              (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
 453}
 454
 455static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
 456                             TCGReg rn, int imm12, bool p, bool w)
 457{
 458    bool u = 1;
 459    if (imm12 < 0) {
 460        imm12 = -imm12;
 461        u = 0;
 462    }
 463    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
 464              (rn << 16) | (rt << 12) | imm12);
 465}
 466
 467static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
 468                                   TCGReg rn, int imm12)
 469{
 470    tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
 471}
 472
 473static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
 474                                   TCGReg rn, int imm12)
 475{
 476    tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
 477}
 478
 479static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
 480                                  TCGReg rn, TCGReg rm)
 481{
 482    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
 483}
 484
 485static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
 486                                  TCGReg rn, TCGReg rm)
 487{
 488    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
 489}
 490
 491static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
 492                                   TCGReg rn, int imm8)
 493{
 494    tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
 495}
 496
 497static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
 498                                  TCGReg rn, TCGReg rm)
 499{
 500    tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
 501}
 502
 503static inline void tcg_out_ldrd_rwb(TCGContext *s, int cond, TCGReg rt,
 504                                    TCGReg rn, TCGReg rm)
 505{
 506    tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1);
 507}
 508
 509static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
 510                                   TCGReg rn, int imm8)
 511{
 512    tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
 513}
 514
 515static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
 516                                  TCGReg rn, TCGReg rm)
 517{
 518    tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
 519}
 520
 521/* Register pre-increment with base writeback.  */
 522static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
 523                                    TCGReg rn, TCGReg rm)
 524{
 525    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
 526}
 527
 528static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
 529                                    TCGReg rn, TCGReg rm)
 530{
 531    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
 532}
 533
 534static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
 535                                   TCGReg rn, int imm8)
 536{
 537    tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
 538}
 539
 540static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
 541                                  TCGReg rn, int imm8)
 542{
 543    tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
 544}
 545
 546static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
 547                                   TCGReg rn, TCGReg rm)
 548{
 549    tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
 550}
 551
 552static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
 553                                  TCGReg rn, TCGReg rm)
 554{
 555    tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
 556}
 557
 558static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
 559                                   TCGReg rn, int imm8)
 560{
 561    tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
 562}
 563
 564static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
 565                                   TCGReg rn, TCGReg rm)
 566{
 567    tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
 568}
 569
 570static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
 571                                  TCGReg rn, int imm12)
 572{
 573    tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
 574}
 575
 576static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
 577                                  TCGReg rn, int imm12)
 578{
 579    tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
 580}
 581
 582static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
 583                                 TCGReg rn, TCGReg rm)
 584{
 585    tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
 586}
 587
 588static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
 589                                 TCGReg rn, TCGReg rm)
 590{
 591    tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
 592}
 593
 594static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
 595                                  TCGReg rn, int imm8)
 596{
 597    tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
 598}
 599
 600static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
 601                                  TCGReg rn, TCGReg rm)
 602{
 603    tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
 604}
 605
 606static void tcg_out_movi_pool(TCGContext *s, int cond, int rd, uint32_t arg)
 607{
 608    /* The 12-bit range on the ldr insn is sometimes a bit too small.
 609       In order to get around that we require two insns, one of which
 610       will usually be a nop, but may be replaced in patch_reloc.  */
 611    new_pool_label(s, arg, R_ARM_PC13, s->code_ptr, 0);
 612    tcg_out_ld32_12(s, cond, rd, TCG_REG_PC, 0);
 613    tcg_out_nop(s);
 614}
 615
 616static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
 617{
 618    int rot, diff, opc, sh1, sh2;
 619    uint32_t tt0, tt1, tt2;
 620
 621    /* Check a single MOV/MVN before anything else.  */
 622    rot = encode_imm(arg);
 623    if (rot >= 0) {
 624        tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
 625                        rotl(arg, rot) | (rot << 7));
 626        return;
 627    }
 628    rot = encode_imm(~arg);
 629    if (rot >= 0) {
 630        tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
 631                        rotl(~arg, rot) | (rot << 7));
 632        return;
 633    }
 634
 635    /* Check for a pc-relative address.  This will usually be the TB,
 636       or within the TB, which is immediately before the code block.  */
 637    diff = arg - ((intptr_t)s->code_ptr + 8);
 638    if (diff >= 0) {
 639        rot = encode_imm(diff);
 640        if (rot >= 0) {
 641            tcg_out_dat_imm(s, cond, ARITH_ADD, rd, TCG_REG_PC,
 642                            rotl(diff, rot) | (rot << 7));
 643            return;
 644        }
 645    } else {
 646        rot = encode_imm(-diff);
 647        if (rot >= 0) {
 648            tcg_out_dat_imm(s, cond, ARITH_SUB, rd, TCG_REG_PC,
 649                            rotl(-diff, rot) | (rot << 7));
 650            return;
 651        }
 652    }
 653
 654    /* Use movw + movt.  */
 655    if (use_armv7_instructions) {
 656        /* movw */
 657        tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
 658                  | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
 659        if (arg & 0xffff0000) {
 660            /* movt */
 661            tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
 662                      | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
 663        }
 664        return;
 665    }
 666
 667    /* Look for sequences of two insns.  If we have lots of 1's, we can
 668       shorten the sequence by beginning with mvn and then clearing
 669       higher bits with eor.  */
 670    tt0 = arg;
 671    opc = ARITH_MOV;
 672    if (ctpop32(arg) > 16) {
 673        tt0 = ~arg;
 674        opc = ARITH_MVN;
 675    }
 676    sh1 = ctz32(tt0) & ~1;
 677    tt1 = tt0 & ~(0xff << sh1);
 678    sh2 = ctz32(tt1) & ~1;
 679    tt2 = tt1 & ~(0xff << sh2);
 680    if (tt2 == 0) {
 681        rot = ((32 - sh1) << 7) & 0xf00;
 682        tcg_out_dat_imm(s, cond, opc, rd,  0, ((tt0 >> sh1) & 0xff) | rot);
 683        rot = ((32 - sh2) << 7) & 0xf00;
 684        tcg_out_dat_imm(s, cond, ARITH_EOR, rd, rd,
 685                        ((tt0 >> sh2) & 0xff) | rot);
 686        return;
 687    }
 688
 689    /* Otherwise, drop it into the constant pool.  */
 690    tcg_out_movi_pool(s, cond, rd, arg);
 691}
 692
 693static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
 694                                  TCGArg lhs, TCGArg rhs, int rhs_is_const)
 695{
 696    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 697     * rhs must satisfy the "rI" constraint.
 698     */
 699    if (rhs_is_const) {
 700        int rot = encode_imm(rhs);
 701        tcg_debug_assert(rot >= 0);
 702        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 703    } else {
 704        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 705    }
 706}
 707
 708static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
 709                            TCGReg dst, TCGReg lhs, TCGArg rhs,
 710                            bool rhs_is_const)
 711{
 712    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 713     * rhs must satisfy the "rIK" constraint.
 714     */
 715    if (rhs_is_const) {
 716        int rot = encode_imm(rhs);
 717        if (rot < 0) {
 718            rhs = ~rhs;
 719            rot = encode_imm(rhs);
 720            tcg_debug_assert(rot >= 0);
 721            opc = opinv;
 722        }
 723        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 724    } else {
 725        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 726    }
 727}
 728
 729static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
 730                            TCGArg dst, TCGArg lhs, TCGArg rhs,
 731                            bool rhs_is_const)
 732{
 733    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
 734     * rhs must satisfy the "rIN" constraint.
 735     */
 736    if (rhs_is_const) {
 737        int rot = encode_imm(rhs);
 738        if (rot < 0) {
 739            rhs = -rhs;
 740            rot = encode_imm(rhs);
 741            tcg_debug_assert(rot >= 0);
 742            opc = opneg;
 743        }
 744        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
 745    } else {
 746        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
 747    }
 748}
 749
 750static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
 751                                 TCGReg rn, TCGReg rm)
 752{
 753    /* if ArchVersion() < 6 && d == n then UNPREDICTABLE;  */
 754    if (!use_armv6_instructions && rd == rn) {
 755        if (rd == rm) {
 756            /* rd == rn == rm; copy an input to tmp first.  */
 757            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 758            rm = rn = TCG_REG_TMP;
 759        } else {
 760            rn = rm;
 761            rm = rd;
 762        }
 763    }
 764    /* mul */
 765    tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
 766}
 767
 768static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
 769                                   TCGReg rd1, TCGReg rn, TCGReg rm)
 770{
 771    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
 772    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
 773        if (rd0 == rm || rd1 == rm) {
 774            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 775            rn = TCG_REG_TMP;
 776        } else {
 777            TCGReg t = rn;
 778            rn = rm;
 779            rm = t;
 780        }
 781    }
 782    /* umull */
 783    tcg_out32(s, (cond << 28) | 0x00800090 |
 784              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 785}
 786
 787static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
 788                                   TCGReg rd1, TCGReg rn, TCGReg rm)
 789{
 790    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
 791    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
 792        if (rd0 == rm || rd1 == rm) {
 793            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
 794            rn = TCG_REG_TMP;
 795        } else {
 796            TCGReg t = rn;
 797            rn = rm;
 798            rm = t;
 799        }
 800    }
 801    /* smull */
 802    tcg_out32(s, (cond << 28) | 0x00c00090 |
 803              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 804}
 805
 806static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
 807{
 808    tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 809}
 810
 811static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
 812{
 813    tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 814}
 815
 816static inline void tcg_out_ext8s(TCGContext *s, int cond,
 817                                 int rd, int rn)
 818{
 819    if (use_armv6_instructions) {
 820        /* sxtb */
 821        tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
 822    } else {
 823        tcg_out_dat_reg(s, cond, ARITH_MOV,
 824                        rd, 0, rn, SHIFT_IMM_LSL(24));
 825        tcg_out_dat_reg(s, cond, ARITH_MOV,
 826                        rd, 0, rd, SHIFT_IMM_ASR(24));
 827    }
 828}
 829
 830static inline void tcg_out_ext8u(TCGContext *s, int cond,
 831                                 int rd, int rn)
 832{
 833    tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
 834}
 835
 836static inline void tcg_out_ext16s(TCGContext *s, int cond,
 837                                  int rd, int rn)
 838{
 839    if (use_armv6_instructions) {
 840        /* sxth */
 841        tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
 842    } else {
 843        tcg_out_dat_reg(s, cond, ARITH_MOV,
 844                        rd, 0, rn, SHIFT_IMM_LSL(16));
 845        tcg_out_dat_reg(s, cond, ARITH_MOV,
 846                        rd, 0, rd, SHIFT_IMM_ASR(16));
 847    }
 848}
 849
 850static inline void tcg_out_ext16u(TCGContext *s, int cond,
 851                                  int rd, int rn)
 852{
 853    if (use_armv6_instructions) {
 854        /* uxth */
 855        tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
 856    } else {
 857        tcg_out_dat_reg(s, cond, ARITH_MOV,
 858                        rd, 0, rn, SHIFT_IMM_LSL(16));
 859        tcg_out_dat_reg(s, cond, ARITH_MOV,
 860                        rd, 0, rd, SHIFT_IMM_LSR(16));
 861    }
 862}
 863
 864static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
 865{
 866    if (use_armv6_instructions) {
 867        /* revsh */
 868        tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
 869    } else {
 870        tcg_out_dat_reg(s, cond, ARITH_MOV,
 871                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
 872        tcg_out_dat_reg(s, cond, ARITH_MOV,
 873                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
 874        tcg_out_dat_reg(s, cond, ARITH_ORR,
 875                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
 876    }
 877}
 878
 879static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
 880{
 881    if (use_armv6_instructions) {
 882        /* rev16 */
 883        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
 884    } else {
 885        tcg_out_dat_reg(s, cond, ARITH_MOV,
 886                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
 887        tcg_out_dat_reg(s, cond, ARITH_MOV,
 888                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
 889        tcg_out_dat_reg(s, cond, ARITH_ORR,
 890                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
 891    }
 892}
 893
 894/* swap the two low bytes assuming that the two high input bytes and the
 895   two high output bit can hold any value. */
 896static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
 897{
 898    if (use_armv6_instructions) {
 899        /* rev16 */
 900        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
 901    } else {
 902        tcg_out_dat_reg(s, cond, ARITH_MOV,
 903                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
 904        tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
 905        tcg_out_dat_reg(s, cond, ARITH_ORR,
 906                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
 907    }
 908}
 909
 910static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
 911{
 912    if (use_armv6_instructions) {
 913        /* rev */
 914        tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
 915    } else {
 916        tcg_out_dat_reg(s, cond, ARITH_EOR,
 917                        TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
 918        tcg_out_dat_imm(s, cond, ARITH_BIC,
 919                        TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
 920        tcg_out_dat_reg(s, cond, ARITH_MOV,
 921                        rd, 0, rn, SHIFT_IMM_ROR(8));
 922        tcg_out_dat_reg(s, cond, ARITH_EOR,
 923                        rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
 924    }
 925}
 926
 927static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
 928                                   TCGArg a1, int ofs, int len, bool const_a1)
 929{
 930    if (const_a1) {
 931        /* bfi becomes bfc with rn == 15.  */
 932        a1 = 15;
 933    }
 934    /* bfi/bfc */
 935    tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
 936              | (ofs << 7) | ((ofs + len - 1) << 16));
 937}
 938
 939static inline void tcg_out_extract(TCGContext *s, int cond, TCGReg rd,
 940                                   TCGArg a1, int ofs, int len)
 941{
 942    /* ubfx */
 943    tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | a1
 944              | (ofs << 7) | ((len - 1) << 16));
 945}
 946
 947static inline void tcg_out_sextract(TCGContext *s, int cond, TCGReg rd,
 948                                    TCGArg a1, int ofs, int len)
 949{
 950    /* sbfx */
 951    tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | a1
 952              | (ofs << 7) | ((len - 1) << 16));
 953}
 954
 955static inline void tcg_out_ld32u(TCGContext *s, int cond,
 956                int rd, int rn, int32_t offset)
 957{
 958    if (offset > 0xfff || offset < -0xfff) {
 959        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 960        tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
 961    } else
 962        tcg_out_ld32_12(s, cond, rd, rn, offset);
 963}
 964
 965static inline void tcg_out_st32(TCGContext *s, int cond,
 966                int rd, int rn, int32_t offset)
 967{
 968    if (offset > 0xfff || offset < -0xfff) {
 969        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 970        tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
 971    } else
 972        tcg_out_st32_12(s, cond, rd, rn, offset);
 973}
 974
 975static inline void tcg_out_ld16u(TCGContext *s, int cond,
 976                int rd, int rn, int32_t offset)
 977{
 978    if (offset > 0xff || offset < -0xff) {
 979        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 980        tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
 981    } else
 982        tcg_out_ld16u_8(s, cond, rd, rn, offset);
 983}
 984
 985static inline void tcg_out_ld16s(TCGContext *s, int cond,
 986                int rd, int rn, int32_t offset)
 987{
 988    if (offset > 0xff || offset < -0xff) {
 989        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
 990        tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
 991    } else
 992        tcg_out_ld16s_8(s, cond, rd, rn, offset);
 993}
 994
 995static inline void tcg_out_st16(TCGContext *s, int cond,
 996                int rd, int rn, int32_t offset)
 997{
 998    if (offset > 0xff || offset < -0xff) {
 999        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
1000        tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
1001    } else
1002        tcg_out_st16_8(s, cond, rd, rn, offset);
1003}
1004
1005static inline void tcg_out_ld8u(TCGContext *s, int cond,
1006                int rd, int rn, int32_t offset)
1007{
1008    if (offset > 0xfff || offset < -0xfff) {
1009        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
1010        tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
1011    } else
1012        tcg_out_ld8_12(s, cond, rd, rn, offset);
1013}
1014
1015static inline void tcg_out_ld8s(TCGContext *s, int cond,
1016                int rd, int rn, int32_t offset)
1017{
1018    if (offset > 0xff || offset < -0xff) {
1019        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
1020        tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
1021    } else
1022        tcg_out_ld8s_8(s, cond, rd, rn, offset);
1023}
1024
1025static inline void tcg_out_st8(TCGContext *s, int cond,
1026                int rd, int rn, int32_t offset)
1027{
1028    if (offset > 0xfff || offset < -0xfff) {
1029        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
1030        tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
1031    } else
1032        tcg_out_st8_12(s, cond, rd, rn, offset);
1033}
1034
1035/* The _goto case is normally between TBs within the same code buffer, and
1036 * with the code buffer limited to 16MB we wouldn't need the long case.
1037 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
1038 */
1039static void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
1040{
1041    intptr_t addri = (intptr_t)addr;
1042    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1043
1044    if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
1045        tcg_out_b(s, cond, disp);
1046        return;
1047    }
1048    tcg_out_movi_pool(s, cond, TCG_REG_PC, addri);
1049}
1050
1051/* The call case is mostly used for helpers - so it's not unreasonable
1052 * for them to be beyond branch range */
1053static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
1054{
1055    intptr_t addri = (intptr_t)addr;
1056    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1057
1058    if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
1059        if (addri & 1) {
1060            /* Use BLX if the target is in Thumb mode */
1061            if (!use_armv5t_instructions) {
1062                tcg_abort();
1063            }
1064            tcg_out_blx_imm(s, disp);
1065        } else {
1066            tcg_out_bl(s, COND_AL, disp);
1067        }
1068    } else if (use_armv7_instructions) {
1069        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
1070        tcg_out_blx(s, COND_AL, TCG_REG_TMP);
1071    } else {
1072        /* ??? Know that movi_pool emits exactly 2 insns.  */
1073        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
1074        tcg_out_movi_pool(s, COND_AL, TCG_REG_PC, addri);
1075    }
1076}
1077
1078static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
1079{
1080    if (l->has_value) {
1081        tcg_out_goto(s, cond, l->u.value_ptr);
1082    } else {
1083        tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
1084        tcg_out_b(s, cond, 0);
1085    }
1086}
1087
1088static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1089{
1090    if (use_armv7_instructions) {
1091        tcg_out32(s, INSN_DMB_ISH);
1092    } else if (use_armv6_instructions) {
1093        tcg_out32(s, INSN_DMB_MCR);
1094    }
1095}
1096
1097static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args,
1098                            const int *const_args)
1099{
1100    TCGReg al = args[0];
1101    TCGReg ah = args[1];
1102    TCGArg bl = args[2];
1103    TCGArg bh = args[3];
1104    TCGCond cond = args[4];
1105    int const_bl = const_args[2];
1106    int const_bh = const_args[3];
1107
1108    switch (cond) {
1109    case TCG_COND_EQ:
1110    case TCG_COND_NE:
1111    case TCG_COND_LTU:
1112    case TCG_COND_LEU:
1113    case TCG_COND_GTU:
1114    case TCG_COND_GEU:
1115        /* We perform a conditional comparision.  If the high half is
1116           equal, then overwrite the flags with the comparison of the
1117           low half.  The resulting flags cover the whole.  */
1118        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, ah, bh, const_bh);
1119        tcg_out_dat_rI(s, COND_EQ, ARITH_CMP, 0, al, bl, const_bl);
1120        return cond;
1121
1122    case TCG_COND_LT:
1123    case TCG_COND_GE:
1124        /* We perform a double-word subtraction and examine the result.
1125           We do not actually need the result of the subtract, so the
1126           low part "subtract" is a compare.  For the high half we have
1127           no choice but to compute into a temporary.  */
1128        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, al, bl, const_bl);
1129        tcg_out_dat_rI(s, COND_AL, ARITH_SBC | TO_CPSR,
1130                       TCG_REG_TMP, ah, bh, const_bh);
1131        return cond;
1132
1133    case TCG_COND_LE:
1134    case TCG_COND_GT:
1135        /* Similar, but with swapped arguments, via reversed subtract.  */
1136        tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR,
1137                       TCG_REG_TMP, al, bl, const_bl);
1138        tcg_out_dat_rI(s, COND_AL, ARITH_RSC | TO_CPSR,
1139                       TCG_REG_TMP, ah, bh, const_bh);
1140        return tcg_swap_cond(cond);
1141
1142    default:
1143        g_assert_not_reached();
1144    }
1145}
1146
1147#ifdef CONFIG_SOFTMMU
1148#include "tcg-ldst.inc.c"
1149
1150/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1151 *                                     int mmu_idx, uintptr_t ra)
1152 */
1153static void * const qemu_ld_helpers[16] = {
1154    [MO_UB]   = helper_ret_ldub_mmu,
1155    [MO_SB]   = helper_ret_ldsb_mmu,
1156
1157    [MO_LEUW] = helper_le_lduw_mmu,
1158    [MO_LEUL] = helper_le_ldul_mmu,
1159    [MO_LEQ]  = helper_le_ldq_mmu,
1160    [MO_LESW] = helper_le_ldsw_mmu,
1161    [MO_LESL] = helper_le_ldul_mmu,
1162
1163    [MO_BEUW] = helper_be_lduw_mmu,
1164    [MO_BEUL] = helper_be_ldul_mmu,
1165    [MO_BEQ]  = helper_be_ldq_mmu,
1166    [MO_BESW] = helper_be_ldsw_mmu,
1167    [MO_BESL] = helper_be_ldul_mmu,
1168};
1169
1170/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1171 *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
1172 */
1173static void * const qemu_st_helpers[16] = {
1174    [MO_UB]   = helper_ret_stb_mmu,
1175    [MO_LEUW] = helper_le_stw_mmu,
1176    [MO_LEUL] = helper_le_stl_mmu,
1177    [MO_LEQ]  = helper_le_stq_mmu,
1178    [MO_BEUW] = helper_be_stw_mmu,
1179    [MO_BEUL] = helper_be_stl_mmu,
1180    [MO_BEQ]  = helper_be_stq_mmu,
1181};
1182
1183/* Helper routines for marshalling helper function arguments into
1184 * the correct registers and stack.
1185 * argreg is where we want to put this argument, arg is the argument itself.
1186 * Return value is the updated argreg ready for the next call.
1187 * Note that argreg 0..3 is real registers, 4+ on stack.
1188 *
1189 * We provide routines for arguments which are: immediate, 32 bit
1190 * value in register, 16 and 8 bit values in register (which must be zero
1191 * extended before use) and 64 bit value in a lo:hi register pair.
1192 */
1193#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG)                \
1194static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg)              \
1195{                                                                          \
1196    if (argreg < 4) {                                                      \
1197        MOV_ARG(s, COND_AL, argreg, arg);                                  \
1198    } else {                                                               \
1199        int ofs = (argreg - 4) * 4;                                        \
1200        EXT_ARG;                                                           \
1201        tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE);            \
1202        tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs);         \
1203    }                                                                      \
1204    return argreg + 1;                                                     \
1205}
1206
1207DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
1208    (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1209DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
1210    (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1211DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
1212    (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1213DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
1214
1215static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
1216                                TCGReg arglo, TCGReg arghi)
1217{
1218    /* 64 bit arguments must go in even/odd register pairs
1219     * and in 8-aligned stack slots.
1220     */
1221    if (argreg & 1) {
1222        argreg++;
1223    }
1224    if (use_armv6_instructions && argreg >= 4
1225        && (arglo & 1) == 0 && arghi == arglo + 1) {
1226        tcg_out_strd_8(s, COND_AL, arglo,
1227                       TCG_REG_CALL_STACK, (argreg - 4) * 4);
1228        return argreg + 2;
1229    } else {
1230        argreg = tcg_out_arg_reg32(s, argreg, arglo);
1231        argreg = tcg_out_arg_reg32(s, argreg, arghi);
1232        return argreg;
1233    }
1234}
1235
1236#define TLB_SHIFT       (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
1237
1238/* We expect tlb_mask to be before tlb_table.  */
1239QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
1240                  offsetof(CPUArchState, tlb_mask));
1241
1242/* We expect to use a 20-bit unsigned offset from ENV.  */
1243QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1])
1244                  > 0xfffff);
1245
1246/* Load and compare a TLB entry, leaving the flags set.  Returns the register
1247   containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */
1248
1249static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1250                               TCGMemOp opc, int mem_index, bool is_load)
1251{
1252    int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
1253                   : offsetof(CPUTLBEntry, addr_write));
1254    int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]);
1255    int table_off = offsetof(CPUArchState, tlb_table[mem_index]);
1256    TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
1257    unsigned s_bits = opc & MO_SIZE;
1258    unsigned a_bits = get_alignment_bits(opc);
1259
1260    if (table_off > 0xfff) {
1261        int mask_hi = mask_off & ~0xfff;
1262        int table_hi = table_off & ~0xfff;
1263        int rot;
1264
1265        table_base = TCG_REG_R2;
1266        if (mask_hi == table_hi) {
1267            mask_base = table_base;
1268        } else if (mask_hi) {
1269            mask_base = TCG_REG_TMP;
1270            rot = encode_imm(mask_hi);
1271            assert(rot >= 0);
1272            tcg_out_dat_imm(s, COND_AL, ARITH_ADD, mask_base, TCG_AREG0,
1273                            rotl(mask_hi, rot) | (rot << 7));
1274        }
1275        rot = encode_imm(table_hi);
1276        assert(rot >= 0);
1277        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, table_base, TCG_AREG0,
1278                        rotl(table_hi, rot) | (rot << 7));
1279
1280        mask_off -= mask_hi;
1281        table_off -= table_hi;
1282    }
1283
1284    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
1285    tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP, mask_base, mask_off);
1286    tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R2, table_base, table_off);
1287
1288    /* Extract the tlb index from the address into TMP.  */
1289    tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, addrlo,
1290                    SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
1291
1292    /*
1293     * Add the tlb_table pointer, creating the CPUTLBEntry address in R2.
1294     * Load the tlb comparator into R0/R1 and the fast path addend into R2.
1295     */
1296    if (cmp_off == 0) {
1297        if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1298            tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R2, TCG_REG_TMP);
1299        } else {
1300            tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R2, TCG_REG_TMP);
1301        }
1302    } else {
1303        tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
1304                        TCG_REG_R2, TCG_REG_R2, TCG_REG_TMP, 0);
1305        if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1306            tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1307        } else {
1308            tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1309        }
1310    }
1311    if (!use_armv6_instructions && TARGET_LONG_BITS == 64) {
1312        tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
1313    }
1314
1315    /* Load the tlb addend.  */
1316    tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2,
1317                    offsetof(CPUTLBEntry, addend));
1318
1319    /* Check alignment.  We don't support inline unaligned acceses,
1320       but we can easily support overalignment checks.  */
1321    if (a_bits < s_bits) {
1322        a_bits = s_bits;
1323    }
1324
1325    if (use_armv7_instructions) {
1326        tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1));
1327        int rot = encode_imm(mask);
1328
1329        if (rot >= 0) { 
1330            tcg_out_dat_imm(s, COND_AL, ARITH_BIC, TCG_REG_TMP, addrlo,
1331                            rotl(mask, rot) | (rot << 7));
1332        } else {
1333            tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask);
1334            tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
1335                            addrlo, TCG_REG_TMP, 0);
1336        }
1337        tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R0, TCG_REG_TMP, 0);
1338    } else {
1339        if (a_bits) {
1340            tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo,
1341                            (1 << a_bits) - 1);
1342        }
1343        tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP,
1344                        0, TCG_REG_R0, TCG_REG_TMP,
1345                        SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1346    }
1347
1348    if (TARGET_LONG_BITS == 64) {
1349        tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addrhi, 0);
1350    }
1351
1352    return TCG_REG_R2;
1353}
1354
1355/* Record the context of a call to the out of line helper code for the slow
1356   path for a load or store, so that we can later generate the correct
1357   helper code.  */
1358static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1359                                TCGReg datalo, TCGReg datahi, TCGReg addrlo,
1360                                TCGReg addrhi, tcg_insn_unit *raddr,
1361                                tcg_insn_unit *label_ptr)
1362{
1363    TCGLabelQemuLdst *label = new_ldst_label(s);
1364
1365    label->is_ld = is_ld;
1366    label->oi = oi;
1367    label->datalo_reg = datalo;
1368    label->datahi_reg = datahi;
1369    label->addrlo_reg = addrlo;
1370    label->addrhi_reg = addrhi;
1371    label->raddr = raddr;
1372    label->label_ptr[0] = label_ptr;
1373}
1374
1375static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1376{
1377    TCGReg argreg, datalo, datahi;
1378    TCGMemOpIdx oi = lb->oi;
1379    TCGMemOp opc = get_memop(oi);
1380    void *func;
1381
1382    bool ok = reloc_pc24(lb->label_ptr[0], s->code_ptr);
1383    tcg_debug_assert(ok);
1384
1385    argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
1386    if (TARGET_LONG_BITS == 64) {
1387        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1388    } else {
1389        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1390    }
1391    argreg = tcg_out_arg_imm32(s, argreg, oi);
1392    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1393
1394    /* For armv6 we can use the canonical unsigned helpers and minimize
1395       icache usage.  For pre-armv6, use the signed helpers since we do
1396       not have a single insn sign-extend.  */
1397    if (use_armv6_instructions) {
1398        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
1399    } else {
1400        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
1401        if (opc & MO_SIGN) {
1402            opc = MO_UL;
1403        }
1404    }
1405    tcg_out_call(s, func);
1406
1407    datalo = lb->datalo_reg;
1408    datahi = lb->datahi_reg;
1409    switch (opc & MO_SSIZE) {
1410    case MO_SB:
1411        tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
1412        break;
1413    case MO_SW:
1414        tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
1415        break;
1416    default:
1417        tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1418        break;
1419    case MO_Q:
1420        if (datalo != TCG_REG_R1) {
1421            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1422            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1423        } else if (datahi != TCG_REG_R0) {
1424            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1425            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1426        } else {
1427            tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
1428            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1429            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
1430        }
1431        break;
1432    }
1433
1434    tcg_out_goto(s, COND_AL, lb->raddr);
1435}
1436
1437static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1438{
1439    TCGReg argreg, datalo, datahi;
1440    TCGMemOpIdx oi = lb->oi;
1441    TCGMemOp opc = get_memop(oi);
1442
1443    bool ok = reloc_pc24(lb->label_ptr[0], s->code_ptr);
1444    tcg_debug_assert(ok);
1445
1446    argreg = TCG_REG_R0;
1447    argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
1448    if (TARGET_LONG_BITS == 64) {
1449        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1450    } else {
1451        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1452    }
1453
1454    datalo = lb->datalo_reg;
1455    datahi = lb->datahi_reg;
1456    switch (opc & MO_SIZE) {
1457    case MO_8:
1458        argreg = tcg_out_arg_reg8(s, argreg, datalo);
1459        break;
1460    case MO_16:
1461        argreg = tcg_out_arg_reg16(s, argreg, datalo);
1462        break;
1463    case MO_32:
1464    default:
1465        argreg = tcg_out_arg_reg32(s, argreg, datalo);
1466        break;
1467    case MO_64:
1468        argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
1469        break;
1470    }
1471
1472    argreg = tcg_out_arg_imm32(s, argreg, oi);
1473    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1474
1475    /* Tail-call to the helper, which will return to the fast path.  */
1476    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1477}
1478#endif /* SOFTMMU */
1479
1480static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
1481                                         TCGReg datalo, TCGReg datahi,
1482                                         TCGReg addrlo, TCGReg addend)
1483{
1484    TCGMemOp bswap = opc & MO_BSWAP;
1485
1486    switch (opc & MO_SSIZE) {
1487    case MO_UB:
1488        tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
1489        break;
1490    case MO_SB:
1491        tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
1492        break;
1493    case MO_UW:
1494        tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1495        if (bswap) {
1496            tcg_out_bswap16(s, COND_AL, datalo, datalo);
1497        }
1498        break;
1499    case MO_SW:
1500        if (bswap) {
1501            tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1502            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1503        } else {
1504            tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
1505        }
1506        break;
1507    case MO_UL:
1508    default:
1509        tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
1510        if (bswap) {
1511            tcg_out_bswap32(s, COND_AL, datalo, datalo);
1512        }
1513        break;
1514    case MO_Q:
1515        {
1516            TCGReg dl = (bswap ? datahi : datalo);
1517            TCGReg dh = (bswap ? datalo : datahi);
1518
1519            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
1520            if (USING_SOFTMMU && use_armv6_instructions
1521                && (dl & 1) == 0 && dh == dl + 1) {
1522                tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
1523            } else if (dl != addend) {
1524                tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
1525                tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
1526            } else {
1527                tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
1528                                addend, addrlo, SHIFT_IMM_LSL(0));
1529                tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
1530                tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
1531            }
1532            if (bswap) {
1533                tcg_out_bswap32(s, COND_AL, dl, dl);
1534                tcg_out_bswap32(s, COND_AL, dh, dh);
1535            }
1536        }
1537        break;
1538    }
1539}
1540
1541static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
1542                                          TCGReg datalo, TCGReg datahi,
1543                                          TCGReg addrlo)
1544{
1545    TCGMemOp bswap = opc & MO_BSWAP;
1546
1547    switch (opc & MO_SSIZE) {
1548    case MO_UB:
1549        tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
1550        break;
1551    case MO_SB:
1552        tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
1553        break;
1554    case MO_UW:
1555        tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1556        if (bswap) {
1557            tcg_out_bswap16(s, COND_AL, datalo, datalo);
1558        }
1559        break;
1560    case MO_SW:
1561        if (bswap) {
1562            tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1563            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1564        } else {
1565            tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
1566        }
1567        break;
1568    case MO_UL:
1569    default:
1570        tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
1571        if (bswap) {
1572            tcg_out_bswap32(s, COND_AL, datalo, datalo);
1573        }
1574        break;
1575    case MO_Q:
1576        {
1577            TCGReg dl = (bswap ? datahi : datalo);
1578            TCGReg dh = (bswap ? datalo : datahi);
1579
1580            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
1581            if (USING_SOFTMMU && use_armv6_instructions
1582                && (dl & 1) == 0 && dh == dl + 1) {
1583                tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
1584            } else if (dl == addrlo) {
1585                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1586                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1587            } else {
1588                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1589                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1590            }
1591            if (bswap) {
1592                tcg_out_bswap32(s, COND_AL, dl, dl);
1593                tcg_out_bswap32(s, COND_AL, dh, dh);
1594            }
1595        }
1596        break;
1597    }
1598}
1599
1600static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1601{
1602    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1603    TCGMemOpIdx oi;
1604    TCGMemOp opc;
1605#ifdef CONFIG_SOFTMMU
1606    int mem_index;
1607    TCGReg addend;
1608    tcg_insn_unit *label_ptr;
1609#endif
1610
1611    datalo = *args++;
1612    datahi = (is64 ? *args++ : 0);
1613    addrlo = *args++;
1614    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1615    oi = *args++;
1616    opc = get_memop(oi);
1617
1618#ifdef CONFIG_SOFTMMU
1619    mem_index = get_mmuidx(oi);
1620    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
1621
1622    /* This a conditional BL only to load a pointer within this opcode into LR
1623       for the slow path.  We will not be using the value for a tail call.  */
1624    label_ptr = s->code_ptr;
1625    tcg_out_bl(s, COND_NE, 0);
1626
1627    tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
1628
1629    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1630                        s->code_ptr, label_ptr);
1631#else /* !CONFIG_SOFTMMU */
1632    if (guest_base) {
1633        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1634        tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
1635    } else {
1636        tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
1637    }
1638#endif
1639}
1640
1641static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc,
1642                                         TCGReg datalo, TCGReg datahi,
1643                                         TCGReg addrlo, TCGReg addend)
1644{
1645    TCGMemOp bswap = opc & MO_BSWAP;
1646
1647    switch (opc & MO_SIZE) {
1648    case MO_8:
1649        tcg_out_st8_r(s, cond, datalo, addrlo, addend);
1650        break;
1651    case MO_16:
1652        if (bswap) {
1653            tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
1654            tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
1655        } else {
1656            tcg_out_st16_r(s, cond, datalo, addrlo, addend);
1657        }
1658        break;
1659    case MO_32:
1660    default:
1661        if (bswap) {
1662            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1663            tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
1664        } else {
1665            tcg_out_st32_r(s, cond, datalo, addrlo, addend);
1666        }
1667        break;
1668    case MO_64:
1669        /* Avoid strd for user-only emulation, to handle unaligned.  */
1670        if (bswap) {
1671            tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
1672            tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
1673            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1674            tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
1675        } else if (USING_SOFTMMU && use_armv6_instructions
1676                   && (datalo & 1) == 0 && datahi == datalo + 1) {
1677            tcg_out_strd_r(s, cond, datalo, addrlo, addend);
1678        } else {
1679            tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
1680            tcg_out_st32_12(s, cond, datahi, addend, 4);
1681        }
1682        break;
1683    }
1684}
1685
1686static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc,
1687                                          TCGReg datalo, TCGReg datahi,
1688                                          TCGReg addrlo)
1689{
1690    TCGMemOp bswap = opc & MO_BSWAP;
1691
1692    switch (opc & MO_SIZE) {
1693    case MO_8:
1694        tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
1695        break;
1696    case MO_16:
1697        if (bswap) {
1698            tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
1699            tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
1700        } else {
1701            tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
1702        }
1703        break;
1704    case MO_32:
1705    default:
1706        if (bswap) {
1707            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1708            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1709        } else {
1710            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1711        }
1712        break;
1713    case MO_64:
1714        /* Avoid strd for user-only emulation, to handle unaligned.  */
1715        if (bswap) {
1716            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
1717            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1718            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1719            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
1720        } else if (USING_SOFTMMU && use_armv6_instructions
1721                   && (datalo & 1) == 0 && datahi == datalo + 1) {
1722            tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
1723        } else {
1724            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1725            tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
1726        }
1727        break;
1728    }
1729}
1730
1731static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1732{
1733    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1734    TCGMemOpIdx oi;
1735    TCGMemOp opc;
1736#ifdef CONFIG_SOFTMMU
1737    int mem_index;
1738    TCGReg addend;
1739    tcg_insn_unit *label_ptr;
1740#endif
1741
1742    datalo = *args++;
1743    datahi = (is64 ? *args++ : 0);
1744    addrlo = *args++;
1745    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1746    oi = *args++;
1747    opc = get_memop(oi);
1748
1749#ifdef CONFIG_SOFTMMU
1750    mem_index = get_mmuidx(oi);
1751    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
1752
1753    tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
1754
1755    /* The conditional call must come last, as we're going to return here.  */
1756    label_ptr = s->code_ptr;
1757    tcg_out_bl(s, COND_NE, 0);
1758
1759    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1760                        s->code_ptr, label_ptr);
1761#else /* !CONFIG_SOFTMMU */
1762    if (guest_base) {
1763        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1764        tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
1765                              datahi, addrlo, TCG_REG_TMP);
1766    } else {
1767        tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
1768    }
1769#endif
1770}
1771
1772static tcg_insn_unit *tb_ret_addr;
1773
1774static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1775                const TCGArg *args, const int *const_args)
1776{
1777    TCGArg a0, a1, a2, a3, a4, a5;
1778    int c;
1779
1780    switch (opc) {
1781    case INDEX_op_exit_tb:
1782        /* Reuse the zeroing that exists for goto_ptr.  */
1783        a0 = args[0];
1784        if (a0 == 0) {
1785            tcg_out_goto(s, COND_AL, s->code_gen_epilogue);
1786        } else {
1787            tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
1788            tcg_out_goto(s, COND_AL, tb_ret_addr);
1789        }
1790        break;
1791    case INDEX_op_goto_tb:
1792        {
1793            /* Indirect jump method */
1794            intptr_t ptr, dif, dil;
1795            TCGReg base = TCG_REG_PC;
1796
1797            tcg_debug_assert(s->tb_jmp_insn_offset == 0);
1798            ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
1799            dif = ptr - ((intptr_t)s->code_ptr + 8);
1800            dil = sextract32(dif, 0, 12);
1801            if (dif != dil) {
1802                /* The TB is close, but outside the 12 bits addressable by
1803                   the load.  We can extend this to 20 bits with a sub of a
1804                   shifted immediate from pc.  In the vastly unlikely event
1805                   the code requires more than 1MB, we'll use 2 insns and
1806                   be no worse off.  */
1807                base = TCG_REG_R0;
1808                tcg_out_movi32(s, COND_AL, base, ptr - dil);
1809            }
1810            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil);
1811            set_jmp_reset_offset(s, args[0]);
1812        }
1813        break;
1814    case INDEX_op_goto_ptr:
1815        tcg_out_bx(s, COND_AL, args[0]);
1816        break;
1817    case INDEX_op_br:
1818        tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
1819        break;
1820
1821    case INDEX_op_ld8u_i32:
1822        tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
1823        break;
1824    case INDEX_op_ld8s_i32:
1825        tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
1826        break;
1827    case INDEX_op_ld16u_i32:
1828        tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
1829        break;
1830    case INDEX_op_ld16s_i32:
1831        tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
1832        break;
1833    case INDEX_op_ld_i32:
1834        tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
1835        break;
1836    case INDEX_op_st8_i32:
1837        tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
1838        break;
1839    case INDEX_op_st16_i32:
1840        tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
1841        break;
1842    case INDEX_op_st_i32:
1843        tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
1844        break;
1845
1846    case INDEX_op_movcond_i32:
1847        /* Constraints mean that v2 is always in the same register as dest,
1848         * so we only need to do "if condition passed, move v1 to dest".
1849         */
1850        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1851                        args[1], args[2], const_args[2]);
1852        tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
1853                        ARITH_MVN, args[0], 0, args[3], const_args[3]);
1854        break;
1855    case INDEX_op_add_i32:
1856        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
1857                        args[0], args[1], args[2], const_args[2]);
1858        break;
1859    case INDEX_op_sub_i32:
1860        if (const_args[1]) {
1861            if (const_args[2]) {
1862                tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
1863            } else {
1864                tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
1865                               args[0], args[2], args[1], 1);
1866            }
1867        } else {
1868            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
1869                            args[0], args[1], args[2], const_args[2]);
1870        }
1871        break;
1872    case INDEX_op_and_i32:
1873        tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
1874                        args[0], args[1], args[2], const_args[2]);
1875        break;
1876    case INDEX_op_andc_i32:
1877        tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
1878                        args[0], args[1], args[2], const_args[2]);
1879        break;
1880    case INDEX_op_or_i32:
1881        c = ARITH_ORR;
1882        goto gen_arith;
1883    case INDEX_op_xor_i32:
1884        c = ARITH_EOR;
1885        /* Fall through.  */
1886    gen_arith:
1887        tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
1888        break;
1889    case INDEX_op_add2_i32:
1890        a0 = args[0], a1 = args[1], a2 = args[2];
1891        a3 = args[3], a4 = args[4], a5 = args[5];
1892        if (a0 == a3 || (a0 == a5 && !const_args[5])) {
1893            a0 = TCG_REG_TMP;
1894        }
1895        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
1896                        a0, a2, a4, const_args[4]);
1897        tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
1898                        a1, a3, a5, const_args[5]);
1899        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1900        break;
1901    case INDEX_op_sub2_i32:
1902        a0 = args[0], a1 = args[1], a2 = args[2];
1903        a3 = args[3], a4 = args[4], a5 = args[5];
1904        if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
1905            a0 = TCG_REG_TMP;
1906        }
1907        if (const_args[2]) {
1908            if (const_args[4]) {
1909                tcg_out_movi32(s, COND_AL, a0, a4);
1910                a4 = a0;
1911            }
1912            tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
1913        } else {
1914            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
1915                            ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
1916        }
1917        if (const_args[3]) {
1918            if (const_args[5]) {
1919                tcg_out_movi32(s, COND_AL, a1, a5);
1920                a5 = a1;
1921            }
1922            tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
1923        } else {
1924            tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
1925                            a1, a3, a5, const_args[5]);
1926        }
1927        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1928        break;
1929    case INDEX_op_neg_i32:
1930        tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
1931        break;
1932    case INDEX_op_not_i32:
1933        tcg_out_dat_reg(s, COND_AL,
1934                        ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
1935        break;
1936    case INDEX_op_mul_i32:
1937        tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
1938        break;
1939    case INDEX_op_mulu2_i32:
1940        tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1941        break;
1942    case INDEX_op_muls2_i32:
1943        tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1944        break;
1945    /* XXX: Perhaps args[2] & 0x1f is wrong */
1946    case INDEX_op_shl_i32:
1947        c = const_args[2] ?
1948                SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
1949        goto gen_shift32;
1950    case INDEX_op_shr_i32:
1951        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
1952                SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
1953        goto gen_shift32;
1954    case INDEX_op_sar_i32:
1955        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
1956                SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
1957        goto gen_shift32;
1958    case INDEX_op_rotr_i32:
1959        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
1960                SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
1961        /* Fall through.  */
1962    gen_shift32:
1963        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
1964        break;
1965
1966    case INDEX_op_rotl_i32:
1967        if (const_args[2]) {
1968            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1969                            ((0x20 - args[2]) & 0x1f) ?
1970                            SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
1971                            SHIFT_IMM_LSL(0));
1972        } else {
1973            tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
1974            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1975                            SHIFT_REG_ROR(TCG_REG_TMP));
1976        }
1977        break;
1978
1979    case INDEX_op_ctz_i32:
1980        tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0);
1981        a1 = TCG_REG_TMP;
1982        goto do_clz;
1983
1984    case INDEX_op_clz_i32:
1985        a1 = args[1];
1986    do_clz:
1987        a0 = args[0];
1988        a2 = args[2];
1989        c = const_args[2];
1990        if (c && a2 == 32) {
1991            tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0);
1992            break;
1993        }
1994        tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, a1, 0);
1995        tcg_out_dat_reg(s, COND_NE, INSN_CLZ, a0, 0, a1, 0);
1996        if (c || a0 != a2) {
1997            tcg_out_dat_rIK(s, COND_EQ, ARITH_MOV, ARITH_MVN, a0, 0, a2, c);
1998        }
1999        break;
2000
2001    case INDEX_op_brcond_i32:
2002        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
2003                       args[0], args[1], const_args[1]);
2004        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
2005                           arg_label(args[3]));
2006        break;
2007    case INDEX_op_setcond_i32:
2008        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
2009                        args[1], args[2], const_args[2]);
2010        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
2011                        ARITH_MOV, args[0], 0, 1);
2012        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
2013                        ARITH_MOV, args[0], 0, 0);
2014        break;
2015
2016    case INDEX_op_brcond2_i32:
2017        c = tcg_out_cmp2(s, args, const_args);
2018        tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[5]));
2019        break;
2020    case INDEX_op_setcond2_i32:
2021        c = tcg_out_cmp2(s, args + 1, const_args + 1);
2022        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1);
2023        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)],
2024                        ARITH_MOV, args[0], 0, 0);
2025        break;
2026
2027    case INDEX_op_qemu_ld_i32:
2028        tcg_out_qemu_ld(s, args, 0);
2029        break;
2030    case INDEX_op_qemu_ld_i64:
2031        tcg_out_qemu_ld(s, args, 1);
2032        break;
2033    case INDEX_op_qemu_st_i32:
2034        tcg_out_qemu_st(s, args, 0);
2035        break;
2036    case INDEX_op_qemu_st_i64:
2037        tcg_out_qemu_st(s, args, 1);
2038        break;
2039
2040    case INDEX_op_bswap16_i32:
2041        tcg_out_bswap16(s, COND_AL, args[0], args[1]);
2042        break;
2043    case INDEX_op_bswap32_i32:
2044        tcg_out_bswap32(s, COND_AL, args[0], args[1]);
2045        break;
2046
2047    case INDEX_op_ext8s_i32:
2048        tcg_out_ext8s(s, COND_AL, args[0], args[1]);
2049        break;
2050    case INDEX_op_ext16s_i32:
2051        tcg_out_ext16s(s, COND_AL, args[0], args[1]);
2052        break;
2053    case INDEX_op_ext16u_i32:
2054        tcg_out_ext16u(s, COND_AL, args[0], args[1]);
2055        break;
2056
2057    case INDEX_op_deposit_i32:
2058        tcg_out_deposit(s, COND_AL, args[0], args[2],
2059                        args[3], args[4], const_args[2]);
2060        break;
2061    case INDEX_op_extract_i32:
2062        tcg_out_extract(s, COND_AL, args[0], args[1], args[2], args[3]);
2063        break;
2064    case INDEX_op_sextract_i32:
2065        tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]);
2066        break;
2067
2068    case INDEX_op_div_i32:
2069        tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
2070        break;
2071    case INDEX_op_divu_i32:
2072        tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
2073        break;
2074
2075    case INDEX_op_mb:
2076        tcg_out_mb(s, args[0]);
2077        break;
2078
2079    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2080    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
2081    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2082    default:
2083        tcg_abort();
2084    }
2085}
2086
2087static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2088{
2089    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2090    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2091    static const TCGTargetOpDef s_s = { .args_ct_str = { "s", "s" } };
2092    static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2093    static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2094    static const TCGTargetOpDef r_r_l = { .args_ct_str = { "r", "r", "l" } };
2095    static const TCGTargetOpDef r_l_l = { .args_ct_str = { "r", "l", "l" } };
2096    static const TCGTargetOpDef s_s_s = { .args_ct_str = { "s", "s", "s" } };
2097    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2098    static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } };
2099    static const TCGTargetOpDef r_r_rIN
2100        = { .args_ct_str = { "r", "r", "rIN" } };
2101    static const TCGTargetOpDef r_r_rIK
2102        = { .args_ct_str = { "r", "r", "rIK" } };
2103    static const TCGTargetOpDef r_r_r_r
2104        = { .args_ct_str = { "r", "r", "r", "r" } };
2105    static const TCGTargetOpDef r_r_l_l
2106        = { .args_ct_str = { "r", "r", "l", "l" } };
2107    static const TCGTargetOpDef s_s_s_s
2108        = { .args_ct_str = { "s", "s", "s", "s" } };
2109    static const TCGTargetOpDef br
2110        = { .args_ct_str = { "r", "rIN" } };
2111    static const TCGTargetOpDef dep
2112        = { .args_ct_str = { "r", "0", "rZ" } };
2113    static const TCGTargetOpDef movc
2114        = { .args_ct_str = { "r", "r", "rIN", "rIK", "0" } };
2115    static const TCGTargetOpDef add2
2116        = { .args_ct_str = { "r", "r", "r", "r", "rIN", "rIK" } };
2117    static const TCGTargetOpDef sub2
2118        = { .args_ct_str = { "r", "r", "rI", "rI", "rIN", "rIK" } };
2119    static const TCGTargetOpDef br2
2120        = { .args_ct_str = { "r", "r", "rI", "rI" } };
2121    static const TCGTargetOpDef setc2
2122        = { .args_ct_str = { "r", "r", "r", "rI", "rI" } };
2123
2124    switch (op) {
2125    case INDEX_op_goto_ptr:
2126        return &r;
2127
2128    case INDEX_op_ld8u_i32:
2129    case INDEX_op_ld8s_i32:
2130    case INDEX_op_ld16u_i32:
2131    case INDEX_op_ld16s_i32:
2132    case INDEX_op_ld_i32:
2133    case INDEX_op_st8_i32:
2134    case INDEX_op_st16_i32:
2135    case INDEX_op_st_i32:
2136    case INDEX_op_neg_i32:
2137    case INDEX_op_not_i32:
2138    case INDEX_op_bswap16_i32:
2139    case INDEX_op_bswap32_i32:
2140    case INDEX_op_ext8s_i32:
2141    case INDEX_op_ext16s_i32:
2142    case INDEX_op_ext16u_i32:
2143    case INDEX_op_extract_i32:
2144    case INDEX_op_sextract_i32:
2145        return &r_r;
2146
2147    case INDEX_op_add_i32:
2148    case INDEX_op_sub_i32:
2149    case INDEX_op_setcond_i32:
2150        return &r_r_rIN;
2151    case INDEX_op_and_i32:
2152    case INDEX_op_andc_i32:
2153    case INDEX_op_clz_i32:
2154    case INDEX_op_ctz_i32:
2155        return &r_r_rIK;
2156    case INDEX_op_mul_i32:
2157    case INDEX_op_div_i32:
2158    case INDEX_op_divu_i32:
2159        return &r_r_r;
2160    case INDEX_op_mulu2_i32:
2161    case INDEX_op_muls2_i32:
2162        return &r_r_r_r;
2163    case INDEX_op_or_i32:
2164    case INDEX_op_xor_i32:
2165        return &r_r_rI;
2166    case INDEX_op_shl_i32:
2167    case INDEX_op_shr_i32:
2168    case INDEX_op_sar_i32:
2169    case INDEX_op_rotl_i32:
2170    case INDEX_op_rotr_i32:
2171        return &r_r_ri;
2172
2173    case INDEX_op_brcond_i32:
2174        return &br;
2175    case INDEX_op_deposit_i32:
2176        return &dep;
2177    case INDEX_op_movcond_i32:
2178        return &movc;
2179    case INDEX_op_add2_i32:
2180        return &add2;
2181    case INDEX_op_sub2_i32:
2182        return &sub2;
2183    case INDEX_op_brcond2_i32:
2184        return &br2;
2185    case INDEX_op_setcond2_i32:
2186        return &setc2;
2187
2188    case INDEX_op_qemu_ld_i32:
2189        return TARGET_LONG_BITS == 32 ? &r_l : &r_l_l;
2190    case INDEX_op_qemu_ld_i64:
2191        return TARGET_LONG_BITS == 32 ? &r_r_l : &r_r_l_l;
2192    case INDEX_op_qemu_st_i32:
2193        return TARGET_LONG_BITS == 32 ? &s_s : &s_s_s;
2194    case INDEX_op_qemu_st_i64:
2195        return TARGET_LONG_BITS == 32 ? &s_s_s : &s_s_s_s;
2196
2197    default:
2198        return NULL;
2199    }
2200}
2201
2202static void tcg_target_init(TCGContext *s)
2203{
2204    /* Only probe for the platform and capabilities if we havn't already
2205       determined maximum values at compile time.  */
2206#ifndef use_idiv_instructions
2207    {
2208        unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2209        use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
2210    }
2211#endif
2212    if (__ARM_ARCH < 7) {
2213        const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
2214        if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
2215            arm_arch = pl[1] - '0';
2216        }
2217    }
2218
2219    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
2220
2221    tcg_target_call_clobber_regs = 0;
2222    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
2223    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
2224    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
2225    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
2226    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
2227    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
2228
2229    s->reserved_regs = 0;
2230    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2231    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2232    tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
2233}
2234
2235static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
2236                              TCGReg arg1, intptr_t arg2)
2237{
2238    tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
2239}
2240
2241static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
2242                              TCGReg arg1, intptr_t arg2)
2243{
2244    tcg_out_st32(s, COND_AL, arg, arg1, arg2);
2245}
2246
2247static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
2248                               TCGReg base, intptr_t ofs)
2249{
2250    return false;
2251}
2252
2253static inline void tcg_out_mov(TCGContext *s, TCGType type,
2254                               TCGReg ret, TCGReg arg)
2255{
2256    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
2257}
2258
2259static inline void tcg_out_movi(TCGContext *s, TCGType type,
2260                                TCGReg ret, tcg_target_long arg)
2261{
2262    tcg_out_movi32(s, COND_AL, ret, arg);
2263}
2264
2265static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2266{
2267    int i;
2268    for (i = 0; i < count; ++i) {
2269        p[i] = INSN_NOP;
2270    }
2271}
2272
2273/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2274   and tcg_register_jit.  */
2275
2276#define PUSH_SIZE  ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
2277
2278#define FRAME_SIZE \
2279    ((PUSH_SIZE \
2280      + TCG_STATIC_CALL_ARGS_SIZE \
2281      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2282      + TCG_TARGET_STACK_ALIGN - 1) \
2283     & -TCG_TARGET_STACK_ALIGN)
2284
2285static void tcg_target_qemu_prologue(TCGContext *s)
2286{
2287    int stack_addend;
2288
2289    /* Calling convention requires us to save r4-r11 and lr.  */
2290    /* stmdb sp!, { r4 - r11, lr } */
2291    tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
2292
2293    /* Reserve callee argument and tcg temp space.  */
2294    stack_addend = FRAME_SIZE - PUSH_SIZE;
2295
2296    tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
2297                   TCG_REG_CALL_STACK, stack_addend, 1);
2298    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2299                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2300
2301    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2302
2303    tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
2304
2305    /*
2306     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2307     * and fall through to the rest of the epilogue.
2308     */
2309    s->code_gen_epilogue = s->code_ptr;
2310    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
2311
2312    /* TB epilogue */
2313    tb_ret_addr = s->code_ptr;
2314    tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
2315                   TCG_REG_CALL_STACK, stack_addend, 1);
2316
2317    /* ldmia sp!, { r4 - r11, pc } */
2318    tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
2319}
2320
2321typedef struct {
2322    DebugFrameHeader h;
2323    uint8_t fde_def_cfa[4];
2324    uint8_t fde_reg_ofs[18];
2325} DebugFrame;
2326
2327#define ELF_HOST_MACHINE EM_ARM
2328
2329/* We're expecting a 2 byte uleb128 encoded value.  */
2330QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2331
2332static const DebugFrame debug_frame = {
2333    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2334    .h.cie.id = -1,
2335    .h.cie.version = 1,
2336    .h.cie.code_align = 1,
2337    .h.cie.data_align = 0x7c,             /* sleb128 -4 */
2338    .h.cie.return_column = 14,
2339
2340    /* Total FDE size does not include the "len" member.  */
2341    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2342
2343    .fde_def_cfa = {
2344        12, 13,                         /* DW_CFA_def_cfa sp, ... */
2345        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2346        (FRAME_SIZE >> 7)
2347    },
2348    .fde_reg_ofs = {
2349        /* The following must match the stmdb in the prologue.  */
2350        0x8e, 1,                        /* DW_CFA_offset, lr, -4 */
2351        0x8b, 2,                        /* DW_CFA_offset, r11, -8 */
2352        0x8a, 3,                        /* DW_CFA_offset, r10, -12 */
2353        0x89, 4,                        /* DW_CFA_offset, r9, -16 */
2354        0x88, 5,                        /* DW_CFA_offset, r8, -20 */
2355        0x87, 6,                        /* DW_CFA_offset, r7, -24 */
2356        0x86, 7,                        /* DW_CFA_offset, r6, -28 */
2357        0x85, 8,                        /* DW_CFA_offset, r5, -32 */
2358        0x84, 9,                        /* DW_CFA_offset, r4, -36 */
2359    }
2360};
2361
2362void tcg_register_jit(void *buf, size_t buf_size)
2363{
2364    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2365}
2366