qemu/tcg/sparc/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "tcg-pool.inc.c"
  26
  27#ifdef CONFIG_DEBUG_TCG
  28static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  29    "%g0",
  30    "%g1",
  31    "%g2",
  32    "%g3",
  33    "%g4",
  34    "%g5",
  35    "%g6",
  36    "%g7",
  37    "%o0",
  38    "%o1",
  39    "%o2",
  40    "%o3",
  41    "%o4",
  42    "%o5",
  43    "%o6",
  44    "%o7",
  45    "%l0",
  46    "%l1",
  47    "%l2",
  48    "%l3",
  49    "%l4",
  50    "%l5",
  51    "%l6",
  52    "%l7",
  53    "%i0",
  54    "%i1",
  55    "%i2",
  56    "%i3",
  57    "%i4",
  58    "%i5",
  59    "%i6",
  60    "%i7",
  61};
  62#endif
  63
  64#ifdef __arch64__
  65# define SPARC64 1
  66#else
  67# define SPARC64 0
  68#endif
  69
  70/* Note that sparcv8plus can only hold 64 bit quantities in %g and %o
  71   registers.  These are saved manually by the kernel in full 64-bit
  72   slots.  The %i and %l registers are saved by the register window
  73   mechanism, which only allocates space for 32 bits.  Given that this
  74   window spill/fill can happen on any signal, we must consider the
  75   high bits of the %i and %l registers garbage at all times.  */
  76#if SPARC64
  77# define ALL_64  0xffffffffu
  78#else
  79# define ALL_64  0xffffu
  80#endif
  81
  82/* Define some temporary registers.  T2 is used for constant generation.  */
  83#define TCG_REG_T1  TCG_REG_G1
  84#define TCG_REG_T2  TCG_REG_O7
  85
  86#ifndef CONFIG_SOFTMMU
  87# define TCG_GUEST_BASE_REG TCG_REG_I5
  88#endif
  89
  90#define TCG_REG_TB  TCG_REG_I1
  91#define USE_REG_TB  (sizeof(void *) > 4)
  92
  93static const int tcg_target_reg_alloc_order[] = {
  94    TCG_REG_L0,
  95    TCG_REG_L1,
  96    TCG_REG_L2,
  97    TCG_REG_L3,
  98    TCG_REG_L4,
  99    TCG_REG_L5,
 100    TCG_REG_L6,
 101    TCG_REG_L7,
 102
 103    TCG_REG_I0,
 104    TCG_REG_I1,
 105    TCG_REG_I2,
 106    TCG_REG_I3,
 107    TCG_REG_I4,
 108    TCG_REG_I5,
 109
 110    TCG_REG_G2,
 111    TCG_REG_G3,
 112    TCG_REG_G4,
 113    TCG_REG_G5,
 114
 115    TCG_REG_O0,
 116    TCG_REG_O1,
 117    TCG_REG_O2,
 118    TCG_REG_O3,
 119    TCG_REG_O4,
 120    TCG_REG_O5,
 121};
 122
 123static const int tcg_target_call_iarg_regs[6] = {
 124    TCG_REG_O0,
 125    TCG_REG_O1,
 126    TCG_REG_O2,
 127    TCG_REG_O3,
 128    TCG_REG_O4,
 129    TCG_REG_O5,
 130};
 131
 132static const int tcg_target_call_oarg_regs[] = {
 133    TCG_REG_O0,
 134    TCG_REG_O1,
 135    TCG_REG_O2,
 136    TCG_REG_O3,
 137};
 138
 139#define INSN_OP(x)  ((x) << 30)
 140#define INSN_OP2(x) ((x) << 22)
 141#define INSN_OP3(x) ((x) << 19)
 142#define INSN_OPF(x) ((x) << 5)
 143#define INSN_RD(x)  ((x) << 25)
 144#define INSN_RS1(x) ((x) << 14)
 145#define INSN_RS2(x) (x)
 146#define INSN_ASI(x) ((x) << 5)
 147
 148#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff))
 149#define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
 150#define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
 151#define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20))
 152#define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
 153#define INSN_COND(x) ((x) << 25)
 154
 155#define COND_N     0x0
 156#define COND_E     0x1
 157#define COND_LE    0x2
 158#define COND_L     0x3
 159#define COND_LEU   0x4
 160#define COND_CS    0x5
 161#define COND_NEG   0x6
 162#define COND_VS    0x7
 163#define COND_A     0x8
 164#define COND_NE    0x9
 165#define COND_G     0xa
 166#define COND_GE    0xb
 167#define COND_GU    0xc
 168#define COND_CC    0xd
 169#define COND_POS   0xe
 170#define COND_VC    0xf
 171#define BA         (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2))
 172
 173#define RCOND_Z    1
 174#define RCOND_LEZ  2
 175#define RCOND_LZ   3
 176#define RCOND_NZ   5
 177#define RCOND_GZ   6
 178#define RCOND_GEZ  7
 179
 180#define MOVCC_ICC  (1 << 18)
 181#define MOVCC_XCC  (1 << 18 | 1 << 12)
 182
 183#define BPCC_ICC   0
 184#define BPCC_XCC   (2 << 20)
 185#define BPCC_PT    (1 << 19)
 186#define BPCC_PN    0
 187#define BPCC_A     (1 << 29)
 188
 189#define BPR_PT     BPCC_PT
 190
 191#define ARITH_ADD  (INSN_OP(2) | INSN_OP3(0x00))
 192#define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10))
 193#define ARITH_AND  (INSN_OP(2) | INSN_OP3(0x01))
 194#define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05))
 195#define ARITH_OR   (INSN_OP(2) | INSN_OP3(0x02))
 196#define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12))
 197#define ARITH_ORN  (INSN_OP(2) | INSN_OP3(0x06))
 198#define ARITH_XOR  (INSN_OP(2) | INSN_OP3(0x03))
 199#define ARITH_SUB  (INSN_OP(2) | INSN_OP3(0x04))
 200#define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14))
 201#define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08))
 202#define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c))
 203#define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a))
 204#define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b))
 205#define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e))
 206#define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f))
 207#define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09))
 208#define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d))
 209#define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d))
 210#define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c))
 211#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f))
 212
 213#define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11))
 214#define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16))
 215
 216#define SHIFT_SLL  (INSN_OP(2) | INSN_OP3(0x25))
 217#define SHIFT_SRL  (INSN_OP(2) | INSN_OP3(0x26))
 218#define SHIFT_SRA  (INSN_OP(2) | INSN_OP3(0x27))
 219
 220#define SHIFT_SLLX (INSN_OP(2) | INSN_OP3(0x25) | (1 << 12))
 221#define SHIFT_SRLX (INSN_OP(2) | INSN_OP3(0x26) | (1 << 12))
 222#define SHIFT_SRAX (INSN_OP(2) | INSN_OP3(0x27) | (1 << 12))
 223
 224#define RDY        (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0))
 225#define WRY        (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0))
 226#define JMPL       (INSN_OP(2) | INSN_OP3(0x38))
 227#define RETURN     (INSN_OP(2) | INSN_OP3(0x39))
 228#define SAVE       (INSN_OP(2) | INSN_OP3(0x3c))
 229#define RESTORE    (INSN_OP(2) | INSN_OP3(0x3d))
 230#define SETHI      (INSN_OP(0) | INSN_OP2(0x4))
 231#define CALL       INSN_OP(1)
 232#define LDUB       (INSN_OP(3) | INSN_OP3(0x01))
 233#define LDSB       (INSN_OP(3) | INSN_OP3(0x09))
 234#define LDUH       (INSN_OP(3) | INSN_OP3(0x02))
 235#define LDSH       (INSN_OP(3) | INSN_OP3(0x0a))
 236#define LDUW       (INSN_OP(3) | INSN_OP3(0x00))
 237#define LDSW       (INSN_OP(3) | INSN_OP3(0x08))
 238#define LDX        (INSN_OP(3) | INSN_OP3(0x0b))
 239#define STB        (INSN_OP(3) | INSN_OP3(0x05))
 240#define STH        (INSN_OP(3) | INSN_OP3(0x06))
 241#define STW        (INSN_OP(3) | INSN_OP3(0x04))
 242#define STX        (INSN_OP(3) | INSN_OP3(0x0e))
 243#define LDUBA      (INSN_OP(3) | INSN_OP3(0x11))
 244#define LDSBA      (INSN_OP(3) | INSN_OP3(0x19))
 245#define LDUHA      (INSN_OP(3) | INSN_OP3(0x12))
 246#define LDSHA      (INSN_OP(3) | INSN_OP3(0x1a))
 247#define LDUWA      (INSN_OP(3) | INSN_OP3(0x10))
 248#define LDSWA      (INSN_OP(3) | INSN_OP3(0x18))
 249#define LDXA       (INSN_OP(3) | INSN_OP3(0x1b))
 250#define STBA       (INSN_OP(3) | INSN_OP3(0x15))
 251#define STHA       (INSN_OP(3) | INSN_OP3(0x16))
 252#define STWA       (INSN_OP(3) | INSN_OP3(0x14))
 253#define STXA       (INSN_OP(3) | INSN_OP3(0x1e))
 254
 255#define MEMBAR     (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(15) | (1 << 13))
 256
 257#define NOP        (SETHI | INSN_RD(TCG_REG_G0) | 0)
 258
 259#ifndef ASI_PRIMARY_LITTLE
 260#define ASI_PRIMARY_LITTLE 0x88
 261#endif
 262
 263#define LDUH_LE    (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
 264#define LDSH_LE    (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
 265#define LDUW_LE    (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
 266#define LDSW_LE    (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
 267#define LDX_LE     (LDXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
 268
 269#define STH_LE     (STHA  | INSN_ASI(ASI_PRIMARY_LITTLE))
 270#define STW_LE     (STWA  | INSN_ASI(ASI_PRIMARY_LITTLE))
 271#define STX_LE     (STXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
 272
 273#ifndef use_vis3_instructions
 274bool use_vis3_instructions;
 275#endif
 276
 277static inline int check_fit_i64(int64_t val, unsigned int bits)
 278{
 279    return val == sextract64(val, 0, bits);
 280}
 281
 282static inline int check_fit_i32(int32_t val, unsigned int bits)
 283{
 284    return val == sextract32(val, 0, bits);
 285}
 286
 287#define check_fit_tl    check_fit_i64
 288#if SPARC64
 289# define check_fit_ptr  check_fit_i64
 290#else
 291# define check_fit_ptr  check_fit_i32
 292#endif
 293
 294static void patch_reloc(tcg_insn_unit *code_ptr, int type,
 295                        intptr_t value, intptr_t addend)
 296{
 297    uint32_t insn = *code_ptr;
 298    intptr_t pcrel;
 299
 300    value += addend;
 301    pcrel = tcg_ptr_byte_diff((tcg_insn_unit *)value, code_ptr);
 302
 303    switch (type) {
 304    case R_SPARC_WDISP16:
 305        assert(check_fit_ptr(pcrel >> 2, 16));
 306        insn &= ~INSN_OFF16(-1);
 307        insn |= INSN_OFF16(pcrel);
 308        break;
 309    case R_SPARC_WDISP19:
 310        assert(check_fit_ptr(pcrel >> 2, 19));
 311        insn &= ~INSN_OFF19(-1);
 312        insn |= INSN_OFF19(pcrel);
 313        break;
 314    case R_SPARC_13:
 315        /* Note that we're abusing this reloc type for our own needs.  */
 316        if (!check_fit_ptr(value, 13)) {
 317            int adj = (value > 0 ? 0xff8 : -0x1000);
 318            value -= adj;
 319            assert(check_fit_ptr(value, 13));
 320            *code_ptr++ = (ARITH_ADD | INSN_RD(TCG_REG_T2)
 321                           | INSN_RS1(TCG_REG_TB) | INSN_IMM13(adj));
 322            insn ^= INSN_RS1(TCG_REG_TB) ^ INSN_RS1(TCG_REG_T2);
 323        }
 324        insn &= ~INSN_IMM13(-1);
 325        insn |= INSN_IMM13(value);
 326        break;
 327    case R_SPARC_32:
 328        /* Note that we're abusing this reloc type for our own needs.  */
 329        code_ptr[0] = deposit32(code_ptr[0], 0, 22, value >> 10);
 330        code_ptr[1] = deposit32(code_ptr[1], 0, 10, value);
 331        return;
 332    default:
 333        g_assert_not_reached();
 334    }
 335
 336    *code_ptr = insn;
 337}
 338
 339/* parse target specific constraints */
 340static const char *target_parse_constraint(TCGArgConstraint *ct,
 341                                           const char *ct_str, TCGType type)
 342{
 343    switch (*ct_str++) {
 344    case 'r':
 345        ct->ct |= TCG_CT_REG;
 346        ct->u.regs = 0xffffffff;
 347        break;
 348    case 'R':
 349        ct->ct |= TCG_CT_REG;
 350        ct->u.regs = ALL_64;
 351        break;
 352    case 'A': /* qemu_ld/st address constraint */
 353        ct->ct |= TCG_CT_REG;
 354        ct->u.regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
 355    reserve_helpers:
 356        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
 357        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
 358        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
 359        break;
 360    case 's': /* qemu_st data 32-bit constraint */
 361        ct->ct |= TCG_CT_REG;
 362        ct->u.regs = 0xffffffff;
 363        goto reserve_helpers;
 364    case 'S': /* qemu_st data 64-bit constraint */
 365        ct->ct |= TCG_CT_REG;
 366        ct->u.regs = ALL_64;
 367        goto reserve_helpers;
 368    case 'I':
 369        ct->ct |= TCG_CT_CONST_S11;
 370        break;
 371    case 'J':
 372        ct->ct |= TCG_CT_CONST_S13;
 373        break;
 374    case 'Z':
 375        ct->ct |= TCG_CT_CONST_ZERO;
 376        break;
 377    default:
 378        return NULL;
 379    }
 380    return ct_str;
 381}
 382
 383/* test if a constant matches the constraint */
 384static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
 385                                         const TCGArgConstraint *arg_ct)
 386{
 387    int ct = arg_ct->ct;
 388
 389    if (ct & TCG_CT_CONST) {
 390        return 1;
 391    }
 392
 393    if (type == TCG_TYPE_I32) {
 394        val = (int32_t)val;
 395    }
 396
 397    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 398        return 1;
 399    } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) {
 400        return 1;
 401    } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) {
 402        return 1;
 403    } else {
 404        return 0;
 405    }
 406}
 407
 408static inline void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1,
 409                                 TCGReg rs2, int op)
 410{
 411    tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2));
 412}
 413
 414static inline void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1,
 415                                  int32_t offset, int op)
 416{
 417    tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset));
 418}
 419
 420static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1,
 421                           int32_t val2, int val2const, int op)
 422{
 423    tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1)
 424              | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2)));
 425}
 426
 427static inline void tcg_out_mov(TCGContext *s, TCGType type,
 428                               TCGReg ret, TCGReg arg)
 429{
 430    if (ret != arg) {
 431        tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
 432    }
 433}
 434
 435static inline void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
 436{
 437    tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10));
 438}
 439
 440static inline void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
 441{
 442    tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
 443}
 444
 445static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
 446                             tcg_target_long arg, bool in_prologue)
 447{
 448    tcg_target_long hi, lo = (int32_t)arg;
 449    tcg_target_long test, lsb;
 450
 451    /* Make sure we test 32-bit constants for imm13 properly.  */
 452    if (type == TCG_TYPE_I32) {
 453        arg = lo;
 454    }
 455
 456    /* A 13-bit constant sign-extended to 64-bits.  */
 457    if (check_fit_tl(arg, 13)) {
 458        tcg_out_movi_imm13(s, ret, arg);
 459        return;
 460    }
 461
 462    /* A 32-bit constant, or 32-bit zero-extended to 64-bits.  */
 463    if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
 464        tcg_out_sethi(s, ret, arg);
 465        if (arg & 0x3ff) {
 466            tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
 467        }
 468        return;
 469    }
 470
 471    /* A 32-bit constant sign-extended to 64-bits.  */
 472    if (arg == lo) {
 473        tcg_out_sethi(s, ret, ~arg);
 474        tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
 475        return;
 476    }
 477
 478    /* A 21-bit constant, shifted.  */
 479    lsb = ctz64(arg);
 480    test = (tcg_target_long)arg >> lsb;
 481    if (check_fit_tl(test, 13)) {
 482        tcg_out_movi_imm13(s, ret, test);
 483        tcg_out_arithi(s, ret, ret, lsb, SHIFT_SLLX);
 484        return;
 485    } else if (lsb > 10 && test == extract64(test, 0, 21)) {
 486        tcg_out_sethi(s, ret, test << 10);
 487        tcg_out_arithi(s, ret, ret, lsb - 10, SHIFT_SLLX);
 488        return;
 489    }
 490
 491    if (!in_prologue) {
 492        if (USE_REG_TB) {
 493            intptr_t diff = arg - (uintptr_t)s->code_gen_ptr;
 494            if (check_fit_ptr(diff, 13)) {
 495                tcg_out_arithi(s, ret, TCG_REG_TB, diff, ARITH_ADD);
 496            } else {
 497                new_pool_label(s, arg, R_SPARC_13, s->code_ptr,
 498                               -(intptr_t)s->code_gen_ptr);
 499                tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(TCG_REG_TB));
 500                /* May be used to extend the 13-bit range in patch_reloc.  */
 501                tcg_out32(s, NOP);
 502            }
 503        } else {
 504            new_pool_label(s, arg, R_SPARC_32, s->code_ptr, 0);
 505            tcg_out_sethi(s, ret, 0);
 506            tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(ret) | INSN_IMM13(0));
 507        }
 508        return;
 509    }
 510
 511    /* A 64-bit constant decomposed into 2 32-bit pieces.  */
 512    if (check_fit_i32(lo, 13)) {
 513        hi = (arg - lo) >> 32;
 514        tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
 515        tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
 516        tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
 517    } else {
 518        hi = arg >> 32;
 519        tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
 520        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo);
 521        tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
 522        tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR);
 523    }
 524}
 525
 526static inline void tcg_out_movi(TCGContext *s, TCGType type,
 527                                TCGReg ret, tcg_target_long arg)
 528{
 529    tcg_out_movi_int(s, type, ret, arg, false);
 530}
 531
 532static inline void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1,
 533                                   TCGReg a2, int op)
 534{
 535    tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
 536}
 537
 538static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
 539                         intptr_t offset, int op)
 540{
 541    if (check_fit_ptr(offset, 13)) {
 542        tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
 543                  INSN_IMM13(offset));
 544    } else {
 545        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset);
 546        tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op);
 547    }
 548}
 549
 550static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
 551                              TCGReg arg1, intptr_t arg2)
 552{
 553    tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
 554}
 555
 556static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
 557                              TCGReg arg1, intptr_t arg2)
 558{
 559    tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
 560}
 561
 562static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 563                               TCGReg base, intptr_t ofs)
 564{
 565    if (val == 0) {
 566        tcg_out_st(s, type, TCG_REG_G0, base, ofs);
 567        return true;
 568    }
 569    return false;
 570}
 571
 572static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg)
 573{
 574    intptr_t diff = arg - (uintptr_t)s->code_gen_ptr;
 575    if (USE_REG_TB && check_fit_ptr(diff, 13)) {
 576        tcg_out_ld(s, TCG_TYPE_PTR, ret, TCG_REG_TB, diff);
 577        return;
 578    }
 579    tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff);
 580    tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff);
 581}
 582
 583static inline void tcg_out_sety(TCGContext *s, TCGReg rs)
 584{
 585    tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs));
 586}
 587
 588static inline void tcg_out_rdy(TCGContext *s, TCGReg rd)
 589{
 590    tcg_out32(s, RDY | INSN_RD(rd));
 591}
 592
 593static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1,
 594                          int32_t val2, int val2const, int uns)
 595{
 596    /* Load Y with the sign/zero extension of RS1 to 64-bits.  */
 597    if (uns) {
 598        tcg_out_sety(s, TCG_REG_G0);
 599    } else {
 600        tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA);
 601        tcg_out_sety(s, TCG_REG_T1);
 602    }
 603
 604    tcg_out_arithc(s, rd, rs1, val2, val2const,
 605                   uns ? ARITH_UDIV : ARITH_SDIV);
 606}
 607
 608static inline void tcg_out_nop(TCGContext *s)
 609{
 610    tcg_out32(s, NOP);
 611}
 612
 613static const uint8_t tcg_cond_to_bcond[] = {
 614    [TCG_COND_EQ] = COND_E,
 615    [TCG_COND_NE] = COND_NE,
 616    [TCG_COND_LT] = COND_L,
 617    [TCG_COND_GE] = COND_GE,
 618    [TCG_COND_LE] = COND_LE,
 619    [TCG_COND_GT] = COND_G,
 620    [TCG_COND_LTU] = COND_CS,
 621    [TCG_COND_GEU] = COND_CC,
 622    [TCG_COND_LEU] = COND_LEU,
 623    [TCG_COND_GTU] = COND_GU,
 624};
 625
 626static const uint8_t tcg_cond_to_rcond[] = {
 627    [TCG_COND_EQ] = RCOND_Z,
 628    [TCG_COND_NE] = RCOND_NZ,
 629    [TCG_COND_LT] = RCOND_LZ,
 630    [TCG_COND_GT] = RCOND_GZ,
 631    [TCG_COND_LE] = RCOND_LEZ,
 632    [TCG_COND_GE] = RCOND_GEZ
 633};
 634
 635static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19)
 636{
 637    tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19);
 638}
 639
 640static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l)
 641{
 642    int off19;
 643
 644    if (l->has_value) {
 645        off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr));
 646    } else {
 647        /* Make sure to preserve destinations during retranslation.  */
 648        off19 = *s->code_ptr & INSN_OFF19(-1);
 649        tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0);
 650    }
 651    tcg_out_bpcc0(s, scond, flags, off19);
 652}
 653
 654static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const)
 655{
 656    tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
 657}
 658
 659static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1,
 660                               int32_t arg2, int const_arg2, TCGLabel *l)
 661{
 662    tcg_out_cmp(s, arg1, arg2, const_arg2);
 663    tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, l);
 664    tcg_out_nop(s);
 665}
 666
 667static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret,
 668                          int32_t v1, int v1const)
 669{
 670    tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret)
 671              | INSN_RS1(tcg_cond_to_bcond[cond])
 672              | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1)));
 673}
 674
 675static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
 676                                TCGReg c1, int32_t c2, int c2const,
 677                                int32_t v1, int v1const)
 678{
 679    tcg_out_cmp(s, c1, c2, c2const);
 680    tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const);
 681}
 682
 683static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1,
 684                               int32_t arg2, int const_arg2, TCGLabel *l)
 685{
 686    /* For 64-bit signed comparisons vs zero, we can avoid the compare.  */
 687    if (arg2 == 0 && !is_unsigned_cond(cond)) {
 688        int off16;
 689
 690        if (l->has_value) {
 691            off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr));
 692        } else {
 693            /* Make sure to preserve destinations during retranslation.  */
 694            off16 = *s->code_ptr & INSN_OFF16(-1);
 695            tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0);
 696        }
 697        tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
 698                  | INSN_COND(tcg_cond_to_rcond[cond]) | off16);
 699    } else {
 700        tcg_out_cmp(s, arg1, arg2, const_arg2);
 701        tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l);
 702    }
 703    tcg_out_nop(s);
 704}
 705
 706static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1,
 707                         int32_t v1, int v1const)
 708{
 709    tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1)
 710              | (tcg_cond_to_rcond[cond] << 10)
 711              | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1)));
 712}
 713
 714static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
 715                                TCGReg c1, int32_t c2, int c2const,
 716                                int32_t v1, int v1const)
 717{
 718    /* For 64-bit signed comparisons vs zero, we can avoid the compare.
 719       Note that the immediate range is one bit smaller, so we must check
 720       for that as well.  */
 721    if (c2 == 0 && !is_unsigned_cond(cond)
 722        && (!v1const || check_fit_i32(v1, 10))) {
 723        tcg_out_movr(s, cond, ret, c1, v1, v1const);
 724    } else {
 725        tcg_out_cmp(s, c1, c2, c2const);
 726        tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const);
 727    }
 728}
 729
 730static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
 731                                TCGReg c1, int32_t c2, int c2const)
 732{
 733    /* For 32-bit comparisons, we can play games with ADDC/SUBC.  */
 734    switch (cond) {
 735    case TCG_COND_LTU:
 736    case TCG_COND_GEU:
 737        /* The result of the comparison is in the carry bit.  */
 738        break;
 739
 740    case TCG_COND_EQ:
 741    case TCG_COND_NE:
 742        /* For equality, we can transform to inequality vs zero.  */
 743        if (c2 != 0) {
 744            tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_XOR);
 745            c2 = TCG_REG_T1;
 746        } else {
 747            c2 = c1;
 748        }
 749        c1 = TCG_REG_G0, c2const = 0;
 750        cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU);
 751        break;
 752
 753    case TCG_COND_GTU:
 754    case TCG_COND_LEU:
 755        /* If we don't need to load a constant into a register, we can
 756           swap the operands on GTU/LEU.  There's no benefit to loading
 757           the constant into a temporary register.  */
 758        if (!c2const || c2 == 0) {
 759            TCGReg t = c1;
 760            c1 = c2;
 761            c2 = t;
 762            c2const = 0;
 763            cond = tcg_swap_cond(cond);
 764            break;
 765        }
 766        /* FALLTHRU */
 767
 768    default:
 769        tcg_out_cmp(s, c1, c2, c2const);
 770        tcg_out_movi_imm13(s, ret, 0);
 771        tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
 772        return;
 773    }
 774
 775    tcg_out_cmp(s, c1, c2, c2const);
 776    if (cond == TCG_COND_LTU) {
 777        tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC);
 778    } else {
 779        tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC);
 780    }
 781}
 782
 783static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
 784                                TCGReg c1, int32_t c2, int c2const)
 785{
 786    if (use_vis3_instructions) {
 787        switch (cond) {
 788        case TCG_COND_NE:
 789            if (c2 != 0) {
 790                break;
 791            }
 792            c2 = c1, c2const = 0, c1 = TCG_REG_G0;
 793            /* FALLTHRU */
 794        case TCG_COND_LTU:
 795            tcg_out_cmp(s, c1, c2, c2const);
 796            tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC);
 797            return;
 798        default:
 799            break;
 800        }
 801    }
 802
 803    /* For 64-bit signed comparisons vs zero, we can avoid the compare
 804       if the input does not overlap the output.  */
 805    if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
 806        tcg_out_movi_imm13(s, ret, 0);
 807        tcg_out_movr(s, cond, ret, c1, 1, 1);
 808    } else {
 809        tcg_out_cmp(s, c1, c2, c2const);
 810        tcg_out_movi_imm13(s, ret, 0);
 811        tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
 812    }
 813}
 814
 815static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh,
 816                                TCGReg al, TCGReg ah, int32_t bl, int blconst,
 817                                int32_t bh, int bhconst, int opl, int oph)
 818{
 819    TCGReg tmp = TCG_REG_T1;
 820
 821    /* Note that the low parts are fully consumed before tmp is set.  */
 822    if (rl != ah && (bhconst || rl != bh)) {
 823        tmp = rl;
 824    }
 825
 826    tcg_out_arithc(s, tmp, al, bl, blconst, opl);
 827    tcg_out_arithc(s, rh, ah, bh, bhconst, oph);
 828    tcg_out_mov(s, TCG_TYPE_I32, rl, tmp);
 829}
 830
 831static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
 832                                TCGReg al, TCGReg ah, int32_t bl, int blconst,
 833                                int32_t bh, int bhconst, bool is_sub)
 834{
 835    TCGReg tmp = TCG_REG_T1;
 836
 837    /* Note that the low parts are fully consumed before tmp is set.  */
 838    if (rl != ah && (bhconst || rl != bh)) {
 839        tmp = rl;
 840    }
 841
 842    tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC);
 843
 844    if (use_vis3_instructions && !is_sub) {
 845        /* Note that ADDXC doesn't accept immediates.  */
 846        if (bhconst && bh != 0) {
 847           tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh);
 848           bh = TCG_REG_T2;
 849        }
 850        tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC);
 851    } else if (bh == TCG_REG_G0) {
 852        /* If we have a zero, we can perform the operation in two insns,
 853           with the arithmetic first, and a conditional move into place.  */
 854        if (rh == ah) {
 855            tcg_out_arithi(s, TCG_REG_T2, ah, 1,
 856                           is_sub ? ARITH_SUB : ARITH_ADD);
 857            tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0);
 858        } else {
 859            tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD);
 860            tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0);
 861        }
 862    } else {
 863        /* Otherwise adjust BH as if there is carry into T2 ... */
 864        if (bhconst) {
 865            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh + (is_sub ? -1 : 1));
 866        } else {
 867            tcg_out_arithi(s, TCG_REG_T2, bh, 1,
 868                           is_sub ? ARITH_SUB : ARITH_ADD);
 869        }
 870        /* ... smoosh T2 back to original BH if carry is clear ... */
 871        tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst);
 872        /* ... and finally perform the arithmetic with the new operand.  */
 873        tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD);
 874    }
 875
 876    tcg_out_mov(s, TCG_TYPE_I64, rl, tmp);
 877}
 878
 879static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest,
 880                                 bool in_prologue)
 881{
 882    ptrdiff_t disp = tcg_pcrel_diff(s, dest);
 883
 884    if (disp == (int32_t)disp) {
 885        tcg_out32(s, CALL | (uint32_t)disp >> 2);
 886    } else {
 887        uintptr_t desti = (uintptr_t)dest;
 888        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1,
 889                         desti & ~0xfff, in_prologue);
 890        tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL);
 891    }
 892}
 893
 894static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
 895{
 896    tcg_out_call_nodelay(s, dest, false);
 897    tcg_out_nop(s);
 898}
 899
 900static void tcg_out_mb(TCGContext *s, TCGArg a0)
 901{
 902    /* Note that the TCG memory order constants mirror the Sparc MEMBAR.  */
 903    tcg_out32(s, MEMBAR | (a0 & TCG_MO_ALL));
 904}
 905
 906#ifdef CONFIG_SOFTMMU
 907static tcg_insn_unit *qemu_ld_trampoline[16];
 908static tcg_insn_unit *qemu_st_trampoline[16];
 909
 910static void emit_extend(TCGContext *s, TCGReg r, int op)
 911{
 912    /* Emit zero extend of 8, 16 or 32 bit data as
 913     * required by the MO_* value op; do nothing for 64 bit.
 914     */
 915    switch (op & MO_SIZE) {
 916    case MO_8:
 917        tcg_out_arithi(s, r, r, 0xff, ARITH_AND);
 918        break;
 919    case MO_16:
 920        tcg_out_arithi(s, r, r, 16, SHIFT_SLL);
 921        tcg_out_arithi(s, r, r, 16, SHIFT_SRL);
 922        break;
 923    case MO_32:
 924        if (SPARC64) {
 925            tcg_out_arith(s, r, r, 0, SHIFT_SRL);
 926        }
 927        break;
 928    case MO_64:
 929        break;
 930    }
 931}
 932
 933static void build_trampolines(TCGContext *s)
 934{
 935    static void * const qemu_ld_helpers[16] = {
 936        [MO_UB]   = helper_ret_ldub_mmu,
 937        [MO_SB]   = helper_ret_ldsb_mmu,
 938        [MO_LEUW] = helper_le_lduw_mmu,
 939        [MO_LESW] = helper_le_ldsw_mmu,
 940        [MO_LEUL] = helper_le_ldul_mmu,
 941        [MO_LEQ]  = helper_le_ldq_mmu,
 942        [MO_BEUW] = helper_be_lduw_mmu,
 943        [MO_BESW] = helper_be_ldsw_mmu,
 944        [MO_BEUL] = helper_be_ldul_mmu,
 945        [MO_BEQ]  = helper_be_ldq_mmu,
 946    };
 947    static void * const qemu_st_helpers[16] = {
 948        [MO_UB]   = helper_ret_stb_mmu,
 949        [MO_LEUW] = helper_le_stw_mmu,
 950        [MO_LEUL] = helper_le_stl_mmu,
 951        [MO_LEQ]  = helper_le_stq_mmu,
 952        [MO_BEUW] = helper_be_stw_mmu,
 953        [MO_BEUL] = helper_be_stl_mmu,
 954        [MO_BEQ]  = helper_be_stq_mmu,
 955    };
 956
 957    int i;
 958    TCGReg ra;
 959
 960    for (i = 0; i < 16; ++i) {
 961        if (qemu_ld_helpers[i] == NULL) {
 962            continue;
 963        }
 964
 965        /* May as well align the trampoline.  */
 966        while ((uintptr_t)s->code_ptr & 15) {
 967            tcg_out_nop(s);
 968        }
 969        qemu_ld_trampoline[i] = s->code_ptr;
 970
 971        if (SPARC64 || TARGET_LONG_BITS == 32) {
 972            ra = TCG_REG_O3;
 973        } else {
 974            /* Install the high part of the address.  */
 975            tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX);
 976            ra = TCG_REG_O4;
 977        }
 978
 979        /* Set the retaddr operand.  */
 980        tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
 981        /* Set the env operand.  */
 982        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0);
 983        /* Tail call.  */
 984        tcg_out_call_nodelay(s, qemu_ld_helpers[i], true);
 985        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
 986    }
 987
 988    for (i = 0; i < 16; ++i) {
 989        if (qemu_st_helpers[i] == NULL) {
 990            continue;
 991        }
 992
 993        /* May as well align the trampoline.  */
 994        while ((uintptr_t)s->code_ptr & 15) {
 995            tcg_out_nop(s);
 996        }
 997        qemu_st_trampoline[i] = s->code_ptr;
 998
 999        if (SPARC64) {
1000            emit_extend(s, TCG_REG_O2, i);
1001            ra = TCG_REG_O4;
1002        } else {
1003            ra = TCG_REG_O1;
1004            if (TARGET_LONG_BITS == 64) {
1005                /* Install the high part of the address.  */
1006                tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
1007                ra += 2;
1008            } else {
1009                ra += 1;
1010            }
1011            if ((i & MO_SIZE) == MO_64) {
1012                /* Install the high part of the data.  */
1013                tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
1014                ra += 2;
1015            } else {
1016                emit_extend(s, ra, i);
1017                ra += 1;
1018            }
1019            /* Skip the oi argument.  */
1020            ra += 1;
1021        }
1022                
1023        /* Set the retaddr operand.  */
1024        if (ra >= TCG_REG_O6) {
1025            tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK,
1026                       TCG_TARGET_CALL_STACK_OFFSET);
1027            ra = TCG_REG_G1;
1028        }
1029        tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
1030        /* Set the env operand.  */
1031        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0);
1032        /* Tail call.  */
1033        tcg_out_call_nodelay(s, qemu_st_helpers[i], true);
1034        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
1035    }
1036}
1037#endif
1038
1039/* Generate global QEMU prologue and epilogue code */
1040static void tcg_target_qemu_prologue(TCGContext *s)
1041{
1042    int tmp_buf_size, frame_size;
1043
1044    /* The TCG temp buffer is at the top of the frame, immediately
1045       below the frame pointer.  */
1046    tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long);
1047    tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size,
1048                  tmp_buf_size);
1049
1050    /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is
1051       otherwise the minimal frame usable by callees.  */
1052    frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS;
1053    frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size;
1054    frame_size += TCG_TARGET_STACK_ALIGN - 1;
1055    frame_size &= -TCG_TARGET_STACK_ALIGN;
1056    tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
1057              INSN_IMM13(-frame_size));
1058
1059#ifndef CONFIG_SOFTMMU
1060    if (guest_base != 0) {
1061        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
1062        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
1063    }
1064#endif
1065
1066    /* We choose TCG_REG_TB such that no move is required.  */
1067    if (USE_REG_TB) {
1068        QEMU_BUILD_BUG_ON(TCG_REG_TB != TCG_REG_I1);
1069        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
1070    }
1071
1072    tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL);
1073    /* delay slot */
1074    tcg_out_nop(s);
1075
1076    /* Epilogue for goto_ptr.  */
1077    s->code_gen_epilogue = s->code_ptr;
1078    tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1079    /* delay slot */
1080    tcg_out_movi_imm13(s, TCG_REG_O0, 0);
1081
1082#ifdef CONFIG_SOFTMMU
1083    build_trampolines(s);
1084#endif
1085}
1086
1087static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
1088{
1089    int i;
1090    for (i = 0; i < count; ++i) {
1091        p[i] = NOP;
1092    }
1093}
1094
1095#if defined(CONFIG_SOFTMMU)
1096/* Perform the TLB load and compare.
1097
1098   Inputs:
1099   ADDRLO and ADDRHI contain the possible two parts of the address.
1100
1101   MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1102
1103   WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1104   This should be offsetof addr_read or addr_write.
1105
1106   The result of the TLB comparison is in %[ix]cc.  The sanitized address
1107   is in the returned register, maybe %o0.  The TLB addend is in %o1.  */
1108
1109static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
1110                               TCGMemOp opc, int which)
1111{
1112    const TCGReg r0 = TCG_REG_O0;
1113    const TCGReg r1 = TCG_REG_O1;
1114    const TCGReg r2 = TCG_REG_O2;
1115    unsigned s_bits = opc & MO_SIZE;
1116    unsigned a_bits = get_alignment_bits(opc);
1117    int tlb_ofs;
1118
1119    /* Shift the page number down.  */
1120    tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL);
1121
1122    /* Mask out the page offset, except for the required alignment.
1123       We don't support unaligned accesses.  */
1124    if (a_bits < s_bits) {
1125        a_bits = s_bits;
1126    }
1127    tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1,
1128                 TARGET_PAGE_MASK | ((1 << a_bits) - 1));
1129
1130    /* Mask the tlb index.  */
1131    tcg_out_arithi(s, r1, r1, CPU_TLB_SIZE - 1, ARITH_AND);
1132    
1133    /* Mask page, part 2.  */
1134    tcg_out_arith(s, r0, addr, TCG_REG_T1, ARITH_AND);
1135
1136    /* Shift the tlb index into place.  */
1137    tcg_out_arithi(s, r1, r1, CPU_TLB_ENTRY_BITS, SHIFT_SLL);
1138
1139    /* Relative to the current ENV.  */
1140    tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD);
1141
1142    /* Find a base address that can load both tlb comparator and addend.  */
1143    tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]);
1144    if (!check_fit_ptr(tlb_ofs + sizeof(CPUTLBEntry), 13)) {
1145        if (tlb_ofs & ~0x3ff) {
1146            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, tlb_ofs & ~0x3ff);
1147            tcg_out_arith(s, r1, r1, TCG_REG_T1, ARITH_ADD);
1148        }
1149        tlb_ofs &= 0x3ff;
1150    }
1151
1152    /* Load the tlb comparator and the addend.  */
1153    tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which);
1154    tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend));
1155
1156    /* subcc arg0, arg2, %g0 */
1157    tcg_out_cmp(s, r0, r2, 0);
1158
1159    /* If the guest address must be zero-extended, do so now.  */
1160    if (SPARC64 && TARGET_LONG_BITS == 32) {
1161        tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL);
1162        return r0;
1163    }
1164    return addr;
1165}
1166#endif /* CONFIG_SOFTMMU */
1167
1168static const int qemu_ld_opc[16] = {
1169    [MO_UB]   = LDUB,
1170    [MO_SB]   = LDSB,
1171
1172    [MO_BEUW] = LDUH,
1173    [MO_BESW] = LDSH,
1174    [MO_BEUL] = LDUW,
1175    [MO_BESL] = LDSW,
1176    [MO_BEQ]  = LDX,
1177
1178    [MO_LEUW] = LDUH_LE,
1179    [MO_LESW] = LDSH_LE,
1180    [MO_LEUL] = LDUW_LE,
1181    [MO_LESL] = LDSW_LE,
1182    [MO_LEQ]  = LDX_LE,
1183};
1184
1185static const int qemu_st_opc[16] = {
1186    [MO_UB]   = STB,
1187
1188    [MO_BEUW] = STH,
1189    [MO_BEUL] = STW,
1190    [MO_BEQ]  = STX,
1191
1192    [MO_LEUW] = STH_LE,
1193    [MO_LEUL] = STW_LE,
1194    [MO_LEQ]  = STX_LE,
1195};
1196
1197static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
1198                            TCGMemOpIdx oi, bool is_64)
1199{
1200    TCGMemOp memop = get_memop(oi);
1201#ifdef CONFIG_SOFTMMU
1202    unsigned memi = get_mmuidx(oi);
1203    TCGReg addrz, param;
1204    tcg_insn_unit *func;
1205    tcg_insn_unit *label_ptr;
1206
1207    addrz = tcg_out_tlb_load(s, addr, memi, memop,
1208                             offsetof(CPUTLBEntry, addr_read));
1209
1210    /* The fast path is exactly one insn.  Thus we can perform the
1211       entire TLB Hit in the (annulled) delay slot of the branch
1212       over the TLB Miss case.  */
1213
1214    /* beq,a,pt %[xi]cc, label0 */
1215    label_ptr = s->code_ptr;
1216    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
1217                  | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
1218    /* delay slot */
1219    tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
1220                    qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
1221
1222    /* TLB Miss.  */
1223
1224    param = TCG_REG_O1;
1225    if (!SPARC64 && TARGET_LONG_BITS == 64) {
1226        /* Skip the high-part; we'll perform the extract in the trampoline.  */
1227        param++;
1228    }
1229    tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
1230
1231    /* We use the helpers to extend SB and SW data, leaving the case
1232       of SL needing explicit extending below.  */
1233    if ((memop & MO_SSIZE) == MO_SL) {
1234        func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1235    } else {
1236        func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)];
1237    }
1238    tcg_debug_assert(func != NULL);
1239    tcg_out_call_nodelay(s, func, false);
1240    /* delay slot */
1241    tcg_out_movi(s, TCG_TYPE_I32, param, oi);
1242
1243    /* Recall that all of the helpers return 64-bit results.
1244       Which complicates things for sparcv8plus.  */
1245    if (SPARC64) {
1246        /* We let the helper sign-extend SB and SW, but leave SL for here.  */
1247        if (is_64 && (memop & MO_SSIZE) == MO_SL) {
1248            tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
1249        } else {
1250            tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
1251        }
1252    } else {
1253        if ((memop & MO_SIZE) == MO_64) {
1254            tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX);
1255            tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL);
1256            tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR);
1257        } else if (is_64) {
1258            /* Re-extend from 32-bit rather than reassembling when we
1259               know the high register must be an extension.  */
1260            tcg_out_arithi(s, data, TCG_REG_O1, 0,
1261                           memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL);
1262        } else {
1263            tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1);
1264        }
1265    }
1266
1267    *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
1268#else
1269    if (SPARC64 && TARGET_LONG_BITS == 32) {
1270        tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
1271        addr = TCG_REG_T1;
1272    }
1273    tcg_out_ldst_rr(s, data, addr,
1274                    (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0),
1275                    qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
1276#endif /* CONFIG_SOFTMMU */
1277}
1278
1279static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
1280                            TCGMemOpIdx oi)
1281{
1282    TCGMemOp memop = get_memop(oi);
1283#ifdef CONFIG_SOFTMMU
1284    unsigned memi = get_mmuidx(oi);
1285    TCGReg addrz, param;
1286    tcg_insn_unit *func;
1287    tcg_insn_unit *label_ptr;
1288
1289    addrz = tcg_out_tlb_load(s, addr, memi, memop,
1290                             offsetof(CPUTLBEntry, addr_write));
1291
1292    /* The fast path is exactly one insn.  Thus we can perform the entire
1293       TLB Hit in the (annulled) delay slot of the branch over TLB Miss.  */
1294    /* beq,a,pt %[xi]cc, label0 */
1295    label_ptr = s->code_ptr;
1296    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
1297                  | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
1298    /* delay slot */
1299    tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
1300                    qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
1301
1302    /* TLB Miss.  */
1303
1304    param = TCG_REG_O1;
1305    if (!SPARC64 && TARGET_LONG_BITS == 64) {
1306        /* Skip the high-part; we'll perform the extract in the trampoline.  */
1307        param++;
1308    }
1309    tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
1310    if (!SPARC64 && (memop & MO_SIZE) == MO_64) {
1311        /* Skip the high-part; we'll perform the extract in the trampoline.  */
1312        param++;
1313    }
1314    tcg_out_mov(s, TCG_TYPE_REG, param++, data);
1315
1316    func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1317    tcg_debug_assert(func != NULL);
1318    tcg_out_call_nodelay(s, func, false);
1319    /* delay slot */
1320    tcg_out_movi(s, TCG_TYPE_I32, param, oi);
1321
1322    *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
1323#else
1324    if (SPARC64 && TARGET_LONG_BITS == 32) {
1325        tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
1326        addr = TCG_REG_T1;
1327    }
1328    tcg_out_ldst_rr(s, data, addr,
1329                    (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0),
1330                    qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
1331#endif /* CONFIG_SOFTMMU */
1332}
1333
1334static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1335                       const TCGArg args[TCG_MAX_OP_ARGS],
1336                       const int const_args[TCG_MAX_OP_ARGS])
1337{
1338    TCGArg a0, a1, a2;
1339    int c, c2;
1340
1341    /* Hoist the loads of the most common arguments.  */
1342    a0 = args[0];
1343    a1 = args[1];
1344    a2 = args[2];
1345    c2 = const_args[2];
1346
1347    switch (opc) {
1348    case INDEX_op_exit_tb:
1349        if (check_fit_ptr(a0, 13)) {
1350            tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1351            tcg_out_movi_imm13(s, TCG_REG_O0, a0);
1352            break;
1353        } else if (USE_REG_TB) {
1354            intptr_t tb_diff = a0 - (uintptr_t)s->code_gen_ptr;
1355            if (check_fit_ptr(tb_diff, 13)) {
1356                tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1357                /* Note that TCG_REG_TB has been unwound to O1.  */
1358                tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O1, tb_diff, ARITH_ADD);
1359                break;
1360            }
1361        }
1362        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff);
1363        tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1364        tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR);
1365        break;
1366    case INDEX_op_goto_tb:
1367        if (s->tb_jmp_insn_offset) {
1368            /* direct jump method */
1369            if (USE_REG_TB) {
1370                /* make sure the patch is 8-byte aligned.  */
1371                if ((intptr_t)s->code_ptr & 4) {
1372                    tcg_out_nop(s);
1373                }
1374                s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1375                tcg_out_sethi(s, TCG_REG_T1, 0);
1376                tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR);
1377                tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL);
1378                tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD);
1379            } else {
1380                s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1381                tcg_out32(s, CALL);
1382                tcg_out_nop(s);
1383            }
1384        } else {
1385            /* indirect jump method */
1386            tcg_out_ld_ptr(s, TCG_REG_TB,
1387                           (uintptr_t)(s->tb_jmp_target_addr + a0));
1388            tcg_out_arithi(s, TCG_REG_G0, TCG_REG_TB, 0, JMPL);
1389            tcg_out_nop(s);
1390        }
1391        set_jmp_reset_offset(s, a0);
1392
1393        /* For the unlinked path of goto_tb, we need to reset
1394           TCG_REG_TB to the beginning of this TB.  */
1395        if (USE_REG_TB) {
1396            c = -tcg_current_code_size(s);
1397            if (check_fit_i32(c, 13)) {
1398                tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD);
1399            } else {
1400                tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, c);
1401                tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB,
1402                              TCG_REG_T1, ARITH_ADD);
1403            }
1404        }
1405        break;
1406    case INDEX_op_goto_ptr:
1407        tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL);
1408        if (USE_REG_TB) {
1409            tcg_out_arith(s, TCG_REG_TB, a0, TCG_REG_G0, ARITH_OR);
1410        } else {
1411            tcg_out_nop(s);
1412        }
1413        break;
1414    case INDEX_op_br:
1415        tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0));
1416        tcg_out_nop(s);
1417        break;
1418
1419#define OP_32_64(x)                             \
1420        glue(glue(case INDEX_op_, x), _i32):    \
1421        glue(glue(case INDEX_op_, x), _i64)
1422
1423    OP_32_64(ld8u):
1424        tcg_out_ldst(s, a0, a1, a2, LDUB);
1425        break;
1426    OP_32_64(ld8s):
1427        tcg_out_ldst(s, a0, a1, a2, LDSB);
1428        break;
1429    OP_32_64(ld16u):
1430        tcg_out_ldst(s, a0, a1, a2, LDUH);
1431        break;
1432    OP_32_64(ld16s):
1433        tcg_out_ldst(s, a0, a1, a2, LDSH);
1434        break;
1435    case INDEX_op_ld_i32:
1436    case INDEX_op_ld32u_i64:
1437        tcg_out_ldst(s, a0, a1, a2, LDUW);
1438        break;
1439    OP_32_64(st8):
1440        tcg_out_ldst(s, a0, a1, a2, STB);
1441        break;
1442    OP_32_64(st16):
1443        tcg_out_ldst(s, a0, a1, a2, STH);
1444        break;
1445    case INDEX_op_st_i32:
1446    case INDEX_op_st32_i64:
1447        tcg_out_ldst(s, a0, a1, a2, STW);
1448        break;
1449    OP_32_64(add):
1450        c = ARITH_ADD;
1451        goto gen_arith;
1452    OP_32_64(sub):
1453        c = ARITH_SUB;
1454        goto gen_arith;
1455    OP_32_64(and):
1456        c = ARITH_AND;
1457        goto gen_arith;
1458    OP_32_64(andc):
1459        c = ARITH_ANDN;
1460        goto gen_arith;
1461    OP_32_64(or):
1462        c = ARITH_OR;
1463        goto gen_arith;
1464    OP_32_64(orc):
1465        c = ARITH_ORN;
1466        goto gen_arith;
1467    OP_32_64(xor):
1468        c = ARITH_XOR;
1469        goto gen_arith;
1470    case INDEX_op_shl_i32:
1471        c = SHIFT_SLL;
1472    do_shift32:
1473        /* Limit immediate shift count lest we create an illegal insn.  */
1474        tcg_out_arithc(s, a0, a1, a2 & 31, c2, c);
1475        break;
1476    case INDEX_op_shr_i32:
1477        c = SHIFT_SRL;
1478        goto do_shift32;
1479    case INDEX_op_sar_i32:
1480        c = SHIFT_SRA;
1481        goto do_shift32;
1482    case INDEX_op_mul_i32:
1483        c = ARITH_UMUL;
1484        goto gen_arith;
1485
1486    OP_32_64(neg):
1487        c = ARITH_SUB;
1488        goto gen_arith1;
1489    OP_32_64(not):
1490        c = ARITH_ORN;
1491        goto gen_arith1;
1492
1493    case INDEX_op_div_i32:
1494        tcg_out_div32(s, a0, a1, a2, c2, 0);
1495        break;
1496    case INDEX_op_divu_i32:
1497        tcg_out_div32(s, a0, a1, a2, c2, 1);
1498        break;
1499
1500    case INDEX_op_brcond_i32:
1501        tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3]));
1502        break;
1503    case INDEX_op_setcond_i32:
1504        tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2);
1505        break;
1506    case INDEX_op_movcond_i32:
1507        tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
1508        break;
1509
1510    case INDEX_op_add2_i32:
1511        tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
1512                            args[4], const_args[4], args[5], const_args[5],
1513                            ARITH_ADDCC, ARITH_ADDC);
1514        break;
1515    case INDEX_op_sub2_i32:
1516        tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
1517                            args[4], const_args[4], args[5], const_args[5],
1518                            ARITH_SUBCC, ARITH_SUBC);
1519        break;
1520    case INDEX_op_mulu2_i32:
1521        c = ARITH_UMUL;
1522        goto do_mul2;
1523    case INDEX_op_muls2_i32:
1524        c = ARITH_SMUL;
1525    do_mul2:
1526        /* The 32-bit multiply insns produce a full 64-bit result.  If the
1527           destination register can hold it, we can avoid the slower RDY.  */
1528        tcg_out_arithc(s, a0, a2, args[3], const_args[3], c);
1529        if (SPARC64 || a0 <= TCG_REG_O7) {
1530            tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
1531        } else {
1532            tcg_out_rdy(s, a1);
1533        }
1534        break;
1535
1536    case INDEX_op_qemu_ld_i32:
1537        tcg_out_qemu_ld(s, a0, a1, a2, false);
1538        break;
1539    case INDEX_op_qemu_ld_i64:
1540        tcg_out_qemu_ld(s, a0, a1, a2, true);
1541        break;
1542    case INDEX_op_qemu_st_i32:
1543    case INDEX_op_qemu_st_i64:
1544        tcg_out_qemu_st(s, a0, a1, a2);
1545        break;
1546
1547    case INDEX_op_ld32s_i64:
1548        tcg_out_ldst(s, a0, a1, a2, LDSW);
1549        break;
1550    case INDEX_op_ld_i64:
1551        tcg_out_ldst(s, a0, a1, a2, LDX);
1552        break;
1553    case INDEX_op_st_i64:
1554        tcg_out_ldst(s, a0, a1, a2, STX);
1555        break;
1556    case INDEX_op_shl_i64:
1557        c = SHIFT_SLLX;
1558    do_shift64:
1559        /* Limit immediate shift count lest we create an illegal insn.  */
1560        tcg_out_arithc(s, a0, a1, a2 & 63, c2, c);
1561        break;
1562    case INDEX_op_shr_i64:
1563        c = SHIFT_SRLX;
1564        goto do_shift64;
1565    case INDEX_op_sar_i64:
1566        c = SHIFT_SRAX;
1567        goto do_shift64;
1568    case INDEX_op_mul_i64:
1569        c = ARITH_MULX;
1570        goto gen_arith;
1571    case INDEX_op_div_i64:
1572        c = ARITH_SDIVX;
1573        goto gen_arith;
1574    case INDEX_op_divu_i64:
1575        c = ARITH_UDIVX;
1576        goto gen_arith;
1577    case INDEX_op_ext_i32_i64:
1578    case INDEX_op_ext32s_i64:
1579        tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA);
1580        break;
1581    case INDEX_op_extu_i32_i64:
1582    case INDEX_op_ext32u_i64:
1583        tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL);
1584        break;
1585    case INDEX_op_extrl_i64_i32:
1586        tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
1587        break;
1588    case INDEX_op_extrh_i64_i32:
1589        tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX);
1590        break;
1591
1592    case INDEX_op_brcond_i64:
1593        tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3]));
1594        break;
1595    case INDEX_op_setcond_i64:
1596        tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2);
1597        break;
1598    case INDEX_op_movcond_i64:
1599        tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
1600        break;
1601    case INDEX_op_add2_i64:
1602        tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
1603                            const_args[4], args[5], const_args[5], false);
1604        break;
1605    case INDEX_op_sub2_i64:
1606        tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
1607                            const_args[4], args[5], const_args[5], true);
1608        break;
1609    case INDEX_op_muluh_i64:
1610        tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI);
1611        break;
1612
1613    gen_arith:
1614        tcg_out_arithc(s, a0, a1, a2, c2, c);
1615        break;
1616
1617    gen_arith1:
1618        tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c);
1619        break;
1620
1621    case INDEX_op_mb:
1622        tcg_out_mb(s, a0);
1623        break;
1624
1625    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
1626    case INDEX_op_mov_i64:
1627    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
1628    case INDEX_op_movi_i64:
1629    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
1630    default:
1631        tcg_abort();
1632    }
1633}
1634
1635static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
1636{
1637    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
1638    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
1639    static const TCGTargetOpDef R_r = { .args_ct_str = { "R", "r" } };
1640    static const TCGTargetOpDef r_R = { .args_ct_str = { "r", "R" } };
1641    static const TCGTargetOpDef R_R = { .args_ct_str = { "R", "R" } };
1642    static const TCGTargetOpDef r_A = { .args_ct_str = { "r", "A" } };
1643    static const TCGTargetOpDef R_A = { .args_ct_str = { "R", "A" } };
1644    static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
1645    static const TCGTargetOpDef RZ_r = { .args_ct_str = { "RZ", "r" } };
1646    static const TCGTargetOpDef sZ_A = { .args_ct_str = { "sZ", "A" } };
1647    static const TCGTargetOpDef SZ_A = { .args_ct_str = { "SZ", "A" } };
1648    static const TCGTargetOpDef rZ_rJ = { .args_ct_str = { "rZ", "rJ" } };
1649    static const TCGTargetOpDef RZ_RJ = { .args_ct_str = { "RZ", "RJ" } };
1650    static const TCGTargetOpDef R_R_R = { .args_ct_str = { "R", "R", "R" } };
1651    static const TCGTargetOpDef r_rZ_rJ
1652        = { .args_ct_str = { "r", "rZ", "rJ" } };
1653    static const TCGTargetOpDef R_RZ_RJ
1654        = { .args_ct_str = { "R", "RZ", "RJ" } };
1655    static const TCGTargetOpDef r_r_rZ_rJ
1656        = { .args_ct_str = { "r", "r", "rZ", "rJ" } };
1657    static const TCGTargetOpDef movc_32
1658        = { .args_ct_str = { "r", "rZ", "rJ", "rI", "0" } };
1659    static const TCGTargetOpDef movc_64
1660        = { .args_ct_str = { "R", "RZ", "RJ", "RI", "0" } };
1661    static const TCGTargetOpDef add2_32
1662        = { .args_ct_str = { "r", "r", "rZ", "rZ", "rJ", "rJ" } };
1663    static const TCGTargetOpDef add2_64
1664        = { .args_ct_str = { "R", "R", "RZ", "RZ", "RJ", "RI" } };
1665
1666    switch (op) {
1667    case INDEX_op_goto_ptr:
1668        return &r;
1669
1670    case INDEX_op_ld8u_i32:
1671    case INDEX_op_ld8s_i32:
1672    case INDEX_op_ld16u_i32:
1673    case INDEX_op_ld16s_i32:
1674    case INDEX_op_ld_i32:
1675    case INDEX_op_neg_i32:
1676    case INDEX_op_not_i32:
1677        return &r_r;
1678
1679    case INDEX_op_st8_i32:
1680    case INDEX_op_st16_i32:
1681    case INDEX_op_st_i32:
1682        return &rZ_r;
1683
1684    case INDEX_op_add_i32:
1685    case INDEX_op_mul_i32:
1686    case INDEX_op_div_i32:
1687    case INDEX_op_divu_i32:
1688    case INDEX_op_sub_i32:
1689    case INDEX_op_and_i32:
1690    case INDEX_op_andc_i32:
1691    case INDEX_op_or_i32:
1692    case INDEX_op_orc_i32:
1693    case INDEX_op_xor_i32:
1694    case INDEX_op_shl_i32:
1695    case INDEX_op_shr_i32:
1696    case INDEX_op_sar_i32:
1697    case INDEX_op_setcond_i32:
1698        return &r_rZ_rJ;
1699
1700    case INDEX_op_brcond_i32:
1701        return &rZ_rJ;
1702    case INDEX_op_movcond_i32:
1703        return &movc_32;
1704    case INDEX_op_add2_i32:
1705    case INDEX_op_sub2_i32:
1706        return &add2_32;
1707    case INDEX_op_mulu2_i32:
1708    case INDEX_op_muls2_i32:
1709        return &r_r_rZ_rJ;
1710
1711    case INDEX_op_ld8u_i64:
1712    case INDEX_op_ld8s_i64:
1713    case INDEX_op_ld16u_i64:
1714    case INDEX_op_ld16s_i64:
1715    case INDEX_op_ld32u_i64:
1716    case INDEX_op_ld32s_i64:
1717    case INDEX_op_ld_i64:
1718    case INDEX_op_ext_i32_i64:
1719    case INDEX_op_extu_i32_i64:
1720        return &R_r;
1721
1722    case INDEX_op_st8_i64:
1723    case INDEX_op_st16_i64:
1724    case INDEX_op_st32_i64:
1725    case INDEX_op_st_i64:
1726        return &RZ_r;
1727
1728    case INDEX_op_add_i64:
1729    case INDEX_op_mul_i64:
1730    case INDEX_op_div_i64:
1731    case INDEX_op_divu_i64:
1732    case INDEX_op_sub_i64:
1733    case INDEX_op_and_i64:
1734    case INDEX_op_andc_i64:
1735    case INDEX_op_or_i64:
1736    case INDEX_op_orc_i64:
1737    case INDEX_op_xor_i64:
1738    case INDEX_op_shl_i64:
1739    case INDEX_op_shr_i64:
1740    case INDEX_op_sar_i64:
1741    case INDEX_op_setcond_i64:
1742        return &R_RZ_RJ;
1743
1744    case INDEX_op_neg_i64:
1745    case INDEX_op_not_i64:
1746    case INDEX_op_ext32s_i64:
1747    case INDEX_op_ext32u_i64:
1748        return &R_R;
1749
1750    case INDEX_op_extrl_i64_i32:
1751    case INDEX_op_extrh_i64_i32:
1752        return &r_R;
1753
1754    case INDEX_op_brcond_i64:
1755        return &RZ_RJ;
1756    case INDEX_op_movcond_i64:
1757        return &movc_64;
1758    case INDEX_op_add2_i64:
1759    case INDEX_op_sub2_i64:
1760        return &add2_64;
1761    case INDEX_op_muluh_i64:
1762        return &R_R_R;
1763
1764    case INDEX_op_qemu_ld_i32:
1765        return &r_A;
1766    case INDEX_op_qemu_ld_i64:
1767        return &R_A;
1768    case INDEX_op_qemu_st_i32:
1769        return &sZ_A;
1770    case INDEX_op_qemu_st_i64:
1771        return &SZ_A;
1772
1773    default:
1774        return NULL;
1775    }
1776}
1777
1778static void tcg_target_init(TCGContext *s)
1779{
1780    /* Only probe for the platform and capabilities if we havn't already
1781       determined maximum values at compile time.  */
1782#ifndef use_vis3_instructions
1783    {
1784        unsigned long hwcap = qemu_getauxval(AT_HWCAP);
1785        use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0;
1786    }
1787#endif
1788
1789    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
1790    tcg_target_available_regs[TCG_TYPE_I64] = ALL_64;
1791
1792    tcg_target_call_clobber_regs = 0;
1793    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G1);
1794    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G2);
1795    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G3);
1796    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G4);
1797    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G5);
1798    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G6);
1799    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G7);
1800    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O0);
1801    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O1);
1802    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O2);
1803    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O3);
1804    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O4);
1805    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O5);
1806    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O6);
1807    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O7);
1808
1809    s->reserved_regs = 0;
1810    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */
1811    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */
1812    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */
1813    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */
1814    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */
1815    tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
1816    tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
1817    tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
1818}
1819
1820#if SPARC64
1821# define ELF_HOST_MACHINE  EM_SPARCV9
1822#else
1823# define ELF_HOST_MACHINE  EM_SPARC32PLUS
1824# define ELF_HOST_FLAGS    EF_SPARC_32PLUS
1825#endif
1826
1827typedef struct {
1828    DebugFrameHeader h;
1829    uint8_t fde_def_cfa[SPARC64 ? 4 : 2];
1830    uint8_t fde_win_save;
1831    uint8_t fde_ret_save[3];
1832} DebugFrame;
1833
1834static const DebugFrame debug_frame = {
1835    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1836    .h.cie.id = -1,
1837    .h.cie.version = 1,
1838    .h.cie.code_align = 1,
1839    .h.cie.data_align = -sizeof(void *) & 0x7f,
1840    .h.cie.return_column = 15,            /* o7 */
1841
1842    /* Total FDE size does not include the "len" member.  */
1843    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
1844
1845    .fde_def_cfa = {
1846#if SPARC64
1847        12, 30,                         /* DW_CFA_def_cfa i6, 2047 */
1848        (2047 & 0x7f) | 0x80, (2047 >> 7)
1849#else
1850        13, 30                          /* DW_CFA_def_cfa_register i6 */
1851#endif
1852    },
1853    .fde_win_save = 0x2d,               /* DW_CFA_GNU_window_save */
1854    .fde_ret_save = { 9, 15, 31 },      /* DW_CFA_register o7, i7 */
1855};
1856
1857void tcg_register_jit(void *buf, size_t buf_size)
1858{
1859    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
1860}
1861
1862void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1863                              uintptr_t addr)
1864{
1865    intptr_t tb_disp = addr - tc_ptr;
1866    intptr_t br_disp = addr - jmp_addr;
1867    tcg_insn_unit i1, i2;
1868
1869    /* We can reach the entire address space for ILP32.
1870       For LP64, the code_gen_buffer can't be larger than 2GB.  */
1871    tcg_debug_assert(tb_disp == (int32_t)tb_disp);
1872    tcg_debug_assert(br_disp == (int32_t)br_disp);
1873
1874    if (!USE_REG_TB) {
1875        atomic_set((uint32_t *)jmp_addr, deposit32(CALL, 0, 30, br_disp >> 2));
1876        flush_icache_range(jmp_addr, jmp_addr + 4);
1877        return;
1878    }
1879
1880    /* This does not exercise the range of the branch, but we do
1881       still need to be able to load the new value of TCG_REG_TB.
1882       But this does still happen quite often.  */
1883    if (check_fit_ptr(tb_disp, 13)) {
1884        /* ba,pt %icc, addr */
1885        i1 = (INSN_OP(0) | INSN_OP2(1) | INSN_COND(COND_A)
1886              | BPCC_ICC | BPCC_PT | INSN_OFF19(br_disp));
1887        i2 = (ARITH_ADD | INSN_RD(TCG_REG_TB) | INSN_RS1(TCG_REG_TB)
1888              | INSN_IMM13(tb_disp));
1889    } else if (tb_disp >= 0) {
1890        i1 = SETHI | INSN_RD(TCG_REG_T1) | ((tb_disp & 0xfffffc00) >> 10);
1891        i2 = (ARITH_OR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1)
1892              | INSN_IMM13(tb_disp & 0x3ff));
1893    } else {
1894        i1 = SETHI | INSN_RD(TCG_REG_T1) | ((~tb_disp & 0xfffffc00) >> 10);
1895        i2 = (ARITH_XOR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1)
1896              | INSN_IMM13((tb_disp & 0x3ff) | -0x400));
1897    }
1898
1899    atomic_set((uint64_t *)jmp_addr, deposit64(i2, 32, 32, i1));
1900    flush_icache_range(jmp_addr, jmp_addr + 8);
1901}
1902