qemu/tcg/ppc/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "tcg-be-ldst.h"
  26
  27#if defined _CALL_DARWIN || defined __APPLE__
  28#define TCG_TARGET_CALL_DARWIN
  29#endif
  30#ifdef _CALL_SYSV
  31# define TCG_TARGET_CALL_ALIGN_ARGS   1
  32#endif
  33
  34/* For some memory operations, we need a scratch that isn't R0.  For the AIX
  35   calling convention, we can re-use the TOC register since we'll be reloading
  36   it at every call.  Otherwise R12 will do nicely as neither a call-saved
  37   register nor a parameter register.  */
  38#ifdef _CALL_AIX
  39# define TCG_REG_TMP1   TCG_REG_R2
  40#else
  41# define TCG_REG_TMP1   TCG_REG_R12
  42#endif
  43
  44/* For the 64-bit target, we don't like the 5 insn sequence needed to build
  45   full 64-bit addresses.  Better to have a base register to which we can
  46   apply a 32-bit displacement.
  47
  48   There are generally three items of interest:
  49   (1) helper functions in the main executable,
  50   (2) TranslationBlock data structures,
  51   (3) the return address in the epilogue.
  52
  53   For user-only, we USE_STATIC_CODE_GEN_BUFFER, so the code_gen_buffer
  54   will be inside the main executable, and thus near enough to make a
  55   pointer to the epilogue be within 2GB of all helper functions.
  56
  57   For softmmu, we'll let the kernel choose the address of code_gen_buffer,
  58   and odds are it'll be somewhere close to the main malloc arena, and so
  59   a pointer to the epilogue will be within 2GB of the TranslationBlocks.
  60
  61   For --enable-pie, everything will be kinda near everything else,
  62   somewhere in high memory.
  63
  64   Thus we choose to keep the return address in a call-saved register.  */
  65#define TCG_REG_RA     TCG_REG_R31
  66#define USE_REG_RA     (TCG_TARGET_REG_BITS == 64)
  67
  68/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
  69#define SZP  ((int)sizeof(void *))
  70
  71/* Shorthand for size of a register.  */
  72#define SZR  (TCG_TARGET_REG_BITS / 8)
  73
  74#define TCG_CT_CONST_S16  0x100
  75#define TCG_CT_CONST_U16  0x200
  76#define TCG_CT_CONST_S32  0x400
  77#define TCG_CT_CONST_U32  0x800
  78#define TCG_CT_CONST_ZERO 0x1000
  79#define TCG_CT_CONST_MONE 0x2000
  80
  81static tcg_insn_unit *tb_ret_addr;
  82
  83#include "elf.h"
  84static bool have_isa_2_06;
  85#define HAVE_ISA_2_06  have_isa_2_06
  86#define HAVE_ISEL      have_isa_2_06
  87
  88#ifndef CONFIG_SOFTMMU
  89#define TCG_GUEST_BASE_REG 30
  90#endif
  91
  92#ifdef CONFIG_DEBUG_TCG
  93static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  94    "r0",
  95    "r1",
  96    "r2",
  97    "r3",
  98    "r4",
  99    "r5",
 100    "r6",
 101    "r7",
 102    "r8",
 103    "r9",
 104    "r10",
 105    "r11",
 106    "r12",
 107    "r13",
 108    "r14",
 109    "r15",
 110    "r16",
 111    "r17",
 112    "r18",
 113    "r19",
 114    "r20",
 115    "r21",
 116    "r22",
 117    "r23",
 118    "r24",
 119    "r25",
 120    "r26",
 121    "r27",
 122    "r28",
 123    "r29",
 124    "r30",
 125    "r31"
 126};
 127#endif
 128
 129static const int tcg_target_reg_alloc_order[] = {
 130    TCG_REG_R14,  /* call saved registers */
 131    TCG_REG_R15,
 132    TCG_REG_R16,
 133    TCG_REG_R17,
 134    TCG_REG_R18,
 135    TCG_REG_R19,
 136    TCG_REG_R20,
 137    TCG_REG_R21,
 138    TCG_REG_R22,
 139    TCG_REG_R23,
 140    TCG_REG_R24,
 141    TCG_REG_R25,
 142    TCG_REG_R26,
 143    TCG_REG_R27,
 144    TCG_REG_R28,
 145    TCG_REG_R29,
 146    TCG_REG_R30,
 147    TCG_REG_R31,
 148    TCG_REG_R12,  /* call clobbered, non-arguments */
 149    TCG_REG_R11,
 150    TCG_REG_R2,
 151    TCG_REG_R13,
 152    TCG_REG_R10,  /* call clobbered, arguments */
 153    TCG_REG_R9,
 154    TCG_REG_R8,
 155    TCG_REG_R7,
 156    TCG_REG_R6,
 157    TCG_REG_R5,
 158    TCG_REG_R4,
 159    TCG_REG_R3,
 160};
 161
 162static const int tcg_target_call_iarg_regs[] = {
 163    TCG_REG_R3,
 164    TCG_REG_R4,
 165    TCG_REG_R5,
 166    TCG_REG_R6,
 167    TCG_REG_R7,
 168    TCG_REG_R8,
 169    TCG_REG_R9,
 170    TCG_REG_R10
 171};
 172
 173static const int tcg_target_call_oarg_regs[] = {
 174    TCG_REG_R3,
 175    TCG_REG_R4
 176};
 177
 178static const int tcg_target_callee_save_regs[] = {
 179#ifdef TCG_TARGET_CALL_DARWIN
 180    TCG_REG_R11,
 181#endif
 182    TCG_REG_R14,
 183    TCG_REG_R15,
 184    TCG_REG_R16,
 185    TCG_REG_R17,
 186    TCG_REG_R18,
 187    TCG_REG_R19,
 188    TCG_REG_R20,
 189    TCG_REG_R21,
 190    TCG_REG_R22,
 191    TCG_REG_R23,
 192    TCG_REG_R24,
 193    TCG_REG_R25,
 194    TCG_REG_R26,
 195    TCG_REG_R27, /* currently used for the global env */
 196    TCG_REG_R28,
 197    TCG_REG_R29,
 198    TCG_REG_R30,
 199    TCG_REG_R31
 200};
 201
 202static inline bool in_range_b(tcg_target_long target)
 203{
 204    return target == sextract64(target, 0, 26);
 205}
 206
 207static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target)
 208{
 209    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
 210    tcg_debug_assert(in_range_b(disp));
 211    return disp & 0x3fffffc;
 212}
 213
 214static void reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target)
 215{
 216    *pc = (*pc & ~0x3fffffc) | reloc_pc24_val(pc, target);
 217}
 218
 219static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target)
 220{
 221    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
 222    tcg_debug_assert(disp == (int16_t) disp);
 223    return disp & 0xfffc;
 224}
 225
 226static void reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target)
 227{
 228    *pc = (*pc & ~0xfffc) | reloc_pc14_val(pc, target);
 229}
 230
 231static inline void tcg_out_b_noaddr(TCGContext *s, int insn)
 232{
 233    unsigned retrans = *s->code_ptr & 0x3fffffc;
 234    tcg_out32(s, insn | retrans);
 235}
 236
 237static inline void tcg_out_bc_noaddr(TCGContext *s, int insn)
 238{
 239    unsigned retrans = *s->code_ptr & 0xfffc;
 240    tcg_out32(s, insn | retrans);
 241}
 242
 243static void patch_reloc(tcg_insn_unit *code_ptr, int type,
 244                        intptr_t value, intptr_t addend)
 245{
 246    tcg_insn_unit *target = (tcg_insn_unit *)value;
 247
 248    tcg_debug_assert(addend == 0);
 249    switch (type) {
 250    case R_PPC_REL14:
 251        reloc_pc14(code_ptr, target);
 252        break;
 253    case R_PPC_REL24:
 254        reloc_pc24(code_ptr, target);
 255        break;
 256    default:
 257        tcg_abort();
 258    }
 259}
 260
 261/* parse target specific constraints */
 262static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 263{
 264    const char *ct_str;
 265
 266    ct_str = *pct_str;
 267    switch (ct_str[0]) {
 268    case 'A': case 'B': case 'C': case 'D':
 269        ct->ct |= TCG_CT_REG;
 270        tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A');
 271        break;
 272    case 'r':
 273        ct->ct |= TCG_CT_REG;
 274        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
 275        break;
 276    case 'L':                   /* qemu_ld constraint */
 277        ct->ct |= TCG_CT_REG;
 278        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
 279        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
 280#ifdef CONFIG_SOFTMMU
 281        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
 282        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
 283#endif
 284        break;
 285    case 'S':                   /* qemu_st constraint */
 286        ct->ct |= TCG_CT_REG;
 287        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
 288        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
 289#ifdef CONFIG_SOFTMMU
 290        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
 291        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
 292        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
 293#endif
 294        break;
 295    case 'I':
 296        ct->ct |= TCG_CT_CONST_S16;
 297        break;
 298    case 'J':
 299        ct->ct |= TCG_CT_CONST_U16;
 300        break;
 301    case 'M':
 302        ct->ct |= TCG_CT_CONST_MONE;
 303        break;
 304    case 'T':
 305        ct->ct |= TCG_CT_CONST_S32;
 306        break;
 307    case 'U':
 308        ct->ct |= TCG_CT_CONST_U32;
 309        break;
 310    case 'Z':
 311        ct->ct |= TCG_CT_CONST_ZERO;
 312        break;
 313    default:
 314        return -1;
 315    }
 316    ct_str++;
 317    *pct_str = ct_str;
 318    return 0;
 319}
 320
 321/* test if a constant matches the constraint */
 322static int tcg_target_const_match(tcg_target_long val, TCGType type,
 323                                  const TCGArgConstraint *arg_ct)
 324{
 325    int ct = arg_ct->ct;
 326    if (ct & TCG_CT_CONST) {
 327        return 1;
 328    }
 329
 330    /* The only 32-bit constraint we use aside from
 331       TCG_CT_CONST is TCG_CT_CONST_S16.  */
 332    if (type == TCG_TYPE_I32) {
 333        val = (int32_t)val;
 334    }
 335
 336    if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
 337        return 1;
 338    } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
 339        return 1;
 340    } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
 341        return 1;
 342    } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
 343        return 1;
 344    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 345        return 1;
 346    } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
 347        return 1;
 348    }
 349    return 0;
 350}
 351
 352#define OPCD(opc) ((opc)<<26)
 353#define XO19(opc) (OPCD(19)|((opc)<<1))
 354#define MD30(opc) (OPCD(30)|((opc)<<2))
 355#define MDS30(opc) (OPCD(30)|((opc)<<1))
 356#define XO31(opc) (OPCD(31)|((opc)<<1))
 357#define XO58(opc) (OPCD(58)|(opc))
 358#define XO62(opc) (OPCD(62)|(opc))
 359
 360#define B      OPCD( 18)
 361#define BC     OPCD( 16)
 362#define LBZ    OPCD( 34)
 363#define LHZ    OPCD( 40)
 364#define LHA    OPCD( 42)
 365#define LWZ    OPCD( 32)
 366#define STB    OPCD( 38)
 367#define STH    OPCD( 44)
 368#define STW    OPCD( 36)
 369
 370#define STD    XO62(  0)
 371#define STDU   XO62(  1)
 372#define STDX   XO31(149)
 373
 374#define LD     XO58(  0)
 375#define LDX    XO31( 21)
 376#define LDU    XO58(  1)
 377#define LWA    XO58(  2)
 378#define LWAX   XO31(341)
 379
 380#define ADDIC  OPCD( 12)
 381#define ADDI   OPCD( 14)
 382#define ADDIS  OPCD( 15)
 383#define ORI    OPCD( 24)
 384#define ORIS   OPCD( 25)
 385#define XORI   OPCD( 26)
 386#define XORIS  OPCD( 27)
 387#define ANDI   OPCD( 28)
 388#define ANDIS  OPCD( 29)
 389#define MULLI  OPCD(  7)
 390#define CMPLI  OPCD( 10)
 391#define CMPI   OPCD( 11)
 392#define SUBFIC OPCD( 8)
 393
 394#define LWZU   OPCD( 33)
 395#define STWU   OPCD( 37)
 396
 397#define RLWIMI OPCD( 20)
 398#define RLWINM OPCD( 21)
 399#define RLWNM  OPCD( 23)
 400
 401#define RLDICL MD30(  0)
 402#define RLDICR MD30(  1)
 403#define RLDIMI MD30(  3)
 404#define RLDCL  MDS30( 8)
 405
 406#define BCLR   XO19( 16)
 407#define BCCTR  XO19(528)
 408#define CRAND  XO19(257)
 409#define CRANDC XO19(129)
 410#define CRNAND XO19(225)
 411#define CROR   XO19(449)
 412#define CRNOR  XO19( 33)
 413
 414#define EXTSB  XO31(954)
 415#define EXTSH  XO31(922)
 416#define EXTSW  XO31(986)
 417#define ADD    XO31(266)
 418#define ADDE   XO31(138)
 419#define ADDME  XO31(234)
 420#define ADDZE  XO31(202)
 421#define ADDC   XO31( 10)
 422#define AND    XO31( 28)
 423#define SUBF   XO31( 40)
 424#define SUBFC  XO31(  8)
 425#define SUBFE  XO31(136)
 426#define SUBFME XO31(232)
 427#define SUBFZE XO31(200)
 428#define OR     XO31(444)
 429#define XOR    XO31(316)
 430#define MULLW  XO31(235)
 431#define MULHW  XO31( 75)
 432#define MULHWU XO31( 11)
 433#define DIVW   XO31(491)
 434#define DIVWU  XO31(459)
 435#define CMP    XO31(  0)
 436#define CMPL   XO31( 32)
 437#define LHBRX  XO31(790)
 438#define LWBRX  XO31(534)
 439#define LDBRX  XO31(532)
 440#define STHBRX XO31(918)
 441#define STWBRX XO31(662)
 442#define STDBRX XO31(660)
 443#define MFSPR  XO31(339)
 444#define MTSPR  XO31(467)
 445#define SRAWI  XO31(824)
 446#define NEG    XO31(104)
 447#define MFCR   XO31( 19)
 448#define MFOCRF (MFCR | (1u << 20))
 449#define NOR    XO31(124)
 450#define CNTLZW XO31( 26)
 451#define CNTLZD XO31( 58)
 452#define ANDC   XO31( 60)
 453#define ORC    XO31(412)
 454#define EQV    XO31(284)
 455#define NAND   XO31(476)
 456#define ISEL   XO31( 15)
 457
 458#define MULLD  XO31(233)
 459#define MULHD  XO31( 73)
 460#define MULHDU XO31(  9)
 461#define DIVD   XO31(489)
 462#define DIVDU  XO31(457)
 463
 464#define LBZX   XO31( 87)
 465#define LHZX   XO31(279)
 466#define LHAX   XO31(343)
 467#define LWZX   XO31( 23)
 468#define STBX   XO31(215)
 469#define STHX   XO31(407)
 470#define STWX   XO31(151)
 471
 472#define EIEIO  XO31(854)
 473#define HWSYNC XO31(598)
 474#define LWSYNC (HWSYNC | (1u << 21))
 475
 476#define SPR(a, b) ((((a)<<5)|(b))<<11)
 477#define LR     SPR(8, 0)
 478#define CTR    SPR(9, 0)
 479
 480#define SLW    XO31( 24)
 481#define SRW    XO31(536)
 482#define SRAW   XO31(792)
 483
 484#define SLD    XO31( 27)
 485#define SRD    XO31(539)
 486#define SRAD   XO31(794)
 487#define SRADI  XO31(413<<1)
 488
 489#define TW     XO31( 4)
 490#define TRAP   (TW | TO(31))
 491
 492#define NOP    ORI  /* ori 0,0,0 */
 493
 494#define RT(r) ((r)<<21)
 495#define RS(r) ((r)<<21)
 496#define RA(r) ((r)<<16)
 497#define RB(r) ((r)<<11)
 498#define TO(t) ((t)<<21)
 499#define SH(s) ((s)<<11)
 500#define MB(b) ((b)<<6)
 501#define ME(e) ((e)<<1)
 502#define BO(o) ((o)<<21)
 503#define MB64(b) ((b)<<5)
 504#define FXM(b) (1 << (19 - (b)))
 505
 506#define LK    1
 507
 508#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
 509#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
 510#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
 511#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
 512
 513#define BF(n)    ((n)<<23)
 514#define BI(n, c) (((c)+((n)*4))<<16)
 515#define BT(n, c) (((c)+((n)*4))<<21)
 516#define BA(n, c) (((c)+((n)*4))<<16)
 517#define BB(n, c) (((c)+((n)*4))<<11)
 518#define BC_(n, c) (((c)+((n)*4))<<6)
 519
 520#define BO_COND_TRUE  BO(12)
 521#define BO_COND_FALSE BO( 4)
 522#define BO_ALWAYS     BO(20)
 523
 524enum {
 525    CR_LT,
 526    CR_GT,
 527    CR_EQ,
 528    CR_SO
 529};
 530
 531static const uint32_t tcg_to_bc[] = {
 532    [TCG_COND_EQ]  = BC | BI(7, CR_EQ) | BO_COND_TRUE,
 533    [TCG_COND_NE]  = BC | BI(7, CR_EQ) | BO_COND_FALSE,
 534    [TCG_COND_LT]  = BC | BI(7, CR_LT) | BO_COND_TRUE,
 535    [TCG_COND_GE]  = BC | BI(7, CR_LT) | BO_COND_FALSE,
 536    [TCG_COND_LE]  = BC | BI(7, CR_GT) | BO_COND_FALSE,
 537    [TCG_COND_GT]  = BC | BI(7, CR_GT) | BO_COND_TRUE,
 538    [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
 539    [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
 540    [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
 541    [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
 542};
 543
 544/* The low bit here is set if the RA and RB fields must be inverted.  */
 545static const uint32_t tcg_to_isel[] = {
 546    [TCG_COND_EQ]  = ISEL | BC_(7, CR_EQ),
 547    [TCG_COND_NE]  = ISEL | BC_(7, CR_EQ) | 1,
 548    [TCG_COND_LT]  = ISEL | BC_(7, CR_LT),
 549    [TCG_COND_GE]  = ISEL | BC_(7, CR_LT) | 1,
 550    [TCG_COND_LE]  = ISEL | BC_(7, CR_GT) | 1,
 551    [TCG_COND_GT]  = ISEL | BC_(7, CR_GT),
 552    [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
 553    [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
 554    [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
 555    [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
 556};
 557
 558static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
 559                             TCGReg base, tcg_target_long offset);
 560
 561static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
 562{
 563    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
 564    if (ret != arg) {
 565        tcg_out32(s, OR | SAB(arg, ret, arg));
 566    }
 567}
 568
 569static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
 570                               int sh, int mb)
 571{
 572    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
 573    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
 574    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
 575    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
 576}
 577
 578static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
 579                               int sh, int mb, int me)
 580{
 581    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
 582}
 583
 584static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
 585{
 586    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
 587}
 588
 589static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
 590{
 591    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
 592}
 593
 594static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
 595{
 596    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
 597}
 598
 599static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
 600{
 601    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
 602}
 603
 604static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
 605{
 606    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
 607}
 608
 609static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg)
 610{
 611    if (arg == (int16_t) arg) {
 612        tcg_out32(s, ADDI | TAI(ret, 0, arg));
 613    } else {
 614        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
 615        if (arg & 0xffff) {
 616            tcg_out32(s, ORI | SAI(ret, ret, arg));
 617        }
 618    }
 619}
 620
 621static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
 622                         tcg_target_long arg)
 623{
 624    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
 625    if (type == TCG_TYPE_I32 || arg == (int32_t)arg) {
 626        tcg_out_movi32(s, ret, arg);
 627    } else if (arg == (uint32_t)arg && !(arg & 0x8000)) {
 628        tcg_out32(s, ADDI | TAI(ret, 0, arg));
 629        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
 630    } else {
 631        int32_t high;
 632
 633        if (USE_REG_RA) {
 634            intptr_t diff = arg - (intptr_t)tb_ret_addr;
 635            if (diff == (int32_t)diff) {
 636                tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_RA, diff);
 637                return;
 638            }
 639        }
 640
 641        high = arg >> 31 >> 1;
 642        tcg_out_movi32(s, ret, high);
 643        if (high) {
 644            tcg_out_shli64(s, ret, ret, 32);
 645        }
 646        if (arg & 0xffff0000) {
 647            tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
 648        }
 649        if (arg & 0xffff) {
 650            tcg_out32(s, ORI | SAI(ret, ret, arg));
 651        }
 652    }
 653}
 654
 655static bool mask_operand(uint32_t c, int *mb, int *me)
 656{
 657    uint32_t lsb, test;
 658
 659    /* Accept a bit pattern like:
 660           0....01....1
 661           1....10....0
 662           0..01..10..0
 663       Keep track of the transitions.  */
 664    if (c == 0 || c == -1) {
 665        return false;
 666    }
 667    test = c;
 668    lsb = test & -test;
 669    test += lsb;
 670    if (test & (test - 1)) {
 671        return false;
 672    }
 673
 674    *me = clz32(lsb);
 675    *mb = test ? clz32(test & -test) + 1 : 0;
 676    return true;
 677}
 678
 679static bool mask64_operand(uint64_t c, int *mb, int *me)
 680{
 681    uint64_t lsb;
 682
 683    if (c == 0) {
 684        return false;
 685    }
 686
 687    lsb = c & -c;
 688    /* Accept 1..10..0.  */
 689    if (c == -lsb) {
 690        *mb = 0;
 691        *me = clz64(lsb);
 692        return true;
 693    }
 694    /* Accept 0..01..1.  */
 695    if (lsb == 1 && (c & (c + 1)) == 0) {
 696        *mb = clz64(c + 1) + 1;
 697        *me = 63;
 698        return true;
 699    }
 700    return false;
 701}
 702
 703static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
 704{
 705    int mb, me;
 706
 707    if (mask_operand(c, &mb, &me)) {
 708        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
 709    } else if ((c & 0xffff) == c) {
 710        tcg_out32(s, ANDI | SAI(src, dst, c));
 711        return;
 712    } else if ((c & 0xffff0000) == c) {
 713        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
 714        return;
 715    } else {
 716        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
 717        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
 718    }
 719}
 720
 721static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
 722{
 723    int mb, me;
 724
 725    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
 726    if (mask64_operand(c, &mb, &me)) {
 727        if (mb == 0) {
 728            tcg_out_rld(s, RLDICR, dst, src, 0, me);
 729        } else {
 730            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
 731        }
 732    } else if ((c & 0xffff) == c) {
 733        tcg_out32(s, ANDI | SAI(src, dst, c));
 734        return;
 735    } else if ((c & 0xffff0000) == c) {
 736        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
 737        return;
 738    } else {
 739        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
 740        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
 741    }
 742}
 743
 744static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
 745                           int op_lo, int op_hi)
 746{
 747    if (c >> 16) {
 748        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
 749        src = dst;
 750    }
 751    if (c & 0xffff) {
 752        tcg_out32(s, op_lo | SAI(src, dst, c));
 753        src = dst;
 754    }
 755}
 756
 757static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
 758{
 759    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
 760}
 761
 762static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
 763{
 764    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
 765}
 766
 767static void tcg_out_b(TCGContext *s, int mask, tcg_insn_unit *target)
 768{
 769    ptrdiff_t disp = tcg_pcrel_diff(s, target);
 770    if (in_range_b(disp)) {
 771        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
 772    } else {
 773        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
 774        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
 775        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
 776    }
 777}
 778
 779static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
 780                             TCGReg base, tcg_target_long offset)
 781{
 782    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
 783    bool is_store = false;
 784    TCGReg rs = TCG_REG_TMP1;
 785
 786    switch (opi) {
 787    case LD: case LWA:
 788        align = 3;
 789        /* FALLTHRU */
 790    default:
 791        if (rt != TCG_REG_R0) {
 792            rs = rt;
 793            break;
 794        }
 795        break;
 796    case STD:
 797        align = 3;
 798        /* FALLTHRU */
 799    case STB: case STH: case STW:
 800        is_store = true;
 801        break;
 802    }
 803
 804    /* For unaligned, or very large offsets, use the indexed form.  */
 805    if (offset & align || offset != (int32_t)offset) {
 806        if (rs == base) {
 807            rs = TCG_REG_R0;
 808        }
 809        tcg_debug_assert(!is_store || rs != rt);
 810        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
 811        tcg_out32(s, opx | TAB(rt, base, rs));
 812        return;
 813    }
 814
 815    l0 = (int16_t)offset;
 816    offset = (offset - l0) >> 16;
 817    l1 = (int16_t)offset;
 818
 819    if (l1 < 0 && orig >= 0) {
 820        extra = 0x4000;
 821        l1 = (int16_t)(offset - 0x4000);
 822    }
 823    if (l1) {
 824        tcg_out32(s, ADDIS | TAI(rs, base, l1));
 825        base = rs;
 826    }
 827    if (extra) {
 828        tcg_out32(s, ADDIS | TAI(rs, base, extra));
 829        base = rs;
 830    }
 831    if (opi != ADDI || base != rt || l0 != 0) {
 832        tcg_out32(s, opi | TAI(rt, base, l0));
 833    }
 834}
 835
 836static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
 837                              TCGReg arg1, intptr_t arg2)
 838{
 839    int opi, opx;
 840
 841    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
 842    if (type == TCG_TYPE_I32) {
 843        opi = LWZ, opx = LWZX;
 844    } else {
 845        opi = LD, opx = LDX;
 846    }
 847    tcg_out_mem_long(s, opi, opx, ret, arg1, arg2);
 848}
 849
 850static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
 851                              TCGReg arg1, intptr_t arg2)
 852{
 853    int opi, opx;
 854
 855    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
 856    if (type == TCG_TYPE_I32) {
 857        opi = STW, opx = STWX;
 858    } else {
 859        opi = STD, opx = STDX;
 860    }
 861    tcg_out_mem_long(s, opi, opx, arg, arg1, arg2);
 862}
 863
 864static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 865                               TCGReg base, intptr_t ofs)
 866{
 867    return false;
 868}
 869
 870static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
 871                        int const_arg2, int cr, TCGType type)
 872{
 873    int imm;
 874    uint32_t op;
 875
 876    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
 877
 878    /* Simplify the comparisons below wrt CMPI.  */
 879    if (type == TCG_TYPE_I32) {
 880        arg2 = (int32_t)arg2;
 881    }
 882
 883    switch (cond) {
 884    case TCG_COND_EQ:
 885    case TCG_COND_NE:
 886        if (const_arg2) {
 887            if ((int16_t) arg2 == arg2) {
 888                op = CMPI;
 889                imm = 1;
 890                break;
 891            } else if ((uint16_t) arg2 == arg2) {
 892                op = CMPLI;
 893                imm = 1;
 894                break;
 895            }
 896        }
 897        op = CMPL;
 898        imm = 0;
 899        break;
 900
 901    case TCG_COND_LT:
 902    case TCG_COND_GE:
 903    case TCG_COND_LE:
 904    case TCG_COND_GT:
 905        if (const_arg2) {
 906            if ((int16_t) arg2 == arg2) {
 907                op = CMPI;
 908                imm = 1;
 909                break;
 910            }
 911        }
 912        op = CMP;
 913        imm = 0;
 914        break;
 915
 916    case TCG_COND_LTU:
 917    case TCG_COND_GEU:
 918    case TCG_COND_LEU:
 919    case TCG_COND_GTU:
 920        if (const_arg2) {
 921            if ((uint16_t) arg2 == arg2) {
 922                op = CMPLI;
 923                imm = 1;
 924                break;
 925            }
 926        }
 927        op = CMPL;
 928        imm = 0;
 929        break;
 930
 931    default:
 932        tcg_abort();
 933    }
 934    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
 935
 936    if (imm) {
 937        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
 938    } else {
 939        if (const_arg2) {
 940            tcg_out_movi(s, type, TCG_REG_R0, arg2);
 941            arg2 = TCG_REG_R0;
 942        }
 943        tcg_out32(s, op | RA(arg1) | RB(arg2));
 944    }
 945}
 946
 947static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
 948                                TCGReg dst, TCGReg src)
 949{
 950    if (type == TCG_TYPE_I32) {
 951        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
 952        tcg_out_shri32(s, dst, dst, 5);
 953    } else {
 954        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
 955        tcg_out_shri64(s, dst, dst, 6);
 956    }
 957}
 958
 959static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
 960{
 961    /* X != 0 implies X + -1 generates a carry.  Extra addition
 962       trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.  */
 963    if (dst != src) {
 964        tcg_out32(s, ADDIC | TAI(dst, src, -1));
 965        tcg_out32(s, SUBFE | TAB(dst, dst, src));
 966    } else {
 967        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
 968        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
 969    }
 970}
 971
 972static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
 973                                  bool const_arg2)
 974{
 975    if (const_arg2) {
 976        if ((uint32_t)arg2 == arg2) {
 977            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
 978        } else {
 979            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
 980            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
 981        }
 982    } else {
 983        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
 984    }
 985    return TCG_REG_R0;
 986}
 987
 988static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
 989                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
 990                            int const_arg2)
 991{
 992    int crop, sh;
 993
 994    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
 995
 996    /* Ignore high bits of a potential constant arg2.  */
 997    if (type == TCG_TYPE_I32) {
 998        arg2 = (uint32_t)arg2;
 999    }
1000
1001    /* Handle common and trivial cases before handling anything else.  */
1002    if (arg2 == 0) {
1003        switch (cond) {
1004        case TCG_COND_EQ:
1005            tcg_out_setcond_eq0(s, type, arg0, arg1);
1006            return;
1007        case TCG_COND_NE:
1008            if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1009                tcg_out_ext32u(s, TCG_REG_R0, arg1);
1010                arg1 = TCG_REG_R0;
1011            }
1012            tcg_out_setcond_ne0(s, arg0, arg1);
1013            return;
1014        case TCG_COND_GE:
1015            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1016            arg1 = arg0;
1017            /* FALLTHRU */
1018        case TCG_COND_LT:
1019            /* Extract the sign bit.  */
1020            if (type == TCG_TYPE_I32) {
1021                tcg_out_shri32(s, arg0, arg1, 31);
1022            } else {
1023                tcg_out_shri64(s, arg0, arg1, 63);
1024            }
1025            return;
1026        default:
1027            break;
1028        }
1029    }
1030
1031    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1032       All other cases below are also at least 3 insns, so speed up the
1033       code generator by not considering them and always using ISEL.  */
1034    if (HAVE_ISEL) {
1035        int isel, tab;
1036
1037        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1038
1039        isel = tcg_to_isel[cond];
1040
1041        tcg_out_movi(s, type, arg0, 1);
1042        if (isel & 1) {
1043            /* arg0 = (bc ? 0 : 1) */
1044            tab = TAB(arg0, 0, arg0);
1045            isel &= ~1;
1046        } else {
1047            /* arg0 = (bc ? 1 : 0) */
1048            tcg_out_movi(s, type, TCG_REG_R0, 0);
1049            tab = TAB(arg0, arg0, TCG_REG_R0);
1050        }
1051        tcg_out32(s, isel | tab);
1052        return;
1053    }
1054
1055    switch (cond) {
1056    case TCG_COND_EQ:
1057        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1058        tcg_out_setcond_eq0(s, type, arg0, arg1);
1059        return;
1060
1061    case TCG_COND_NE:
1062        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1063        /* Discard the high bits only once, rather than both inputs.  */
1064        if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1065            tcg_out_ext32u(s, TCG_REG_R0, arg1);
1066            arg1 = TCG_REG_R0;
1067        }
1068        tcg_out_setcond_ne0(s, arg0, arg1);
1069        return;
1070
1071    case TCG_COND_GT:
1072    case TCG_COND_GTU:
1073        sh = 30;
1074        crop = 0;
1075        goto crtest;
1076
1077    case TCG_COND_LT:
1078    case TCG_COND_LTU:
1079        sh = 29;
1080        crop = 0;
1081        goto crtest;
1082
1083    case TCG_COND_GE:
1084    case TCG_COND_GEU:
1085        sh = 31;
1086        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
1087        goto crtest;
1088
1089    case TCG_COND_LE:
1090    case TCG_COND_LEU:
1091        sh = 31;
1092        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
1093    crtest:
1094        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1095        if (crop) {
1096            tcg_out32(s, crop);
1097        }
1098        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1099        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1100        break;
1101
1102    default:
1103        tcg_abort();
1104    }
1105}
1106
1107static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
1108{
1109    if (l->has_value) {
1110        tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value_ptr));
1111    } else {
1112        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
1113        tcg_out_bc_noaddr(s, bc);
1114    }
1115}
1116
1117static void tcg_out_brcond(TCGContext *s, TCGCond cond,
1118                           TCGArg arg1, TCGArg arg2, int const_arg2,
1119                           TCGLabel *l, TCGType type)
1120{
1121    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1122    tcg_out_bc(s, tcg_to_bc[cond], l);
1123}
1124
1125static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
1126                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
1127                            TCGArg v2, bool const_c2)
1128{
1129    /* If for some reason both inputs are zero, don't produce bad code.  */
1130    if (v1 == 0 && v2 == 0) {
1131        tcg_out_movi(s, type, dest, 0);
1132        return;
1133    }
1134
1135    tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
1136
1137    if (HAVE_ISEL) {
1138        int isel = tcg_to_isel[cond];
1139
1140        /* Swap the V operands if the operation indicates inversion.  */
1141        if (isel & 1) {
1142            int t = v1;
1143            v1 = v2;
1144            v2 = t;
1145            isel &= ~1;
1146        }
1147        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
1148        if (v2 == 0) {
1149            tcg_out_movi(s, type, TCG_REG_R0, 0);
1150        }
1151        tcg_out32(s, isel | TAB(dest, v1, v2));
1152    } else {
1153        if (dest == v2) {
1154            cond = tcg_invert_cond(cond);
1155            v2 = v1;
1156        } else if (dest != v1) {
1157            if (v1 == 0) {
1158                tcg_out_movi(s, type, dest, 0);
1159            } else {
1160                tcg_out_mov(s, type, dest, v1);
1161            }
1162        }
1163        /* Branch forward over one insn */
1164        tcg_out32(s, tcg_to_bc[cond] | 8);
1165        if (v2 == 0) {
1166            tcg_out_movi(s, type, dest, 0);
1167        } else {
1168            tcg_out_mov(s, type, dest, v2);
1169        }
1170    }
1171}
1172
1173static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
1174                         const int *const_args)
1175{
1176    static const struct { uint8_t bit1, bit2; } bits[] = {
1177        [TCG_COND_LT ] = { CR_LT, CR_LT },
1178        [TCG_COND_LE ] = { CR_LT, CR_GT },
1179        [TCG_COND_GT ] = { CR_GT, CR_GT },
1180        [TCG_COND_GE ] = { CR_GT, CR_LT },
1181        [TCG_COND_LTU] = { CR_LT, CR_LT },
1182        [TCG_COND_LEU] = { CR_LT, CR_GT },
1183        [TCG_COND_GTU] = { CR_GT, CR_GT },
1184        [TCG_COND_GEU] = { CR_GT, CR_LT },
1185    };
1186
1187    TCGCond cond = args[4], cond2;
1188    TCGArg al, ah, bl, bh;
1189    int blconst, bhconst;
1190    int op, bit1, bit2;
1191
1192    al = args[0];
1193    ah = args[1];
1194    bl = args[2];
1195    bh = args[3];
1196    blconst = const_args[2];
1197    bhconst = const_args[3];
1198
1199    switch (cond) {
1200    case TCG_COND_EQ:
1201        op = CRAND;
1202        goto do_equality;
1203    case TCG_COND_NE:
1204        op = CRNAND;
1205    do_equality:
1206        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
1207        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
1208        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1209        break;
1210
1211    case TCG_COND_LT:
1212    case TCG_COND_LE:
1213    case TCG_COND_GT:
1214    case TCG_COND_GE:
1215    case TCG_COND_LTU:
1216    case TCG_COND_LEU:
1217    case TCG_COND_GTU:
1218    case TCG_COND_GEU:
1219        bit1 = bits[cond].bit1;
1220        bit2 = bits[cond].bit2;
1221        op = (bit1 != bit2 ? CRANDC : CRAND);
1222        cond2 = tcg_unsigned_cond(cond);
1223
1224        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
1225        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
1226        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
1227        tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
1228        break;
1229
1230    default:
1231        tcg_abort();
1232    }
1233}
1234
1235static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1236                             const int *const_args)
1237{
1238    tcg_out_cmp2(s, args + 1, const_args + 1);
1239    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1240    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
1241}
1242
1243static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
1244                             const int *const_args)
1245{
1246    tcg_out_cmp2(s, args, const_args);
1247    tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
1248}
1249
1250static void tcg_out_mb(TCGContext *s, TCGArg a0)
1251{
1252    uint32_t insn = HWSYNC;
1253    a0 &= TCG_MO_ALL;
1254    if (a0 == TCG_MO_LD_LD) {
1255        insn = LWSYNC;
1256    } else if (a0 == TCG_MO_ST_ST) {
1257        insn = EIEIO;
1258    }
1259    tcg_out32(s, insn);
1260}
1261
1262#ifdef __powerpc64__
1263void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
1264{
1265    tcg_insn_unit i1, i2;
1266    uint64_t pair;
1267    intptr_t diff = addr - jmp_addr;
1268
1269    if (in_range_b(diff)) {
1270        i1 = B | (diff & 0x3fffffc);
1271        i2 = NOP;
1272    } else if (USE_REG_RA) {
1273        intptr_t lo, hi;
1274        diff = addr - (uintptr_t)tb_ret_addr;
1275        lo = (int16_t)diff;
1276        hi = (int32_t)(diff - lo);
1277        tcg_debug_assert(diff == hi + lo);
1278        i1 = ADDIS | TAI(TCG_REG_TMP1, TCG_REG_RA, hi >> 16);
1279        i2 = ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, lo);
1280    } else {
1281        tcg_debug_assert(TCG_TARGET_REG_BITS == 32 || addr == (int32_t)addr);
1282        i1 = ADDIS | TAI(TCG_REG_TMP1, 0, addr >> 16);
1283        i2 = ORI | SAI(TCG_REG_TMP1, TCG_REG_TMP1, addr);
1284    }
1285#ifdef HOST_WORDS_BIGENDIAN
1286    pair = (uint64_t)i1 << 32 | i2;
1287#else
1288    pair = (uint64_t)i2 << 32 | i1;
1289#endif
1290
1291    atomic_set((uint64_t *)jmp_addr, pair);
1292    flush_icache_range(jmp_addr, jmp_addr + 8);
1293}
1294#else
1295void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
1296{
1297    intptr_t diff = addr - jmp_addr;
1298    tcg_debug_assert(in_range_b(diff));
1299    atomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
1300    flush_icache_range(jmp_addr, jmp_addr + 4);
1301}
1302#endif
1303
1304static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1305{
1306#ifdef _CALL_AIX
1307    /* Look through the descriptor.  If the branch is in range, and we
1308       don't have to spend too much effort on building the toc.  */
1309    void *tgt = ((void **)target)[0];
1310    uintptr_t toc = ((uintptr_t *)target)[1];
1311    intptr_t diff = tcg_pcrel_diff(s, tgt);
1312
1313    if (in_range_b(diff) && toc == (uint32_t)toc) {
1314        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
1315        tcg_out_b(s, LK, tgt);
1316    } else {
1317        /* Fold the low bits of the constant into the addresses below.  */
1318        intptr_t arg = (intptr_t)target;
1319        int ofs = (int16_t)arg;
1320
1321        if (ofs + 8 < 0x8000) {
1322            arg -= ofs;
1323        } else {
1324            ofs = 0;
1325        }
1326        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
1327        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
1328        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
1329        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
1330        tcg_out32(s, BCCTR | BO_ALWAYS | LK);
1331    }
1332#elif defined(_CALL_ELF) && _CALL_ELF == 2
1333    intptr_t diff;
1334
1335    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
1336       address, which the callee uses to compute its TOC address.  */
1337    /* FIXME: when the branch is in range, we could avoid r12 load if we
1338       knew that the destination uses the same TOC, and what its local
1339       entry point offset is.  */
1340    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
1341
1342    diff = tcg_pcrel_diff(s, target);
1343    if (in_range_b(diff)) {
1344        tcg_out_b(s, LK, target);
1345    } else {
1346        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
1347        tcg_out32(s, BCCTR | BO_ALWAYS | LK);
1348    }
1349#else
1350    tcg_out_b(s, LK, target);
1351#endif
1352}
1353
1354static const uint32_t qemu_ldx_opc[16] = {
1355    [MO_UB] = LBZX,
1356    [MO_UW] = LHZX,
1357    [MO_UL] = LWZX,
1358    [MO_Q]  = LDX,
1359    [MO_SW] = LHAX,
1360    [MO_SL] = LWAX,
1361    [MO_BSWAP | MO_UB] = LBZX,
1362    [MO_BSWAP | MO_UW] = LHBRX,
1363    [MO_BSWAP | MO_UL] = LWBRX,
1364    [MO_BSWAP | MO_Q]  = LDBRX,
1365};
1366
1367static const uint32_t qemu_stx_opc[16] = {
1368    [MO_UB] = STBX,
1369    [MO_UW] = STHX,
1370    [MO_UL] = STWX,
1371    [MO_Q]  = STDX,
1372    [MO_BSWAP | MO_UB] = STBX,
1373    [MO_BSWAP | MO_UW] = STHBRX,
1374    [MO_BSWAP | MO_UL] = STWBRX,
1375    [MO_BSWAP | MO_Q]  = STDBRX,
1376};
1377
1378static const uint32_t qemu_exts_opc[4] = {
1379    EXTSB, EXTSH, EXTSW, 0
1380};
1381
1382#if defined (CONFIG_SOFTMMU)
1383/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
1384 *                                 int mmu_idx, uintptr_t ra)
1385 */
1386static void * const qemu_ld_helpers[16] = {
1387    [MO_UB]   = helper_ret_ldub_mmu,
1388    [MO_LEUW] = helper_le_lduw_mmu,
1389    [MO_LEUL] = helper_le_ldul_mmu,
1390    [MO_LEQ]  = helper_le_ldq_mmu,
1391    [MO_BEUW] = helper_be_lduw_mmu,
1392    [MO_BEUL] = helper_be_ldul_mmu,
1393    [MO_BEQ]  = helper_be_ldq_mmu,
1394};
1395
1396/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
1397 *                                 uintxx_t val, int mmu_idx, uintptr_t ra)
1398 */
1399static void * const qemu_st_helpers[16] = {
1400    [MO_UB]   = helper_ret_stb_mmu,
1401    [MO_LEUW] = helper_le_stw_mmu,
1402    [MO_LEUL] = helper_le_stl_mmu,
1403    [MO_LEQ]  = helper_le_stq_mmu,
1404    [MO_BEUW] = helper_be_stw_mmu,
1405    [MO_BEUL] = helper_be_stl_mmu,
1406    [MO_BEQ]  = helper_be_stq_mmu,
1407};
1408
1409/* Perform the TLB load and compare.  Places the result of the comparison
1410   in CR7, loads the addend of the TLB into R3, and returns the register
1411   containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */
1412
1413static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc,
1414                               TCGReg addrlo, TCGReg addrhi,
1415                               int mem_index, bool is_read)
1416{
1417    int cmp_off
1418        = (is_read
1419           ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1420           : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
1421    int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
1422    TCGReg base = TCG_AREG0;
1423    unsigned s_bits = opc & MO_SIZE;
1424    unsigned a_bits = get_alignment_bits(opc);
1425
1426    /* Extract the page index, shifted into place for tlb index.  */
1427    if (TCG_TARGET_REG_BITS == 64) {
1428        if (TARGET_LONG_BITS == 32) {
1429            /* Zero-extend the address into a place helpful for further use. */
1430            tcg_out_ext32u(s, TCG_REG_R4, addrlo);
1431            addrlo = TCG_REG_R4;
1432        } else {
1433            tcg_out_rld(s, RLDICL, TCG_REG_R3, addrlo,
1434                        64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS);
1435        }
1436    }
1437
1438    /* Compensate for very large offsets.  */
1439    if (add_off >= 0x8000) {
1440        /* Most target env are smaller than 32k; none are larger than 64k.
1441           Simplify the logic here merely to offset by 0x7ff0, giving us a
1442           range just shy of 64k.  Check this assumption.  */
1443        QEMU_BUILD_BUG_ON(offsetof(CPUArchState,
1444                                   tlb_table[NB_MMU_MODES - 1][1])
1445                          > 0x7ff0 + 0x7fff);
1446        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, base, 0x7ff0));
1447        base = TCG_REG_TMP1;
1448        cmp_off -= 0x7ff0;
1449        add_off -= 0x7ff0;
1450    }
1451
1452    /* Extraction and shifting, part 2.  */
1453    if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) {
1454        tcg_out_rlw(s, RLWINM, TCG_REG_R3, addrlo,
1455                    32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS),
1456                    32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS),
1457                    31 - CPU_TLB_ENTRY_BITS);
1458    } else {
1459        tcg_out_shli64(s, TCG_REG_R3, TCG_REG_R3, CPU_TLB_ENTRY_BITS);
1460    }
1461
1462    tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, base));
1463
1464    /* Load the tlb comparator.  */
1465    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1466        tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
1467        tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
1468    } else {
1469        tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
1470    }
1471
1472    /* Load the TLB addend for use on the fast path.  Do this asap
1473       to minimize any load use delay.  */
1474    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off);
1475
1476    /* Clear the non-page, non-alignment bits from the address */
1477    if (TCG_TARGET_REG_BITS == 32) {
1478        /* We don't support unaligned accesses on 32-bits.
1479         * Preserve the bottom bits and thus trigger a comparison
1480         * failure on unaligned accesses.
1481         */
1482        if (a_bits < s_bits) {
1483            a_bits = s_bits;
1484        }
1485        tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
1486                    (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
1487    } else {
1488        TCGReg t = addrlo;
1489
1490        /* If the access is unaligned, we need to make sure we fail if we
1491         * cross a page boundary.  The trick is to add the access size-1
1492         * to the address before masking the low bits.  That will make the
1493         * address overflow to the next page if we cross a page boundary,
1494         * which will then force a mismatch of the TLB compare.
1495         */
1496        if (a_bits < s_bits) {
1497            unsigned a_mask = (1 << a_bits) - 1;
1498            unsigned s_mask = (1 << s_bits) - 1;
1499            tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
1500            t = TCG_REG_R0;
1501        }
1502
1503        /* Mask the address for the requested alignment.  */
1504        if (TARGET_LONG_BITS == 32) {
1505            tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
1506                        (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
1507        } else if (a_bits == 0) {
1508            tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
1509        } else {
1510            tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
1511                        64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
1512            tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
1513        }
1514    }
1515
1516    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1517        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
1518                    0, 7, TCG_TYPE_I32);
1519        tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
1520        tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1521    } else {
1522        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
1523                    0, 7, TCG_TYPE_TL);
1524    }
1525
1526    return addrlo;
1527}
1528
1529/* Record the context of a call to the out of line helper code for the slow
1530   path for a load or store, so that we can later generate the correct
1531   helper code.  */
1532static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1533                                TCGReg datalo_reg, TCGReg datahi_reg,
1534                                TCGReg addrlo_reg, TCGReg addrhi_reg,
1535                                tcg_insn_unit *raddr, tcg_insn_unit *lptr)
1536{
1537    TCGLabelQemuLdst *label = new_ldst_label(s);
1538
1539    label->is_ld = is_ld;
1540    label->oi = oi;
1541    label->datalo_reg = datalo_reg;
1542    label->datahi_reg = datahi_reg;
1543    label->addrlo_reg = addrlo_reg;
1544    label->addrhi_reg = addrhi_reg;
1545    label->raddr = raddr;
1546    label->label_ptr[0] = lptr;
1547}
1548
1549static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1550{
1551    TCGMemOpIdx oi = lb->oi;
1552    TCGMemOp opc = get_memop(oi);
1553    TCGReg hi, lo, arg = TCG_REG_R3;
1554
1555    reloc_pc14(lb->label_ptr[0], s->code_ptr);
1556
1557    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
1558
1559    lo = lb->addrlo_reg;
1560    hi = lb->addrhi_reg;
1561    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1562#ifdef TCG_TARGET_CALL_ALIGN_ARGS
1563        arg |= 1;
1564#endif
1565        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
1566        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
1567    } else {
1568        /* If the address needed to be zero-extended, we'll have already
1569           placed it in R4.  The only remaining case is 64-bit guest.  */
1570        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
1571    }
1572
1573    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
1574    tcg_out32(s, MFSPR | RT(arg) | LR);
1575
1576    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1577
1578    lo = lb->datalo_reg;
1579    hi = lb->datahi_reg;
1580    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
1581        tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
1582        tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
1583    } else if (opc & MO_SIGN) {
1584        uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
1585        tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3));
1586    } else {
1587        tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3);
1588    }
1589
1590    tcg_out_b(s, 0, lb->raddr);
1591}
1592
1593static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1594{
1595    TCGMemOpIdx oi = lb->oi;
1596    TCGMemOp opc = get_memop(oi);
1597    TCGMemOp s_bits = opc & MO_SIZE;
1598    TCGReg hi, lo, arg = TCG_REG_R3;
1599
1600    reloc_pc14(lb->label_ptr[0], s->code_ptr);
1601
1602    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
1603
1604    lo = lb->addrlo_reg;
1605    hi = lb->addrhi_reg;
1606    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1607#ifdef TCG_TARGET_CALL_ALIGN_ARGS
1608        arg |= 1;
1609#endif
1610        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
1611        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
1612    } else {
1613        /* If the address needed to be zero-extended, we'll have already
1614           placed it in R4.  The only remaining case is 64-bit guest.  */
1615        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
1616    }
1617
1618    lo = lb->datalo_reg;
1619    hi = lb->datahi_reg;
1620    if (TCG_TARGET_REG_BITS == 32) {
1621        switch (s_bits) {
1622        case MO_64:
1623#ifdef TCG_TARGET_CALL_ALIGN_ARGS
1624            arg |= 1;
1625#endif
1626            tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
1627            /* FALLTHRU */
1628        case MO_32:
1629            tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
1630            break;
1631        default:
1632            tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31);
1633            break;
1634        }
1635    } else {
1636        if (s_bits == MO_64) {
1637            tcg_out_mov(s, TCG_TYPE_I64, arg++, lo);
1638        } else {
1639            tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits));
1640        }
1641    }
1642
1643    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
1644    tcg_out32(s, MFSPR | RT(arg) | LR);
1645
1646    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1647
1648    tcg_out_b(s, 0, lb->raddr);
1649}
1650#endif /* SOFTMMU */
1651
1652static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
1653{
1654    TCGReg datalo, datahi, addrlo, rbase;
1655    TCGReg addrhi __attribute__((unused));
1656    TCGMemOpIdx oi;
1657    TCGMemOp opc, s_bits;
1658#ifdef CONFIG_SOFTMMU
1659    int mem_index;
1660    tcg_insn_unit *label_ptr;
1661#endif
1662
1663    datalo = *args++;
1664    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
1665    addrlo = *args++;
1666    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
1667    oi = *args++;
1668    opc = get_memop(oi);
1669    s_bits = opc & MO_SIZE;
1670
1671#ifdef CONFIG_SOFTMMU
1672    mem_index = get_mmuidx(oi);
1673    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
1674
1675    /* Load a pointer into the current opcode w/conditional branch-link. */
1676    label_ptr = s->code_ptr;
1677    tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
1678
1679    rbase = TCG_REG_R3;
1680#else  /* !CONFIG_SOFTMMU */
1681    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
1682    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1683        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
1684        addrlo = TCG_REG_TMP1;
1685    }
1686#endif
1687
1688    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
1689        if (opc & MO_BSWAP) {
1690            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
1691            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
1692            tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
1693        } else if (rbase != 0) {
1694            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
1695            tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
1696            tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
1697        } else if (addrlo == datahi) {
1698            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
1699            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
1700        } else {
1701            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
1702            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
1703        }
1704    } else {
1705        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
1706        if (!HAVE_ISA_2_06 && insn == LDBRX) {
1707            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
1708            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
1709            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
1710            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
1711        } else if (insn) {
1712            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
1713        } else {
1714            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
1715            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
1716            insn = qemu_exts_opc[s_bits];
1717            tcg_out32(s, insn | RA(datalo) | RS(datalo));
1718        }
1719    }
1720
1721#ifdef CONFIG_SOFTMMU
1722    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1723                        s->code_ptr, label_ptr);
1724#endif
1725}
1726
1727static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
1728{
1729    TCGReg datalo, datahi, addrlo, rbase;
1730    TCGReg addrhi __attribute__((unused));
1731    TCGMemOpIdx oi;
1732    TCGMemOp opc, s_bits;
1733#ifdef CONFIG_SOFTMMU
1734    int mem_index;
1735    tcg_insn_unit *label_ptr;
1736#endif
1737
1738    datalo = *args++;
1739    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
1740    addrlo = *args++;
1741    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
1742    oi = *args++;
1743    opc = get_memop(oi);
1744    s_bits = opc & MO_SIZE;
1745
1746#ifdef CONFIG_SOFTMMU
1747    mem_index = get_mmuidx(oi);
1748    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
1749
1750    /* Load a pointer into the current opcode w/conditional branch-link. */
1751    label_ptr = s->code_ptr;
1752    tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
1753
1754    rbase = TCG_REG_R3;
1755#else  /* !CONFIG_SOFTMMU */
1756    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
1757    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1758        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
1759        addrlo = TCG_REG_TMP1;
1760    }
1761#endif
1762
1763    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
1764        if (opc & MO_BSWAP) {
1765            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
1766            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
1767            tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0));
1768        } else if (rbase != 0) {
1769            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
1770            tcg_out32(s, STWX | SAB(datahi, rbase, addrlo));
1771            tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0));
1772        } else {
1773            tcg_out32(s, STW | TAI(datahi, addrlo, 0));
1774            tcg_out32(s, STW | TAI(datalo, addrlo, 4));
1775        }
1776    } else {
1777        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
1778        if (!HAVE_ISA_2_06 && insn == STDBRX) {
1779            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
1780            tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
1781            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
1782            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1));
1783        } else {
1784            tcg_out32(s, insn | SAB(datalo, rbase, addrlo));
1785        }
1786    }
1787
1788#ifdef CONFIG_SOFTMMU
1789    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1790                        s->code_ptr, label_ptr);
1791#endif
1792}
1793
1794/* Parameters for function call generation, used in tcg.c.  */
1795#define TCG_TARGET_STACK_ALIGN       16
1796#define TCG_TARGET_EXTEND_ARGS       1
1797
1798#ifdef _CALL_AIX
1799# define LINK_AREA_SIZE                (6 * SZR)
1800# define LR_OFFSET                     (1 * SZR)
1801# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
1802#elif defined(TCG_TARGET_CALL_DARWIN)
1803# define LINK_AREA_SIZE                (6 * SZR)
1804# define LR_OFFSET                     (2 * SZR)
1805#elif TCG_TARGET_REG_BITS == 64
1806# if defined(_CALL_ELF) && _CALL_ELF == 2
1807#  define LINK_AREA_SIZE               (4 * SZR)
1808#  define LR_OFFSET                    (1 * SZR)
1809# endif
1810#else /* TCG_TARGET_REG_BITS == 32 */
1811# if defined(_CALL_SYSV)
1812#  define LINK_AREA_SIZE               (2 * SZR)
1813#  define LR_OFFSET                    (1 * SZR)
1814# endif
1815#endif
1816#ifndef LR_OFFSET
1817# error "Unhandled abi"
1818#endif
1819#ifndef TCG_TARGET_CALL_STACK_OFFSET
1820# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
1821#endif
1822
1823#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
1824#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
1825
1826#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
1827                     + TCG_STATIC_CALL_ARGS_SIZE    \
1828                     + CPU_TEMP_BUF_SIZE            \
1829                     + REG_SAVE_SIZE                \
1830                     + TCG_TARGET_STACK_ALIGN - 1)  \
1831                    & -TCG_TARGET_STACK_ALIGN)
1832
1833#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
1834
1835static void tcg_target_qemu_prologue(TCGContext *s)
1836{
1837    int i;
1838
1839#ifdef _CALL_AIX
1840    void **desc = (void **)s->code_ptr;
1841    desc[0] = desc + 2;                   /* entry point */
1842    desc[1] = 0;                          /* environment pointer */
1843    s->code_ptr = (void *)(desc + 2);     /* skip over descriptor */
1844#endif
1845
1846    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
1847                  CPU_TEMP_BUF_SIZE);
1848
1849    /* Prologue */
1850    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
1851    tcg_out32(s, (SZR == 8 ? STDU : STWU)
1852              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
1853
1854    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
1855        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
1856                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
1857    }
1858    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
1859
1860#ifndef CONFIG_SOFTMMU
1861    if (guest_base) {
1862        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
1863        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
1864    }
1865#endif
1866
1867    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1868    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
1869
1870    if (USE_REG_RA) {
1871#ifdef _CALL_AIX
1872        /* Make the caller load the value as the TOC into R2.  */
1873        tb_ret_addr = s->code_ptr + 2;
1874        desc[1] = tb_ret_addr;
1875        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2);
1876        tcg_out32(s, BCCTR | BO_ALWAYS);
1877#elif defined(_CALL_ELF) && _CALL_ELF == 2
1878        /* Compute from the incoming R12 value.  */
1879        tb_ret_addr = s->code_ptr + 2;
1880        tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12,
1881                                tcg_ptr_byte_diff(tb_ret_addr, s->code_buf)));
1882        tcg_out32(s, BCCTR | BO_ALWAYS);
1883#else
1884        /* Reserve max 5 insns for the constant load.  */
1885        tb_ret_addr = s->code_ptr + 6;
1886        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr);
1887        tcg_out32(s, BCCTR | BO_ALWAYS);
1888        while (s->code_ptr < tb_ret_addr) {
1889            tcg_out32(s, NOP);
1890        }
1891#endif
1892    } else {
1893        tcg_out32(s, BCCTR | BO_ALWAYS);
1894        tb_ret_addr = s->code_ptr;
1895    }
1896
1897    /* Epilogue */
1898    tcg_debug_assert(tb_ret_addr == s->code_ptr);
1899
1900    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
1901    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
1902        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
1903                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
1904    }
1905    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
1906    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
1907    tcg_out32(s, BCLR | BO_ALWAYS);
1908}
1909
1910static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
1911                       const int *const_args)
1912{
1913    TCGArg a0, a1, a2;
1914    int c;
1915
1916    switch (opc) {
1917    case INDEX_op_exit_tb:
1918        if (USE_REG_RA) {
1919            ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr);
1920
1921            /* Use a direct branch if we can, otherwise use the value in RA.
1922               Note that the direct branch is always backward, thus we need
1923               to account for the possibility of 5 insns from the movi.  */
1924            if (!in_range_b(disp - 20)) {
1925                tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR);
1926                tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
1927                tcg_out32(s, BCCTR | BO_ALWAYS);
1928                break;
1929            }
1930        }
1931        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
1932        tcg_out_b(s, 0, tb_ret_addr);
1933        break;
1934    case INDEX_op_goto_tb:
1935        tcg_debug_assert(s->tb_jmp_insn_offset);
1936        /* Direct jump. */
1937#ifdef __powerpc64__
1938        /* Ensure the next insns are 8-byte aligned. */
1939        if ((uintptr_t)s->code_ptr & 7) {
1940            tcg_out32(s, NOP);
1941        }
1942        s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
1943        /* To be replaced by either a branch+nop or a load into TMP1.  */
1944        s->code_ptr += 2;
1945        tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
1946        tcg_out32(s, BCCTR | BO_ALWAYS);
1947#else
1948        /* To be replaced by a branch.  */
1949        s->code_ptr++;
1950#endif
1951        s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
1952        break;
1953    case INDEX_op_br:
1954        {
1955            TCGLabel *l = arg_label(args[0]);
1956
1957            if (l->has_value) {
1958                tcg_out_b(s, 0, l->u.value_ptr);
1959            } else {
1960                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
1961                tcg_out_b_noaddr(s, B);
1962            }
1963        }
1964        break;
1965    case INDEX_op_ld8u_i32:
1966    case INDEX_op_ld8u_i64:
1967        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
1968        break;
1969    case INDEX_op_ld8s_i32:
1970    case INDEX_op_ld8s_i64:
1971        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
1972        tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
1973        break;
1974    case INDEX_op_ld16u_i32:
1975    case INDEX_op_ld16u_i64:
1976        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
1977        break;
1978    case INDEX_op_ld16s_i32:
1979    case INDEX_op_ld16s_i64:
1980        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
1981        break;
1982    case INDEX_op_ld_i32:
1983    case INDEX_op_ld32u_i64:
1984        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
1985        break;
1986    case INDEX_op_ld32s_i64:
1987        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
1988        break;
1989    case INDEX_op_ld_i64:
1990        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
1991        break;
1992    case INDEX_op_st8_i32:
1993    case INDEX_op_st8_i64:
1994        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
1995        break;
1996    case INDEX_op_st16_i32:
1997    case INDEX_op_st16_i64:
1998        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
1999        break;
2000    case INDEX_op_st_i32:
2001    case INDEX_op_st32_i64:
2002        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
2003        break;
2004    case INDEX_op_st_i64:
2005        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
2006        break;
2007
2008    case INDEX_op_add_i32:
2009        a0 = args[0], a1 = args[1], a2 = args[2];
2010        if (const_args[2]) {
2011        do_addi_32:
2012            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
2013        } else {
2014            tcg_out32(s, ADD | TAB(a0, a1, a2));
2015        }
2016        break;
2017    case INDEX_op_sub_i32:
2018        a0 = args[0], a1 = args[1], a2 = args[2];
2019        if (const_args[1]) {
2020            if (const_args[2]) {
2021                tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
2022            } else {
2023                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2024            }
2025        } else if (const_args[2]) {
2026            a2 = -a2;
2027            goto do_addi_32;
2028        } else {
2029            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2030        }
2031        break;
2032
2033    case INDEX_op_and_i32:
2034        a0 = args[0], a1 = args[1], a2 = args[2];
2035        if (const_args[2]) {
2036            tcg_out_andi32(s, a0, a1, a2);
2037        } else {
2038            tcg_out32(s, AND | SAB(a1, a0, a2));
2039        }
2040        break;
2041    case INDEX_op_and_i64:
2042        a0 = args[0], a1 = args[1], a2 = args[2];
2043        if (const_args[2]) {
2044            tcg_out_andi64(s, a0, a1, a2);
2045        } else {
2046            tcg_out32(s, AND | SAB(a1, a0, a2));
2047        }
2048        break;
2049    case INDEX_op_or_i64:
2050    case INDEX_op_or_i32:
2051        a0 = args[0], a1 = args[1], a2 = args[2];
2052        if (const_args[2]) {
2053            tcg_out_ori32(s, a0, a1, a2);
2054        } else {
2055            tcg_out32(s, OR | SAB(a1, a0, a2));
2056        }
2057        break;
2058    case INDEX_op_xor_i64:
2059    case INDEX_op_xor_i32:
2060        a0 = args[0], a1 = args[1], a2 = args[2];
2061        if (const_args[2]) {
2062            tcg_out_xori32(s, a0, a1, a2);
2063        } else {
2064            tcg_out32(s, XOR | SAB(a1, a0, a2));
2065        }
2066        break;
2067    case INDEX_op_andc_i32:
2068        a0 = args[0], a1 = args[1], a2 = args[2];
2069        if (const_args[2]) {
2070            tcg_out_andi32(s, a0, a1, ~a2);
2071        } else {
2072            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2073        }
2074        break;
2075    case INDEX_op_andc_i64:
2076        a0 = args[0], a1 = args[1], a2 = args[2];
2077        if (const_args[2]) {
2078            tcg_out_andi64(s, a0, a1, ~a2);
2079        } else {
2080            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2081        }
2082        break;
2083    case INDEX_op_orc_i32:
2084        if (const_args[2]) {
2085            tcg_out_ori32(s, args[0], args[1], ~args[2]);
2086            break;
2087        }
2088        /* FALLTHRU */
2089    case INDEX_op_orc_i64:
2090        tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
2091        break;
2092    case INDEX_op_eqv_i32:
2093        if (const_args[2]) {
2094            tcg_out_xori32(s, args[0], args[1], ~args[2]);
2095            break;
2096        }
2097        /* FALLTHRU */
2098    case INDEX_op_eqv_i64:
2099        tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
2100        break;
2101    case INDEX_op_nand_i32:
2102    case INDEX_op_nand_i64:
2103        tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
2104        break;
2105    case INDEX_op_nor_i32:
2106    case INDEX_op_nor_i64:
2107        tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
2108        break;
2109
2110    case INDEX_op_mul_i32:
2111        a0 = args[0], a1 = args[1], a2 = args[2];
2112        if (const_args[2]) {
2113            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2114        } else {
2115            tcg_out32(s, MULLW | TAB(a0, a1, a2));
2116        }
2117        break;
2118
2119    case INDEX_op_div_i32:
2120        tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
2121        break;
2122
2123    case INDEX_op_divu_i32:
2124        tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
2125        break;
2126
2127    case INDEX_op_shl_i32:
2128        if (const_args[2]) {
2129            tcg_out_shli32(s, args[0], args[1], args[2]);
2130        } else {
2131            tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
2132        }
2133        break;
2134    case INDEX_op_shr_i32:
2135        if (const_args[2]) {
2136            tcg_out_shri32(s, args[0], args[1], args[2]);
2137        } else {
2138            tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
2139        }
2140        break;
2141    case INDEX_op_sar_i32:
2142        if (const_args[2]) {
2143            tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2]));
2144        } else {
2145            tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
2146        }
2147        break;
2148    case INDEX_op_rotl_i32:
2149        if (const_args[2]) {
2150            tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
2151        } else {
2152            tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
2153                         | MB(0) | ME(31));
2154        }
2155        break;
2156    case INDEX_op_rotr_i32:
2157        if (const_args[2]) {
2158            tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
2159        } else {
2160            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
2161            tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
2162                         | MB(0) | ME(31));
2163        }
2164        break;
2165
2166    case INDEX_op_brcond_i32:
2167        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2168                       arg_label(args[3]), TCG_TYPE_I32);
2169        break;
2170    case INDEX_op_brcond_i64:
2171        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2172                       arg_label(args[3]), TCG_TYPE_I64);
2173        break;
2174    case INDEX_op_brcond2_i32:
2175        tcg_out_brcond2(s, args, const_args);
2176        break;
2177
2178    case INDEX_op_neg_i32:
2179    case INDEX_op_neg_i64:
2180        tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
2181        break;
2182
2183    case INDEX_op_not_i32:
2184    case INDEX_op_not_i64:
2185        tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
2186        break;
2187
2188    case INDEX_op_add_i64:
2189        a0 = args[0], a1 = args[1], a2 = args[2];
2190        if (const_args[2]) {
2191        do_addi_64:
2192            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2193        } else {
2194            tcg_out32(s, ADD | TAB(a0, a1, a2));
2195        }
2196        break;
2197    case INDEX_op_sub_i64:
2198        a0 = args[0], a1 = args[1], a2 = args[2];
2199        if (const_args[1]) {
2200            if (const_args[2]) {
2201                tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
2202            } else {
2203                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2204            }
2205        } else if (const_args[2]) {
2206            a2 = -a2;
2207            goto do_addi_64;
2208        } else {
2209            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2210        }
2211        break;
2212
2213    case INDEX_op_shl_i64:
2214        if (const_args[2]) {
2215            tcg_out_shli64(s, args[0], args[1], args[2]);
2216        } else {
2217            tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
2218        }
2219        break;
2220    case INDEX_op_shr_i64:
2221        if (const_args[2]) {
2222            tcg_out_shri64(s, args[0], args[1], args[2]);
2223        } else {
2224            tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
2225        }
2226        break;
2227    case INDEX_op_sar_i64:
2228        if (const_args[2]) {
2229            int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1);
2230            tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh);
2231        } else {
2232            tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
2233        }
2234        break;
2235    case INDEX_op_rotl_i64:
2236        if (const_args[2]) {
2237            tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
2238        } else {
2239            tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
2240        }
2241        break;
2242    case INDEX_op_rotr_i64:
2243        if (const_args[2]) {
2244            tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
2245        } else {
2246            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
2247            tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
2248        }
2249        break;
2250
2251    case INDEX_op_mul_i64:
2252        a0 = args[0], a1 = args[1], a2 = args[2];
2253        if (const_args[2]) {
2254            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2255        } else {
2256            tcg_out32(s, MULLD | TAB(a0, a1, a2));
2257        }
2258        break;
2259    case INDEX_op_div_i64:
2260        tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
2261        break;
2262    case INDEX_op_divu_i64:
2263        tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
2264        break;
2265
2266    case INDEX_op_qemu_ld_i32:
2267        tcg_out_qemu_ld(s, args, false);
2268        break;
2269    case INDEX_op_qemu_ld_i64:
2270        tcg_out_qemu_ld(s, args, true);
2271        break;
2272    case INDEX_op_qemu_st_i32:
2273        tcg_out_qemu_st(s, args, false);
2274        break;
2275    case INDEX_op_qemu_st_i64:
2276        tcg_out_qemu_st(s, args, true);
2277        break;
2278
2279    case INDEX_op_ext8s_i32:
2280    case INDEX_op_ext8s_i64:
2281        c = EXTSB;
2282        goto gen_ext;
2283    case INDEX_op_ext16s_i32:
2284    case INDEX_op_ext16s_i64:
2285        c = EXTSH;
2286        goto gen_ext;
2287    case INDEX_op_ext_i32_i64:
2288    case INDEX_op_ext32s_i64:
2289        c = EXTSW;
2290        goto gen_ext;
2291    gen_ext:
2292        tcg_out32(s, c | RS(args[1]) | RA(args[0]));
2293        break;
2294    case INDEX_op_extu_i32_i64:
2295        tcg_out_ext32u(s, args[0], args[1]);
2296        break;
2297
2298    case INDEX_op_setcond_i32:
2299        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
2300                        const_args[2]);
2301        break;
2302    case INDEX_op_setcond_i64:
2303        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
2304                        const_args[2]);
2305        break;
2306    case INDEX_op_setcond2_i32:
2307        tcg_out_setcond2(s, args, const_args);
2308        break;
2309
2310    case INDEX_op_bswap16_i32:
2311    case INDEX_op_bswap16_i64:
2312        a0 = args[0], a1 = args[1];
2313        /* a1 = abcd */
2314        if (a0 != a1) {
2315            /* a0 = (a1 r<< 24) & 0xff # 000c */
2316            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
2317            /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */
2318            tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23);
2319        } else {
2320            /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */
2321            tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23);
2322            /* a0 = (a1 r<< 24) & 0xff # 000c */
2323            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
2324            /* a0 = a0 | r0 # 00dc */
2325            tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0));
2326        }
2327        break;
2328
2329    case INDEX_op_bswap32_i32:
2330    case INDEX_op_bswap32_i64:
2331        /* Stolen from gcc's builtin_bswap32 */
2332        a1 = args[1];
2333        a0 = args[0] == a1 ? TCG_REG_R0 : args[0];
2334
2335        /* a1 = args[1] # abcd */
2336        /* a0 = rotate_left (a1, 8) # bcda */
2337        tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
2338        /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */
2339        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
2340        /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */
2341        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
2342
2343        if (a0 == TCG_REG_R0) {
2344            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2345        }
2346        break;
2347
2348    case INDEX_op_bswap64_i64:
2349        a0 = args[0], a1 = args[1], a2 = TCG_REG_R0;
2350        if (a0 == a1) {
2351            a0 = TCG_REG_R0;
2352            a2 = a1;
2353        }
2354
2355        /* a1 = # abcd efgh */
2356        /* a0 = rl32(a1, 8) # 0000 fghe */
2357        tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
2358        /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */
2359        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
2360        /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */
2361        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
2362
2363        /* a0 = rl64(a0, 32) # hgfe 0000 */
2364        /* a2 = rl64(a1, 32) # efgh abcd */
2365        tcg_out_rld(s, RLDICL, a0, a0, 32, 0);
2366        tcg_out_rld(s, RLDICL, a2, a1, 32, 0);
2367
2368        /* a0 = dep(a0, rl32(a2, 8), 0xffffffff)  # hgfe bcda */
2369        tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31);
2370        /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */
2371        tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7);
2372        /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */
2373        tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23);
2374
2375        if (a0 == 0) {
2376            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2377        }
2378        break;
2379
2380    case INDEX_op_deposit_i32:
2381        if (const_args[2]) {
2382            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
2383            tcg_out_andi32(s, args[0], args[0], ~mask);
2384        } else {
2385            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
2386                        32 - args[3] - args[4], 31 - args[3]);
2387        }
2388        break;
2389    case INDEX_op_deposit_i64:
2390        if (const_args[2]) {
2391            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
2392            tcg_out_andi64(s, args[0], args[0], ~mask);
2393        } else {
2394            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
2395                        64 - args[3] - args[4]);
2396        }
2397        break;
2398
2399    case INDEX_op_movcond_i32:
2400        tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
2401                        args[3], args[4], const_args[2]);
2402        break;
2403    case INDEX_op_movcond_i64:
2404        tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
2405                        args[3], args[4], const_args[2]);
2406        break;
2407
2408#if TCG_TARGET_REG_BITS == 64
2409    case INDEX_op_add2_i64:
2410#else
2411    case INDEX_op_add2_i32:
2412#endif
2413        /* Note that the CA bit is defined based on the word size of the
2414           environment.  So in 64-bit mode it's always carry-out of bit 63.
2415           The fallback code using deposit works just as well for 32-bit.  */
2416        a0 = args[0], a1 = args[1];
2417        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
2418            a0 = TCG_REG_R0;
2419        }
2420        if (const_args[4]) {
2421            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
2422        } else {
2423            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
2424        }
2425        if (const_args[5]) {
2426            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
2427        } else {
2428            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
2429        }
2430        if (a0 != args[0]) {
2431            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2432        }
2433        break;
2434
2435#if TCG_TARGET_REG_BITS == 64
2436    case INDEX_op_sub2_i64:
2437#else
2438    case INDEX_op_sub2_i32:
2439#endif
2440        a0 = args[0], a1 = args[1];
2441        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
2442            a0 = TCG_REG_R0;
2443        }
2444        if (const_args[2]) {
2445            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
2446        } else {
2447            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
2448        }
2449        if (const_args[3]) {
2450            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
2451        } else {
2452            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
2453        }
2454        if (a0 != args[0]) {
2455            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2456        }
2457        break;
2458
2459    case INDEX_op_muluh_i32:
2460        tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
2461        break;
2462    case INDEX_op_mulsh_i32:
2463        tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
2464        break;
2465    case INDEX_op_muluh_i64:
2466        tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
2467        break;
2468    case INDEX_op_mulsh_i64:
2469        tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
2470        break;
2471
2472    case INDEX_op_mb:
2473        tcg_out_mb(s, args[0]);
2474        break;
2475
2476    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
2477    case INDEX_op_mov_i64:
2478    case INDEX_op_movi_i32:  /* Always emitted via tcg_out_movi.  */
2479    case INDEX_op_movi_i64:
2480    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
2481    default:
2482        tcg_abort();
2483    }
2484}
2485
2486static const TCGTargetOpDef ppc_op_defs[] = {
2487    { INDEX_op_exit_tb, { } },
2488    { INDEX_op_goto_tb, { } },
2489    { INDEX_op_br, { } },
2490
2491    { INDEX_op_ld8u_i32, { "r", "r" } },
2492    { INDEX_op_ld8s_i32, { "r", "r" } },
2493    { INDEX_op_ld16u_i32, { "r", "r" } },
2494    { INDEX_op_ld16s_i32, { "r", "r" } },
2495    { INDEX_op_ld_i32, { "r", "r" } },
2496
2497    { INDEX_op_st8_i32, { "r", "r" } },
2498    { INDEX_op_st16_i32, { "r", "r" } },
2499    { INDEX_op_st_i32, { "r", "r" } },
2500
2501    { INDEX_op_add_i32, { "r", "r", "ri" } },
2502    { INDEX_op_mul_i32, { "r", "r", "rI" } },
2503    { INDEX_op_div_i32, { "r", "r", "r" } },
2504    { INDEX_op_divu_i32, { "r", "r", "r" } },
2505    { INDEX_op_sub_i32, { "r", "rI", "ri" } },
2506    { INDEX_op_and_i32, { "r", "r", "ri" } },
2507    { INDEX_op_or_i32, { "r", "r", "ri" } },
2508    { INDEX_op_xor_i32, { "r", "r", "ri" } },
2509    { INDEX_op_andc_i32, { "r", "r", "ri" } },
2510    { INDEX_op_orc_i32, { "r", "r", "ri" } },
2511    { INDEX_op_eqv_i32, { "r", "r", "ri" } },
2512    { INDEX_op_nand_i32, { "r", "r", "r" } },
2513    { INDEX_op_nor_i32, { "r", "r", "r" } },
2514
2515    { INDEX_op_shl_i32, { "r", "r", "ri" } },
2516    { INDEX_op_shr_i32, { "r", "r", "ri" } },
2517    { INDEX_op_sar_i32, { "r", "r", "ri" } },
2518    { INDEX_op_rotl_i32, { "r", "r", "ri" } },
2519    { INDEX_op_rotr_i32, { "r", "r", "ri" } },
2520
2521    { INDEX_op_neg_i32, { "r", "r" } },
2522    { INDEX_op_not_i32, { "r", "r" } },
2523    { INDEX_op_ext8s_i32, { "r", "r" } },
2524    { INDEX_op_ext16s_i32, { "r", "r" } },
2525    { INDEX_op_bswap16_i32, { "r", "r" } },
2526    { INDEX_op_bswap32_i32, { "r", "r" } },
2527
2528    { INDEX_op_brcond_i32, { "r", "ri" } },
2529    { INDEX_op_setcond_i32, { "r", "r", "ri" } },
2530    { INDEX_op_movcond_i32, { "r", "r", "ri", "rZ", "rZ" } },
2531
2532    { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
2533
2534    { INDEX_op_muluh_i32, { "r", "r", "r" } },
2535    { INDEX_op_mulsh_i32, { "r", "r", "r" } },
2536
2537#if TCG_TARGET_REG_BITS == 64
2538    { INDEX_op_ld8u_i64, { "r", "r" } },
2539    { INDEX_op_ld8s_i64, { "r", "r" } },
2540    { INDEX_op_ld16u_i64, { "r", "r" } },
2541    { INDEX_op_ld16s_i64, { "r", "r" } },
2542    { INDEX_op_ld32u_i64, { "r", "r" } },
2543    { INDEX_op_ld32s_i64, { "r", "r" } },
2544    { INDEX_op_ld_i64, { "r", "r" } },
2545
2546    { INDEX_op_st8_i64, { "r", "r" } },
2547    { INDEX_op_st16_i64, { "r", "r" } },
2548    { INDEX_op_st32_i64, { "r", "r" } },
2549    { INDEX_op_st_i64, { "r", "r" } },
2550
2551    { INDEX_op_add_i64, { "r", "r", "rT" } },
2552    { INDEX_op_sub_i64, { "r", "rI", "rT" } },
2553    { INDEX_op_and_i64, { "r", "r", "ri" } },
2554    { INDEX_op_or_i64, { "r", "r", "rU" } },
2555    { INDEX_op_xor_i64, { "r", "r", "rU" } },
2556    { INDEX_op_andc_i64, { "r", "r", "ri" } },
2557    { INDEX_op_orc_i64, { "r", "r", "r" } },
2558    { INDEX_op_eqv_i64, { "r", "r", "r" } },
2559    { INDEX_op_nand_i64, { "r", "r", "r" } },
2560    { INDEX_op_nor_i64, { "r", "r", "r" } },
2561
2562    { INDEX_op_shl_i64, { "r", "r", "ri" } },
2563    { INDEX_op_shr_i64, { "r", "r", "ri" } },
2564    { INDEX_op_sar_i64, { "r", "r", "ri" } },
2565    { INDEX_op_rotl_i64, { "r", "r", "ri" } },
2566    { INDEX_op_rotr_i64, { "r", "r", "ri" } },
2567
2568    { INDEX_op_mul_i64, { "r", "r", "rI" } },
2569    { INDEX_op_div_i64, { "r", "r", "r" } },
2570    { INDEX_op_divu_i64, { "r", "r", "r" } },
2571
2572    { INDEX_op_neg_i64, { "r", "r" } },
2573    { INDEX_op_not_i64, { "r", "r" } },
2574    { INDEX_op_ext8s_i64, { "r", "r" } },
2575    { INDEX_op_ext16s_i64, { "r", "r" } },
2576    { INDEX_op_ext32s_i64, { "r", "r" } },
2577    { INDEX_op_ext_i32_i64, { "r", "r" } },
2578    { INDEX_op_extu_i32_i64, { "r", "r" } },
2579    { INDEX_op_bswap16_i64, { "r", "r" } },
2580    { INDEX_op_bswap32_i64, { "r", "r" } },
2581    { INDEX_op_bswap64_i64, { "r", "r" } },
2582
2583    { INDEX_op_brcond_i64, { "r", "ri" } },
2584    { INDEX_op_setcond_i64, { "r", "r", "ri" } },
2585    { INDEX_op_movcond_i64, { "r", "r", "ri", "rZ", "rZ" } },
2586
2587    { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
2588
2589    { INDEX_op_mulsh_i64, { "r", "r", "r" } },
2590    { INDEX_op_muluh_i64, { "r", "r", "r" } },
2591#endif
2592
2593#if TCG_TARGET_REG_BITS == 32
2594    { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
2595    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2596#endif
2597
2598#if TCG_TARGET_REG_BITS == 64
2599    { INDEX_op_add2_i64, { "r", "r", "r", "r", "rI", "rZM" } },
2600    { INDEX_op_sub2_i64, { "r", "r", "rI", "rZM", "r", "r" } },
2601#else
2602    { INDEX_op_add2_i32, { "r", "r", "r", "r", "rI", "rZM" } },
2603    { INDEX_op_sub2_i32, { "r", "r", "rI", "rZM", "r", "r" } },
2604#endif
2605
2606#if TCG_TARGET_REG_BITS == 64
2607    { INDEX_op_qemu_ld_i32, { "r", "L" } },
2608    { INDEX_op_qemu_st_i32, { "S", "S" } },
2609    { INDEX_op_qemu_ld_i64, { "r", "L" } },
2610    { INDEX_op_qemu_st_i64, { "S", "S" } },
2611#elif TARGET_LONG_BITS == 32
2612    { INDEX_op_qemu_ld_i32, { "r", "L" } },
2613    { INDEX_op_qemu_st_i32, { "S", "S" } },
2614    { INDEX_op_qemu_ld_i64, { "L", "L", "L" } },
2615    { INDEX_op_qemu_st_i64, { "S", "S", "S" } },
2616#else
2617    { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
2618    { INDEX_op_qemu_st_i32, { "S", "S", "S" } },
2619    { INDEX_op_qemu_ld_i64, { "L", "L", "L", "L" } },
2620    { INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } },
2621#endif
2622
2623    { INDEX_op_mb, { } },
2624    { -1 },
2625};
2626
2627static void tcg_target_init(TCGContext *s)
2628{
2629    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2630    if (hwcap & PPC_FEATURE_ARCH_2_06) {
2631        have_isa_2_06 = true;
2632    }
2633
2634    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
2635    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
2636    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
2637                     (1 << TCG_REG_R0) |
2638                     (1 << TCG_REG_R2) |
2639                     (1 << TCG_REG_R3) |
2640                     (1 << TCG_REG_R4) |
2641                     (1 << TCG_REG_R5) |
2642                     (1 << TCG_REG_R6) |
2643                     (1 << TCG_REG_R7) |
2644                     (1 << TCG_REG_R8) |
2645                     (1 << TCG_REG_R9) |
2646                     (1 << TCG_REG_R10) |
2647                     (1 << TCG_REG_R11) |
2648                     (1 << TCG_REG_R12));
2649
2650    tcg_regset_clear(s->reserved_regs);
2651    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
2652    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
2653#if defined(_CALL_SYSV)
2654    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
2655#endif
2656#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
2657    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
2658#endif
2659    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
2660    if (USE_REG_RA) {
2661        tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA);  /* return addr */
2662    }
2663
2664    tcg_add_target_add_op_defs(ppc_op_defs);
2665}
2666
2667#ifdef __ELF__
2668typedef struct {
2669    DebugFrameCIE cie;
2670    DebugFrameFDEHeader fde;
2671    uint8_t fde_def_cfa[4];
2672    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
2673} DebugFrame;
2674
2675/* We're expecting a 2 byte uleb128 encoded value.  */
2676QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2677
2678#if TCG_TARGET_REG_BITS == 64
2679# define ELF_HOST_MACHINE EM_PPC64
2680#else
2681# define ELF_HOST_MACHINE EM_PPC
2682#endif
2683
2684static DebugFrame debug_frame = {
2685    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2686    .cie.id = -1,
2687    .cie.version = 1,
2688    .cie.code_align = 1,
2689    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
2690    .cie.return_column = 65,
2691
2692    /* Total FDE size does not include the "len" member.  */
2693    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2694
2695    .fde_def_cfa = {
2696        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
2697        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2698        (FRAME_SIZE >> 7)
2699    },
2700    .fde_reg_ofs = {
2701        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
2702        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
2703    }
2704};
2705
2706void tcg_register_jit(void *buf, size_t buf_size)
2707{
2708    uint8_t *p = &debug_frame.fde_reg_ofs[3];
2709    int i;
2710
2711    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
2712        p[0] = 0x80 + tcg_target_callee_save_regs[i];
2713        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
2714    }
2715
2716    debug_frame.fde.func_start = (uintptr_t)buf;
2717    debug_frame.fde.func_len = buf_size;
2718
2719    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2720}
2721#endif /* __ELF__ */
2722
2723static size_t dcache_bsize = 16;
2724static size_t icache_bsize = 16;
2725
2726void flush_icache_range(uintptr_t start, uintptr_t stop)
2727{
2728    uintptr_t p, start1, stop1;
2729    size_t dsize = dcache_bsize;
2730    size_t isize = icache_bsize;
2731
2732    start1 = start & ~(dsize - 1);
2733    stop1 = (stop + dsize - 1) & ~(dsize - 1);
2734    for (p = start1; p < stop1; p += dsize) {
2735        asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
2736    }
2737    asm volatile ("sync" : : : "memory");
2738
2739    start &= start & ~(isize - 1);
2740    stop1 = (stop + isize - 1) & ~(isize - 1);
2741    for (p = start1; p < stop1; p += isize) {
2742        asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
2743    }
2744    asm volatile ("sync" : : : "memory");
2745    asm volatile ("isync" : : : "memory");
2746}
2747
2748#if defined _AIX
2749#include <sys/systemcfg.h>
2750
2751static void __attribute__((constructor)) tcg_cache_init(void)
2752{
2753    icache_bsize = _system_configuration.icache_line;
2754    dcache_bsize = _system_configuration.dcache_line;
2755}
2756
2757#elif defined __linux__
2758static void __attribute__((constructor)) tcg_cache_init(void)
2759{
2760    unsigned long dsize = qemu_getauxval(AT_DCACHEBSIZE);
2761    unsigned long isize = qemu_getauxval(AT_ICACHEBSIZE);
2762
2763    if (dsize == 0 || isize == 0) {
2764        if (dsize == 0) {
2765            fprintf(stderr, "getauxval AT_DCACHEBSIZE failed\n");
2766        }
2767        if (isize == 0) {
2768            fprintf(stderr, "getauxval AT_ICACHEBSIZE failed\n");
2769        }
2770        exit(1);
2771    }
2772    dcache_bsize = dsize;
2773    icache_bsize = isize;
2774}
2775
2776#elif defined __APPLE__
2777#include <sys/sysctl.h>
2778
2779static void __attribute__((constructor)) tcg_cache_init(void)
2780{
2781    size_t len;
2782    unsigned cacheline;
2783    int name[2] = { CTL_HW, HW_CACHELINE };
2784
2785    len = sizeof(cacheline);
2786    if (sysctl(name, 2, &cacheline, &len, NULL, 0)) {
2787        perror("sysctl CTL_HW HW_CACHELINE failed");
2788        exit(1);
2789    }
2790    dcache_bsize = cacheline;
2791    icache_bsize = cacheline;
2792}
2793
2794#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
2795#include <sys/sysctl.h>
2796
2797static void __attribute__((constructor)) tcg_cache_init(void)
2798{
2799    size_t len = 4;
2800    unsigned cacheline;
2801
2802    if (sysctlbyname ("machdep.cacheline_size", &cacheline, &len, NULL, 0)) {
2803        fprintf(stderr, "sysctlbyname machdep.cacheline_size failed: %s\n",
2804                strerror(errno));
2805        exit(1);
2806    }
2807    dcache_bsize = cacheline;
2808    icache_bsize = cacheline;
2809}
2810#endif
2811