qemu/tcg/ppc/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "elf.h"
  26#include "tcg-pool.inc.c"
  27
  28#if defined _CALL_DARWIN || defined __APPLE__
  29#define TCG_TARGET_CALL_DARWIN
  30#endif
  31#ifdef _CALL_SYSV
  32# define TCG_TARGET_CALL_ALIGN_ARGS   1
  33#endif
  34
  35/* For some memory operations, we need a scratch that isn't R0.  For the AIX
  36   calling convention, we can re-use the TOC register since we'll be reloading
  37   it at every call.  Otherwise R12 will do nicely as neither a call-saved
  38   register nor a parameter register.  */
  39#ifdef _CALL_AIX
  40# define TCG_REG_TMP1   TCG_REG_R2
  41#else
  42# define TCG_REG_TMP1   TCG_REG_R12
  43#endif
  44
  45#define TCG_VEC_TMP1    TCG_REG_V0
  46#define TCG_VEC_TMP2    TCG_REG_V1
  47
  48#define TCG_REG_TB     TCG_REG_R31
  49#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64)
  50
  51/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
  52#define SZP  ((int)sizeof(void *))
  53
  54/* Shorthand for size of a register.  */
  55#define SZR  (TCG_TARGET_REG_BITS / 8)
  56
  57#define TCG_CT_CONST_S16  0x100
  58#define TCG_CT_CONST_U16  0x200
  59#define TCG_CT_CONST_S32  0x400
  60#define TCG_CT_CONST_U32  0x800
  61#define TCG_CT_CONST_ZERO 0x1000
  62#define TCG_CT_CONST_MONE 0x2000
  63#define TCG_CT_CONST_WSZ  0x4000
  64
  65static tcg_insn_unit *tb_ret_addr;
  66
  67TCGPowerISA have_isa;
  68static bool have_isel;
  69bool have_altivec;
  70bool have_vsx;
  71
  72#ifndef CONFIG_SOFTMMU
  73#define TCG_GUEST_BASE_REG 30
  74#endif
  75
  76#ifdef CONFIG_DEBUG_TCG
  77static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
  78    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
  79    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
  80    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
  81    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
  82    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
  83    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
  84    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
  85    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
  86};
  87#endif
  88
  89static const int tcg_target_reg_alloc_order[] = {
  90    TCG_REG_R14,  /* call saved registers */
  91    TCG_REG_R15,
  92    TCG_REG_R16,
  93    TCG_REG_R17,
  94    TCG_REG_R18,
  95    TCG_REG_R19,
  96    TCG_REG_R20,
  97    TCG_REG_R21,
  98    TCG_REG_R22,
  99    TCG_REG_R23,
 100    TCG_REG_R24,
 101    TCG_REG_R25,
 102    TCG_REG_R26,
 103    TCG_REG_R27,
 104    TCG_REG_R28,
 105    TCG_REG_R29,
 106    TCG_REG_R30,
 107    TCG_REG_R31,
 108    TCG_REG_R12,  /* call clobbered, non-arguments */
 109    TCG_REG_R11,
 110    TCG_REG_R2,
 111    TCG_REG_R13,
 112    TCG_REG_R10,  /* call clobbered, arguments */
 113    TCG_REG_R9,
 114    TCG_REG_R8,
 115    TCG_REG_R7,
 116    TCG_REG_R6,
 117    TCG_REG_R5,
 118    TCG_REG_R4,
 119    TCG_REG_R3,
 120
 121    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
 122    TCG_REG_V2,   /* call clobbered, vectors */
 123    TCG_REG_V3,
 124    TCG_REG_V4,
 125    TCG_REG_V5,
 126    TCG_REG_V6,
 127    TCG_REG_V7,
 128    TCG_REG_V8,
 129    TCG_REG_V9,
 130    TCG_REG_V10,
 131    TCG_REG_V11,
 132    TCG_REG_V12,
 133    TCG_REG_V13,
 134    TCG_REG_V14,
 135    TCG_REG_V15,
 136    TCG_REG_V16,
 137    TCG_REG_V17,
 138    TCG_REG_V18,
 139    TCG_REG_V19,
 140};
 141
 142static const int tcg_target_call_iarg_regs[] = {
 143    TCG_REG_R3,
 144    TCG_REG_R4,
 145    TCG_REG_R5,
 146    TCG_REG_R6,
 147    TCG_REG_R7,
 148    TCG_REG_R8,
 149    TCG_REG_R9,
 150    TCG_REG_R10
 151};
 152
 153static const int tcg_target_call_oarg_regs[] = {
 154    TCG_REG_R3,
 155    TCG_REG_R4
 156};
 157
 158static const int tcg_target_callee_save_regs[] = {
 159#ifdef TCG_TARGET_CALL_DARWIN
 160    TCG_REG_R11,
 161#endif
 162    TCG_REG_R14,
 163    TCG_REG_R15,
 164    TCG_REG_R16,
 165    TCG_REG_R17,
 166    TCG_REG_R18,
 167    TCG_REG_R19,
 168    TCG_REG_R20,
 169    TCG_REG_R21,
 170    TCG_REG_R22,
 171    TCG_REG_R23,
 172    TCG_REG_R24,
 173    TCG_REG_R25,
 174    TCG_REG_R26,
 175    TCG_REG_R27, /* currently used for the global env */
 176    TCG_REG_R28,
 177    TCG_REG_R29,
 178    TCG_REG_R30,
 179    TCG_REG_R31
 180};
 181
 182static inline bool in_range_b(tcg_target_long target)
 183{
 184    return target == sextract64(target, 0, 26);
 185}
 186
 187static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target)
 188{
 189    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
 190    tcg_debug_assert(in_range_b(disp));
 191    return disp & 0x3fffffc;
 192}
 193
 194static bool reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target)
 195{
 196    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
 197    if (in_range_b(disp)) {
 198        *pc = (*pc & ~0x3fffffc) | (disp & 0x3fffffc);
 199        return true;
 200    }
 201    return false;
 202}
 203
 204static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target)
 205{
 206    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
 207    tcg_debug_assert(disp == (int16_t) disp);
 208    return disp & 0xfffc;
 209}
 210
 211static bool reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target)
 212{
 213    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
 214    if (disp == (int16_t) disp) {
 215        *pc = (*pc & ~0xfffc) | (disp & 0xfffc);
 216        return true;
 217    }
 218    return false;
 219}
 220
 221/* parse target specific constraints */
 222static const char *target_parse_constraint(TCGArgConstraint *ct,
 223                                           const char *ct_str, TCGType type)
 224{
 225    switch (*ct_str++) {
 226    case 'A': case 'B': case 'C': case 'D':
 227        ct->ct |= TCG_CT_REG;
 228        tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A');
 229        break;
 230    case 'r':
 231        ct->ct |= TCG_CT_REG;
 232        ct->u.regs = 0xffffffff;
 233        break;
 234    case 'v':
 235        ct->ct |= TCG_CT_REG;
 236        ct->u.regs = 0xffffffff00000000ull;
 237        break;
 238    case 'L':                   /* qemu_ld constraint */
 239        ct->ct |= TCG_CT_REG;
 240        ct->u.regs = 0xffffffff;
 241        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
 242#ifdef CONFIG_SOFTMMU
 243        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
 244        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
 245#endif
 246        break;
 247    case 'S':                   /* qemu_st constraint */
 248        ct->ct |= TCG_CT_REG;
 249        ct->u.regs = 0xffffffff;
 250        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
 251#ifdef CONFIG_SOFTMMU
 252        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
 253        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
 254        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
 255#endif
 256        break;
 257    case 'I':
 258        ct->ct |= TCG_CT_CONST_S16;
 259        break;
 260    case 'J':
 261        ct->ct |= TCG_CT_CONST_U16;
 262        break;
 263    case 'M':
 264        ct->ct |= TCG_CT_CONST_MONE;
 265        break;
 266    case 'T':
 267        ct->ct |= TCG_CT_CONST_S32;
 268        break;
 269    case 'U':
 270        ct->ct |= TCG_CT_CONST_U32;
 271        break;
 272    case 'W':
 273        ct->ct |= TCG_CT_CONST_WSZ;
 274        break;
 275    case 'Z':
 276        ct->ct |= TCG_CT_CONST_ZERO;
 277        break;
 278    default:
 279        return NULL;
 280    }
 281    return ct_str;
 282}
 283
 284/* test if a constant matches the constraint */
 285static int tcg_target_const_match(tcg_target_long val, TCGType type,
 286                                  const TCGArgConstraint *arg_ct)
 287{
 288    int ct = arg_ct->ct;
 289    if (ct & TCG_CT_CONST) {
 290        return 1;
 291    }
 292
 293    /* The only 32-bit constraint we use aside from
 294       TCG_CT_CONST is TCG_CT_CONST_S16.  */
 295    if (type == TCG_TYPE_I32) {
 296        val = (int32_t)val;
 297    }
 298
 299    if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
 300        return 1;
 301    } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
 302        return 1;
 303    } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
 304        return 1;
 305    } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
 306        return 1;
 307    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 308        return 1;
 309    } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
 310        return 1;
 311    } else if ((ct & TCG_CT_CONST_WSZ)
 312               && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
 313        return 1;
 314    }
 315    return 0;
 316}
 317
 318#define OPCD(opc) ((opc)<<26)
 319#define XO19(opc) (OPCD(19)|((opc)<<1))
 320#define MD30(opc) (OPCD(30)|((opc)<<2))
 321#define MDS30(opc) (OPCD(30)|((opc)<<1))
 322#define XO31(opc) (OPCD(31)|((opc)<<1))
 323#define XO58(opc) (OPCD(58)|(opc))
 324#define XO62(opc) (OPCD(62)|(opc))
 325#define VX4(opc)  (OPCD(4)|(opc))
 326
 327#define B      OPCD( 18)
 328#define BC     OPCD( 16)
 329#define LBZ    OPCD( 34)
 330#define LHZ    OPCD( 40)
 331#define LHA    OPCD( 42)
 332#define LWZ    OPCD( 32)
 333#define LWZUX  XO31( 55)
 334#define STB    OPCD( 38)
 335#define STH    OPCD( 44)
 336#define STW    OPCD( 36)
 337
 338#define STD    XO62(  0)
 339#define STDU   XO62(  1)
 340#define STDX   XO31(149)
 341
 342#define LD     XO58(  0)
 343#define LDX    XO31( 21)
 344#define LDU    XO58(  1)
 345#define LDUX   XO31( 53)
 346#define LWA    XO58(  2)
 347#define LWAX   XO31(341)
 348
 349#define ADDIC  OPCD( 12)
 350#define ADDI   OPCD( 14)
 351#define ADDIS  OPCD( 15)
 352#define ORI    OPCD( 24)
 353#define ORIS   OPCD( 25)
 354#define XORI   OPCD( 26)
 355#define XORIS  OPCD( 27)
 356#define ANDI   OPCD( 28)
 357#define ANDIS  OPCD( 29)
 358#define MULLI  OPCD(  7)
 359#define CMPLI  OPCD( 10)
 360#define CMPI   OPCD( 11)
 361#define SUBFIC OPCD( 8)
 362
 363#define LWZU   OPCD( 33)
 364#define STWU   OPCD( 37)
 365
 366#define RLWIMI OPCD( 20)
 367#define RLWINM OPCD( 21)
 368#define RLWNM  OPCD( 23)
 369
 370#define RLDICL MD30(  0)
 371#define RLDICR MD30(  1)
 372#define RLDIMI MD30(  3)
 373#define RLDCL  MDS30( 8)
 374
 375#define BCLR   XO19( 16)
 376#define BCCTR  XO19(528)
 377#define CRAND  XO19(257)
 378#define CRANDC XO19(129)
 379#define CRNAND XO19(225)
 380#define CROR   XO19(449)
 381#define CRNOR  XO19( 33)
 382
 383#define EXTSB  XO31(954)
 384#define EXTSH  XO31(922)
 385#define EXTSW  XO31(986)
 386#define ADD    XO31(266)
 387#define ADDE   XO31(138)
 388#define ADDME  XO31(234)
 389#define ADDZE  XO31(202)
 390#define ADDC   XO31( 10)
 391#define AND    XO31( 28)
 392#define SUBF   XO31( 40)
 393#define SUBFC  XO31(  8)
 394#define SUBFE  XO31(136)
 395#define SUBFME XO31(232)
 396#define SUBFZE XO31(200)
 397#define OR     XO31(444)
 398#define XOR    XO31(316)
 399#define MULLW  XO31(235)
 400#define MULHW  XO31( 75)
 401#define MULHWU XO31( 11)
 402#define DIVW   XO31(491)
 403#define DIVWU  XO31(459)
 404#define CMP    XO31(  0)
 405#define CMPL   XO31( 32)
 406#define LHBRX  XO31(790)
 407#define LWBRX  XO31(534)
 408#define LDBRX  XO31(532)
 409#define STHBRX XO31(918)
 410#define STWBRX XO31(662)
 411#define STDBRX XO31(660)
 412#define MFSPR  XO31(339)
 413#define MTSPR  XO31(467)
 414#define SRAWI  XO31(824)
 415#define NEG    XO31(104)
 416#define MFCR   XO31( 19)
 417#define MFOCRF (MFCR | (1u << 20))
 418#define NOR    XO31(124)
 419#define CNTLZW XO31( 26)
 420#define CNTLZD XO31( 58)
 421#define CNTTZW XO31(538)
 422#define CNTTZD XO31(570)
 423#define CNTPOPW XO31(378)
 424#define CNTPOPD XO31(506)
 425#define ANDC   XO31( 60)
 426#define ORC    XO31(412)
 427#define EQV    XO31(284)
 428#define NAND   XO31(476)
 429#define ISEL   XO31( 15)
 430
 431#define MULLD  XO31(233)
 432#define MULHD  XO31( 73)
 433#define MULHDU XO31(  9)
 434#define DIVD   XO31(489)
 435#define DIVDU  XO31(457)
 436
 437#define LBZX   XO31( 87)
 438#define LHZX   XO31(279)
 439#define LHAX   XO31(343)
 440#define LWZX   XO31( 23)
 441#define STBX   XO31(215)
 442#define STHX   XO31(407)
 443#define STWX   XO31(151)
 444
 445#define EIEIO  XO31(854)
 446#define HWSYNC XO31(598)
 447#define LWSYNC (HWSYNC | (1u << 21))
 448
 449#define SPR(a, b) ((((a)<<5)|(b))<<11)
 450#define LR     SPR(8, 0)
 451#define CTR    SPR(9, 0)
 452
 453#define SLW    XO31( 24)
 454#define SRW    XO31(536)
 455#define SRAW   XO31(792)
 456
 457#define SLD    XO31( 27)
 458#define SRD    XO31(539)
 459#define SRAD   XO31(794)
 460#define SRADI  XO31(413<<1)
 461
 462#define TW     XO31( 4)
 463#define TRAP   (TW | TO(31))
 464
 465#define NOP    ORI  /* ori 0,0,0 */
 466
 467#define LVX        XO31(103)
 468#define LVEBX      XO31(7)
 469#define LVEHX      XO31(39)
 470#define LVEWX      XO31(71)
 471#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
 472#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
 473#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
 474#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
 475#define LXSD       (OPCD(57) | 2)   /* v3.00 */
 476#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
 477
 478#define STVX       XO31(231)
 479#define STVEWX     XO31(199)
 480#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
 481#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
 482#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
 483#define STXSD      (OPCD(61) | 2)   /* v3.00 */
 484
 485#define VADDSBS    VX4(768)
 486#define VADDUBS    VX4(512)
 487#define VADDUBM    VX4(0)
 488#define VADDSHS    VX4(832)
 489#define VADDUHS    VX4(576)
 490#define VADDUHM    VX4(64)
 491#define VADDSWS    VX4(896)
 492#define VADDUWS    VX4(640)
 493#define VADDUWM    VX4(128)
 494#define VADDUDM    VX4(192)       /* v2.07 */
 495
 496#define VSUBSBS    VX4(1792)
 497#define VSUBUBS    VX4(1536)
 498#define VSUBUBM    VX4(1024)
 499#define VSUBSHS    VX4(1856)
 500#define VSUBUHS    VX4(1600)
 501#define VSUBUHM    VX4(1088)
 502#define VSUBSWS    VX4(1920)
 503#define VSUBUWS    VX4(1664)
 504#define VSUBUWM    VX4(1152)
 505#define VSUBUDM    VX4(1216)      /* v2.07 */
 506
 507#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
 508#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
 509
 510#define VMAXSB     VX4(258)
 511#define VMAXSH     VX4(322)
 512#define VMAXSW     VX4(386)
 513#define VMAXSD     VX4(450)       /* v2.07 */
 514#define VMAXUB     VX4(2)
 515#define VMAXUH     VX4(66)
 516#define VMAXUW     VX4(130)
 517#define VMAXUD     VX4(194)       /* v2.07 */
 518#define VMINSB     VX4(770)
 519#define VMINSH     VX4(834)
 520#define VMINSW     VX4(898)
 521#define VMINSD     VX4(962)       /* v2.07 */
 522#define VMINUB     VX4(514)
 523#define VMINUH     VX4(578)
 524#define VMINUW     VX4(642)
 525#define VMINUD     VX4(706)       /* v2.07 */
 526
 527#define VCMPEQUB   VX4(6)
 528#define VCMPEQUH   VX4(70)
 529#define VCMPEQUW   VX4(134)
 530#define VCMPEQUD   VX4(199)       /* v2.07 */
 531#define VCMPGTSB   VX4(774)
 532#define VCMPGTSH   VX4(838)
 533#define VCMPGTSW   VX4(902)
 534#define VCMPGTSD   VX4(967)       /* v2.07 */
 535#define VCMPGTUB   VX4(518)
 536#define VCMPGTUH   VX4(582)
 537#define VCMPGTUW   VX4(646)
 538#define VCMPGTUD   VX4(711)       /* v2.07 */
 539#define VCMPNEB    VX4(7)         /* v3.00 */
 540#define VCMPNEH    VX4(71)        /* v3.00 */
 541#define VCMPNEW    VX4(135)       /* v3.00 */
 542
 543#define VSLB       VX4(260)
 544#define VSLH       VX4(324)
 545#define VSLW       VX4(388)
 546#define VSLD       VX4(1476)      /* v2.07 */
 547#define VSRB       VX4(516)
 548#define VSRH       VX4(580)
 549#define VSRW       VX4(644)
 550#define VSRD       VX4(1732)      /* v2.07 */
 551#define VSRAB      VX4(772)
 552#define VSRAH      VX4(836)
 553#define VSRAW      VX4(900)
 554#define VSRAD      VX4(964)       /* v2.07 */
 555#define VRLB       VX4(4)
 556#define VRLH       VX4(68)
 557#define VRLW       VX4(132)
 558#define VRLD       VX4(196)       /* v2.07 */
 559
 560#define VMULEUB    VX4(520)
 561#define VMULEUH    VX4(584)
 562#define VMULEUW    VX4(648)       /* v2.07 */
 563#define VMULOUB    VX4(8)
 564#define VMULOUH    VX4(72)
 565#define VMULOUW    VX4(136)       /* v2.07 */
 566#define VMULUWM    VX4(137)       /* v2.07 */
 567#define VMSUMUHM   VX4(38)
 568
 569#define VMRGHB     VX4(12)
 570#define VMRGHH     VX4(76)
 571#define VMRGHW     VX4(140)
 572#define VMRGLB     VX4(268)
 573#define VMRGLH     VX4(332)
 574#define VMRGLW     VX4(396)
 575
 576#define VPKUHUM    VX4(14)
 577#define VPKUWUM    VX4(78)
 578
 579#define VAND       VX4(1028)
 580#define VANDC      VX4(1092)
 581#define VNOR       VX4(1284)
 582#define VOR        VX4(1156)
 583#define VXOR       VX4(1220)
 584#define VEQV       VX4(1668)      /* v2.07 */
 585#define VNAND      VX4(1412)      /* v2.07 */
 586#define VORC       VX4(1348)      /* v2.07 */
 587
 588#define VSPLTB     VX4(524)
 589#define VSPLTH     VX4(588)
 590#define VSPLTW     VX4(652)
 591#define VSPLTISB   VX4(780)
 592#define VSPLTISH   VX4(844)
 593#define VSPLTISW   VX4(908)
 594
 595#define VSLDOI     VX4(44)
 596
 597#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
 598#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
 599#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
 600
 601#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
 602#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
 603#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
 604#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
 605#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
 606#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
 607
 608#define RT(r) ((r)<<21)
 609#define RS(r) ((r)<<21)
 610#define RA(r) ((r)<<16)
 611#define RB(r) ((r)<<11)
 612#define TO(t) ((t)<<21)
 613#define SH(s) ((s)<<11)
 614#define MB(b) ((b)<<6)
 615#define ME(e) ((e)<<1)
 616#define BO(o) ((o)<<21)
 617#define MB64(b) ((b)<<5)
 618#define FXM(b) (1 << (19 - (b)))
 619
 620#define VRT(r)  (((r) & 31) << 21)
 621#define VRA(r)  (((r) & 31) << 16)
 622#define VRB(r)  (((r) & 31) << 11)
 623#define VRC(r)  (((r) & 31) <<  6)
 624
 625#define LK    1
 626
 627#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
 628#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
 629#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
 630#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
 631
 632#define BF(n)    ((n)<<23)
 633#define BI(n, c) (((c)+((n)*4))<<16)
 634#define BT(n, c) (((c)+((n)*4))<<21)
 635#define BA(n, c) (((c)+((n)*4))<<16)
 636#define BB(n, c) (((c)+((n)*4))<<11)
 637#define BC_(n, c) (((c)+((n)*4))<<6)
 638
 639#define BO_COND_TRUE  BO(12)
 640#define BO_COND_FALSE BO( 4)
 641#define BO_ALWAYS     BO(20)
 642
 643enum {
 644    CR_LT,
 645    CR_GT,
 646    CR_EQ,
 647    CR_SO
 648};
 649
 650static const uint32_t tcg_to_bc[] = {
 651    [TCG_COND_EQ]  = BC | BI(7, CR_EQ) | BO_COND_TRUE,
 652    [TCG_COND_NE]  = BC | BI(7, CR_EQ) | BO_COND_FALSE,
 653    [TCG_COND_LT]  = BC | BI(7, CR_LT) | BO_COND_TRUE,
 654    [TCG_COND_GE]  = BC | BI(7, CR_LT) | BO_COND_FALSE,
 655    [TCG_COND_LE]  = BC | BI(7, CR_GT) | BO_COND_FALSE,
 656    [TCG_COND_GT]  = BC | BI(7, CR_GT) | BO_COND_TRUE,
 657    [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
 658    [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
 659    [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
 660    [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
 661};
 662
 663/* The low bit here is set if the RA and RB fields must be inverted.  */
 664static const uint32_t tcg_to_isel[] = {
 665    [TCG_COND_EQ]  = ISEL | BC_(7, CR_EQ),
 666    [TCG_COND_NE]  = ISEL | BC_(7, CR_EQ) | 1,
 667    [TCG_COND_LT]  = ISEL | BC_(7, CR_LT),
 668    [TCG_COND_GE]  = ISEL | BC_(7, CR_LT) | 1,
 669    [TCG_COND_LE]  = ISEL | BC_(7, CR_GT) | 1,
 670    [TCG_COND_GT]  = ISEL | BC_(7, CR_GT),
 671    [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
 672    [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
 673    [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
 674    [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
 675};
 676
 677static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 678                        intptr_t value, intptr_t addend)
 679{
 680    tcg_insn_unit *target;
 681    int16_t lo;
 682    int32_t hi;
 683
 684    value += addend;
 685    target = (tcg_insn_unit *)value;
 686
 687    switch (type) {
 688    case R_PPC_REL14:
 689        return reloc_pc14(code_ptr, target);
 690    case R_PPC_REL24:
 691        return reloc_pc24(code_ptr, target);
 692    case R_PPC_ADDR16:
 693        /*
 694         * We are (slightly) abusing this relocation type.  In particular,
 695         * assert that the low 2 bits are zero, and do not modify them.
 696         * That way we can use this with LD et al that have opcode bits
 697         * in the low 2 bits of the insn.
 698         */
 699        if ((value & 3) || value != (int16_t)value) {
 700            return false;
 701        }
 702        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
 703        break;
 704    case R_PPC_ADDR32:
 705        /*
 706         * We are abusing this relocation type.  Again, this points to
 707         * a pair of insns, lis + load.  This is an absolute address
 708         * relocation for PPC32 so the lis cannot be removed.
 709         */
 710        lo = value;
 711        hi = value - lo;
 712        if (hi + lo != value) {
 713            return false;
 714        }
 715        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
 716        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
 717        break;
 718    default:
 719        g_assert_not_reached();
 720    }
 721    return true;
 722}
 723
 724static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
 725                             TCGReg base, tcg_target_long offset);
 726
 727static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
 728{
 729    if (ret == arg) {
 730        return true;
 731    }
 732    switch (type) {
 733    case TCG_TYPE_I64:
 734        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
 735        /* fallthru */
 736    case TCG_TYPE_I32:
 737        if (ret < TCG_REG_V0) {
 738            if (arg < TCG_REG_V0) {
 739                tcg_out32(s, OR | SAB(arg, ret, arg));
 740                break;
 741            } else if (have_isa_2_07) {
 742                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
 743                          | VRT(arg) | RA(ret));
 744                break;
 745            } else {
 746                /* Altivec does not support vector->integer moves.  */
 747                return false;
 748            }
 749        } else if (arg < TCG_REG_V0) {
 750            if (have_isa_2_07) {
 751                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
 752                          | VRT(ret) | RA(arg));
 753                break;
 754            } else {
 755                /* Altivec does not support integer->vector moves.  */
 756                return false;
 757            }
 758        }
 759        /* fallthru */
 760    case TCG_TYPE_V64:
 761    case TCG_TYPE_V128:
 762        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
 763        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
 764        break;
 765    default:
 766        g_assert_not_reached();
 767    }
 768    return true;
 769}
 770
 771static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
 772                               int sh, int mb)
 773{
 774    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
 775    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
 776    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
 777    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
 778}
 779
 780static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
 781                               int sh, int mb, int me)
 782{
 783    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
 784}
 785
 786static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
 787{
 788    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
 789}
 790
 791static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
 792{
 793    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
 794}
 795
 796static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
 797{
 798    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
 799}
 800
 801static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
 802{
 803    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
 804}
 805
 806static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
 807{
 808    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
 809}
 810
 811/* Emit a move into ret of arg, if it can be done in one insn.  */
 812static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
 813{
 814    if (arg == (int16_t)arg) {
 815        tcg_out32(s, ADDI | TAI(ret, 0, arg));
 816        return true;
 817    }
 818    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
 819        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
 820        return true;
 821    }
 822    return false;
 823}
 824
 825static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
 826                             tcg_target_long arg, bool in_prologue)
 827{
 828    intptr_t tb_diff;
 829    tcg_target_long tmp;
 830    int shift;
 831
 832    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
 833
 834    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
 835        arg = (int32_t)arg;
 836    }
 837
 838    /* Load 16-bit immediates with one insn.  */
 839    if (tcg_out_movi_one(s, ret, arg)) {
 840        return;
 841    }
 842
 843    /* Load addresses within the TB with one insn.  */
 844    tb_diff = arg - (intptr_t)s->code_gen_ptr;
 845    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
 846        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
 847        return;
 848    }
 849
 850    /* Load 32-bit immediates with two insns.  Note that we've already
 851       eliminated bare ADDIS, so we know both insns are required.  */
 852    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
 853        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
 854        tcg_out32(s, ORI | SAI(ret, ret, arg));
 855        return;
 856    }
 857    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
 858        tcg_out32(s, ADDI | TAI(ret, 0, arg));
 859        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
 860        return;
 861    }
 862
 863    /* Load masked 16-bit value.  */
 864    if (arg > 0 && (arg & 0x8000)) {
 865        tmp = arg | 0x7fff;
 866        if ((tmp & (tmp + 1)) == 0) {
 867            int mb = clz64(tmp + 1) + 1;
 868            tcg_out32(s, ADDI | TAI(ret, 0, arg));
 869            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
 870            return;
 871        }
 872    }
 873
 874    /* Load common masks with 2 insns.  */
 875    shift = ctz64(arg);
 876    tmp = arg >> shift;
 877    if (tmp == (int16_t)tmp) {
 878        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
 879        tcg_out_shli64(s, ret, ret, shift);
 880        return;
 881    }
 882    shift = clz64(arg);
 883    if (tcg_out_movi_one(s, ret, arg << shift)) {
 884        tcg_out_shri64(s, ret, ret, shift);
 885        return;
 886    }
 887
 888    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
 889    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
 890        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
 891        return;
 892    }
 893
 894    /* Use the constant pool, if possible.  */
 895    if (!in_prologue && USE_REG_TB) {
 896        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
 897                       -(intptr_t)s->code_gen_ptr);
 898        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
 899        return;
 900    }
 901
 902    tmp = arg >> 31 >> 1;
 903    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
 904    if (tmp) {
 905        tcg_out_shli64(s, ret, ret, 32);
 906    }
 907    if (arg & 0xffff0000) {
 908        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
 909    }
 910    if (arg & 0xffff) {
 911        tcg_out32(s, ORI | SAI(ret, ret, arg));
 912    }
 913}
 914
 915static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
 916                             tcg_target_long val)
 917{
 918    uint32_t load_insn;
 919    int rel, low;
 920    intptr_t add;
 921
 922    low = (int8_t)val;
 923    if (low >= -16 && low < 16) {
 924        if (val == (tcg_target_long)dup_const(MO_8, low)) {
 925            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
 926            return;
 927        }
 928        if (val == (tcg_target_long)dup_const(MO_16, low)) {
 929            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
 930            return;
 931        }
 932        if (val == (tcg_target_long)dup_const(MO_32, low)) {
 933            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
 934            return;
 935        }
 936    }
 937    if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) {
 938        tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
 939        return;
 940    }
 941
 942    /*
 943     * Otherwise we must load the value from the constant pool.
 944     */
 945    if (USE_REG_TB) {
 946        rel = R_PPC_ADDR16;
 947        add = -(intptr_t)s->code_gen_ptr;
 948    } else {
 949        rel = R_PPC_ADDR32;
 950        add = 0;
 951    }
 952
 953    if (have_vsx) {
 954        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
 955        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
 956        if (TCG_TARGET_REG_BITS == 64) {
 957            new_pool_label(s, val, rel, s->code_ptr, add);
 958        } else {
 959            new_pool_l2(s, rel, s->code_ptr, add, val, val);
 960        }
 961    } else {
 962        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
 963        if (TCG_TARGET_REG_BITS == 64) {
 964            new_pool_l2(s, rel, s->code_ptr, add, val, val);
 965        } else {
 966            new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
 967        }
 968    }
 969
 970    if (USE_REG_TB) {
 971        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
 972        load_insn |= RA(TCG_REG_TB);
 973    } else {
 974        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
 975        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
 976    }
 977    tcg_out32(s, load_insn);
 978}
 979
 980static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
 981                         tcg_target_long arg)
 982{
 983    switch (type) {
 984    case TCG_TYPE_I32:
 985    case TCG_TYPE_I64:
 986        tcg_debug_assert(ret < TCG_REG_V0);
 987        tcg_out_movi_int(s, type, ret, arg, false);
 988        break;
 989
 990    case TCG_TYPE_V64:
 991    case TCG_TYPE_V128:
 992        tcg_debug_assert(ret >= TCG_REG_V0);
 993        tcg_out_dupi_vec(s, type, ret, arg);
 994        break;
 995
 996    default:
 997        g_assert_not_reached();
 998    }
 999}
1000
1001static bool mask_operand(uint32_t c, int *mb, int *me)
1002{
1003    uint32_t lsb, test;
1004
1005    /* Accept a bit pattern like:
1006           0....01....1
1007           1....10....0
1008           0..01..10..0
1009       Keep track of the transitions.  */
1010    if (c == 0 || c == -1) {
1011        return false;
1012    }
1013    test = c;
1014    lsb = test & -test;
1015    test += lsb;
1016    if (test & (test - 1)) {
1017        return false;
1018    }
1019
1020    *me = clz32(lsb);
1021    *mb = test ? clz32(test & -test) + 1 : 0;
1022    return true;
1023}
1024
1025static bool mask64_operand(uint64_t c, int *mb, int *me)
1026{
1027    uint64_t lsb;
1028
1029    if (c == 0) {
1030        return false;
1031    }
1032
1033    lsb = c & -c;
1034    /* Accept 1..10..0.  */
1035    if (c == -lsb) {
1036        *mb = 0;
1037        *me = clz64(lsb);
1038        return true;
1039    }
1040    /* Accept 0..01..1.  */
1041    if (lsb == 1 && (c & (c + 1)) == 0) {
1042        *mb = clz64(c + 1) + 1;
1043        *me = 63;
1044        return true;
1045    }
1046    return false;
1047}
1048
1049static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1050{
1051    int mb, me;
1052
1053    if (mask_operand(c, &mb, &me)) {
1054        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1055    } else if ((c & 0xffff) == c) {
1056        tcg_out32(s, ANDI | SAI(src, dst, c));
1057        return;
1058    } else if ((c & 0xffff0000) == c) {
1059        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1060        return;
1061    } else {
1062        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1063        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1064    }
1065}
1066
1067static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1068{
1069    int mb, me;
1070
1071    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1072    if (mask64_operand(c, &mb, &me)) {
1073        if (mb == 0) {
1074            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1075        } else {
1076            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1077        }
1078    } else if ((c & 0xffff) == c) {
1079        tcg_out32(s, ANDI | SAI(src, dst, c));
1080        return;
1081    } else if ((c & 0xffff0000) == c) {
1082        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1083        return;
1084    } else {
1085        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1086        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1087    }
1088}
1089
1090static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1091                           int op_lo, int op_hi)
1092{
1093    if (c >> 16) {
1094        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1095        src = dst;
1096    }
1097    if (c & 0xffff) {
1098        tcg_out32(s, op_lo | SAI(src, dst, c));
1099        src = dst;
1100    }
1101}
1102
1103static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1104{
1105    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1106}
1107
1108static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1109{
1110    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1111}
1112
1113static void tcg_out_b(TCGContext *s, int mask, tcg_insn_unit *target)
1114{
1115    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1116    if (in_range_b(disp)) {
1117        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1118    } else {
1119        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1120        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1121        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1122    }
1123}
1124
1125static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1126                             TCGReg base, tcg_target_long offset)
1127{
1128    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1129    bool is_int_store = false;
1130    TCGReg rs = TCG_REG_TMP1;
1131
1132    switch (opi) {
1133    case LD: case LWA:
1134        align = 3;
1135        /* FALLTHRU */
1136    default:
1137        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1138            rs = rt;
1139            break;
1140        }
1141        break;
1142    case LXSD:
1143    case STXSD:
1144        align = 3;
1145        break;
1146    case LXV:
1147    case STXV:
1148        align = 15;
1149        break;
1150    case STD:
1151        align = 3;
1152        /* FALLTHRU */
1153    case STB: case STH: case STW:
1154        is_int_store = true;
1155        break;
1156    }
1157
1158    /* For unaligned, or very large offsets, use the indexed form.  */
1159    if (offset & align || offset != (int32_t)offset || opi == 0) {
1160        if (rs == base) {
1161            rs = TCG_REG_R0;
1162        }
1163        tcg_debug_assert(!is_int_store || rs != rt);
1164        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1165        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1166        return;
1167    }
1168
1169    l0 = (int16_t)offset;
1170    offset = (offset - l0) >> 16;
1171    l1 = (int16_t)offset;
1172
1173    if (l1 < 0 && orig >= 0) {
1174        extra = 0x4000;
1175        l1 = (int16_t)(offset - 0x4000);
1176    }
1177    if (l1) {
1178        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1179        base = rs;
1180    }
1181    if (extra) {
1182        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1183        base = rs;
1184    }
1185    if (opi != ADDI || base != rt || l0 != 0) {
1186        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1187    }
1188}
1189
1190static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1191                           TCGReg va, TCGReg vb, int shb)
1192{
1193    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1194}
1195
1196static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1197                       TCGReg base, intptr_t offset)
1198{
1199    int shift;
1200
1201    switch (type) {
1202    case TCG_TYPE_I32:
1203        if (ret < TCG_REG_V0) {
1204            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1205            break;
1206        }
1207        if (have_isa_2_07 && have_vsx) {
1208            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1209            break;
1210        }
1211        tcg_debug_assert((offset & 3) == 0);
1212        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1213        shift = (offset - 4) & 0xc;
1214        if (shift) {
1215            tcg_out_vsldoi(s, ret, ret, ret, shift);
1216        }
1217        break;
1218    case TCG_TYPE_I64:
1219        if (ret < TCG_REG_V0) {
1220            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1221            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1222            break;
1223        }
1224        /* fallthru */
1225    case TCG_TYPE_V64:
1226        tcg_debug_assert(ret >= TCG_REG_V0);
1227        if (have_vsx) {
1228            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1229                             ret, base, offset);
1230            break;
1231        }
1232        tcg_debug_assert((offset & 7) == 0);
1233        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1234        if (offset & 8) {
1235            tcg_out_vsldoi(s, ret, ret, ret, 8);
1236        }
1237        break;
1238    case TCG_TYPE_V128:
1239        tcg_debug_assert(ret >= TCG_REG_V0);
1240        tcg_debug_assert((offset & 15) == 0);
1241        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1242                         LVX, ret, base, offset);
1243        break;
1244    default:
1245        g_assert_not_reached();
1246    }
1247}
1248
1249static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1250                              TCGReg base, intptr_t offset)
1251{
1252    int shift;
1253
1254    switch (type) {
1255    case TCG_TYPE_I32:
1256        if (arg < TCG_REG_V0) {
1257            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1258            break;
1259        }
1260        if (have_isa_2_07 && have_vsx) {
1261            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1262            break;
1263        }
1264        assert((offset & 3) == 0);
1265        tcg_debug_assert((offset & 3) == 0);
1266        shift = (offset - 4) & 0xc;
1267        if (shift) {
1268            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1269            arg = TCG_VEC_TMP1;
1270        }
1271        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1272        break;
1273    case TCG_TYPE_I64:
1274        if (arg < TCG_REG_V0) {
1275            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1276            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1277            break;
1278        }
1279        /* fallthru */
1280    case TCG_TYPE_V64:
1281        tcg_debug_assert(arg >= TCG_REG_V0);
1282        if (have_vsx) {
1283            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1284                             STXSDX, arg, base, offset);
1285            break;
1286        }
1287        tcg_debug_assert((offset & 7) == 0);
1288        if (offset & 8) {
1289            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1290            arg = TCG_VEC_TMP1;
1291        }
1292        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1293        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1294        break;
1295    case TCG_TYPE_V128:
1296        tcg_debug_assert(arg >= TCG_REG_V0);
1297        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1298                         STVX, arg, base, offset);
1299        break;
1300    default:
1301        g_assert_not_reached();
1302    }
1303}
1304
1305static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1306                               TCGReg base, intptr_t ofs)
1307{
1308    return false;
1309}
1310
1311static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1312                        int const_arg2, int cr, TCGType type)
1313{
1314    int imm;
1315    uint32_t op;
1316
1317    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1318
1319    /* Simplify the comparisons below wrt CMPI.  */
1320    if (type == TCG_TYPE_I32) {
1321        arg2 = (int32_t)arg2;
1322    }
1323
1324    switch (cond) {
1325    case TCG_COND_EQ:
1326    case TCG_COND_NE:
1327        if (const_arg2) {
1328            if ((int16_t) arg2 == arg2) {
1329                op = CMPI;
1330                imm = 1;
1331                break;
1332            } else if ((uint16_t) arg2 == arg2) {
1333                op = CMPLI;
1334                imm = 1;
1335                break;
1336            }
1337        }
1338        op = CMPL;
1339        imm = 0;
1340        break;
1341
1342    case TCG_COND_LT:
1343    case TCG_COND_GE:
1344    case TCG_COND_LE:
1345    case TCG_COND_GT:
1346        if (const_arg2) {
1347            if ((int16_t) arg2 == arg2) {
1348                op = CMPI;
1349                imm = 1;
1350                break;
1351            }
1352        }
1353        op = CMP;
1354        imm = 0;
1355        break;
1356
1357    case TCG_COND_LTU:
1358    case TCG_COND_GEU:
1359    case TCG_COND_LEU:
1360    case TCG_COND_GTU:
1361        if (const_arg2) {
1362            if ((uint16_t) arg2 == arg2) {
1363                op = CMPLI;
1364                imm = 1;
1365                break;
1366            }
1367        }
1368        op = CMPL;
1369        imm = 0;
1370        break;
1371
1372    default:
1373        tcg_abort();
1374    }
1375    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1376
1377    if (imm) {
1378        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1379    } else {
1380        if (const_arg2) {
1381            tcg_out_movi(s, type, TCG_REG_R0, arg2);
1382            arg2 = TCG_REG_R0;
1383        }
1384        tcg_out32(s, op | RA(arg1) | RB(arg2));
1385    }
1386}
1387
1388static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1389                                TCGReg dst, TCGReg src)
1390{
1391    if (type == TCG_TYPE_I32) {
1392        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1393        tcg_out_shri32(s, dst, dst, 5);
1394    } else {
1395        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1396        tcg_out_shri64(s, dst, dst, 6);
1397    }
1398}
1399
1400static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
1401{
1402    /* X != 0 implies X + -1 generates a carry.  Extra addition
1403       trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.  */
1404    if (dst != src) {
1405        tcg_out32(s, ADDIC | TAI(dst, src, -1));
1406        tcg_out32(s, SUBFE | TAB(dst, dst, src));
1407    } else {
1408        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1409        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1410    }
1411}
1412
1413static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1414                                  bool const_arg2)
1415{
1416    if (const_arg2) {
1417        if ((uint32_t)arg2 == arg2) {
1418            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1419        } else {
1420            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1421            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1422        }
1423    } else {
1424        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1425    }
1426    return TCG_REG_R0;
1427}
1428
1429static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1430                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
1431                            int const_arg2)
1432{
1433    int crop, sh;
1434
1435    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1436
1437    /* Ignore high bits of a potential constant arg2.  */
1438    if (type == TCG_TYPE_I32) {
1439        arg2 = (uint32_t)arg2;
1440    }
1441
1442    /* Handle common and trivial cases before handling anything else.  */
1443    if (arg2 == 0) {
1444        switch (cond) {
1445        case TCG_COND_EQ:
1446            tcg_out_setcond_eq0(s, type, arg0, arg1);
1447            return;
1448        case TCG_COND_NE:
1449            if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1450                tcg_out_ext32u(s, TCG_REG_R0, arg1);
1451                arg1 = TCG_REG_R0;
1452            }
1453            tcg_out_setcond_ne0(s, arg0, arg1);
1454            return;
1455        case TCG_COND_GE:
1456            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1457            arg1 = arg0;
1458            /* FALLTHRU */
1459        case TCG_COND_LT:
1460            /* Extract the sign bit.  */
1461            if (type == TCG_TYPE_I32) {
1462                tcg_out_shri32(s, arg0, arg1, 31);
1463            } else {
1464                tcg_out_shri64(s, arg0, arg1, 63);
1465            }
1466            return;
1467        default:
1468            break;
1469        }
1470    }
1471
1472    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1473       All other cases below are also at least 3 insns, so speed up the
1474       code generator by not considering them and always using ISEL.  */
1475    if (have_isel) {
1476        int isel, tab;
1477
1478        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1479
1480        isel = tcg_to_isel[cond];
1481
1482        tcg_out_movi(s, type, arg0, 1);
1483        if (isel & 1) {
1484            /* arg0 = (bc ? 0 : 1) */
1485            tab = TAB(arg0, 0, arg0);
1486            isel &= ~1;
1487        } else {
1488            /* arg0 = (bc ? 1 : 0) */
1489            tcg_out_movi(s, type, TCG_REG_R0, 0);
1490            tab = TAB(arg0, arg0, TCG_REG_R0);
1491        }
1492        tcg_out32(s, isel | tab);
1493        return;
1494    }
1495
1496    switch (cond) {
1497    case TCG_COND_EQ:
1498        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1499        tcg_out_setcond_eq0(s, type, arg0, arg1);
1500        return;
1501
1502    case TCG_COND_NE:
1503        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1504        /* Discard the high bits only once, rather than both inputs.  */
1505        if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1506            tcg_out_ext32u(s, TCG_REG_R0, arg1);
1507            arg1 = TCG_REG_R0;
1508        }
1509        tcg_out_setcond_ne0(s, arg0, arg1);
1510        return;
1511
1512    case TCG_COND_GT:
1513    case TCG_COND_GTU:
1514        sh = 30;
1515        crop = 0;
1516        goto crtest;
1517
1518    case TCG_COND_LT:
1519    case TCG_COND_LTU:
1520        sh = 29;
1521        crop = 0;
1522        goto crtest;
1523
1524    case TCG_COND_GE:
1525    case TCG_COND_GEU:
1526        sh = 31;
1527        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
1528        goto crtest;
1529
1530    case TCG_COND_LE:
1531    case TCG_COND_LEU:
1532        sh = 31;
1533        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
1534    crtest:
1535        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1536        if (crop) {
1537            tcg_out32(s, crop);
1538        }
1539        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1540        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1541        break;
1542
1543    default:
1544        tcg_abort();
1545    }
1546}
1547
1548static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
1549{
1550    if (l->has_value) {
1551        bc |= reloc_pc14_val(s->code_ptr, l->u.value_ptr);
1552    } else {
1553        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
1554    }
1555    tcg_out32(s, bc);
1556}
1557
1558static void tcg_out_brcond(TCGContext *s, TCGCond cond,
1559                           TCGArg arg1, TCGArg arg2, int const_arg2,
1560                           TCGLabel *l, TCGType type)
1561{
1562    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1563    tcg_out_bc(s, tcg_to_bc[cond], l);
1564}
1565
1566static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
1567                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
1568                            TCGArg v2, bool const_c2)
1569{
1570    /* If for some reason both inputs are zero, don't produce bad code.  */
1571    if (v1 == 0 && v2 == 0) {
1572        tcg_out_movi(s, type, dest, 0);
1573        return;
1574    }
1575
1576    tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
1577
1578    if (have_isel) {
1579        int isel = tcg_to_isel[cond];
1580
1581        /* Swap the V operands if the operation indicates inversion.  */
1582        if (isel & 1) {
1583            int t = v1;
1584            v1 = v2;
1585            v2 = t;
1586            isel &= ~1;
1587        }
1588        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
1589        if (v2 == 0) {
1590            tcg_out_movi(s, type, TCG_REG_R0, 0);
1591        }
1592        tcg_out32(s, isel | TAB(dest, v1, v2));
1593    } else {
1594        if (dest == v2) {
1595            cond = tcg_invert_cond(cond);
1596            v2 = v1;
1597        } else if (dest != v1) {
1598            if (v1 == 0) {
1599                tcg_out_movi(s, type, dest, 0);
1600            } else {
1601                tcg_out_mov(s, type, dest, v1);
1602            }
1603        }
1604        /* Branch forward over one insn */
1605        tcg_out32(s, tcg_to_bc[cond] | 8);
1606        if (v2 == 0) {
1607            tcg_out_movi(s, type, dest, 0);
1608        } else {
1609            tcg_out_mov(s, type, dest, v2);
1610        }
1611    }
1612}
1613
1614static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
1615                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
1616{
1617    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
1618        tcg_out32(s, opc | RA(a0) | RS(a1));
1619    } else {
1620        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
1621        /* Note that the only other valid constant for a2 is 0.  */
1622        if (have_isel) {
1623            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
1624            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
1625        } else if (!const_a2 && a0 == a2) {
1626            tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
1627            tcg_out32(s, opc | RA(a0) | RS(a1));
1628        } else {
1629            tcg_out32(s, opc | RA(a0) | RS(a1));
1630            tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
1631            if (const_a2) {
1632                tcg_out_movi(s, type, a0, 0);
1633            } else {
1634                tcg_out_mov(s, type, a0, a2);
1635            }
1636        }
1637    }
1638}
1639
1640static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
1641                         const int *const_args)
1642{
1643    static const struct { uint8_t bit1, bit2; } bits[] = {
1644        [TCG_COND_LT ] = { CR_LT, CR_LT },
1645        [TCG_COND_LE ] = { CR_LT, CR_GT },
1646        [TCG_COND_GT ] = { CR_GT, CR_GT },
1647        [TCG_COND_GE ] = { CR_GT, CR_LT },
1648        [TCG_COND_LTU] = { CR_LT, CR_LT },
1649        [TCG_COND_LEU] = { CR_LT, CR_GT },
1650        [TCG_COND_GTU] = { CR_GT, CR_GT },
1651        [TCG_COND_GEU] = { CR_GT, CR_LT },
1652    };
1653
1654    TCGCond cond = args[4], cond2;
1655    TCGArg al, ah, bl, bh;
1656    int blconst, bhconst;
1657    int op, bit1, bit2;
1658
1659    al = args[0];
1660    ah = args[1];
1661    bl = args[2];
1662    bh = args[3];
1663    blconst = const_args[2];
1664    bhconst = const_args[3];
1665
1666    switch (cond) {
1667    case TCG_COND_EQ:
1668        op = CRAND;
1669        goto do_equality;
1670    case TCG_COND_NE:
1671        op = CRNAND;
1672    do_equality:
1673        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
1674        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
1675        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1676        break;
1677
1678    case TCG_COND_LT:
1679    case TCG_COND_LE:
1680    case TCG_COND_GT:
1681    case TCG_COND_GE:
1682    case TCG_COND_LTU:
1683    case TCG_COND_LEU:
1684    case TCG_COND_GTU:
1685    case TCG_COND_GEU:
1686        bit1 = bits[cond].bit1;
1687        bit2 = bits[cond].bit2;
1688        op = (bit1 != bit2 ? CRANDC : CRAND);
1689        cond2 = tcg_unsigned_cond(cond);
1690
1691        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
1692        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
1693        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
1694        tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
1695        break;
1696
1697    default:
1698        tcg_abort();
1699    }
1700}
1701
1702static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1703                             const int *const_args)
1704{
1705    tcg_out_cmp2(s, args + 1, const_args + 1);
1706    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1707    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
1708}
1709
1710static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
1711                             const int *const_args)
1712{
1713    tcg_out_cmp2(s, args, const_args);
1714    tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
1715}
1716
1717static void tcg_out_mb(TCGContext *s, TCGArg a0)
1718{
1719    uint32_t insn = HWSYNC;
1720    a0 &= TCG_MO_ALL;
1721    if (a0 == TCG_MO_LD_LD) {
1722        insn = LWSYNC;
1723    } else if (a0 == TCG_MO_ST_ST) {
1724        insn = EIEIO;
1725    }
1726    tcg_out32(s, insn);
1727}
1728
1729void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1730                              uintptr_t addr)
1731{
1732    if (TCG_TARGET_REG_BITS == 64) {
1733        tcg_insn_unit i1, i2;
1734        intptr_t tb_diff = addr - tc_ptr;
1735        intptr_t br_diff = addr - (jmp_addr + 4);
1736        uint64_t pair;
1737
1738        /* This does not exercise the range of the branch, but we do
1739           still need to be able to load the new value of TCG_REG_TB.
1740           But this does still happen quite often.  */
1741        if (tb_diff == (int16_t)tb_diff) {
1742            i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
1743            i2 = B | (br_diff & 0x3fffffc);
1744        } else {
1745            intptr_t lo = (int16_t)tb_diff;
1746            intptr_t hi = (int32_t)(tb_diff - lo);
1747            assert(tb_diff == hi + lo);
1748            i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
1749            i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
1750        }
1751#ifdef HOST_WORDS_BIGENDIAN
1752        pair = (uint64_t)i1 << 32 | i2;
1753#else
1754        pair = (uint64_t)i2 << 32 | i1;
1755#endif
1756
1757        /* As per the enclosing if, this is ppc64.  Avoid the _Static_assert
1758           within atomic_set that would fail to build a ppc32 host.  */
1759        atomic_set__nocheck((uint64_t *)jmp_addr, pair);
1760        flush_icache_range(jmp_addr, jmp_addr + 8);
1761    } else {
1762        intptr_t diff = addr - jmp_addr;
1763        tcg_debug_assert(in_range_b(diff));
1764        atomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
1765        flush_icache_range(jmp_addr, jmp_addr + 4);
1766    }
1767}
1768
1769static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1770{
1771#ifdef _CALL_AIX
1772    /* Look through the descriptor.  If the branch is in range, and we
1773       don't have to spend too much effort on building the toc.  */
1774    void *tgt = ((void **)target)[0];
1775    uintptr_t toc = ((uintptr_t *)target)[1];
1776    intptr_t diff = tcg_pcrel_diff(s, tgt);
1777
1778    if (in_range_b(diff) && toc == (uint32_t)toc) {
1779        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
1780        tcg_out_b(s, LK, tgt);
1781    } else {
1782        /* Fold the low bits of the constant into the addresses below.  */
1783        intptr_t arg = (intptr_t)target;
1784        int ofs = (int16_t)arg;
1785
1786        if (ofs + 8 < 0x8000) {
1787            arg -= ofs;
1788        } else {
1789            ofs = 0;
1790        }
1791        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
1792        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
1793        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
1794        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
1795        tcg_out32(s, BCCTR | BO_ALWAYS | LK);
1796    }
1797#elif defined(_CALL_ELF) && _CALL_ELF == 2
1798    intptr_t diff;
1799
1800    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
1801       address, which the callee uses to compute its TOC address.  */
1802    /* FIXME: when the branch is in range, we could avoid r12 load if we
1803       knew that the destination uses the same TOC, and what its local
1804       entry point offset is.  */
1805    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
1806
1807    diff = tcg_pcrel_diff(s, target);
1808    if (in_range_b(diff)) {
1809        tcg_out_b(s, LK, target);
1810    } else {
1811        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
1812        tcg_out32(s, BCCTR | BO_ALWAYS | LK);
1813    }
1814#else
1815    tcg_out_b(s, LK, target);
1816#endif
1817}
1818
1819static const uint32_t qemu_ldx_opc[16] = {
1820    [MO_UB] = LBZX,
1821    [MO_UW] = LHZX,
1822    [MO_UL] = LWZX,
1823    [MO_Q]  = LDX,
1824    [MO_SW] = LHAX,
1825    [MO_SL] = LWAX,
1826    [MO_BSWAP | MO_UB] = LBZX,
1827    [MO_BSWAP | MO_UW] = LHBRX,
1828    [MO_BSWAP | MO_UL] = LWBRX,
1829    [MO_BSWAP | MO_Q]  = LDBRX,
1830};
1831
1832static const uint32_t qemu_stx_opc[16] = {
1833    [MO_UB] = STBX,
1834    [MO_UW] = STHX,
1835    [MO_UL] = STWX,
1836    [MO_Q]  = STDX,
1837    [MO_BSWAP | MO_UB] = STBX,
1838    [MO_BSWAP | MO_UW] = STHBRX,
1839    [MO_BSWAP | MO_UL] = STWBRX,
1840    [MO_BSWAP | MO_Q]  = STDBRX,
1841};
1842
1843static const uint32_t qemu_exts_opc[4] = {
1844    EXTSB, EXTSH, EXTSW, 0
1845};
1846
1847#if defined (CONFIG_SOFTMMU)
1848#include "tcg-ldst.inc.c"
1849
1850/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
1851 *                                 int mmu_idx, uintptr_t ra)
1852 */
1853static void * const qemu_ld_helpers[16] = {
1854    [MO_UB]   = helper_ret_ldub_mmu,
1855    [MO_LEUW] = helper_le_lduw_mmu,
1856    [MO_LEUL] = helper_le_ldul_mmu,
1857    [MO_LEQ]  = helper_le_ldq_mmu,
1858    [MO_BEUW] = helper_be_lduw_mmu,
1859    [MO_BEUL] = helper_be_ldul_mmu,
1860    [MO_BEQ]  = helper_be_ldq_mmu,
1861};
1862
1863/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
1864 *                                 uintxx_t val, int mmu_idx, uintptr_t ra)
1865 */
1866static void * const qemu_st_helpers[16] = {
1867    [MO_UB]   = helper_ret_stb_mmu,
1868    [MO_LEUW] = helper_le_stw_mmu,
1869    [MO_LEUL] = helper_le_stl_mmu,
1870    [MO_LEQ]  = helper_le_stq_mmu,
1871    [MO_BEUW] = helper_be_stw_mmu,
1872    [MO_BEUL] = helper_be_stl_mmu,
1873    [MO_BEQ]  = helper_be_stq_mmu,
1874};
1875
1876/* We expect to use a 16-bit negative offset from ENV.  */
1877QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1878QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
1879
1880/* Perform the TLB load and compare.  Places the result of the comparison
1881   in CR7, loads the addend of the TLB into R3, and returns the register
1882   containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */
1883
1884static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
1885                               TCGReg addrlo, TCGReg addrhi,
1886                               int mem_index, bool is_read)
1887{
1888    int cmp_off
1889        = (is_read
1890           ? offsetof(CPUTLBEntry, addr_read)
1891           : offsetof(CPUTLBEntry, addr_write));
1892    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1893    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1894    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1895    unsigned s_bits = opc & MO_SIZE;
1896    unsigned a_bits = get_alignment_bits(opc);
1897
1898    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
1899    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
1900    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
1901
1902    /* Extract the page index, shifted into place for tlb index.  */
1903    if (TCG_TARGET_REG_BITS == 32) {
1904        tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
1905                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1906    } else {
1907        tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
1908                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1909    }
1910    tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
1911
1912    /* Load the TLB comparator.  */
1913    if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
1914        uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
1915                        ? LWZUX : LDUX);
1916        tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
1917    } else {
1918        tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
1919        if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1920            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
1921            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
1922        } else {
1923            tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
1924        }
1925    }
1926
1927    /* Load the TLB addend for use on the fast path.  Do this asap
1928       to minimize any load use delay.  */
1929    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
1930               offsetof(CPUTLBEntry, addend));
1931
1932    /* Clear the non-page, non-alignment bits from the address */
1933    if (TCG_TARGET_REG_BITS == 32) {
1934        /* We don't support unaligned accesses on 32-bits.
1935         * Preserve the bottom bits and thus trigger a comparison
1936         * failure on unaligned accesses.
1937         */
1938        if (a_bits < s_bits) {
1939            a_bits = s_bits;
1940        }
1941        tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
1942                    (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
1943    } else {
1944        TCGReg t = addrlo;
1945
1946        /* If the access is unaligned, we need to make sure we fail if we
1947         * cross a page boundary.  The trick is to add the access size-1
1948         * to the address before masking the low bits.  That will make the
1949         * address overflow to the next page if we cross a page boundary,
1950         * which will then force a mismatch of the TLB compare.
1951         */
1952        if (a_bits < s_bits) {
1953            unsigned a_mask = (1 << a_bits) - 1;
1954            unsigned s_mask = (1 << s_bits) - 1;
1955            tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
1956            t = TCG_REG_R0;
1957        }
1958
1959        /* Mask the address for the requested alignment.  */
1960        if (TARGET_LONG_BITS == 32) {
1961            tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
1962                        (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
1963            /* Zero-extend the address for use in the final address.  */
1964            tcg_out_ext32u(s, TCG_REG_R4, addrlo);
1965            addrlo = TCG_REG_R4;
1966        } else if (a_bits == 0) {
1967            tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
1968        } else {
1969            tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
1970                        64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
1971            tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
1972        }
1973    }
1974
1975    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1976        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
1977                    0, 7, TCG_TYPE_I32);
1978        tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
1979        tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1980    } else {
1981        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
1982                    0, 7, TCG_TYPE_TL);
1983    }
1984
1985    return addrlo;
1986}
1987
1988/* Record the context of a call to the out of line helper code for the slow
1989   path for a load or store, so that we can later generate the correct
1990   helper code.  */
1991static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1992                                TCGReg datalo_reg, TCGReg datahi_reg,
1993                                TCGReg addrlo_reg, TCGReg addrhi_reg,
1994                                tcg_insn_unit *raddr, tcg_insn_unit *lptr)
1995{
1996    TCGLabelQemuLdst *label = new_ldst_label(s);
1997
1998    label->is_ld = is_ld;
1999    label->oi = oi;
2000    label->datalo_reg = datalo_reg;
2001    label->datahi_reg = datahi_reg;
2002    label->addrlo_reg = addrlo_reg;
2003    label->addrhi_reg = addrhi_reg;
2004    label->raddr = raddr;
2005    label->label_ptr[0] = lptr;
2006}
2007
2008static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2009{
2010    TCGMemOpIdx oi = lb->oi;
2011    MemOp opc = get_memop(oi);
2012    TCGReg hi, lo, arg = TCG_REG_R3;
2013
2014    if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) {
2015        return false;
2016    }
2017
2018    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2019
2020    lo = lb->addrlo_reg;
2021    hi = lb->addrhi_reg;
2022    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2023#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2024        arg |= 1;
2025#endif
2026        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2027        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2028    } else {
2029        /* If the address needed to be zero-extended, we'll have already
2030           placed it in R4.  The only remaining case is 64-bit guest.  */
2031        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2032    }
2033
2034    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2035    tcg_out32(s, MFSPR | RT(arg) | LR);
2036
2037    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2038
2039    lo = lb->datalo_reg;
2040    hi = lb->datahi_reg;
2041    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2042        tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
2043        tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
2044    } else if (opc & MO_SIGN) {
2045        uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
2046        tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3));
2047    } else {
2048        tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3);
2049    }
2050
2051    tcg_out_b(s, 0, lb->raddr);
2052    return true;
2053}
2054
2055static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2056{
2057    TCGMemOpIdx oi = lb->oi;
2058    MemOp opc = get_memop(oi);
2059    MemOp s_bits = opc & MO_SIZE;
2060    TCGReg hi, lo, arg = TCG_REG_R3;
2061
2062    if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) {
2063        return false;
2064    }
2065
2066    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2067
2068    lo = lb->addrlo_reg;
2069    hi = lb->addrhi_reg;
2070    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2071#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2072        arg |= 1;
2073#endif
2074        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2075        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2076    } else {
2077        /* If the address needed to be zero-extended, we'll have already
2078           placed it in R4.  The only remaining case is 64-bit guest.  */
2079        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2080    }
2081
2082    lo = lb->datalo_reg;
2083    hi = lb->datahi_reg;
2084    if (TCG_TARGET_REG_BITS == 32) {
2085        switch (s_bits) {
2086        case MO_64:
2087#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2088            arg |= 1;
2089#endif
2090            tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2091            /* FALLTHRU */
2092        case MO_32:
2093            tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2094            break;
2095        default:
2096            tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31);
2097            break;
2098        }
2099    } else {
2100        if (s_bits == MO_64) {
2101            tcg_out_mov(s, TCG_TYPE_I64, arg++, lo);
2102        } else {
2103            tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits));
2104        }
2105    }
2106
2107    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2108    tcg_out32(s, MFSPR | RT(arg) | LR);
2109
2110    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2111
2112    tcg_out_b(s, 0, lb->raddr);
2113    return true;
2114}
2115#endif /* SOFTMMU */
2116
2117static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
2118{
2119    TCGReg datalo, datahi, addrlo, rbase;
2120    TCGReg addrhi __attribute__((unused));
2121    TCGMemOpIdx oi;
2122    MemOp opc, s_bits;
2123#ifdef CONFIG_SOFTMMU
2124    int mem_index;
2125    tcg_insn_unit *label_ptr;
2126#endif
2127
2128    datalo = *args++;
2129    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2130    addrlo = *args++;
2131    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2132    oi = *args++;
2133    opc = get_memop(oi);
2134    s_bits = opc & MO_SIZE;
2135
2136#ifdef CONFIG_SOFTMMU
2137    mem_index = get_mmuidx(oi);
2138    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
2139
2140    /* Load a pointer into the current opcode w/conditional branch-link. */
2141    label_ptr = s->code_ptr;
2142    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2143
2144    rbase = TCG_REG_R3;
2145#else  /* !CONFIG_SOFTMMU */
2146    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2147    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2148        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2149        addrlo = TCG_REG_TMP1;
2150    }
2151#endif
2152
2153    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2154        if (opc & MO_BSWAP) {
2155            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2156            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2157            tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
2158        } else if (rbase != 0) {
2159            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2160            tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
2161            tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
2162        } else if (addrlo == datahi) {
2163            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2164            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2165        } else {
2166            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2167            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2168        }
2169    } else {
2170        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2171        if (!have_isa_2_06 && insn == LDBRX) {
2172            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2173            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2174            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
2175            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2176        } else if (insn) {
2177            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2178        } else {
2179            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2180            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2181            insn = qemu_exts_opc[s_bits];
2182            tcg_out32(s, insn | RA(datalo) | RS(datalo));
2183        }
2184    }
2185
2186#ifdef CONFIG_SOFTMMU
2187    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
2188                        s->code_ptr, label_ptr);
2189#endif
2190}
2191
2192static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
2193{
2194    TCGReg datalo, datahi, addrlo, rbase;
2195    TCGReg addrhi __attribute__((unused));
2196    TCGMemOpIdx oi;
2197    MemOp opc, s_bits;
2198#ifdef CONFIG_SOFTMMU
2199    int mem_index;
2200    tcg_insn_unit *label_ptr;
2201#endif
2202
2203    datalo = *args++;
2204    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2205    addrlo = *args++;
2206    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2207    oi = *args++;
2208    opc = get_memop(oi);
2209    s_bits = opc & MO_SIZE;
2210
2211#ifdef CONFIG_SOFTMMU
2212    mem_index = get_mmuidx(oi);
2213    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
2214
2215    /* Load a pointer into the current opcode w/conditional branch-link. */
2216    label_ptr = s->code_ptr;
2217    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2218
2219    rbase = TCG_REG_R3;
2220#else  /* !CONFIG_SOFTMMU */
2221    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2222    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2223        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2224        addrlo = TCG_REG_TMP1;
2225    }
2226#endif
2227
2228    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2229        if (opc & MO_BSWAP) {
2230            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2231            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2232            tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0));
2233        } else if (rbase != 0) {
2234            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2235            tcg_out32(s, STWX | SAB(datahi, rbase, addrlo));
2236            tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0));
2237        } else {
2238            tcg_out32(s, STW | TAI(datahi, addrlo, 0));
2239            tcg_out32(s, STW | TAI(datalo, addrlo, 4));
2240        }
2241    } else {
2242        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2243        if (!have_isa_2_06 && insn == STDBRX) {
2244            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2245            tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
2246            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2247            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1));
2248        } else {
2249            tcg_out32(s, insn | SAB(datalo, rbase, addrlo));
2250        }
2251    }
2252
2253#ifdef CONFIG_SOFTMMU
2254    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
2255                        s->code_ptr, label_ptr);
2256#endif
2257}
2258
2259static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2260{
2261    int i;
2262    for (i = 0; i < count; ++i) {
2263        p[i] = NOP;
2264    }
2265}
2266
2267/* Parameters for function call generation, used in tcg.c.  */
2268#define TCG_TARGET_STACK_ALIGN       16
2269#define TCG_TARGET_EXTEND_ARGS       1
2270
2271#ifdef _CALL_AIX
2272# define LINK_AREA_SIZE                (6 * SZR)
2273# define LR_OFFSET                     (1 * SZR)
2274# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2275#elif defined(TCG_TARGET_CALL_DARWIN)
2276# define LINK_AREA_SIZE                (6 * SZR)
2277# define LR_OFFSET                     (2 * SZR)
2278#elif TCG_TARGET_REG_BITS == 64
2279# if defined(_CALL_ELF) && _CALL_ELF == 2
2280#  define LINK_AREA_SIZE               (4 * SZR)
2281#  define LR_OFFSET                    (1 * SZR)
2282# endif
2283#else /* TCG_TARGET_REG_BITS == 32 */
2284# if defined(_CALL_SYSV)
2285#  define LINK_AREA_SIZE               (2 * SZR)
2286#  define LR_OFFSET                    (1 * SZR)
2287# endif
2288#endif
2289#ifndef LR_OFFSET
2290# error "Unhandled abi"
2291#endif
2292#ifndef TCG_TARGET_CALL_STACK_OFFSET
2293# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2294#endif
2295
2296#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2297#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2298
2299#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2300                     + TCG_STATIC_CALL_ARGS_SIZE    \
2301                     + CPU_TEMP_BUF_SIZE            \
2302                     + REG_SAVE_SIZE                \
2303                     + TCG_TARGET_STACK_ALIGN - 1)  \
2304                    & -TCG_TARGET_STACK_ALIGN)
2305
2306#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2307
2308static void tcg_target_qemu_prologue(TCGContext *s)
2309{
2310    int i;
2311
2312#ifdef _CALL_AIX
2313    void **desc = (void **)s->code_ptr;
2314    desc[0] = desc + 2;                   /* entry point */
2315    desc[1] = 0;                          /* environment pointer */
2316    s->code_ptr = (void *)(desc + 2);     /* skip over descriptor */
2317#endif
2318
2319    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2320                  CPU_TEMP_BUF_SIZE);
2321
2322    /* Prologue */
2323    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2324    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2325              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2326
2327    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2328        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2329                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2330    }
2331    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2332
2333#ifndef CONFIG_SOFTMMU
2334    if (guest_base) {
2335        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2336        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2337    }
2338#endif
2339
2340    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2341    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2342    if (USE_REG_TB) {
2343        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
2344    }
2345    tcg_out32(s, BCCTR | BO_ALWAYS);
2346
2347    /* Epilogue */
2348    s->code_gen_epilogue = tb_ret_addr = s->code_ptr;
2349
2350    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2351    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2352        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2353                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2354    }
2355    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2356    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2357    tcg_out32(s, BCLR | BO_ALWAYS);
2358}
2359
2360static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
2361                       const int *const_args)
2362{
2363    TCGArg a0, a1, a2;
2364    int c;
2365
2366    switch (opc) {
2367    case INDEX_op_exit_tb:
2368        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
2369        tcg_out_b(s, 0, tb_ret_addr);
2370        break;
2371    case INDEX_op_goto_tb:
2372        if (s->tb_jmp_insn_offset) {
2373            /* Direct jump. */
2374            if (TCG_TARGET_REG_BITS == 64) {
2375                /* Ensure the next insns are 8-byte aligned. */
2376                if ((uintptr_t)s->code_ptr & 7) {
2377                    tcg_out32(s, NOP);
2378                }
2379                s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
2380                tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0));
2381                tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0));
2382            } else {
2383                s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
2384                tcg_out32(s, B);
2385                s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
2386                break;
2387            }
2388        } else {
2389            /* Indirect jump. */
2390            tcg_debug_assert(s->tb_jmp_insn_offset == NULL);
2391            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0,
2392                       (intptr_t)(s->tb_jmp_insn_offset + args[0]));
2393        }
2394        tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
2395        tcg_out32(s, BCCTR | BO_ALWAYS);
2396        set_jmp_reset_offset(s, args[0]);
2397        if (USE_REG_TB) {
2398            /* For the unlinked case, need to reset TCG_REG_TB.  */
2399            c = -tcg_current_code_size(s);
2400            assert(c == (int16_t)c);
2401            tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, c));
2402        }
2403        break;
2404    case INDEX_op_goto_ptr:
2405        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
2406        if (USE_REG_TB) {
2407            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
2408        }
2409        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2410        tcg_out32(s, BCCTR | BO_ALWAYS);
2411        break;
2412    case INDEX_op_br:
2413        {
2414            TCGLabel *l = arg_label(args[0]);
2415            uint32_t insn = B;
2416
2417            if (l->has_value) {
2418                insn |= reloc_pc24_val(s->code_ptr, l->u.value_ptr);
2419            } else {
2420                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2421            }
2422            tcg_out32(s, insn);
2423        }
2424        break;
2425    case INDEX_op_ld8u_i32:
2426    case INDEX_op_ld8u_i64:
2427        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2428        break;
2429    case INDEX_op_ld8s_i32:
2430    case INDEX_op_ld8s_i64:
2431        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2432        tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
2433        break;
2434    case INDEX_op_ld16u_i32:
2435    case INDEX_op_ld16u_i64:
2436        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
2437        break;
2438    case INDEX_op_ld16s_i32:
2439    case INDEX_op_ld16s_i64:
2440        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
2441        break;
2442    case INDEX_op_ld_i32:
2443    case INDEX_op_ld32u_i64:
2444        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
2445        break;
2446    case INDEX_op_ld32s_i64:
2447        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
2448        break;
2449    case INDEX_op_ld_i64:
2450        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
2451        break;
2452    case INDEX_op_st8_i32:
2453    case INDEX_op_st8_i64:
2454        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
2455        break;
2456    case INDEX_op_st16_i32:
2457    case INDEX_op_st16_i64:
2458        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
2459        break;
2460    case INDEX_op_st_i32:
2461    case INDEX_op_st32_i64:
2462        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
2463        break;
2464    case INDEX_op_st_i64:
2465        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
2466        break;
2467
2468    case INDEX_op_add_i32:
2469        a0 = args[0], a1 = args[1], a2 = args[2];
2470        if (const_args[2]) {
2471        do_addi_32:
2472            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
2473        } else {
2474            tcg_out32(s, ADD | TAB(a0, a1, a2));
2475        }
2476        break;
2477    case INDEX_op_sub_i32:
2478        a0 = args[0], a1 = args[1], a2 = args[2];
2479        if (const_args[1]) {
2480            if (const_args[2]) {
2481                tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
2482            } else {
2483                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2484            }
2485        } else if (const_args[2]) {
2486            a2 = -a2;
2487            goto do_addi_32;
2488        } else {
2489            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2490        }
2491        break;
2492
2493    case INDEX_op_and_i32:
2494        a0 = args[0], a1 = args[1], a2 = args[2];
2495        if (const_args[2]) {
2496            tcg_out_andi32(s, a0, a1, a2);
2497        } else {
2498            tcg_out32(s, AND | SAB(a1, a0, a2));
2499        }
2500        break;
2501    case INDEX_op_and_i64:
2502        a0 = args[0], a1 = args[1], a2 = args[2];
2503        if (const_args[2]) {
2504            tcg_out_andi64(s, a0, a1, a2);
2505        } else {
2506            tcg_out32(s, AND | SAB(a1, a0, a2));
2507        }
2508        break;
2509    case INDEX_op_or_i64:
2510    case INDEX_op_or_i32:
2511        a0 = args[0], a1 = args[1], a2 = args[2];
2512        if (const_args[2]) {
2513            tcg_out_ori32(s, a0, a1, a2);
2514        } else {
2515            tcg_out32(s, OR | SAB(a1, a0, a2));
2516        }
2517        break;
2518    case INDEX_op_xor_i64:
2519    case INDEX_op_xor_i32:
2520        a0 = args[0], a1 = args[1], a2 = args[2];
2521        if (const_args[2]) {
2522            tcg_out_xori32(s, a0, a1, a2);
2523        } else {
2524            tcg_out32(s, XOR | SAB(a1, a0, a2));
2525        }
2526        break;
2527    case INDEX_op_andc_i32:
2528        a0 = args[0], a1 = args[1], a2 = args[2];
2529        if (const_args[2]) {
2530            tcg_out_andi32(s, a0, a1, ~a2);
2531        } else {
2532            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2533        }
2534        break;
2535    case INDEX_op_andc_i64:
2536        a0 = args[0], a1 = args[1], a2 = args[2];
2537        if (const_args[2]) {
2538            tcg_out_andi64(s, a0, a1, ~a2);
2539        } else {
2540            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2541        }
2542        break;
2543    case INDEX_op_orc_i32:
2544        if (const_args[2]) {
2545            tcg_out_ori32(s, args[0], args[1], ~args[2]);
2546            break;
2547        }
2548        /* FALLTHRU */
2549    case INDEX_op_orc_i64:
2550        tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
2551        break;
2552    case INDEX_op_eqv_i32:
2553        if (const_args[2]) {
2554            tcg_out_xori32(s, args[0], args[1], ~args[2]);
2555            break;
2556        }
2557        /* FALLTHRU */
2558    case INDEX_op_eqv_i64:
2559        tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
2560        break;
2561    case INDEX_op_nand_i32:
2562    case INDEX_op_nand_i64:
2563        tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
2564        break;
2565    case INDEX_op_nor_i32:
2566    case INDEX_op_nor_i64:
2567        tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
2568        break;
2569
2570    case INDEX_op_clz_i32:
2571        tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
2572                      args[2], const_args[2]);
2573        break;
2574    case INDEX_op_ctz_i32:
2575        tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
2576                      args[2], const_args[2]);
2577        break;
2578    case INDEX_op_ctpop_i32:
2579        tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
2580        break;
2581
2582    case INDEX_op_clz_i64:
2583        tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
2584                      args[2], const_args[2]);
2585        break;
2586    case INDEX_op_ctz_i64:
2587        tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
2588                      args[2], const_args[2]);
2589        break;
2590    case INDEX_op_ctpop_i64:
2591        tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
2592        break;
2593
2594    case INDEX_op_mul_i32:
2595        a0 = args[0], a1 = args[1], a2 = args[2];
2596        if (const_args[2]) {
2597            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2598        } else {
2599            tcg_out32(s, MULLW | TAB(a0, a1, a2));
2600        }
2601        break;
2602
2603    case INDEX_op_div_i32:
2604        tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
2605        break;
2606
2607    case INDEX_op_divu_i32:
2608        tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
2609        break;
2610
2611    case INDEX_op_shl_i32:
2612        if (const_args[2]) {
2613            tcg_out_shli32(s, args[0], args[1], args[2]);
2614        } else {
2615            tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
2616        }
2617        break;
2618    case INDEX_op_shr_i32:
2619        if (const_args[2]) {
2620            tcg_out_shri32(s, args[0], args[1], args[2]);
2621        } else {
2622            tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
2623        }
2624        break;
2625    case INDEX_op_sar_i32:
2626        if (const_args[2]) {
2627            tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2]));
2628        } else {
2629            tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
2630        }
2631        break;
2632    case INDEX_op_rotl_i32:
2633        if (const_args[2]) {
2634            tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
2635        } else {
2636            tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
2637                         | MB(0) | ME(31));
2638        }
2639        break;
2640    case INDEX_op_rotr_i32:
2641        if (const_args[2]) {
2642            tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
2643        } else {
2644            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
2645            tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
2646                         | MB(0) | ME(31));
2647        }
2648        break;
2649
2650    case INDEX_op_brcond_i32:
2651        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2652                       arg_label(args[3]), TCG_TYPE_I32);
2653        break;
2654    case INDEX_op_brcond_i64:
2655        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2656                       arg_label(args[3]), TCG_TYPE_I64);
2657        break;
2658    case INDEX_op_brcond2_i32:
2659        tcg_out_brcond2(s, args, const_args);
2660        break;
2661
2662    case INDEX_op_neg_i32:
2663    case INDEX_op_neg_i64:
2664        tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
2665        break;
2666
2667    case INDEX_op_not_i32:
2668    case INDEX_op_not_i64:
2669        tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
2670        break;
2671
2672    case INDEX_op_add_i64:
2673        a0 = args[0], a1 = args[1], a2 = args[2];
2674        if (const_args[2]) {
2675        do_addi_64:
2676            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2677        } else {
2678            tcg_out32(s, ADD | TAB(a0, a1, a2));
2679        }
2680        break;
2681    case INDEX_op_sub_i64:
2682        a0 = args[0], a1 = args[1], a2 = args[2];
2683        if (const_args[1]) {
2684            if (const_args[2]) {
2685                tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
2686            } else {
2687                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2688            }
2689        } else if (const_args[2]) {
2690            a2 = -a2;
2691            goto do_addi_64;
2692        } else {
2693            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2694        }
2695        break;
2696
2697    case INDEX_op_shl_i64:
2698        if (const_args[2]) {
2699            tcg_out_shli64(s, args[0], args[1], args[2]);
2700        } else {
2701            tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
2702        }
2703        break;
2704    case INDEX_op_shr_i64:
2705        if (const_args[2]) {
2706            tcg_out_shri64(s, args[0], args[1], args[2]);
2707        } else {
2708            tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
2709        }
2710        break;
2711    case INDEX_op_sar_i64:
2712        if (const_args[2]) {
2713            int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1);
2714            tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh);
2715        } else {
2716            tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
2717        }
2718        break;
2719    case INDEX_op_rotl_i64:
2720        if (const_args[2]) {
2721            tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
2722        } else {
2723            tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
2724        }
2725        break;
2726    case INDEX_op_rotr_i64:
2727        if (const_args[2]) {
2728            tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
2729        } else {
2730            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
2731            tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
2732        }
2733        break;
2734
2735    case INDEX_op_mul_i64:
2736        a0 = args[0], a1 = args[1], a2 = args[2];
2737        if (const_args[2]) {
2738            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2739        } else {
2740            tcg_out32(s, MULLD | TAB(a0, a1, a2));
2741        }
2742        break;
2743    case INDEX_op_div_i64:
2744        tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
2745        break;
2746    case INDEX_op_divu_i64:
2747        tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
2748        break;
2749
2750    case INDEX_op_qemu_ld_i32:
2751        tcg_out_qemu_ld(s, args, false);
2752        break;
2753    case INDEX_op_qemu_ld_i64:
2754        tcg_out_qemu_ld(s, args, true);
2755        break;
2756    case INDEX_op_qemu_st_i32:
2757        tcg_out_qemu_st(s, args, false);
2758        break;
2759    case INDEX_op_qemu_st_i64:
2760        tcg_out_qemu_st(s, args, true);
2761        break;
2762
2763    case INDEX_op_ext8s_i32:
2764    case INDEX_op_ext8s_i64:
2765        c = EXTSB;
2766        goto gen_ext;
2767    case INDEX_op_ext16s_i32:
2768    case INDEX_op_ext16s_i64:
2769        c = EXTSH;
2770        goto gen_ext;
2771    case INDEX_op_ext_i32_i64:
2772    case INDEX_op_ext32s_i64:
2773        c = EXTSW;
2774        goto gen_ext;
2775    gen_ext:
2776        tcg_out32(s, c | RS(args[1]) | RA(args[0]));
2777        break;
2778    case INDEX_op_extu_i32_i64:
2779        tcg_out_ext32u(s, args[0], args[1]);
2780        break;
2781
2782    case INDEX_op_setcond_i32:
2783        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
2784                        const_args[2]);
2785        break;
2786    case INDEX_op_setcond_i64:
2787        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
2788                        const_args[2]);
2789        break;
2790    case INDEX_op_setcond2_i32:
2791        tcg_out_setcond2(s, args, const_args);
2792        break;
2793
2794    case INDEX_op_bswap16_i32:
2795    case INDEX_op_bswap16_i64:
2796        a0 = args[0], a1 = args[1];
2797        /* a1 = abcd */
2798        if (a0 != a1) {
2799            /* a0 = (a1 r<< 24) & 0xff # 000c */
2800            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
2801            /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */
2802            tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23);
2803        } else {
2804            /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */
2805            tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23);
2806            /* a0 = (a1 r<< 24) & 0xff # 000c */
2807            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
2808            /* a0 = a0 | r0 # 00dc */
2809            tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0));
2810        }
2811        break;
2812
2813    case INDEX_op_bswap32_i32:
2814    case INDEX_op_bswap32_i64:
2815        /* Stolen from gcc's builtin_bswap32 */
2816        a1 = args[1];
2817        a0 = args[0] == a1 ? TCG_REG_R0 : args[0];
2818
2819        /* a1 = args[1] # abcd */
2820        /* a0 = rotate_left (a1, 8) # bcda */
2821        tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
2822        /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */
2823        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
2824        /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */
2825        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
2826
2827        if (a0 == TCG_REG_R0) {
2828            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2829        }
2830        break;
2831
2832    case INDEX_op_bswap64_i64:
2833        a0 = args[0], a1 = args[1], a2 = TCG_REG_R0;
2834        if (a0 == a1) {
2835            a0 = TCG_REG_R0;
2836            a2 = a1;
2837        }
2838
2839        /* a1 = # abcd efgh */
2840        /* a0 = rl32(a1, 8) # 0000 fghe */
2841        tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
2842        /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */
2843        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
2844        /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */
2845        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
2846
2847        /* a0 = rl64(a0, 32) # hgfe 0000 */
2848        /* a2 = rl64(a1, 32) # efgh abcd */
2849        tcg_out_rld(s, RLDICL, a0, a0, 32, 0);
2850        tcg_out_rld(s, RLDICL, a2, a1, 32, 0);
2851
2852        /* a0 = dep(a0, rl32(a2, 8), 0xffffffff)  # hgfe bcda */
2853        tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31);
2854        /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */
2855        tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7);
2856        /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */
2857        tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23);
2858
2859        if (a0 == 0) {
2860            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2861        }
2862        break;
2863
2864    case INDEX_op_deposit_i32:
2865        if (const_args[2]) {
2866            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
2867            tcg_out_andi32(s, args[0], args[0], ~mask);
2868        } else {
2869            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
2870                        32 - args[3] - args[4], 31 - args[3]);
2871        }
2872        break;
2873    case INDEX_op_deposit_i64:
2874        if (const_args[2]) {
2875            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
2876            tcg_out_andi64(s, args[0], args[0], ~mask);
2877        } else {
2878            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
2879                        64 - args[3] - args[4]);
2880        }
2881        break;
2882
2883    case INDEX_op_extract_i32:
2884        tcg_out_rlw(s, RLWINM, args[0], args[1],
2885                    32 - args[2], 32 - args[3], 31);
2886        break;
2887    case INDEX_op_extract_i64:
2888        tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
2889        break;
2890
2891    case INDEX_op_movcond_i32:
2892        tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
2893                        args[3], args[4], const_args[2]);
2894        break;
2895    case INDEX_op_movcond_i64:
2896        tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
2897                        args[3], args[4], const_args[2]);
2898        break;
2899
2900#if TCG_TARGET_REG_BITS == 64
2901    case INDEX_op_add2_i64:
2902#else
2903    case INDEX_op_add2_i32:
2904#endif
2905        /* Note that the CA bit is defined based on the word size of the
2906           environment.  So in 64-bit mode it's always carry-out of bit 63.
2907           The fallback code using deposit works just as well for 32-bit.  */
2908        a0 = args[0], a1 = args[1];
2909        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
2910            a0 = TCG_REG_R0;
2911        }
2912        if (const_args[4]) {
2913            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
2914        } else {
2915            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
2916        }
2917        if (const_args[5]) {
2918            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
2919        } else {
2920            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
2921        }
2922        if (a0 != args[0]) {
2923            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2924        }
2925        break;
2926
2927#if TCG_TARGET_REG_BITS == 64
2928    case INDEX_op_sub2_i64:
2929#else
2930    case INDEX_op_sub2_i32:
2931#endif
2932        a0 = args[0], a1 = args[1];
2933        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
2934            a0 = TCG_REG_R0;
2935        }
2936        if (const_args[2]) {
2937            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
2938        } else {
2939            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
2940        }
2941        if (const_args[3]) {
2942            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
2943        } else {
2944            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
2945        }
2946        if (a0 != args[0]) {
2947            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2948        }
2949        break;
2950
2951    case INDEX_op_muluh_i32:
2952        tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
2953        break;
2954    case INDEX_op_mulsh_i32:
2955        tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
2956        break;
2957    case INDEX_op_muluh_i64:
2958        tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
2959        break;
2960    case INDEX_op_mulsh_i64:
2961        tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
2962        break;
2963
2964    case INDEX_op_mb:
2965        tcg_out_mb(s, args[0]);
2966        break;
2967
2968    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
2969    case INDEX_op_mov_i64:
2970    case INDEX_op_movi_i32:  /* Always emitted via tcg_out_movi.  */
2971    case INDEX_op_movi_i64:
2972    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
2973    default:
2974        tcg_abort();
2975    }
2976}
2977
2978int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2979{
2980    switch (opc) {
2981    case INDEX_op_and_vec:
2982    case INDEX_op_or_vec:
2983    case INDEX_op_xor_vec:
2984    case INDEX_op_andc_vec:
2985    case INDEX_op_not_vec:
2986        return 1;
2987    case INDEX_op_orc_vec:
2988        return have_isa_2_07;
2989    case INDEX_op_add_vec:
2990    case INDEX_op_sub_vec:
2991    case INDEX_op_smax_vec:
2992    case INDEX_op_smin_vec:
2993    case INDEX_op_umax_vec:
2994    case INDEX_op_umin_vec:
2995    case INDEX_op_shlv_vec:
2996    case INDEX_op_shrv_vec:
2997    case INDEX_op_sarv_vec:
2998        return vece <= MO_32 || have_isa_2_07;
2999    case INDEX_op_ssadd_vec:
3000    case INDEX_op_sssub_vec:
3001    case INDEX_op_usadd_vec:
3002    case INDEX_op_ussub_vec:
3003        return vece <= MO_32;
3004    case INDEX_op_cmp_vec:
3005    case INDEX_op_shli_vec:
3006    case INDEX_op_shri_vec:
3007    case INDEX_op_sari_vec:
3008        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3009    case INDEX_op_neg_vec:
3010        return vece >= MO_32 && have_isa_3_00;
3011    case INDEX_op_mul_vec:
3012        switch (vece) {
3013        case MO_8:
3014        case MO_16:
3015            return -1;
3016        case MO_32:
3017            return have_isa_2_07 ? 1 : -1;
3018        }
3019        return 0;
3020    case INDEX_op_bitsel_vec:
3021        return have_vsx;
3022    default:
3023        return 0;
3024    }
3025}
3026
3027static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3028                            TCGReg dst, TCGReg src)
3029{
3030    tcg_debug_assert(dst >= TCG_REG_V0);
3031
3032    /* Splat from integer reg allowed via constraints for v3.00.  */
3033    if (src < TCG_REG_V0) {
3034        tcg_debug_assert(have_isa_3_00);
3035        switch (vece) {
3036        case MO_64:
3037            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3038            return true;
3039        case MO_32:
3040            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3041            return true;
3042        default:
3043            /* Fail, so that we fall back on either dupm or mov+dup.  */
3044            return false;
3045        }
3046    }
3047
3048    /*
3049     * Recall we use (or emulate) VSX integer loads, so the integer is
3050     * right justified within the left (zero-index) double-word.
3051     */
3052    switch (vece) {
3053    case MO_8:
3054        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3055        break;
3056    case MO_16:
3057        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3058        break;
3059    case MO_32:
3060        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3061        break;
3062    case MO_64:
3063        if (have_vsx) {
3064            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3065            break;
3066        }
3067        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3068        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3069        break;
3070    default:
3071        g_assert_not_reached();
3072    }
3073    return true;
3074}
3075
3076static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3077                             TCGReg out, TCGReg base, intptr_t offset)
3078{
3079    int elt;
3080
3081    tcg_debug_assert(out >= TCG_REG_V0);
3082    switch (vece) {
3083    case MO_8:
3084        if (have_isa_3_00) {
3085            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3086        } else {
3087            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3088        }
3089        elt = extract32(offset, 0, 4);
3090#ifndef HOST_WORDS_BIGENDIAN
3091        elt ^= 15;
3092#endif
3093        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3094        break;
3095    case MO_16:
3096        tcg_debug_assert((offset & 1) == 0);
3097        if (have_isa_3_00) {
3098            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3099        } else {
3100            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3101        }
3102        elt = extract32(offset, 1, 3);
3103#ifndef HOST_WORDS_BIGENDIAN
3104        elt ^= 7;
3105#endif
3106        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3107        break;
3108    case MO_32:
3109        if (have_isa_3_00) {
3110            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3111            break;
3112        }
3113        tcg_debug_assert((offset & 3) == 0);
3114        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3115        elt = extract32(offset, 2, 2);
3116#ifndef HOST_WORDS_BIGENDIAN
3117        elt ^= 3;
3118#endif
3119        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3120        break;
3121    case MO_64:
3122        if (have_vsx) {
3123            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3124            break;
3125        }
3126        tcg_debug_assert((offset & 7) == 0);
3127        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3128        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3129        elt = extract32(offset, 3, 1);
3130#ifndef HOST_WORDS_BIGENDIAN
3131        elt = !elt;
3132#endif
3133        if (elt) {
3134            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3135        } else {
3136            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3137        }
3138        break;
3139    default:
3140        g_assert_not_reached();
3141    }
3142    return true;
3143}
3144
3145static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3146                           unsigned vecl, unsigned vece,
3147                           const TCGArg *args, const int *const_args)
3148{
3149    static const uint32_t
3150        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3151        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3152        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3153        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3154        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3155        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3156        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
3157        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3158        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3159        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3160        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3161        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3162        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3163        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3164        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3165        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3166        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3167        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3168        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3169        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3170        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3171        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3172        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3173        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
3174
3175    TCGType type = vecl + TCG_TYPE_V64;
3176    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
3177    uint32_t insn;
3178
3179    switch (opc) {
3180    case INDEX_op_ld_vec:
3181        tcg_out_ld(s, type, a0, a1, a2);
3182        return;
3183    case INDEX_op_st_vec:
3184        tcg_out_st(s, type, a0, a1, a2);
3185        return;
3186    case INDEX_op_dupm_vec:
3187        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
3188        return;
3189
3190    case INDEX_op_add_vec:
3191        insn = add_op[vece];
3192        break;
3193    case INDEX_op_sub_vec:
3194        insn = sub_op[vece];
3195        break;
3196    case INDEX_op_neg_vec:
3197        insn = neg_op[vece];
3198        a2 = a1;
3199        a1 = 0;
3200        break;
3201    case INDEX_op_mul_vec:
3202        tcg_debug_assert(vece == MO_32 && have_isa_2_07);
3203        insn = VMULUWM;
3204        break;
3205    case INDEX_op_ssadd_vec:
3206        insn = ssadd_op[vece];
3207        break;
3208    case INDEX_op_sssub_vec:
3209        insn = sssub_op[vece];
3210        break;
3211    case INDEX_op_usadd_vec:
3212        insn = usadd_op[vece];
3213        break;
3214    case INDEX_op_ussub_vec:
3215        insn = ussub_op[vece];
3216        break;
3217    case INDEX_op_smin_vec:
3218        insn = smin_op[vece];
3219        break;
3220    case INDEX_op_umin_vec:
3221        insn = umin_op[vece];
3222        break;
3223    case INDEX_op_smax_vec:
3224        insn = smax_op[vece];
3225        break;
3226    case INDEX_op_umax_vec:
3227        insn = umax_op[vece];
3228        break;
3229    case INDEX_op_shlv_vec:
3230        insn = shlv_op[vece];
3231        break;
3232    case INDEX_op_shrv_vec:
3233        insn = shrv_op[vece];
3234        break;
3235    case INDEX_op_sarv_vec:
3236        insn = sarv_op[vece];
3237        break;
3238    case INDEX_op_and_vec:
3239        insn = VAND;
3240        break;
3241    case INDEX_op_or_vec:
3242        insn = VOR;
3243        break;
3244    case INDEX_op_xor_vec:
3245        insn = VXOR;
3246        break;
3247    case INDEX_op_andc_vec:
3248        insn = VANDC;
3249        break;
3250    case INDEX_op_not_vec:
3251        insn = VNOR;
3252        a2 = a1;
3253        break;
3254    case INDEX_op_orc_vec:
3255        insn = VORC;
3256        break;
3257
3258    case INDEX_op_cmp_vec:
3259        switch (args[3]) {
3260        case TCG_COND_EQ:
3261            insn = eq_op[vece];
3262            break;
3263        case TCG_COND_NE:
3264            insn = ne_op[vece];
3265            break;
3266        case TCG_COND_GT:
3267            insn = gts_op[vece];
3268            break;
3269        case TCG_COND_GTU:
3270            insn = gtu_op[vece];
3271            break;
3272        default:
3273            g_assert_not_reached();
3274        }
3275        break;
3276
3277    case INDEX_op_bitsel_vec:
3278        tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
3279        return;
3280
3281    case INDEX_op_dup2_vec:
3282        assert(TCG_TARGET_REG_BITS == 32);
3283        /* With inputs a1 = xLxx, a2 = xHxx  */
3284        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
3285        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
3286        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
3287        return;
3288
3289    case INDEX_op_ppc_mrgh_vec:
3290        insn = mrgh_op[vece];
3291        break;
3292    case INDEX_op_ppc_mrgl_vec:
3293        insn = mrgl_op[vece];
3294        break;
3295    case INDEX_op_ppc_muleu_vec:
3296        insn = muleu_op[vece];
3297        break;
3298    case INDEX_op_ppc_mulou_vec:
3299        insn = mulou_op[vece];
3300        break;
3301    case INDEX_op_ppc_pkum_vec:
3302        insn = pkum_op[vece];
3303        break;
3304    case INDEX_op_ppc_rotl_vec:
3305        insn = rotl_op[vece];
3306        break;
3307    case INDEX_op_ppc_msum_vec:
3308        tcg_debug_assert(vece == MO_16);
3309        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
3310        return;
3311
3312    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3313    case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi.  */
3314    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3315    default:
3316        g_assert_not_reached();
3317    }
3318
3319    tcg_debug_assert(insn != 0);
3320    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3321}
3322
3323static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
3324                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
3325{
3326    TCGv_vec t1 = tcg_temp_new_vec(type);
3327
3328    /* Splat w/bytes for xxspltib.  */
3329    tcg_gen_dupi_vec(MO_8, t1, imm & ((8 << vece) - 1));
3330    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
3331              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3332    tcg_temp_free_vec(t1);
3333}
3334
3335static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3336                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3337{
3338    bool need_swap = false, need_inv = false;
3339
3340    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3341
3342    switch (cond) {
3343    case TCG_COND_EQ:
3344    case TCG_COND_GT:
3345    case TCG_COND_GTU:
3346        break;
3347    case TCG_COND_NE:
3348        if (have_isa_3_00 && vece <= MO_32) {
3349            break;
3350        }
3351        /* fall through */
3352    case TCG_COND_LE:
3353    case TCG_COND_LEU:
3354        need_inv = true;
3355        break;
3356    case TCG_COND_LT:
3357    case TCG_COND_LTU:
3358        need_swap = true;
3359        break;
3360    case TCG_COND_GE:
3361    case TCG_COND_GEU:
3362        need_swap = need_inv = true;
3363        break;
3364    default:
3365        g_assert_not_reached();
3366    }
3367
3368    if (need_inv) {
3369        cond = tcg_invert_cond(cond);
3370    }
3371    if (need_swap) {
3372        TCGv_vec t1;
3373        t1 = v1, v1 = v2, v2 = t1;
3374        cond = tcg_swap_cond(cond);
3375    }
3376
3377    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
3378              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3379
3380    if (need_inv) {
3381        tcg_gen_not_vec(vece, v0, v0);
3382    }
3383}
3384
3385static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
3386                           TCGv_vec v1, TCGv_vec v2)
3387{
3388    TCGv_vec t1 = tcg_temp_new_vec(type);
3389    TCGv_vec t2 = tcg_temp_new_vec(type);
3390    TCGv_vec t3, t4;
3391
3392    switch (vece) {
3393    case MO_8:
3394    case MO_16:
3395        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
3396                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3397        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
3398                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3399        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
3400                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3401        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
3402                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3403        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
3404                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
3405        break;
3406
3407    case MO_32:
3408        tcg_debug_assert(!have_isa_2_07);
3409        t3 = tcg_temp_new_vec(type);
3410        t4 = tcg_temp_new_vec(type);
3411        tcg_gen_dupi_vec(MO_8, t4, -16);
3412        vec_gen_3(INDEX_op_ppc_rotl_vec, type, MO_32, tcgv_vec_arg(t1),
3413                  tcgv_vec_arg(v2), tcgv_vec_arg(t4));
3414        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
3415                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3416        tcg_gen_dupi_vec(MO_8, t3, 0);
3417        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t3),
3418                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(t3));
3419        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t3),
3420                  tcgv_vec_arg(t3), tcgv_vec_arg(t4));
3421        tcg_gen_add_vec(MO_32, v0, t2, t3);
3422        tcg_temp_free_vec(t3);
3423        tcg_temp_free_vec(t4);
3424        break;
3425
3426    default:
3427        g_assert_not_reached();
3428    }
3429    tcg_temp_free_vec(t1);
3430    tcg_temp_free_vec(t2);
3431}
3432
3433void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3434                       TCGArg a0, ...)
3435{
3436    va_list va;
3437    TCGv_vec v0, v1, v2;
3438    TCGArg a2;
3439
3440    va_start(va, a0);
3441    v0 = temp_tcgv_vec(arg_temp(a0));
3442    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3443    a2 = va_arg(va, TCGArg);
3444
3445    switch (opc) {
3446    case INDEX_op_shli_vec:
3447        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
3448        break;
3449    case INDEX_op_shri_vec:
3450        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
3451        break;
3452    case INDEX_op_sari_vec:
3453        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
3454        break;
3455    case INDEX_op_cmp_vec:
3456        v2 = temp_tcgv_vec(arg_temp(a2));
3457        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3458        break;
3459    case INDEX_op_mul_vec:
3460        v2 = temp_tcgv_vec(arg_temp(a2));
3461        expand_vec_mul(type, vece, v0, v1, v2);
3462        break;
3463    default:
3464        g_assert_not_reached();
3465    }
3466    va_end(va);
3467}
3468
3469static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
3470{
3471    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
3472    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
3473    static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
3474    static const TCGTargetOpDef S_S = { .args_ct_str = { "S", "S" } };
3475    static const TCGTargetOpDef r_ri = { .args_ct_str = { "r", "ri" } };
3476    static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
3477    static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
3478    static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
3479    static const TCGTargetOpDef S_S_S = { .args_ct_str = { "S", "S", "S" } };
3480    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
3481    static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } };
3482    static const TCGTargetOpDef r_r_rT = { .args_ct_str = { "r", "r", "rT" } };
3483    static const TCGTargetOpDef r_r_rU = { .args_ct_str = { "r", "r", "rU" } };
3484    static const TCGTargetOpDef r_rI_ri
3485        = { .args_ct_str = { "r", "rI", "ri" } };
3486    static const TCGTargetOpDef r_rI_rT
3487        = { .args_ct_str = { "r", "rI", "rT" } };
3488    static const TCGTargetOpDef r_r_rZW
3489        = { .args_ct_str = { "r", "r", "rZW" } };
3490    static const TCGTargetOpDef L_L_L_L
3491        = { .args_ct_str = { "L", "L", "L", "L" } };
3492    static const TCGTargetOpDef S_S_S_S
3493        = { .args_ct_str = { "S", "S", "S", "S" } };
3494    static const TCGTargetOpDef movc
3495        = { .args_ct_str = { "r", "r", "ri", "rZ", "rZ" } };
3496    static const TCGTargetOpDef dep
3497        = { .args_ct_str = { "r", "0", "rZ" } };
3498    static const TCGTargetOpDef br2
3499        = { .args_ct_str = { "r", "r", "ri", "ri" } };
3500    static const TCGTargetOpDef setc2
3501        = { .args_ct_str = { "r", "r", "r", "ri", "ri" } };
3502    static const TCGTargetOpDef add2
3503        = { .args_ct_str = { "r", "r", "r", "r", "rI", "rZM" } };
3504    static const TCGTargetOpDef sub2
3505        = { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } };
3506    static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
3507    static const TCGTargetOpDef v_vr = { .args_ct_str = { "v", "vr" } };
3508    static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
3509    static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
3510    static const TCGTargetOpDef v_v_v_v
3511        = { .args_ct_str = { "v", "v", "v", "v" } };
3512
3513    switch (op) {
3514    case INDEX_op_goto_ptr:
3515        return &r;
3516
3517    case INDEX_op_ld8u_i32:
3518    case INDEX_op_ld8s_i32:
3519    case INDEX_op_ld16u_i32:
3520    case INDEX_op_ld16s_i32:
3521    case INDEX_op_ld_i32:
3522    case INDEX_op_st8_i32:
3523    case INDEX_op_st16_i32:
3524    case INDEX_op_st_i32:
3525    case INDEX_op_ctpop_i32:
3526    case INDEX_op_neg_i32:
3527    case INDEX_op_not_i32:
3528    case INDEX_op_ext8s_i32:
3529    case INDEX_op_ext16s_i32:
3530    case INDEX_op_bswap16_i32:
3531    case INDEX_op_bswap32_i32:
3532    case INDEX_op_extract_i32:
3533    case INDEX_op_ld8u_i64:
3534    case INDEX_op_ld8s_i64:
3535    case INDEX_op_ld16u_i64:
3536    case INDEX_op_ld16s_i64:
3537    case INDEX_op_ld32u_i64:
3538    case INDEX_op_ld32s_i64:
3539    case INDEX_op_ld_i64:
3540    case INDEX_op_st8_i64:
3541    case INDEX_op_st16_i64:
3542    case INDEX_op_st32_i64:
3543    case INDEX_op_st_i64:
3544    case INDEX_op_ctpop_i64:
3545    case INDEX_op_neg_i64:
3546    case INDEX_op_not_i64:
3547    case INDEX_op_ext8s_i64:
3548    case INDEX_op_ext16s_i64:
3549    case INDEX_op_ext32s_i64:
3550    case INDEX_op_ext_i32_i64:
3551    case INDEX_op_extu_i32_i64:
3552    case INDEX_op_bswap16_i64:
3553    case INDEX_op_bswap32_i64:
3554    case INDEX_op_bswap64_i64:
3555    case INDEX_op_extract_i64:
3556        return &r_r;
3557
3558    case INDEX_op_add_i32:
3559    case INDEX_op_and_i32:
3560    case INDEX_op_or_i32:
3561    case INDEX_op_xor_i32:
3562    case INDEX_op_andc_i32:
3563    case INDEX_op_orc_i32:
3564    case INDEX_op_eqv_i32:
3565    case INDEX_op_shl_i32:
3566    case INDEX_op_shr_i32:
3567    case INDEX_op_sar_i32:
3568    case INDEX_op_rotl_i32:
3569    case INDEX_op_rotr_i32:
3570    case INDEX_op_setcond_i32:
3571    case INDEX_op_and_i64:
3572    case INDEX_op_andc_i64:
3573    case INDEX_op_shl_i64:
3574    case INDEX_op_shr_i64:
3575    case INDEX_op_sar_i64:
3576    case INDEX_op_rotl_i64:
3577    case INDEX_op_rotr_i64:
3578    case INDEX_op_setcond_i64:
3579        return &r_r_ri;
3580    case INDEX_op_mul_i32:
3581    case INDEX_op_mul_i64:
3582        return &r_r_rI;
3583    case INDEX_op_div_i32:
3584    case INDEX_op_divu_i32:
3585    case INDEX_op_nand_i32:
3586    case INDEX_op_nor_i32:
3587    case INDEX_op_muluh_i32:
3588    case INDEX_op_mulsh_i32:
3589    case INDEX_op_orc_i64:
3590    case INDEX_op_eqv_i64:
3591    case INDEX_op_nand_i64:
3592    case INDEX_op_nor_i64:
3593    case INDEX_op_div_i64:
3594    case INDEX_op_divu_i64:
3595    case INDEX_op_mulsh_i64:
3596    case INDEX_op_muluh_i64:
3597        return &r_r_r;
3598    case INDEX_op_sub_i32:
3599        return &r_rI_ri;
3600    case INDEX_op_add_i64:
3601        return &r_r_rT;
3602    case INDEX_op_or_i64:
3603    case INDEX_op_xor_i64:
3604        return &r_r_rU;
3605    case INDEX_op_sub_i64:
3606        return &r_rI_rT;
3607    case INDEX_op_clz_i32:
3608    case INDEX_op_ctz_i32:
3609    case INDEX_op_clz_i64:
3610    case INDEX_op_ctz_i64:
3611        return &r_r_rZW;
3612
3613    case INDEX_op_brcond_i32:
3614    case INDEX_op_brcond_i64:
3615        return &r_ri;
3616
3617    case INDEX_op_movcond_i32:
3618    case INDEX_op_movcond_i64:
3619        return &movc;
3620    case INDEX_op_deposit_i32:
3621    case INDEX_op_deposit_i64:
3622        return &dep;
3623    case INDEX_op_brcond2_i32:
3624        return &br2;
3625    case INDEX_op_setcond2_i32:
3626        return &setc2;
3627    case INDEX_op_add2_i64:
3628    case INDEX_op_add2_i32:
3629        return &add2;
3630    case INDEX_op_sub2_i64:
3631    case INDEX_op_sub2_i32:
3632        return &sub2;
3633
3634    case INDEX_op_qemu_ld_i32:
3635        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3636                ? &r_L : &r_L_L);
3637    case INDEX_op_qemu_st_i32:
3638        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3639                ? &S_S : &S_S_S);
3640    case INDEX_op_qemu_ld_i64:
3641        return (TCG_TARGET_REG_BITS == 64 ? &r_L
3642                : TARGET_LONG_BITS == 32 ? &L_L_L : &L_L_L_L);
3643    case INDEX_op_qemu_st_i64:
3644        return (TCG_TARGET_REG_BITS == 64 ? &S_S
3645                : TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S);
3646
3647    case INDEX_op_add_vec:
3648    case INDEX_op_sub_vec:
3649    case INDEX_op_mul_vec:
3650    case INDEX_op_and_vec:
3651    case INDEX_op_or_vec:
3652    case INDEX_op_xor_vec:
3653    case INDEX_op_andc_vec:
3654    case INDEX_op_orc_vec:
3655    case INDEX_op_cmp_vec:
3656    case INDEX_op_ssadd_vec:
3657    case INDEX_op_sssub_vec:
3658    case INDEX_op_usadd_vec:
3659    case INDEX_op_ussub_vec:
3660    case INDEX_op_smax_vec:
3661    case INDEX_op_smin_vec:
3662    case INDEX_op_umax_vec:
3663    case INDEX_op_umin_vec:
3664    case INDEX_op_shlv_vec:
3665    case INDEX_op_shrv_vec:
3666    case INDEX_op_sarv_vec:
3667    case INDEX_op_ppc_mrgh_vec:
3668    case INDEX_op_ppc_mrgl_vec:
3669    case INDEX_op_ppc_muleu_vec:
3670    case INDEX_op_ppc_mulou_vec:
3671    case INDEX_op_ppc_pkum_vec:
3672    case INDEX_op_ppc_rotl_vec:
3673    case INDEX_op_dup2_vec:
3674        return &v_v_v;
3675    case INDEX_op_not_vec:
3676    case INDEX_op_neg_vec:
3677        return &v_v;
3678    case INDEX_op_dup_vec:
3679        return have_isa_3_00 ? &v_vr : &v_v;
3680    case INDEX_op_ld_vec:
3681    case INDEX_op_st_vec:
3682    case INDEX_op_dupm_vec:
3683        return &v_r;
3684    case INDEX_op_bitsel_vec:
3685    case INDEX_op_ppc_msum_vec:
3686        return &v_v_v_v;
3687
3688    default:
3689        return NULL;
3690    }
3691}
3692
3693static void tcg_target_init(TCGContext *s)
3694{
3695    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3696    unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
3697
3698    have_isa = tcg_isa_base;
3699    if (hwcap & PPC_FEATURE_ARCH_2_06) {
3700        have_isa = tcg_isa_2_06;
3701    }
3702#ifdef PPC_FEATURE2_ARCH_2_07
3703    if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
3704        have_isa = tcg_isa_2_07;
3705    }
3706#endif
3707#ifdef PPC_FEATURE2_ARCH_3_00
3708    if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
3709        have_isa = tcg_isa_3_00;
3710    }
3711#endif
3712
3713#ifdef PPC_FEATURE2_HAS_ISEL
3714    /* Prefer explicit instruction from the kernel. */
3715    have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0;
3716#else
3717    /* Fall back to knowing Power7 (2.06) has ISEL. */
3718    have_isel = have_isa_2_06;
3719#endif
3720
3721    if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
3722        have_altivec = true;
3723        /* We only care about the portion of VSX that overlaps Altivec. */
3724        if (hwcap & PPC_FEATURE_HAS_VSX) {
3725            have_vsx = true;
3726        }
3727    }
3728
3729    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
3730    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
3731    if (have_altivec) {
3732        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3733        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3734    }
3735
3736    tcg_target_call_clobber_regs = 0;
3737    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3738    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3739    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3740    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3741    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3742    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3743    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
3744    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
3745    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
3746    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
3747    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
3748    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
3749
3750    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3751    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3752    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3753    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3754    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3755    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3756    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3757    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3758    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3759    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3760    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3761    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3762    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3763    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3764    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3765    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3766    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3767    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3768    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3769    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3770
3771    s->reserved_regs = 0;
3772    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
3773    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
3774#if defined(_CALL_SYSV)
3775    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
3776#endif
3777#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
3778    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
3779#endif
3780    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
3781    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
3782    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
3783    if (USE_REG_TB) {
3784        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
3785    }
3786}
3787
3788#ifdef __ELF__
3789typedef struct {
3790    DebugFrameCIE cie;
3791    DebugFrameFDEHeader fde;
3792    uint8_t fde_def_cfa[4];
3793    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
3794} DebugFrame;
3795
3796/* We're expecting a 2 byte uleb128 encoded value.  */
3797QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3798
3799#if TCG_TARGET_REG_BITS == 64
3800# define ELF_HOST_MACHINE EM_PPC64
3801#else
3802# define ELF_HOST_MACHINE EM_PPC
3803#endif
3804
3805static DebugFrame debug_frame = {
3806    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3807    .cie.id = -1,
3808    .cie.version = 1,
3809    .cie.code_align = 1,
3810    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
3811    .cie.return_column = 65,
3812
3813    /* Total FDE size does not include the "len" member.  */
3814    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
3815
3816    .fde_def_cfa = {
3817        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
3818        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3819        (FRAME_SIZE >> 7)
3820    },
3821    .fde_reg_ofs = {
3822        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
3823        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
3824    }
3825};
3826
3827void tcg_register_jit(void *buf, size_t buf_size)
3828{
3829    uint8_t *p = &debug_frame.fde_reg_ofs[3];
3830    int i;
3831
3832    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
3833        p[0] = 0x80 + tcg_target_callee_save_regs[i];
3834        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
3835    }
3836
3837    debug_frame.fde.func_start = (uintptr_t)buf;
3838    debug_frame.fde.func_len = buf_size;
3839
3840    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3841}
3842#endif /* __ELF__ */
3843
3844void flush_icache_range(uintptr_t start, uintptr_t stop)
3845{
3846    uintptr_t p, start1, stop1;
3847    size_t dsize = qemu_dcache_linesize;
3848    size_t isize = qemu_icache_linesize;
3849
3850    start1 = start & ~(dsize - 1);
3851    stop1 = (stop + dsize - 1) & ~(dsize - 1);
3852    for (p = start1; p < stop1; p += dsize) {
3853        asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
3854    }
3855    asm volatile ("sync" : : : "memory");
3856
3857    start &= start & ~(isize - 1);
3858    stop1 = (stop + isize - 1) & ~(isize - 1);
3859    for (p = start1; p < stop1; p += isize) {
3860        asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
3861    }
3862    asm volatile ("sync" : : : "memory");
3863    asm volatile ("isync" : : : "memory");
3864}
3865