qemu/tcg/i386/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "tcg-be-ldst.h"
  26
  27#ifdef CONFIG_DEBUG_TCG
  28static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  29#if TCG_TARGET_REG_BITS == 64
  30    "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
  31    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
  32#else
  33    "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
  34#endif
  35};
  36#endif
  37
  38static const int tcg_target_reg_alloc_order[] = {
  39#if TCG_TARGET_REG_BITS == 64
  40    TCG_REG_RBP,
  41    TCG_REG_RBX,
  42    TCG_REG_R12,
  43    TCG_REG_R13,
  44    TCG_REG_R14,
  45    TCG_REG_R15,
  46    TCG_REG_R10,
  47    TCG_REG_R11,
  48    TCG_REG_R9,
  49    TCG_REG_R8,
  50    TCG_REG_RCX,
  51    TCG_REG_RDX,
  52    TCG_REG_RSI,
  53    TCG_REG_RDI,
  54    TCG_REG_RAX,
  55#else
  56    TCG_REG_EBX,
  57    TCG_REG_ESI,
  58    TCG_REG_EDI,
  59    TCG_REG_EBP,
  60    TCG_REG_ECX,
  61    TCG_REG_EDX,
  62    TCG_REG_EAX,
  63#endif
  64};
  65
  66static const int tcg_target_call_iarg_regs[] = {
  67#if TCG_TARGET_REG_BITS == 64
  68#if defined(_WIN64)
  69    TCG_REG_RCX,
  70    TCG_REG_RDX,
  71#else
  72    TCG_REG_RDI,
  73    TCG_REG_RSI,
  74    TCG_REG_RDX,
  75    TCG_REG_RCX,
  76#endif
  77    TCG_REG_R8,
  78    TCG_REG_R9,
  79#else
  80    /* 32 bit mode uses stack based calling convention (GCC default). */
  81#endif
  82};
  83
  84static const int tcg_target_call_oarg_regs[] = {
  85    TCG_REG_EAX,
  86#if TCG_TARGET_REG_BITS == 32
  87    TCG_REG_EDX
  88#endif
  89};
  90
  91/* Constants we accept.  */
  92#define TCG_CT_CONST_S32 0x100
  93#define TCG_CT_CONST_U32 0x200
  94#define TCG_CT_CONST_I32 0x400
  95
  96/* Registers used with L constraint, which are the first argument 
  97   registers on x86_64, and two random call clobbered registers on
  98   i386. */
  99#if TCG_TARGET_REG_BITS == 64
 100# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
 101# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
 102#else
 103# define TCG_REG_L0 TCG_REG_EAX
 104# define TCG_REG_L1 TCG_REG_EDX
 105#endif
 106
 107/* The host compiler should supply <cpuid.h> to enable runtime features
 108   detection, as we're not going to go so far as our own inline assembly.
 109   If not available, default values will be assumed.  */
 110#if defined(CONFIG_CPUID_H)
 111#include <cpuid.h>
 112#endif
 113
 114/* For 32-bit, we are going to attempt to determine at runtime whether cmov
 115   is available.  */
 116#if TCG_TARGET_REG_BITS == 64
 117# define have_cmov 1
 118#elif defined(CONFIG_CPUID_H) && defined(bit_CMOV)
 119static bool have_cmov;
 120#else
 121# define have_cmov 0
 122#endif
 123
 124/* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
 125   going to attempt to determine at runtime whether movbe is available.  */
 126#if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
 127static bool have_movbe;
 128#else
 129# define have_movbe 0
 130#endif
 131
 132/* We need this symbol in tcg-target.h, and we can't properly conditionalize
 133   it there.  Therefore we always define the variable.  */
 134bool have_bmi1;
 135
 136#if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
 137static bool have_bmi2;
 138#else
 139# define have_bmi2 0
 140#endif
 141
 142static tcg_insn_unit *tb_ret_addr;
 143
 144static void patch_reloc(tcg_insn_unit *code_ptr, int type,
 145                        intptr_t value, intptr_t addend)
 146{
 147    value += addend;
 148    switch(type) {
 149    case R_386_PC32:
 150        value -= (uintptr_t)code_ptr;
 151        if (value != (int32_t)value) {
 152            tcg_abort();
 153        }
 154        tcg_patch32(code_ptr, value);
 155        break;
 156    case R_386_PC8:
 157        value -= (uintptr_t)code_ptr;
 158        if (value != (int8_t)value) {
 159            tcg_abort();
 160        }
 161        tcg_patch8(code_ptr, value);
 162        break;
 163    default:
 164        tcg_abort();
 165    }
 166}
 167
 168/* parse target specific constraints */
 169static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 170{
 171    const char *ct_str;
 172
 173    ct_str = *pct_str;
 174    switch(ct_str[0]) {
 175    case 'a':
 176        ct->ct |= TCG_CT_REG;
 177        tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
 178        break;
 179    case 'b':
 180        ct->ct |= TCG_CT_REG;
 181        tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
 182        break;
 183    case 'c':
 184    case_c:
 185        ct->ct |= TCG_CT_REG;
 186        tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
 187        break;
 188    case 'd':
 189        ct->ct |= TCG_CT_REG;
 190        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
 191        break;
 192    case 'S':
 193        ct->ct |= TCG_CT_REG;
 194        tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
 195        break;
 196    case 'D':
 197        ct->ct |= TCG_CT_REG;
 198        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
 199        break;
 200    case 'q':
 201        ct->ct |= TCG_CT_REG;
 202        if (TCG_TARGET_REG_BITS == 64) {
 203            tcg_regset_set32(ct->u.regs, 0, 0xffff);
 204        } else {
 205            tcg_regset_set32(ct->u.regs, 0, 0xf);
 206        }
 207        break;
 208    case 'Q':
 209        ct->ct |= TCG_CT_REG;
 210        tcg_regset_set32(ct->u.regs, 0, 0xf);
 211        break;
 212    case 'r':
 213    case_r:
 214        ct->ct |= TCG_CT_REG;
 215        if (TCG_TARGET_REG_BITS == 64) {
 216            tcg_regset_set32(ct->u.regs, 0, 0xffff);
 217        } else {
 218            tcg_regset_set32(ct->u.regs, 0, 0xff);
 219        }
 220        break;
 221    case 'C':
 222        /* With SHRX et al, we need not use ECX as shift count register.  */
 223        if (have_bmi2) {
 224            goto case_r;
 225        } else {
 226            goto case_c;
 227        }
 228
 229        /* qemu_ld/st address constraint */
 230    case 'L':
 231        ct->ct |= TCG_CT_REG;
 232        if (TCG_TARGET_REG_BITS == 64) {
 233            tcg_regset_set32(ct->u.regs, 0, 0xffff);
 234        } else {
 235            tcg_regset_set32(ct->u.regs, 0, 0xff);
 236        }
 237        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
 238        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
 239        break;
 240
 241    case 'e':
 242        ct->ct |= TCG_CT_CONST_S32;
 243        break;
 244    case 'Z':
 245        ct->ct |= TCG_CT_CONST_U32;
 246        break;
 247    case 'I':
 248        ct->ct |= TCG_CT_CONST_I32;
 249        break;
 250
 251    default:
 252        return -1;
 253    }
 254    ct_str++;
 255    *pct_str = ct_str;
 256    return 0;
 257}
 258
 259/* test if a constant matches the constraint */
 260static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
 261                                         const TCGArgConstraint *arg_ct)
 262{
 263    int ct = arg_ct->ct;
 264    if (ct & TCG_CT_CONST) {
 265        return 1;
 266    }
 267    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
 268        return 1;
 269    }
 270    if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
 271        return 1;
 272    }
 273    if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
 274        return 1;
 275    }
 276    return 0;
 277}
 278
 279#if TCG_TARGET_REG_BITS == 64
 280# define LOWREGMASK(x)  ((x) & 7)
 281#else
 282# define LOWREGMASK(x)  (x)
 283#endif
 284
 285#define P_EXT           0x100           /* 0x0f opcode prefix */
 286#define P_EXT38         0x200           /* 0x0f 0x38 opcode prefix */
 287#define P_DATA16        0x400           /* 0x66 opcode prefix */
 288#if TCG_TARGET_REG_BITS == 64
 289# define P_ADDR32       0x800           /* 0x67 opcode prefix */
 290# define P_REXW         0x1000          /* Set REX.W = 1 */
 291# define P_REXB_R       0x2000          /* REG field as byte register */
 292# define P_REXB_RM      0x4000          /* R/M field as byte register */
 293# define P_GS           0x8000          /* gs segment override */
 294#else
 295# define P_ADDR32       0
 296# define P_REXW         0
 297# define P_REXB_R       0
 298# define P_REXB_RM      0
 299# define P_GS           0
 300#endif
 301#define P_SIMDF3        0x10000         /* 0xf3 opcode prefix */
 302#define P_SIMDF2        0x20000         /* 0xf2 opcode prefix */
 303
 304#define OPC_ARITH_EvIz  (0x81)
 305#define OPC_ARITH_EvIb  (0x83)
 306#define OPC_ARITH_GvEv  (0x03)          /* ... plus (ARITH_FOO << 3) */
 307#define OPC_ANDN        (0xf2 | P_EXT38)
 308#define OPC_ADD_GvEv    (OPC_ARITH_GvEv | (ARITH_ADD << 3))
 309#define OPC_BSWAP       (0xc8 | P_EXT)
 310#define OPC_CALL_Jz     (0xe8)
 311#define OPC_CMOVCC      (0x40 | P_EXT)  /* ... plus condition code */
 312#define OPC_CMP_GvEv    (OPC_ARITH_GvEv | (ARITH_CMP << 3))
 313#define OPC_DEC_r32     (0x48)
 314#define OPC_IMUL_GvEv   (0xaf | P_EXT)
 315#define OPC_IMUL_GvEvIb (0x6b)
 316#define OPC_IMUL_GvEvIz (0x69)
 317#define OPC_INC_r32     (0x40)
 318#define OPC_JCC_long    (0x80 | P_EXT)  /* ... plus condition code */
 319#define OPC_JCC_short   (0x70)          /* ... plus condition code */
 320#define OPC_JMP_long    (0xe9)
 321#define OPC_JMP_short   (0xeb)
 322#define OPC_LEA         (0x8d)
 323#define OPC_MOVB_EvGv   (0x88)          /* stores, more or less */
 324#define OPC_MOVL_EvGv   (0x89)          /* stores, more or less */
 325#define OPC_MOVL_GvEv   (0x8b)          /* loads, more or less */
 326#define OPC_MOVB_EvIz   (0xc6)
 327#define OPC_MOVL_EvIz   (0xc7)
 328#define OPC_MOVL_Iv     (0xb8)
 329#define OPC_MOVBE_GyMy  (0xf0 | P_EXT38)
 330#define OPC_MOVBE_MyGy  (0xf1 | P_EXT38)
 331#define OPC_MOVSBL      (0xbe | P_EXT)
 332#define OPC_MOVSWL      (0xbf | P_EXT)
 333#define OPC_MOVSLQ      (0x63 | P_REXW)
 334#define OPC_MOVZBL      (0xb6 | P_EXT)
 335#define OPC_MOVZWL      (0xb7 | P_EXT)
 336#define OPC_POP_r32     (0x58)
 337#define OPC_PUSH_r32    (0x50)
 338#define OPC_PUSH_Iv     (0x68)
 339#define OPC_PUSH_Ib     (0x6a)
 340#define OPC_RET         (0xc3)
 341#define OPC_SETCC       (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
 342#define OPC_SHIFT_1     (0xd1)
 343#define OPC_SHIFT_Ib    (0xc1)
 344#define OPC_SHIFT_cl    (0xd3)
 345#define OPC_SARX        (0xf7 | P_EXT38 | P_SIMDF3)
 346#define OPC_SHLX        (0xf7 | P_EXT38 | P_DATA16)
 347#define OPC_SHRX        (0xf7 | P_EXT38 | P_SIMDF2)
 348#define OPC_TESTL       (0x85)
 349#define OPC_XCHG_ax_r32 (0x90)
 350
 351#define OPC_GRP3_Ev     (0xf7)
 352#define OPC_GRP5        (0xff)
 353
 354/* Group 1 opcode extensions for 0x80-0x83.
 355   These are also used as modifiers for OPC_ARITH.  */
 356#define ARITH_ADD 0
 357#define ARITH_OR  1
 358#define ARITH_ADC 2
 359#define ARITH_SBB 3
 360#define ARITH_AND 4
 361#define ARITH_SUB 5
 362#define ARITH_XOR 6
 363#define ARITH_CMP 7
 364
 365/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3.  */
 366#define SHIFT_ROL 0
 367#define SHIFT_ROR 1
 368#define SHIFT_SHL 4
 369#define SHIFT_SHR 5
 370#define SHIFT_SAR 7
 371
 372/* Group 3 opcode extensions for 0xf6, 0xf7.  To be used with OPC_GRP3.  */
 373#define EXT3_NOT   2
 374#define EXT3_NEG   3
 375#define EXT3_MUL   4
 376#define EXT3_IMUL  5
 377#define EXT3_DIV   6
 378#define EXT3_IDIV  7
 379
 380/* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */
 381#define EXT5_INC_Ev     0
 382#define EXT5_DEC_Ev     1
 383#define EXT5_CALLN_Ev   2
 384#define EXT5_JMPN_Ev    4
 385
 386/* Condition codes to be added to OPC_JCC_{long,short}.  */
 387#define JCC_JMP (-1)
 388#define JCC_JO  0x0
 389#define JCC_JNO 0x1
 390#define JCC_JB  0x2
 391#define JCC_JAE 0x3
 392#define JCC_JE  0x4
 393#define JCC_JNE 0x5
 394#define JCC_JBE 0x6
 395#define JCC_JA  0x7
 396#define JCC_JS  0x8
 397#define JCC_JNS 0x9
 398#define JCC_JP  0xa
 399#define JCC_JNP 0xb
 400#define JCC_JL  0xc
 401#define JCC_JGE 0xd
 402#define JCC_JLE 0xe
 403#define JCC_JG  0xf
 404
 405static const uint8_t tcg_cond_to_jcc[] = {
 406    [TCG_COND_EQ] = JCC_JE,
 407    [TCG_COND_NE] = JCC_JNE,
 408    [TCG_COND_LT] = JCC_JL,
 409    [TCG_COND_GE] = JCC_JGE,
 410    [TCG_COND_LE] = JCC_JLE,
 411    [TCG_COND_GT] = JCC_JG,
 412    [TCG_COND_LTU] = JCC_JB,
 413    [TCG_COND_GEU] = JCC_JAE,
 414    [TCG_COND_LEU] = JCC_JBE,
 415    [TCG_COND_GTU] = JCC_JA,
 416};
 417
 418#if TCG_TARGET_REG_BITS == 64
 419static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
 420{
 421    int rex;
 422
 423    if (opc & P_GS) {
 424        tcg_out8(s, 0x65);
 425    }
 426    if (opc & P_DATA16) {
 427        /* We should never be asking for both 16 and 64-bit operation.  */
 428        tcg_debug_assert((opc & P_REXW) == 0);
 429        tcg_out8(s, 0x66);
 430    }
 431    if (opc & P_ADDR32) {
 432        tcg_out8(s, 0x67);
 433    }
 434
 435    rex = 0;
 436    rex |= (opc & P_REXW) ? 0x8 : 0x0;  /* REX.W */
 437    rex |= (r & 8) >> 1;                /* REX.R */
 438    rex |= (x & 8) >> 2;                /* REX.X */
 439    rex |= (rm & 8) >> 3;               /* REX.B */
 440
 441    /* P_REXB_{R,RM} indicates that the given register is the low byte.
 442       For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
 443       as otherwise the encoding indicates %[abcd]h.  Note that the values
 444       that are ORed in merely indicate that the REX byte must be present;
 445       those bits get discarded in output.  */
 446    rex |= opc & (r >= 4 ? P_REXB_R : 0);
 447    rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
 448
 449    if (rex) {
 450        tcg_out8(s, (uint8_t)(rex | 0x40));
 451    }
 452
 453    if (opc & (P_EXT | P_EXT38)) {
 454        tcg_out8(s, 0x0f);
 455        if (opc & P_EXT38) {
 456            tcg_out8(s, 0x38);
 457        }
 458    }
 459
 460    tcg_out8(s, opc);
 461}
 462#else
 463static void tcg_out_opc(TCGContext *s, int opc)
 464{
 465    if (opc & P_DATA16) {
 466        tcg_out8(s, 0x66);
 467    }
 468    if (opc & (P_EXT | P_EXT38)) {
 469        tcg_out8(s, 0x0f);
 470        if (opc & P_EXT38) {
 471            tcg_out8(s, 0x38);
 472        }
 473    }
 474    tcg_out8(s, opc);
 475}
 476/* Discard the register arguments to tcg_out_opc early, so as not to penalize
 477   the 32-bit compilation paths.  This method works with all versions of gcc,
 478   whereas relying on optimization may not be able to exclude them.  */
 479#define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc)
 480#endif
 481
 482static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
 483{
 484    tcg_out_opc(s, opc, r, rm, 0);
 485    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
 486}
 487
 488static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
 489{
 490    int tmp;
 491
 492    if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
 493        /* Three byte VEX prefix.  */
 494        tcg_out8(s, 0xc4);
 495
 496        /* VEX.m-mmmm */
 497        if (opc & P_EXT38) {
 498            tmp = 2;
 499        } else if (opc & P_EXT) {
 500            tmp = 1;
 501        } else {
 502            tcg_abort();
 503        }
 504        tmp |= 0x40;                       /* VEX.X */
 505        tmp |= (r & 8 ? 0 : 0x80);         /* VEX.R */
 506        tmp |= (rm & 8 ? 0 : 0x20);        /* VEX.B */
 507        tcg_out8(s, tmp);
 508
 509        tmp = (opc & P_REXW ? 0x80 : 0);   /* VEX.W */
 510    } else {
 511        /* Two byte VEX prefix.  */
 512        tcg_out8(s, 0xc5);
 513
 514        tmp = (r & 8 ? 0 : 0x80);          /* VEX.R */
 515    }
 516    /* VEX.pp */
 517    if (opc & P_DATA16) {
 518        tmp |= 1;                          /* 0x66 */
 519    } else if (opc & P_SIMDF3) {
 520        tmp |= 2;                          /* 0xf3 */
 521    } else if (opc & P_SIMDF2) {
 522        tmp |= 3;                          /* 0xf2 */
 523    }
 524    tmp |= (~v & 15) << 3;                 /* VEX.vvvv */
 525    tcg_out8(s, tmp);
 526    tcg_out8(s, opc);
 527    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
 528}
 529
 530/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
 531   We handle either RM and INDEX missing with a negative value.  In 64-bit
 532   mode for absolute addresses, ~RM is the size of the immediate operand
 533   that will follow the instruction.  */
 534
 535static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
 536                                     int index, int shift, intptr_t offset)
 537{
 538    int mod, len;
 539
 540    if (index < 0 && rm < 0) {
 541        if (TCG_TARGET_REG_BITS == 64) {
 542            /* Try for a rip-relative addressing mode.  This has replaced
 543               the 32-bit-mode absolute addressing encoding.  */
 544            intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
 545            intptr_t disp = offset - pc;
 546            if (disp == (int32_t)disp) {
 547                tcg_out_opc(s, opc, r, 0, 0);
 548                tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
 549                tcg_out32(s, disp);
 550                return;
 551            }
 552
 553            /* Try for an absolute address encoding.  This requires the
 554               use of the MODRM+SIB encoding and is therefore larger than
 555               rip-relative addressing.  */
 556            if (offset == (int32_t)offset) {
 557                tcg_out_opc(s, opc, r, 0, 0);
 558                tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
 559                tcg_out8(s, (4 << 3) | 5);
 560                tcg_out32(s, offset);
 561                return;
 562            }
 563
 564            /* ??? The memory isn't directly addressable.  */
 565            tcg_abort();
 566        } else {
 567            /* Absolute address.  */
 568            tcg_out_opc(s, opc, r, 0, 0);
 569            tcg_out8(s, (r << 3) | 5);
 570            tcg_out32(s, offset);
 571            return;
 572        }
 573    }
 574
 575    /* Find the length of the immediate addend.  Note that the encoding
 576       that would be used for (%ebp) indicates absolute addressing.  */
 577    if (rm < 0) {
 578        mod = 0, len = 4, rm = 5;
 579    } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
 580        mod = 0, len = 0;
 581    } else if (offset == (int8_t)offset) {
 582        mod = 0x40, len = 1;
 583    } else {
 584        mod = 0x80, len = 4;
 585    }
 586
 587    /* Use a single byte MODRM format if possible.  Note that the encoding
 588       that would be used for %esp is the escape to the two byte form.  */
 589    if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
 590        /* Single byte MODRM format.  */
 591        tcg_out_opc(s, opc, r, rm, 0);
 592        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
 593    } else {
 594        /* Two byte MODRM+SIB format.  */
 595
 596        /* Note that the encoding that would place %esp into the index
 597           field indicates no index register.  In 64-bit mode, the REX.X
 598           bit counts, so %r12 can be used as the index.  */
 599        if (index < 0) {
 600            index = 4;
 601        } else {
 602            tcg_debug_assert(index != TCG_REG_ESP);
 603        }
 604
 605        tcg_out_opc(s, opc, r, rm, index);
 606        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
 607        tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
 608    }
 609
 610    if (len == 1) {
 611        tcg_out8(s, offset);
 612    } else if (len == 4) {
 613        tcg_out32(s, offset);
 614    }
 615}
 616
 617/* A simplification of the above with no index or shift.  */
 618static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
 619                                        int rm, intptr_t offset)
 620{
 621    tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
 622}
 623
 624/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
 625static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
 626{
 627    /* Propagate an opcode prefix, such as P_REXW.  */
 628    int ext = subop & ~0x7;
 629    subop &= 0x7;
 630
 631    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
 632}
 633
 634static inline void tcg_out_mov(TCGContext *s, TCGType type,
 635                               TCGReg ret, TCGReg arg)
 636{
 637    if (arg != ret) {
 638        int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
 639        tcg_out_modrm(s, opc, ret, arg);
 640    }
 641}
 642
 643static void tcg_out_movi(TCGContext *s, TCGType type,
 644                         TCGReg ret, tcg_target_long arg)
 645{
 646    tcg_target_long diff;
 647
 648    if (arg == 0) {
 649        tgen_arithr(s, ARITH_XOR, ret, ret);
 650        return;
 651    }
 652    if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
 653        tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
 654        tcg_out32(s, arg);
 655        return;
 656    }
 657    if (arg == (int32_t)arg) {
 658        tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
 659        tcg_out32(s, arg);
 660        return;
 661    }
 662
 663    /* Try a 7 byte pc-relative lea before the 10 byte movq.  */
 664    diff = arg - ((uintptr_t)s->code_ptr + 7);
 665    if (diff == (int32_t)diff) {
 666        tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
 667        tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
 668        tcg_out32(s, diff);
 669        return;
 670    }
 671
 672    tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
 673    tcg_out64(s, arg);
 674}
 675
 676static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
 677{
 678    if (val == (int8_t)val) {
 679        tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
 680        tcg_out8(s, val);
 681    } else if (val == (int32_t)val) {
 682        tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
 683        tcg_out32(s, val);
 684    } else {
 685        tcg_abort();
 686    }
 687}
 688
 689static inline void tcg_out_push(TCGContext *s, int reg)
 690{
 691    tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
 692}
 693
 694static inline void tcg_out_pop(TCGContext *s, int reg)
 695{
 696    tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
 697}
 698
 699static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
 700                              TCGReg arg1, intptr_t arg2)
 701{
 702    int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
 703    tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
 704}
 705
 706static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
 707                              TCGReg arg1, intptr_t arg2)
 708{
 709    int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
 710    tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
 711}
 712
 713static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
 714                               tcg_target_long ofs, tcg_target_long val)
 715{
 716    int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
 717    tcg_out_modrm_offset(s, opc, 0, base, ofs);
 718    tcg_out32(s, val);
 719}
 720
 721static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
 722{
 723    /* Propagate an opcode prefix, such as P_DATA16.  */
 724    int ext = subopc & ~0x7;
 725    subopc &= 0x7;
 726
 727    if (count == 1) {
 728        tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
 729    } else {
 730        tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
 731        tcg_out8(s, count);
 732    }
 733}
 734
 735static inline void tcg_out_bswap32(TCGContext *s, int reg)
 736{
 737    tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
 738}
 739
 740static inline void tcg_out_rolw_8(TCGContext *s, int reg)
 741{
 742    tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
 743}
 744
 745static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
 746{
 747    /* movzbl */
 748    tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
 749    tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
 750}
 751
 752static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
 753{
 754    /* movsbl */
 755    tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
 756    tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
 757}
 758
 759static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
 760{
 761    /* movzwl */
 762    tcg_out_modrm(s, OPC_MOVZWL, dest, src);
 763}
 764
 765static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
 766{
 767    /* movsw[lq] */
 768    tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
 769}
 770
 771static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
 772{
 773    /* 32-bit mov zero extends.  */
 774    tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
 775}
 776
 777static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
 778{
 779    tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
 780}
 781
 782static inline void tcg_out_bswap64(TCGContext *s, int reg)
 783{
 784    tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
 785}
 786
 787static void tgen_arithi(TCGContext *s, int c, int r0,
 788                        tcg_target_long val, int cf)
 789{
 790    int rexw = 0;
 791
 792    if (TCG_TARGET_REG_BITS == 64) {
 793        rexw = c & -8;
 794        c &= 7;
 795    }
 796
 797    /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
 798       partial flags update stalls on Pentium4 and are not recommended
 799       by current Intel optimization manuals.  */
 800    if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
 801        int is_inc = (c == ARITH_ADD) ^ (val < 0);
 802        if (TCG_TARGET_REG_BITS == 64) {
 803            /* The single-byte increment encodings are re-tasked as the
 804               REX prefixes.  Use the MODRM encoding.  */
 805            tcg_out_modrm(s, OPC_GRP5 + rexw,
 806                          (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
 807        } else {
 808            tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
 809        }
 810        return;
 811    }
 812
 813    if (c == ARITH_AND) {
 814        if (TCG_TARGET_REG_BITS == 64) {
 815            if (val == 0xffffffffu) {
 816                tcg_out_ext32u(s, r0, r0);
 817                return;
 818            }
 819            if (val == (uint32_t)val) {
 820                /* AND with no high bits set can use a 32-bit operation.  */
 821                rexw = 0;
 822            }
 823        }
 824        if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
 825            tcg_out_ext8u(s, r0, r0);
 826            return;
 827        }
 828        if (val == 0xffffu) {
 829            tcg_out_ext16u(s, r0, r0);
 830            return;
 831        }
 832    }
 833
 834    if (val == (int8_t)val) {
 835        tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
 836        tcg_out8(s, val);
 837        return;
 838    }
 839    if (rexw == 0 || val == (int32_t)val) {
 840        tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
 841        tcg_out32(s, val);
 842        return;
 843    }
 844
 845    tcg_abort();
 846}
 847
 848static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
 849{
 850    if (val != 0) {
 851        tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
 852    }
 853}
 854
 855/* Use SMALL != 0 to force a short forward branch.  */
 856static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
 857{
 858    int32_t val, val1;
 859
 860    if (l->has_value) {
 861        val = tcg_pcrel_diff(s, l->u.value_ptr);
 862        val1 = val - 2;
 863        if ((int8_t)val1 == val1) {
 864            if (opc == -1) {
 865                tcg_out8(s, OPC_JMP_short);
 866            } else {
 867                tcg_out8(s, OPC_JCC_short + opc);
 868            }
 869            tcg_out8(s, val1);
 870        } else {
 871            if (small) {
 872                tcg_abort();
 873            }
 874            if (opc == -1) {
 875                tcg_out8(s, OPC_JMP_long);
 876                tcg_out32(s, val - 5);
 877            } else {
 878                tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
 879                tcg_out32(s, val - 6);
 880            }
 881        }
 882    } else if (small) {
 883        if (opc == -1) {
 884            tcg_out8(s, OPC_JMP_short);
 885        } else {
 886            tcg_out8(s, OPC_JCC_short + opc);
 887        }
 888        tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
 889        s->code_ptr += 1;
 890    } else {
 891        if (opc == -1) {
 892            tcg_out8(s, OPC_JMP_long);
 893        } else {
 894            tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
 895        }
 896        tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
 897        s->code_ptr += 4;
 898    }
 899}
 900
 901static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
 902                        int const_arg2, int rexw)
 903{
 904    if (const_arg2) {
 905        if (arg2 == 0) {
 906            /* test r, r */
 907            tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
 908        } else {
 909            tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
 910        }
 911    } else {
 912        tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
 913    }
 914}
 915
 916static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
 917                             TCGArg arg1, TCGArg arg2, int const_arg2,
 918                             TCGLabel *label, int small)
 919{
 920    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
 921    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
 922}
 923
 924#if TCG_TARGET_REG_BITS == 64
 925static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
 926                             TCGArg arg1, TCGArg arg2, int const_arg2,
 927                             TCGLabel *label, int small)
 928{
 929    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
 930    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
 931}
 932#else
 933/* XXX: we implement it at the target level to avoid having to
 934   handle cross basic blocks temporaries */
 935static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
 936                            const int *const_args, int small)
 937{
 938    TCGLabel *label_next = gen_new_label();
 939    TCGLabel *label_this = arg_label(args[5]);
 940
 941    switch(args[4]) {
 942    case TCG_COND_EQ:
 943        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
 944                         label_next, 1);
 945        tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
 946                         label_this, small);
 947        break;
 948    case TCG_COND_NE:
 949        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
 950                         label_this, small);
 951        tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
 952                         label_this, small);
 953        break;
 954    case TCG_COND_LT:
 955        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
 956                         label_this, small);
 957        tcg_out_jxx(s, JCC_JNE, label_next, 1);
 958        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
 959                         label_this, small);
 960        break;
 961    case TCG_COND_LE:
 962        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
 963                         label_this, small);
 964        tcg_out_jxx(s, JCC_JNE, label_next, 1);
 965        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
 966                         label_this, small);
 967        break;
 968    case TCG_COND_GT:
 969        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
 970                         label_this, small);
 971        tcg_out_jxx(s, JCC_JNE, label_next, 1);
 972        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
 973                         label_this, small);
 974        break;
 975    case TCG_COND_GE:
 976        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
 977                         label_this, small);
 978        tcg_out_jxx(s, JCC_JNE, label_next, 1);
 979        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
 980                         label_this, small);
 981        break;
 982    case TCG_COND_LTU:
 983        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
 984                         label_this, small);
 985        tcg_out_jxx(s, JCC_JNE, label_next, 1);
 986        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
 987                         label_this, small);
 988        break;
 989    case TCG_COND_LEU:
 990        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
 991                         label_this, small);
 992        tcg_out_jxx(s, JCC_JNE, label_next, 1);
 993        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
 994                         label_this, small);
 995        break;
 996    case TCG_COND_GTU:
 997        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
 998                         label_this, small);
 999        tcg_out_jxx(s, JCC_JNE, label_next, 1);
1000        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
1001                         label_this, small);
1002        break;
1003    case TCG_COND_GEU:
1004        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1005                         label_this, small);
1006        tcg_out_jxx(s, JCC_JNE, label_next, 1);
1007        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
1008                         label_this, small);
1009        break;
1010    default:
1011        tcg_abort();
1012    }
1013    tcg_out_label(s, label_next, s->code_ptr);
1014}
1015#endif
1016
1017static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1018                              TCGArg arg1, TCGArg arg2, int const_arg2)
1019{
1020    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
1021    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1022    tcg_out_ext8u(s, dest, dest);
1023}
1024
1025#if TCG_TARGET_REG_BITS == 64
1026static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1027                              TCGArg arg1, TCGArg arg2, int const_arg2)
1028{
1029    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1030    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1031    tcg_out_ext8u(s, dest, dest);
1032}
1033#else
1034static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1035                             const int *const_args)
1036{
1037    TCGArg new_args[6];
1038    TCGLabel *label_true, *label_over;
1039
1040    memcpy(new_args, args+1, 5*sizeof(TCGArg));
1041
1042    if (args[0] == args[1] || args[0] == args[2]
1043        || (!const_args[3] && args[0] == args[3])
1044        || (!const_args[4] && args[0] == args[4])) {
1045        /* When the destination overlaps with one of the argument
1046           registers, don't do anything tricky.  */
1047        label_true = gen_new_label();
1048        label_over = gen_new_label();
1049
1050        new_args[5] = label_arg(label_true);
1051        tcg_out_brcond2(s, new_args, const_args+1, 1);
1052
1053        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1054        tcg_out_jxx(s, JCC_JMP, label_over, 1);
1055        tcg_out_label(s, label_true, s->code_ptr);
1056
1057        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
1058        tcg_out_label(s, label_over, s->code_ptr);
1059    } else {
1060        /* When the destination does not overlap one of the arguments,
1061           clear the destination first, jump if cond false, and emit an
1062           increment in the true case.  This results in smaller code.  */
1063
1064        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1065
1066        label_over = gen_new_label();
1067        new_args[4] = tcg_invert_cond(new_args[4]);
1068        new_args[5] = label_arg(label_over);
1069        tcg_out_brcond2(s, new_args, const_args+1, 1);
1070
1071        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
1072        tcg_out_label(s, label_over, s->code_ptr);
1073    }
1074}
1075#endif
1076
1077static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1078                              TCGArg c1, TCGArg c2, int const_c2,
1079                              TCGArg v1)
1080{
1081    tcg_out_cmp(s, c1, c2, const_c2, 0);
1082    if (have_cmov) {
1083        tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
1084    } else {
1085        TCGLabel *over = gen_new_label();
1086        tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
1087        tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
1088        tcg_out_label(s, over, s->code_ptr);
1089    }
1090}
1091
1092#if TCG_TARGET_REG_BITS == 64
1093static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1094                              TCGArg c1, TCGArg c2, int const_c2,
1095                              TCGArg v1)
1096{
1097    tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1098    tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
1099}
1100#endif
1101
1102static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
1103{
1104    intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
1105
1106    if (disp == (int32_t)disp) {
1107        tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1108        tcg_out32(s, disp);
1109    } else {
1110        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest);
1111        tcg_out_modrm(s, OPC_GRP5,
1112                      call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1113    }
1114}
1115
1116static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
1117{
1118    tcg_out_branch(s, 1, dest);
1119}
1120
1121static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
1122{
1123    tcg_out_branch(s, 0, dest);
1124}
1125
1126#if defined(CONFIG_SOFTMMU)
1127/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1128 *                                     int mmu_idx, uintptr_t ra)
1129 */
1130static void * const qemu_ld_helpers[16] = {
1131    [MO_UB]   = helper_ret_ldub_mmu,
1132    [MO_LEUW] = helper_le_lduw_mmu,
1133    [MO_LEUL] = helper_le_ldul_mmu,
1134    [MO_LEQ]  = helper_le_ldq_mmu,
1135    [MO_BEUW] = helper_be_lduw_mmu,
1136    [MO_BEUL] = helper_be_ldul_mmu,
1137    [MO_BEQ]  = helper_be_ldq_mmu,
1138};
1139
1140/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1141 *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
1142 */
1143static void * const qemu_st_helpers[16] = {
1144    [MO_UB]   = helper_ret_stb_mmu,
1145    [MO_LEUW] = helper_le_stw_mmu,
1146    [MO_LEUL] = helper_le_stl_mmu,
1147    [MO_LEQ]  = helper_le_stq_mmu,
1148    [MO_BEUW] = helper_be_stw_mmu,
1149    [MO_BEUL] = helper_be_stl_mmu,
1150    [MO_BEQ]  = helper_be_stq_mmu,
1151};
1152
1153/* Perform the TLB load and compare.
1154
1155   Inputs:
1156   ADDRLO and ADDRHI contain the low and high part of the address.
1157
1158   MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1159
1160   WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1161   This should be offsetof addr_read or addr_write.
1162
1163   Outputs:
1164   LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1165   positions of the displacements of forward jumps to the TLB miss case.
1166
1167   Second argument register is loaded with the low part of the address.
1168   In the TLB hit case, it has been adjusted as indicated by the TLB
1169   and so is a host address.  In the TLB miss case, it continues to
1170   hold a guest address.
1171
1172   First argument register is clobbered.  */
1173
1174static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1175                                    int mem_index, TCGMemOp opc,
1176                                    tcg_insn_unit **label_ptr, int which)
1177{
1178    const TCGReg r0 = TCG_REG_L0;
1179    const TCGReg r1 = TCG_REG_L1;
1180    TCGType ttype = TCG_TYPE_I32;
1181    TCGType tlbtype = TCG_TYPE_I32;
1182    int trexw = 0, hrexw = 0, tlbrexw = 0;
1183    int s_mask = (1 << (opc & MO_SIZE)) - 1;
1184    bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0;
1185
1186    if (TCG_TARGET_REG_BITS == 64) {
1187        if (TARGET_LONG_BITS == 64) {
1188            ttype = TCG_TYPE_I64;
1189            trexw = P_REXW;
1190        }
1191        if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1192            hrexw = P_REXW;
1193            if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) {
1194                tlbtype = TCG_TYPE_I64;
1195                tlbrexw = P_REXW;
1196            }
1197        }
1198    }
1199
1200    tcg_out_mov(s, tlbtype, r0, addrlo);
1201    if (aligned) {
1202        tcg_out_mov(s, ttype, r1, addrlo);
1203    } else {
1204        /* For unaligned access check that we don't cross pages using
1205           the page address of the last byte.  */
1206        tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask);
1207    }
1208
1209    tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
1210                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1211
1212    tgen_arithi(s, ARITH_AND + trexw, r1,
1213                TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0);
1214    tgen_arithi(s, ARITH_AND + tlbrexw, r0,
1215                (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1216
1217    tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1218                             offsetof(CPUArchState, tlb_table[mem_index][0])
1219                             + which);
1220
1221    /* cmp 0(r0), r1 */
1222    tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1223
1224    /* Prepare for both the fast path add of the tlb addend, and the slow
1225       path function argument setup.  There are two cases worth note:
1226       For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1227       before the fastpath ADDQ below.  For 64-bit guest and x32 host, MOVQ
1228       copies the entire guest address for the slow path, while truncation
1229       for the 32-bit host happens with the fastpath ADDL below.  */
1230    tcg_out_mov(s, ttype, r1, addrlo);
1231
1232    /* jne slow_path */
1233    tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1234    label_ptr[0] = s->code_ptr;
1235    s->code_ptr += 4;
1236
1237    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1238        /* cmp 4(r0), addrhi */
1239        tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
1240
1241        /* jne slow_path */
1242        tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1243        label_ptr[1] = s->code_ptr;
1244        s->code_ptr += 4;
1245    }
1246
1247    /* TLB Hit.  */
1248
1249    /* add addend(r0), r1 */
1250    tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1251                         offsetof(CPUTLBEntry, addend) - which);
1252}
1253
1254/*
1255 * Record the context of a call to the out of line helper code for the slow path
1256 * for a load or store, so that we can later generate the correct helper code
1257 */
1258static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1259                                TCGReg datalo, TCGReg datahi,
1260                                TCGReg addrlo, TCGReg addrhi,
1261                                tcg_insn_unit *raddr,
1262                                tcg_insn_unit **label_ptr)
1263{
1264    TCGLabelQemuLdst *label = new_ldst_label(s);
1265
1266    label->is_ld = is_ld;
1267    label->oi = oi;
1268    label->datalo_reg = datalo;
1269    label->datahi_reg = datahi;
1270    label->addrlo_reg = addrlo;
1271    label->addrhi_reg = addrhi;
1272    label->raddr = raddr;
1273    label->label_ptr[0] = label_ptr[0];
1274    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1275        label->label_ptr[1] = label_ptr[1];
1276    }
1277}
1278
1279/*
1280 * Generate code for the slow path for a load at the end of block
1281 */
1282static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1283{
1284    TCGMemOpIdx oi = l->oi;
1285    TCGMemOp opc = get_memop(oi);
1286    TCGReg data_reg;
1287    tcg_insn_unit **label_ptr = &l->label_ptr[0];
1288
1289    /* resolve label address */
1290    tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1291    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1292        tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1293    }
1294
1295    if (TCG_TARGET_REG_BITS == 32) {
1296        int ofs = 0;
1297
1298        tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1299        ofs += 4;
1300
1301        tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1302        ofs += 4;
1303
1304        if (TARGET_LONG_BITS == 64) {
1305            tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1306            ofs += 4;
1307        }
1308
1309        tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi);
1310        ofs += 4;
1311
1312        tcg_out_sti(s, TCG_TYPE_PTR, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
1313    } else {
1314        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1315        /* The second argument is already loaded with addrlo.  */
1316        tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
1317        tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1318                     (uintptr_t)l->raddr);
1319    }
1320
1321    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1322
1323    data_reg = l->datalo_reg;
1324    switch (opc & MO_SSIZE) {
1325    case MO_SB:
1326        tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1327        break;
1328    case MO_SW:
1329        tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1330        break;
1331#if TCG_TARGET_REG_BITS == 64
1332    case MO_SL:
1333        tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1334        break;
1335#endif
1336    case MO_UB:
1337    case MO_UW:
1338        /* Note that the helpers have zero-extended to tcg_target_long.  */
1339    case MO_UL:
1340        tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1341        break;
1342    case MO_Q:
1343        if (TCG_TARGET_REG_BITS == 64) {
1344            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1345        } else if (data_reg == TCG_REG_EDX) {
1346            /* xchg %edx, %eax */
1347            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1348            tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1349        } else {
1350            tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1351            tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1352        }
1353        break;
1354    default:
1355        tcg_abort();
1356    }
1357
1358    /* Jump to the code corresponding to next IR of qemu_st */
1359    tcg_out_jmp(s, l->raddr);
1360}
1361
1362/*
1363 * Generate code for the slow path for a store at the end of block
1364 */
1365static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1366{
1367    TCGMemOpIdx oi = l->oi;
1368    TCGMemOp opc = get_memop(oi);
1369    TCGMemOp s_bits = opc & MO_SIZE;
1370    tcg_insn_unit **label_ptr = &l->label_ptr[0];
1371    TCGReg retaddr;
1372
1373    /* resolve label address */
1374    tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1375    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1376        tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1377    }
1378
1379    if (TCG_TARGET_REG_BITS == 32) {
1380        int ofs = 0;
1381
1382        tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1383        ofs += 4;
1384
1385        tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1386        ofs += 4;
1387
1388        if (TARGET_LONG_BITS == 64) {
1389            tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1390            ofs += 4;
1391        }
1392
1393        tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1394        ofs += 4;
1395
1396        if (s_bits == MO_64) {
1397            tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1398            ofs += 4;
1399        }
1400
1401        tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi);
1402        ofs += 4;
1403
1404        retaddr = TCG_REG_EAX;
1405        tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1406        tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
1407    } else {
1408        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1409        /* The second argument is already loaded with addrlo.  */
1410        tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1411                    tcg_target_call_iarg_regs[2], l->datalo_reg);
1412        tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
1413
1414        if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1415            retaddr = tcg_target_call_iarg_regs[4];
1416            tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1417        } else {
1418            retaddr = TCG_REG_RAX;
1419            tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1420            tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
1421                       TCG_TARGET_CALL_STACK_OFFSET);
1422        }
1423    }
1424
1425    /* "Tail call" to the helper, with the return address back inline.  */
1426    tcg_out_push(s, retaddr);
1427    tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1428}
1429#elif defined(__x86_64__) && defined(__linux__)
1430# include <asm/prctl.h>
1431# include <sys/prctl.h>
1432
1433int arch_prctl(int code, unsigned long addr);
1434
1435static int guest_base_flags;
1436static inline void setup_guest_base_seg(void)
1437{
1438    if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
1439        guest_base_flags = P_GS;
1440    }
1441}
1442#else
1443# define guest_base_flags 0
1444static inline void setup_guest_base_seg(void) { }
1445#endif /* SOFTMMU */
1446
1447static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1448                                   TCGReg base, int index, intptr_t ofs,
1449                                   int seg, TCGMemOp memop)
1450{
1451    const TCGMemOp real_bswap = memop & MO_BSWAP;
1452    TCGMemOp bswap = real_bswap;
1453    int movop = OPC_MOVL_GvEv;
1454
1455    if (have_movbe && real_bswap) {
1456        bswap = 0;
1457        movop = OPC_MOVBE_GyMy;
1458    }
1459
1460    switch (memop & MO_SSIZE) {
1461    case MO_UB:
1462        tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
1463                                 base, index, 0, ofs);
1464        break;
1465    case MO_SB:
1466        tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
1467                                 base, index, 0, ofs);
1468        break;
1469    case MO_UW:
1470        tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1471                                 base, index, 0, ofs);
1472        if (real_bswap) {
1473            tcg_out_rolw_8(s, datalo);
1474        }
1475        break;
1476    case MO_SW:
1477        if (real_bswap) {
1478            if (have_movbe) {
1479                tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
1480                                         datalo, base, index, 0, ofs);
1481            } else {
1482                tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1483                                         base, index, 0, ofs);
1484                tcg_out_rolw_8(s, datalo);
1485            }
1486            tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1487        } else {
1488            tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
1489                                     datalo, base, index, 0, ofs);
1490        }
1491        break;
1492    case MO_UL:
1493        tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
1494        if (bswap) {
1495            tcg_out_bswap32(s, datalo);
1496        }
1497        break;
1498#if TCG_TARGET_REG_BITS == 64
1499    case MO_SL:
1500        if (real_bswap) {
1501            tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1502                                     base, index, 0, ofs);
1503            if (bswap) {
1504                tcg_out_bswap32(s, datalo);
1505            }
1506            tcg_out_ext32s(s, datalo, datalo);
1507        } else {
1508            tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
1509                                     base, index, 0, ofs);
1510        }
1511        break;
1512#endif
1513    case MO_Q:
1514        if (TCG_TARGET_REG_BITS == 64) {
1515            tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
1516                                     base, index, 0, ofs);
1517            if (bswap) {
1518                tcg_out_bswap64(s, datalo);
1519            }
1520        } else {
1521            if (real_bswap) {
1522                int t = datalo;
1523                datalo = datahi;
1524                datahi = t;
1525            }
1526            if (base != datalo) {
1527                tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1528                                         base, index, 0, ofs);
1529                tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1530                                         base, index, 0, ofs + 4);
1531            } else {
1532                tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1533                                         base, index, 0, ofs + 4);
1534                tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1535                                         base, index, 0, ofs);
1536            }
1537            if (bswap) {
1538                tcg_out_bswap32(s, datalo);
1539                tcg_out_bswap32(s, datahi);
1540            }
1541        }
1542        break;
1543    default:
1544        tcg_abort();
1545    }
1546}
1547
1548/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1549   EAX. It will be useful once fixed registers globals are less
1550   common. */
1551static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1552{
1553    TCGReg datalo, datahi, addrlo;
1554    TCGReg addrhi __attribute__((unused));
1555    TCGMemOpIdx oi;
1556    TCGMemOp opc;
1557#if defined(CONFIG_SOFTMMU)
1558    int mem_index;
1559    tcg_insn_unit *label_ptr[2];
1560#endif
1561
1562    datalo = *args++;
1563    datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1564    addrlo = *args++;
1565    addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1566    oi = *args++;
1567    opc = get_memop(oi);
1568
1569#if defined(CONFIG_SOFTMMU)
1570    mem_index = get_mmuidx(oi);
1571
1572    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
1573                     label_ptr, offsetof(CPUTLBEntry, addr_read));
1574
1575    /* TLB Hit.  */
1576    tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
1577
1578    /* Record the current context of a load into ldst label */
1579    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1580                        s->code_ptr, label_ptr);
1581#else
1582    {
1583        int32_t offset = guest_base;
1584        TCGReg base = addrlo;
1585        int index = -1;
1586        int seg = 0;
1587
1588        /* For a 32-bit guest, the high 32 bits may contain garbage.
1589           We can do this with the ADDR32 prefix if we're not using
1590           a guest base, or when using segmentation.  Otherwise we
1591           need to zero-extend manually.  */
1592        if (guest_base == 0 || guest_base_flags) {
1593            seg = guest_base_flags;
1594            offset = 0;
1595            if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1596                seg |= P_ADDR32;
1597            }
1598        } else if (TCG_TARGET_REG_BITS == 64) {
1599            if (TARGET_LONG_BITS == 32) {
1600                tcg_out_ext32u(s, TCG_REG_L0, base);
1601                base = TCG_REG_L0;
1602            }
1603            if (offset != guest_base) {
1604                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
1605                index = TCG_REG_L1;
1606                offset = 0;
1607            }
1608        }
1609
1610        tcg_out_qemu_ld_direct(s, datalo, datahi,
1611                               base, index, offset, seg, opc);
1612    }
1613#endif
1614}
1615
1616static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1617                                   TCGReg base, intptr_t ofs, int seg,
1618                                   TCGMemOp memop)
1619{
1620    /* ??? Ideally we wouldn't need a scratch register.  For user-only,
1621       we could perform the bswap twice to restore the original value
1622       instead of moving to the scratch.  But as it is, the L constraint
1623       means that TCG_REG_L0 is definitely free here.  */
1624    const TCGReg scratch = TCG_REG_L0;
1625    const TCGMemOp real_bswap = memop & MO_BSWAP;
1626    TCGMemOp bswap = real_bswap;
1627    int movop = OPC_MOVL_EvGv;
1628
1629    if (have_movbe && real_bswap) {
1630        bswap = 0;
1631        movop = OPC_MOVBE_MyGy;
1632    }
1633
1634    switch (memop & MO_SIZE) {
1635    case MO_8:
1636        /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
1637           Use the scratch register if necessary.  */
1638        if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1639            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1640            datalo = scratch;
1641        }
1642        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1643                             datalo, base, ofs);
1644        break;
1645    case MO_16:
1646        if (bswap) {
1647            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1648            tcg_out_rolw_8(s, scratch);
1649            datalo = scratch;
1650        }
1651        tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
1652        break;
1653    case MO_32:
1654        if (bswap) {
1655            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1656            tcg_out_bswap32(s, scratch);
1657            datalo = scratch;
1658        }
1659        tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1660        break;
1661    case MO_64:
1662        if (TCG_TARGET_REG_BITS == 64) {
1663            if (bswap) {
1664                tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1665                tcg_out_bswap64(s, scratch);
1666                datalo = scratch;
1667            }
1668            tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
1669        } else if (bswap) {
1670            tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1671            tcg_out_bswap32(s, scratch);
1672            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1673            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1674            tcg_out_bswap32(s, scratch);
1675            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1676        } else {
1677            if (real_bswap) {
1678                int t = datalo;
1679                datalo = datahi;
1680                datahi = t;
1681            }
1682            tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1683            tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
1684        }
1685        break;
1686    default:
1687        tcg_abort();
1688    }
1689}
1690
1691static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1692{
1693    TCGReg datalo, datahi, addrlo;
1694    TCGReg addrhi __attribute__((unused));
1695    TCGMemOpIdx oi;
1696    TCGMemOp opc;
1697#if defined(CONFIG_SOFTMMU)
1698    int mem_index;
1699    tcg_insn_unit *label_ptr[2];
1700#endif
1701
1702    datalo = *args++;
1703    datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1704    addrlo = *args++;
1705    addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1706    oi = *args++;
1707    opc = get_memop(oi);
1708
1709#if defined(CONFIG_SOFTMMU)
1710    mem_index = get_mmuidx(oi);
1711
1712    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
1713                     label_ptr, offsetof(CPUTLBEntry, addr_write));
1714
1715    /* TLB Hit.  */
1716    tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1717
1718    /* Record the current context of a store into ldst label */
1719    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1720                        s->code_ptr, label_ptr);
1721#else
1722    {
1723        int32_t offset = guest_base;
1724        TCGReg base = addrlo;
1725        int seg = 0;
1726
1727        /* See comment in tcg_out_qemu_ld re zero-extension of addrlo.  */
1728        if (guest_base == 0 || guest_base_flags) {
1729            seg = guest_base_flags;
1730            offset = 0;
1731            if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1732                seg |= P_ADDR32;
1733            }
1734        } else if (TCG_TARGET_REG_BITS == 64) {
1735            /* ??? Note that we can't use the same SIB addressing scheme
1736               as for loads, since we require L0 free for bswap.  */
1737            if (offset != guest_base) {
1738                if (TARGET_LONG_BITS == 32) {
1739                    tcg_out_ext32u(s, TCG_REG_L0, base);
1740                    base = TCG_REG_L0;
1741                }
1742                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
1743                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1744                base = TCG_REG_L1;
1745                offset = 0;
1746            } else if (TARGET_LONG_BITS == 32) {
1747                tcg_out_ext32u(s, TCG_REG_L1, base);
1748                base = TCG_REG_L1;
1749            }
1750        }
1751
1752        tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
1753    }
1754#endif
1755}
1756
1757static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1758                              const TCGArg *args, const int *const_args)
1759{
1760    int c, vexop, rexw = 0;
1761
1762#if TCG_TARGET_REG_BITS == 64
1763# define OP_32_64(x) \
1764        case glue(glue(INDEX_op_, x), _i64): \
1765            rexw = P_REXW; /* FALLTHRU */    \
1766        case glue(glue(INDEX_op_, x), _i32)
1767#else
1768# define OP_32_64(x) \
1769        case glue(glue(INDEX_op_, x), _i32)
1770#endif
1771
1772    switch(opc) {
1773    case INDEX_op_exit_tb:
1774        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1775        tcg_out_jmp(s, tb_ret_addr);
1776        break;
1777    case INDEX_op_goto_tb:
1778        if (s->tb_jmp_offset) {
1779            /* direct jump method */
1780            tcg_out8(s, OPC_JMP_long); /* jmp im */
1781            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
1782            tcg_out32(s, 0);
1783        } else {
1784            /* indirect jump method */
1785            tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1786                                 (intptr_t)(s->tb_next + args[0]));
1787        }
1788        s->tb_next_offset[args[0]] = tcg_current_code_size(s);
1789        break;
1790    case INDEX_op_br:
1791        tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0);
1792        break;
1793    OP_32_64(ld8u):
1794        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1795        tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1796        break;
1797    OP_32_64(ld8s):
1798        tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1799        break;
1800    OP_32_64(ld16u):
1801        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1802        tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1803        break;
1804    OP_32_64(ld16s):
1805        tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1806        break;
1807#if TCG_TARGET_REG_BITS == 64
1808    case INDEX_op_ld32u_i64:
1809#endif
1810    case INDEX_op_ld_i32:
1811        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1812        break;
1813
1814    OP_32_64(st8):
1815        if (const_args[0]) {
1816            tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1817                                 0, args[1], args[2]);
1818            tcg_out8(s, args[0]);
1819        } else {
1820            tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1821                                 args[0], args[1], args[2]);
1822        }
1823        break;
1824    OP_32_64(st16):
1825        if (const_args[0]) {
1826            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1827                                 0, args[1], args[2]);
1828            tcg_out16(s, args[0]);
1829        } else {
1830            tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1831                                 args[0], args[1], args[2]);
1832        }
1833        break;
1834#if TCG_TARGET_REG_BITS == 64
1835    case INDEX_op_st32_i64:
1836#endif
1837    case INDEX_op_st_i32:
1838        if (const_args[0]) {
1839            tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1840            tcg_out32(s, args[0]);
1841        } else {
1842            tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1843        }
1844        break;
1845
1846    OP_32_64(add):
1847        /* For 3-operand addition, use LEA.  */
1848        if (args[0] != args[1]) {
1849            TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1850
1851            if (const_args[2]) {
1852                c3 = a2, a2 = -1;
1853            } else if (a0 == a2) {
1854                /* Watch out for dest = src + dest, since we've removed
1855                   the matching constraint on the add.  */
1856                tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1857                break;
1858            }
1859
1860            tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1861            break;
1862        }
1863        c = ARITH_ADD;
1864        goto gen_arith;
1865    OP_32_64(sub):
1866        c = ARITH_SUB;
1867        goto gen_arith;
1868    OP_32_64(and):
1869        c = ARITH_AND;
1870        goto gen_arith;
1871    OP_32_64(or):
1872        c = ARITH_OR;
1873        goto gen_arith;
1874    OP_32_64(xor):
1875        c = ARITH_XOR;
1876        goto gen_arith;
1877    gen_arith:
1878        if (const_args[2]) {
1879            tgen_arithi(s, c + rexw, args[0], args[2], 0);
1880        } else {
1881            tgen_arithr(s, c + rexw, args[0], args[2]);
1882        }
1883        break;
1884
1885    OP_32_64(andc):
1886        if (const_args[2]) {
1887            tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
1888                        args[0], args[1]);
1889            tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
1890        } else {
1891            tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
1892        }
1893        break;
1894
1895    OP_32_64(mul):
1896        if (const_args[2]) {
1897            int32_t val;
1898            val = args[2];
1899            if (val == (int8_t)val) {
1900                tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1901                tcg_out8(s, val);
1902            } else {
1903                tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1904                tcg_out32(s, val);
1905            }
1906        } else {
1907            tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1908        }
1909        break;
1910
1911    OP_32_64(div2):
1912        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1913        break;
1914    OP_32_64(divu2):
1915        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1916        break;
1917
1918    OP_32_64(shl):
1919        c = SHIFT_SHL;
1920        vexop = OPC_SHLX;
1921        goto gen_shift_maybe_vex;
1922    OP_32_64(shr):
1923        c = SHIFT_SHR;
1924        vexop = OPC_SHRX;
1925        goto gen_shift_maybe_vex;
1926    OP_32_64(sar):
1927        c = SHIFT_SAR;
1928        vexop = OPC_SARX;
1929        goto gen_shift_maybe_vex;
1930    OP_32_64(rotl):
1931        c = SHIFT_ROL;
1932        goto gen_shift;
1933    OP_32_64(rotr):
1934        c = SHIFT_ROR;
1935        goto gen_shift;
1936    gen_shift_maybe_vex:
1937        if (have_bmi2 && !const_args[2]) {
1938            tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
1939            break;
1940        }
1941        /* FALLTHRU */
1942    gen_shift:
1943        if (const_args[2]) {
1944            tcg_out_shifti(s, c + rexw, args[0], args[2]);
1945        } else {
1946            tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1947        }
1948        break;
1949
1950    case INDEX_op_brcond_i32:
1951        tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1952                         arg_label(args[3]), 0);
1953        break;
1954    case INDEX_op_setcond_i32:
1955        tcg_out_setcond32(s, args[3], args[0], args[1],
1956                          args[2], const_args[2]);
1957        break;
1958    case INDEX_op_movcond_i32:
1959        tcg_out_movcond32(s, args[5], args[0], args[1],
1960                          args[2], const_args[2], args[3]);
1961        break;
1962
1963    OP_32_64(bswap16):
1964        tcg_out_rolw_8(s, args[0]);
1965        break;
1966    OP_32_64(bswap32):
1967        tcg_out_bswap32(s, args[0]);
1968        break;
1969
1970    OP_32_64(neg):
1971        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1972        break;
1973    OP_32_64(not):
1974        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1975        break;
1976
1977    OP_32_64(ext8s):
1978        tcg_out_ext8s(s, args[0], args[1], rexw);
1979        break;
1980    OP_32_64(ext16s):
1981        tcg_out_ext16s(s, args[0], args[1], rexw);
1982        break;
1983    OP_32_64(ext8u):
1984        tcg_out_ext8u(s, args[0], args[1]);
1985        break;
1986    OP_32_64(ext16u):
1987        tcg_out_ext16u(s, args[0], args[1]);
1988        break;
1989
1990    case INDEX_op_qemu_ld_i32:
1991        tcg_out_qemu_ld(s, args, 0);
1992        break;
1993    case INDEX_op_qemu_ld_i64:
1994        tcg_out_qemu_ld(s, args, 1);
1995        break;
1996    case INDEX_op_qemu_st_i32:
1997        tcg_out_qemu_st(s, args, 0);
1998        break;
1999    case INDEX_op_qemu_st_i64:
2000        tcg_out_qemu_st(s, args, 1);
2001        break;
2002
2003    OP_32_64(mulu2):
2004        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
2005        break;
2006    OP_32_64(muls2):
2007        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
2008        break;
2009    OP_32_64(add2):
2010        if (const_args[4]) {
2011            tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
2012        } else {
2013            tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
2014        }
2015        if (const_args[5]) {
2016            tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
2017        } else {
2018            tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
2019        }
2020        break;
2021    OP_32_64(sub2):
2022        if (const_args[4]) {
2023            tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
2024        } else {
2025            tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
2026        }
2027        if (const_args[5]) {
2028            tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
2029        } else {
2030            tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
2031        }
2032        break;
2033
2034#if TCG_TARGET_REG_BITS == 32
2035    case INDEX_op_brcond2_i32:
2036        tcg_out_brcond2(s, args, const_args, 0);
2037        break;
2038    case INDEX_op_setcond2_i32:
2039        tcg_out_setcond2(s, args, const_args);
2040        break;
2041#else /* TCG_TARGET_REG_BITS == 64 */
2042    case INDEX_op_ld32s_i64:
2043        tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
2044        break;
2045    case INDEX_op_ld_i64:
2046        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2047        break;
2048    case INDEX_op_st_i64:
2049        if (const_args[0]) {
2050            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
2051                                 0, args[1], args[2]);
2052            tcg_out32(s, args[0]);
2053        } else {
2054            tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2055        }
2056        break;
2057
2058    case INDEX_op_brcond_i64:
2059        tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
2060                         arg_label(args[3]), 0);
2061        break;
2062    case INDEX_op_setcond_i64:
2063        tcg_out_setcond64(s, args[3], args[0], args[1],
2064                          args[2], const_args[2]);
2065        break;
2066    case INDEX_op_movcond_i64:
2067        tcg_out_movcond64(s, args[5], args[0], args[1],
2068                          args[2], const_args[2], args[3]);
2069        break;
2070
2071    case INDEX_op_bswap64_i64:
2072        tcg_out_bswap64(s, args[0]);
2073        break;
2074    case INDEX_op_extu_i32_i64:
2075    case INDEX_op_ext32u_i64:
2076        tcg_out_ext32u(s, args[0], args[1]);
2077        break;
2078    case INDEX_op_ext_i32_i64:
2079    case INDEX_op_ext32s_i64:
2080        tcg_out_ext32s(s, args[0], args[1]);
2081        break;
2082#endif
2083
2084    OP_32_64(deposit):
2085        if (args[3] == 0 && args[4] == 8) {
2086            /* load bits 0..7 */
2087            tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
2088                          args[2], args[0]);
2089        } else if (args[3] == 8 && args[4] == 8) {
2090            /* load bits 8..15 */
2091            tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
2092        } else if (args[3] == 0 && args[4] == 16) {
2093            /* load bits 0..15 */
2094            tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
2095        } else {
2096            tcg_abort();
2097        }
2098        break;
2099
2100    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2101    case INDEX_op_mov_i64:
2102    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
2103    case INDEX_op_movi_i64:
2104    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2105    default:
2106        tcg_abort();
2107    }
2108
2109#undef OP_32_64
2110}
2111
2112static const TCGTargetOpDef x86_op_defs[] = {
2113    { INDEX_op_exit_tb, { } },
2114    { INDEX_op_goto_tb, { } },
2115    { INDEX_op_br, { } },
2116    { INDEX_op_ld8u_i32, { "r", "r" } },
2117    { INDEX_op_ld8s_i32, { "r", "r" } },
2118    { INDEX_op_ld16u_i32, { "r", "r" } },
2119    { INDEX_op_ld16s_i32, { "r", "r" } },
2120    { INDEX_op_ld_i32, { "r", "r" } },
2121    { INDEX_op_st8_i32, { "qi", "r" } },
2122    { INDEX_op_st16_i32, { "ri", "r" } },
2123    { INDEX_op_st_i32, { "ri", "r" } },
2124
2125    { INDEX_op_add_i32, { "r", "r", "ri" } },
2126    { INDEX_op_sub_i32, { "r", "0", "ri" } },
2127    { INDEX_op_mul_i32, { "r", "0", "ri" } },
2128    { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
2129    { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
2130    { INDEX_op_and_i32, { "r", "0", "ri" } },
2131    { INDEX_op_or_i32, { "r", "0", "ri" } },
2132    { INDEX_op_xor_i32, { "r", "0", "ri" } },
2133    { INDEX_op_andc_i32, { "r", "r", "ri" } },
2134
2135    { INDEX_op_shl_i32, { "r", "0", "Ci" } },
2136    { INDEX_op_shr_i32, { "r", "0", "Ci" } },
2137    { INDEX_op_sar_i32, { "r", "0", "Ci" } },
2138    { INDEX_op_rotl_i32, { "r", "0", "ci" } },
2139    { INDEX_op_rotr_i32, { "r", "0", "ci" } },
2140
2141    { INDEX_op_brcond_i32, { "r", "ri" } },
2142
2143    { INDEX_op_bswap16_i32, { "r", "0" } },
2144    { INDEX_op_bswap32_i32, { "r", "0" } },
2145
2146    { INDEX_op_neg_i32, { "r", "0" } },
2147
2148    { INDEX_op_not_i32, { "r", "0" } },
2149
2150    { INDEX_op_ext8s_i32, { "r", "q" } },
2151    { INDEX_op_ext16s_i32, { "r", "r" } },
2152    { INDEX_op_ext8u_i32, { "r", "q" } },
2153    { INDEX_op_ext16u_i32, { "r", "r" } },
2154
2155    { INDEX_op_setcond_i32, { "q", "r", "ri" } },
2156
2157    { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
2158    { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
2159
2160    { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
2161    { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
2162    { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2163    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2164
2165#if TCG_TARGET_REG_BITS == 32
2166    { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
2167    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2168#else
2169    { INDEX_op_ld8u_i64, { "r", "r" } },
2170    { INDEX_op_ld8s_i64, { "r", "r" } },
2171    { INDEX_op_ld16u_i64, { "r", "r" } },
2172    { INDEX_op_ld16s_i64, { "r", "r" } },
2173    { INDEX_op_ld32u_i64, { "r", "r" } },
2174    { INDEX_op_ld32s_i64, { "r", "r" } },
2175    { INDEX_op_ld_i64, { "r", "r" } },
2176    { INDEX_op_st8_i64, { "ri", "r" } },
2177    { INDEX_op_st16_i64, { "ri", "r" } },
2178    { INDEX_op_st32_i64, { "ri", "r" } },
2179    { INDEX_op_st_i64, { "re", "r" } },
2180
2181    { INDEX_op_add_i64, { "r", "r", "re" } },
2182    { INDEX_op_mul_i64, { "r", "0", "re" } },
2183    { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2184    { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2185    { INDEX_op_sub_i64, { "r", "0", "re" } },
2186    { INDEX_op_and_i64, { "r", "0", "reZ" } },
2187    { INDEX_op_or_i64, { "r", "0", "re" } },
2188    { INDEX_op_xor_i64, { "r", "0", "re" } },
2189    { INDEX_op_andc_i64, { "r", "r", "rI" } },
2190
2191    { INDEX_op_shl_i64, { "r", "0", "Ci" } },
2192    { INDEX_op_shr_i64, { "r", "0", "Ci" } },
2193    { INDEX_op_sar_i64, { "r", "0", "Ci" } },
2194    { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2195    { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2196
2197    { INDEX_op_brcond_i64, { "r", "re" } },
2198    { INDEX_op_setcond_i64, { "r", "r", "re" } },
2199
2200    { INDEX_op_bswap16_i64, { "r", "0" } },
2201    { INDEX_op_bswap32_i64, { "r", "0" } },
2202    { INDEX_op_bswap64_i64, { "r", "0" } },
2203    { INDEX_op_neg_i64, { "r", "0" } },
2204    { INDEX_op_not_i64, { "r", "0" } },
2205
2206    { INDEX_op_ext8s_i64, { "r", "r" } },
2207    { INDEX_op_ext16s_i64, { "r", "r" } },
2208    { INDEX_op_ext32s_i64, { "r", "r" } },
2209    { INDEX_op_ext8u_i64, { "r", "r" } },
2210    { INDEX_op_ext16u_i64, { "r", "r" } },
2211    { INDEX_op_ext32u_i64, { "r", "r" } },
2212
2213    { INDEX_op_ext_i32_i64, { "r", "r" } },
2214    { INDEX_op_extu_i32_i64, { "r", "r" } },
2215
2216    { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
2217    { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
2218
2219    { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2220    { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2221    { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2222    { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
2223#endif
2224
2225#if TCG_TARGET_REG_BITS == 64
2226    { INDEX_op_qemu_ld_i32, { "r", "L" } },
2227    { INDEX_op_qemu_st_i32, { "L", "L" } },
2228    { INDEX_op_qemu_ld_i64, { "r", "L" } },
2229    { INDEX_op_qemu_st_i64, { "L", "L" } },
2230#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2231    { INDEX_op_qemu_ld_i32, { "r", "L" } },
2232    { INDEX_op_qemu_st_i32, { "L", "L" } },
2233    { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
2234    { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
2235#else
2236    { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
2237    { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
2238    { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
2239    { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
2240#endif
2241    { -1 },
2242};
2243
2244static int tcg_target_callee_save_regs[] = {
2245#if TCG_TARGET_REG_BITS == 64
2246    TCG_REG_RBP,
2247    TCG_REG_RBX,
2248#if defined(_WIN64)
2249    TCG_REG_RDI,
2250    TCG_REG_RSI,
2251#endif
2252    TCG_REG_R12,
2253    TCG_REG_R13,
2254    TCG_REG_R14, /* Currently used for the global env. */
2255    TCG_REG_R15,
2256#else
2257    TCG_REG_EBP, /* Currently used for the global env. */
2258    TCG_REG_EBX,
2259    TCG_REG_ESI,
2260    TCG_REG_EDI,
2261#endif
2262};
2263
2264/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2265   and tcg_register_jit.  */
2266
2267#define PUSH_SIZE \
2268    ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2269     * (TCG_TARGET_REG_BITS / 8))
2270
2271#define FRAME_SIZE \
2272    ((PUSH_SIZE \
2273      + TCG_STATIC_CALL_ARGS_SIZE \
2274      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2275      + TCG_TARGET_STACK_ALIGN - 1) \
2276     & ~(TCG_TARGET_STACK_ALIGN - 1))
2277
2278/* Generate global QEMU prologue and epilogue code */
2279static void tcg_target_qemu_prologue(TCGContext *s)
2280{
2281    int i, stack_addend;
2282
2283    /* TB prologue */
2284
2285    /* Reserve some stack space, also for TCG temps.  */
2286    stack_addend = FRAME_SIZE - PUSH_SIZE;
2287    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2288                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2289
2290    /* Save all callee saved registers.  */
2291    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2292        tcg_out_push(s, tcg_target_callee_save_regs[i]);
2293    }
2294
2295#if TCG_TARGET_REG_BITS == 32
2296    tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2297               (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2298    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2299    /* jmp *tb.  */
2300    tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2301                         (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2302                         + stack_addend);
2303#else
2304    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2305    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2306    /* jmp *tb.  */
2307    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2308#endif
2309
2310    /* TB epilogue */
2311    tb_ret_addr = s->code_ptr;
2312
2313    tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2314
2315    for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2316        tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2317    }
2318    tcg_out_opc(s, OPC_RET, 0, 0, 0);
2319
2320#if !defined(CONFIG_SOFTMMU)
2321    /* Try to set up a segment register to point to guest_base.  */
2322    if (guest_base) {
2323        setup_guest_base_seg();
2324    }
2325#endif
2326}
2327
2328static void tcg_target_init(TCGContext *s)
2329{
2330#ifdef CONFIG_CPUID_H
2331    unsigned a, b, c, d;
2332    int max = __get_cpuid_max(0, 0);
2333
2334    if (max >= 1) {
2335        __cpuid(1, a, b, c, d);
2336#ifndef have_cmov
2337        /* For 32-bit, 99% certainty that we're running on hardware that
2338           supports cmov, but we still need to check.  In case cmov is not
2339           available, we'll use a small forward branch.  */
2340        have_cmov = (d & bit_CMOV) != 0;
2341#endif
2342#ifndef have_movbe
2343        /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2344           need to probe for it.  */
2345        have_movbe = (c & bit_MOVBE) != 0;
2346#endif
2347    }
2348
2349    if (max >= 7) {
2350        /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs.  */
2351        __cpuid_count(7, 0, a, b, c, d);
2352#ifdef bit_BMI
2353        have_bmi1 = (b & bit_BMI) != 0;
2354#endif
2355#ifndef have_bmi2
2356        have_bmi2 = (b & bit_BMI2) != 0;
2357#endif
2358    }
2359#endif
2360
2361    if (TCG_TARGET_REG_BITS == 64) {
2362        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2363        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2364    } else {
2365        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2366    }
2367
2368    tcg_regset_clear(tcg_target_call_clobber_regs);
2369    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2370    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2371    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2372    if (TCG_TARGET_REG_BITS == 64) {
2373#if !defined(_WIN64)
2374        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2375        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2376#endif
2377        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2378        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2379        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2380        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2381    }
2382
2383    tcg_regset_clear(s->reserved_regs);
2384    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2385
2386    tcg_add_target_add_op_defs(x86_op_defs);
2387}
2388
2389typedef struct {
2390    DebugFrameHeader h;
2391    uint8_t fde_def_cfa[4];
2392    uint8_t fde_reg_ofs[14];
2393} DebugFrame;
2394
2395/* We're expecting a 2 byte uleb128 encoded value.  */
2396QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2397
2398#if !defined(__ELF__)
2399    /* Host machine without ELF. */
2400#elif TCG_TARGET_REG_BITS == 64
2401#define ELF_HOST_MACHINE EM_X86_64
2402static const DebugFrame debug_frame = {
2403    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2404    .h.cie.id = -1,
2405    .h.cie.version = 1,
2406    .h.cie.code_align = 1,
2407    .h.cie.data_align = 0x78,             /* sleb128 -8 */
2408    .h.cie.return_column = 16,
2409
2410    /* Total FDE size does not include the "len" member.  */
2411    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2412
2413    .fde_def_cfa = {
2414        12, 7,                          /* DW_CFA_def_cfa %rsp, ... */
2415        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2416        (FRAME_SIZE >> 7)
2417    },
2418    .fde_reg_ofs = {
2419        0x90, 1,                        /* DW_CFA_offset, %rip, -8 */
2420        /* The following ordering must match tcg_target_callee_save_regs.  */
2421        0x86, 2,                        /* DW_CFA_offset, %rbp, -16 */
2422        0x83, 3,                        /* DW_CFA_offset, %rbx, -24 */
2423        0x8c, 4,                        /* DW_CFA_offset, %r12, -32 */
2424        0x8d, 5,                        /* DW_CFA_offset, %r13, -40 */
2425        0x8e, 6,                        /* DW_CFA_offset, %r14, -48 */
2426        0x8f, 7,                        /* DW_CFA_offset, %r15, -56 */
2427    }
2428};
2429#else
2430#define ELF_HOST_MACHINE EM_386
2431static const DebugFrame debug_frame = {
2432    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2433    .h.cie.id = -1,
2434    .h.cie.version = 1,
2435    .h.cie.code_align = 1,
2436    .h.cie.data_align = 0x7c,             /* sleb128 -4 */
2437    .h.cie.return_column = 8,
2438
2439    /* Total FDE size does not include the "len" member.  */
2440    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2441
2442    .fde_def_cfa = {
2443        12, 4,                          /* DW_CFA_def_cfa %esp, ... */
2444        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2445        (FRAME_SIZE >> 7)
2446    },
2447    .fde_reg_ofs = {
2448        0x88, 1,                        /* DW_CFA_offset, %eip, -4 */
2449        /* The following ordering must match tcg_target_callee_save_regs.  */
2450        0x85, 2,                        /* DW_CFA_offset, %ebp, -8 */
2451        0x83, 3,                        /* DW_CFA_offset, %ebx, -12 */
2452        0x86, 4,                        /* DW_CFA_offset, %esi, -16 */
2453        0x87, 5,                        /* DW_CFA_offset, %edi, -20 */
2454    }
2455};
2456#endif
2457
2458#if defined(ELF_HOST_MACHINE)
2459void tcg_register_jit(void *buf, size_t buf_size)
2460{
2461    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2462}
2463#endif
2464