qemu/tcg/i386/tcg-target.inc.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "tcg-be-ldst.h"
  26
  27#ifndef NDEBUG
  28static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  29#if TCG_TARGET_REG_BITS == 64
  30    "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
  31    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
  32#else
  33    "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
  34#endif
  35};
  36#endif
  37
  38static const int tcg_target_reg_alloc_order[] = {
  39#if TCG_TARGET_REG_BITS == 64
  40#ifndef _WIN64
  41    TCG_REG_RBP,
  42#endif
  43    TCG_REG_RBX,
  44    TCG_REG_R12,
  45    TCG_REG_R13,
  46    TCG_REG_R14,
  47    TCG_REG_R15,
  48    TCG_REG_R10,
  49    TCG_REG_R11,
  50    TCG_REG_R9,
  51    TCG_REG_R8,
  52    TCG_REG_RCX,
  53    TCG_REG_RDX,
  54    TCG_REG_RSI,
  55    TCG_REG_RDI,
  56    TCG_REG_RAX,
  57#else
  58    TCG_REG_EBX,
  59    TCG_REG_ESI,
  60    TCG_REG_EDI,
  61    TCG_REG_EBP,
  62    TCG_REG_ECX,
  63    TCG_REG_EDX,
  64    TCG_REG_EAX,
  65#endif
  66};
  67
  68static const int tcg_target_call_iarg_regs[] = {
  69#if TCG_TARGET_REG_BITS == 64
  70#if defined(_WIN64)
  71    TCG_REG_RCX,
  72    TCG_REG_RDX,
  73#else
  74    TCG_REG_RDI,
  75    TCG_REG_RSI,
  76    TCG_REG_RDX,
  77    TCG_REG_RCX,
  78#endif
  79    TCG_REG_R8,
  80    TCG_REG_R9,
  81#else
  82    /* 32 bit mode uses stack based calling convention (GCC default). */
  83#endif
  84};
  85
  86static const int tcg_target_call_oarg_regs[] = {
  87    TCG_REG_EAX,
  88#if TCG_TARGET_REG_BITS == 32
  89    TCG_REG_EDX
  90#endif
  91};
  92
  93/* Constants we accept.  */
  94#define TCG_CT_CONST_S32 0x100
  95#define TCG_CT_CONST_U32 0x200
  96#define TCG_CT_CONST_I32 0x400
  97
  98/* Registers used with L constraint, which are the first argument 
  99   registers on x86_64, and two random call clobbered registers on
 100   i386. */
 101#if TCG_TARGET_REG_BITS == 64
 102# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
 103# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
 104#else
 105# define TCG_REG_L0 TCG_REG_EAX
 106# define TCG_REG_L1 TCG_REG_EDX
 107#endif
 108
 109/* The host compiler should supply <cpuid.h> to enable runtime features
 110   detection, as we're not going to go so far as our own inline assembly.
 111   If not available, default values will be assumed.  */
 112#if defined(CONFIG_CPUID_H)
 113#include <cpuid.h>
 114#endif
 115
 116/* For 32-bit, we are going to attempt to determine at runtime whether cmov
 117   is available.  */
 118#if TCG_TARGET_REG_BITS == 64
 119# define have_cmov 1
 120#elif defined(CONFIG_CPUID_H) && defined(bit_CMOV)
 121static bool have_cmov;
 122#else
 123# define have_cmov 0
 124#endif
 125
 126/* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
 127   going to attempt to determine at runtime whether movbe is available.  */
 128#if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
 129static bool have_movbe;
 130#else
 131# define have_movbe 0
 132#endif
 133
 134/* We need this symbol in tcg-target.h, and we can't properly conditionalize
 135   it there.  Therefore we always define the variable.  */
 136bool have_bmi1;
 137
 138#if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
 139static bool have_bmi2;
 140#else
 141# define have_bmi2 0
 142#endif
 143
 144static tcg_insn_unit *tb_ret_addr;
 145
 146static void patch_reloc(tcg_insn_unit *code_ptr, int type,
 147                        intptr_t value, intptr_t addend)
 148{
 149    value += addend;
 150    switch(type) {
 151    case R_386_PC32:
 152        value -= (uintptr_t)code_ptr;
 153        if (value != (int32_t)value) {
 154            tcg_abort();
 155        }
 156        tcg_patch32(code_ptr, value);
 157        break;
 158    case R_386_PC8:
 159        value -= (uintptr_t)code_ptr;
 160        if (value != (int8_t)value) {
 161            tcg_abort();
 162        }
 163        tcg_patch8(code_ptr, value);
 164        break;
 165    default:
 166        tcg_abort();
 167    }
 168}
 169
 170/* parse target specific constraints */
 171static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 172{
 173    const char *ct_str;
 174
 175    ct_str = *pct_str;
 176    switch(ct_str[0]) {
 177    case 'a':
 178        ct->ct |= TCG_CT_REG;
 179        tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
 180        break;
 181    case 'b':
 182        ct->ct |= TCG_CT_REG;
 183        tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
 184        break;
 185    case 'c':
 186    case_c:
 187        ct->ct |= TCG_CT_REG;
 188        tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
 189        break;
 190    case 'd':
 191        ct->ct |= TCG_CT_REG;
 192        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
 193        break;
 194    case 'S':
 195        ct->ct |= TCG_CT_REG;
 196        tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
 197        break;
 198    case 'D':
 199        ct->ct |= TCG_CT_REG;
 200        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
 201        break;
 202    case 'q':
 203        ct->ct |= TCG_CT_REG;
 204        if (TCG_TARGET_REG_BITS == 64) {
 205            tcg_regset_set32(ct->u.regs, 0, 0xffff);
 206        } else {
 207            tcg_regset_set32(ct->u.regs, 0, 0xf);
 208        }
 209        break;
 210    case 'Q':
 211        ct->ct |= TCG_CT_REG;
 212        tcg_regset_set32(ct->u.regs, 0, 0xf);
 213        break;
 214    case 'r':
 215    case_r:
 216        ct->ct |= TCG_CT_REG;
 217        if (TCG_TARGET_REG_BITS == 64) {
 218            tcg_regset_set32(ct->u.regs, 0, 0xffff);
 219        } else {
 220            tcg_regset_set32(ct->u.regs, 0, 0xff);
 221        }
 222        break;
 223    case 'C':
 224        /* With SHRX et al, we need not use ECX as shift count register.  */
 225        if (have_bmi2) {
 226            goto case_r;
 227        } else {
 228            goto case_c;
 229        }
 230
 231        /* qemu_ld/st address constraint */
 232    case 'L':
 233        ct->ct |= TCG_CT_REG;
 234        if (TCG_TARGET_REG_BITS == 64) {
 235            tcg_regset_set32(ct->u.regs, 0, 0xffff);
 236        } else {
 237            tcg_regset_set32(ct->u.regs, 0, 0xff);
 238        }
 239        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
 240        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
 241        break;
 242
 243    case 'e':
 244        ct->ct |= TCG_CT_CONST_S32;
 245        break;
 246    case 'Z':
 247        ct->ct |= TCG_CT_CONST_U32;
 248        break;
 249    case 'I':
 250        ct->ct |= TCG_CT_CONST_I32;
 251        break;
 252
 253    default:
 254        return -1;
 255    }
 256    ct_str++;
 257    *pct_str = ct_str;
 258    return 0;
 259}
 260
 261/* test if a constant matches the constraint */
 262static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
 263                                         const TCGArgConstraint *arg_ct)
 264{
 265    int ct = arg_ct->ct;
 266    if (ct & TCG_CT_CONST) {
 267        return 1;
 268    }
 269    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
 270        return 1;
 271    }
 272    if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
 273        return 1;
 274    }
 275    if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
 276        return 1;
 277    }
 278    return 0;
 279}
 280
 281#if TCG_TARGET_REG_BITS == 64
 282# define LOWREGMASK(x)  ((x) & 7)
 283#else
 284# define LOWREGMASK(x)  (x)
 285#endif
 286
 287#define P_EXT           0x100           /* 0x0f opcode prefix */
 288#define P_EXT38         0x200           /* 0x0f 0x38 opcode prefix */
 289#define P_DATA16        0x400           /* 0x66 opcode prefix */
 290#if TCG_TARGET_REG_BITS == 64
 291# define P_ADDR32       0x800           /* 0x67 opcode prefix */
 292# define P_REXW         0x1000          /* Set REX.W = 1 */
 293# define P_REXB_R       0x2000          /* REG field as byte register */
 294# define P_REXB_RM      0x4000          /* R/M field as byte register */
 295# define P_GS           0x8000          /* gs segment override */
 296#else
 297# define P_ADDR32       0
 298# define P_REXW         0
 299# define P_REXB_R       0
 300# define P_REXB_RM      0
 301# define P_GS           0
 302#endif
 303#define P_SIMDF3        0x10000         /* 0xf3 opcode prefix */
 304#define P_SIMDF2        0x20000         /* 0xf2 opcode prefix */
 305
 306#define OPC_ARITH_EvIz  (0x81)
 307#define OPC_ARITH_EvIb  (0x83)
 308#define OPC_ARITH_GvEv  (0x03)          /* ... plus (ARITH_FOO << 3) */
 309#define OPC_ANDN        (0xf2 | P_EXT38)
 310#define OPC_ADD_GvEv    (OPC_ARITH_GvEv | (ARITH_ADD << 3))
 311#define OPC_BSWAP       (0xc8 | P_EXT)
 312#define OPC_CALL_Jz     (0xe8)
 313#define OPC_CMOVCC      (0x40 | P_EXT)  /* ... plus condition code */
 314#define OPC_CMP_GvEv    (OPC_ARITH_GvEv | (ARITH_CMP << 3))
 315#define OPC_DEC_r32     (0x48)
 316#define OPC_IMUL_GvEv   (0xaf | P_EXT)
 317#define OPC_IMUL_GvEvIb (0x6b)
 318#define OPC_IMUL_GvEvIz (0x69)
 319#define OPC_INC_r32     (0x40)
 320#define OPC_JCC_long    (0x80 | P_EXT)  /* ... plus condition code */
 321#define OPC_JCC_short   (0x70)          /* ... plus condition code */
 322#define OPC_JMP_long    (0xe9)
 323#define OPC_JMP_short   (0xeb)
 324#define OPC_LEA         (0x8d)
 325#define OPC_MOVB_EvGv   (0x88)          /* stores, more or less */
 326#define OPC_MOVL_EvGv   (0x89)          /* stores, more or less */
 327#define OPC_MOVL_GvEv   (0x8b)          /* loads, more or less */
 328#define OPC_MOVB_EvIz   (0xc6)
 329#define OPC_MOVL_EvIz   (0xc7)
 330#define OPC_MOVL_Iv     (0xb8)
 331#define OPC_MOVBE_GyMy  (0xf0 | P_EXT38)
 332#define OPC_MOVBE_MyGy  (0xf1 | P_EXT38)
 333#define OPC_MOVSBL      (0xbe | P_EXT)
 334#define OPC_MOVSWL      (0xbf | P_EXT)
 335#define OPC_MOVSLQ      (0x63 | P_REXW)
 336#define OPC_MOVZBL      (0xb6 | P_EXT)
 337#define OPC_MOVZWL      (0xb7 | P_EXT)
 338#define OPC_POP_r32     (0x58)
 339#define OPC_PUSH_r32    (0x50)
 340#define OPC_PUSH_Iv     (0x68)
 341#define OPC_PUSH_Ib     (0x6a)
 342#define OPC_RET         (0xc3)
 343#define OPC_SETCC       (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
 344#define OPC_SHIFT_1     (0xd1)
 345#define OPC_SHIFT_Ib    (0xc1)
 346#define OPC_SHIFT_cl    (0xd3)
 347#define OPC_SARX        (0xf7 | P_EXT38 | P_SIMDF3)
 348#define OPC_SHLX        (0xf7 | P_EXT38 | P_DATA16)
 349#define OPC_SHRX        (0xf7 | P_EXT38 | P_SIMDF2)
 350#define OPC_TESTL       (0x85)
 351#define OPC_XCHG_ax_r32 (0x90)
 352
 353#define OPC_GRP3_Ev     (0xf7)
 354#define OPC_GRP5        (0xff)
 355
 356/* Group 1 opcode extensions for 0x80-0x83.
 357   These are also used as modifiers for OPC_ARITH.  */
 358#define ARITH_ADD 0
 359#define ARITH_OR  1
 360#define ARITH_ADC 2
 361#define ARITH_SBB 3
 362#define ARITH_AND 4
 363#define ARITH_SUB 5
 364#define ARITH_XOR 6
 365#define ARITH_CMP 7
 366
 367/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3.  */
 368#define SHIFT_ROL 0
 369#define SHIFT_ROR 1
 370#define SHIFT_SHL 4
 371#define SHIFT_SHR 5
 372#define SHIFT_SAR 7
 373
 374/* Group 3 opcode extensions for 0xf6, 0xf7.  To be used with OPC_GRP3.  */
 375#define EXT3_NOT   2
 376#define EXT3_NEG   3
 377#define EXT3_MUL   4
 378#define EXT3_IMUL  5
 379#define EXT3_DIV   6
 380#define EXT3_IDIV  7
 381
 382/* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */
 383#define EXT5_INC_Ev     0
 384#define EXT5_DEC_Ev     1
 385#define EXT5_CALLN_Ev   2
 386#define EXT5_JMPN_Ev    4
 387
 388/* Condition codes to be added to OPC_JCC_{long,short}.  */
 389#define JCC_JMP (-1)
 390#define JCC_JO  0x0
 391#define JCC_JNO 0x1
 392#define JCC_JB  0x2
 393#define JCC_JAE 0x3
 394#define JCC_JE  0x4
 395#define JCC_JNE 0x5
 396#define JCC_JBE 0x6
 397#define JCC_JA  0x7
 398#define JCC_JS  0x8
 399#define JCC_JNS 0x9
 400#define JCC_JP  0xa
 401#define JCC_JNP 0xb
 402#define JCC_JL  0xc
 403#define JCC_JGE 0xd
 404#define JCC_JLE 0xe
 405#define JCC_JG  0xf
 406
 407static const uint8_t tcg_cond_to_jcc[] = {
 408    [TCG_COND_EQ] = JCC_JE,
 409    [TCG_COND_NE] = JCC_JNE,
 410    [TCG_COND_LT] = JCC_JL,
 411    [TCG_COND_GE] = JCC_JGE,
 412    [TCG_COND_LE] = JCC_JLE,
 413    [TCG_COND_GT] = JCC_JG,
 414    [TCG_COND_LTU] = JCC_JB,
 415    [TCG_COND_GEU] = JCC_JAE,
 416    [TCG_COND_LEU] = JCC_JBE,
 417    [TCG_COND_GTU] = JCC_JA,
 418};
 419
 420#if TCG_TARGET_REG_BITS == 64
 421static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
 422{
 423    int rex;
 424
 425    if (opc & P_GS) {
 426        tcg_out8(s, 0x65);
 427    }
 428    if (opc & P_DATA16) {
 429        /* We should never be asking for both 16 and 64-bit operation.  */
 430        assert((opc & P_REXW) == 0);
 431        tcg_out8(s, 0x66);
 432    }
 433    if (opc & P_ADDR32) {
 434        tcg_out8(s, 0x67);
 435    }
 436
 437    rex = 0;
 438    rex |= (opc & P_REXW) ? 0x8 : 0x0;  /* REX.W */
 439    rex |= (r & 8) >> 1;                /* REX.R */
 440    rex |= (x & 8) >> 2;                /* REX.X */
 441    rex |= (rm & 8) >> 3;               /* REX.B */
 442
 443    /* P_REXB_{R,RM} indicates that the given register is the low byte.
 444       For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
 445       as otherwise the encoding indicates %[abcd]h.  Note that the values
 446       that are ORed in merely indicate that the REX byte must be present;
 447       those bits get discarded in output.  */
 448    rex |= opc & (r >= 4 ? P_REXB_R : 0);
 449    rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
 450
 451    if (rex) {
 452        tcg_out8(s, (uint8_t)(rex | 0x40));
 453    }
 454
 455    if (opc & (P_EXT | P_EXT38)) {
 456        tcg_out8(s, 0x0f);
 457        if (opc & P_EXT38) {
 458            tcg_out8(s, 0x38);
 459        }
 460    }
 461
 462    tcg_out8(s, opc);
 463}
 464#else
 465static void tcg_out_opc(TCGContext *s, int opc)
 466{
 467    if (opc & P_DATA16) {
 468        tcg_out8(s, 0x66);
 469    }
 470    if (opc & (P_EXT | P_EXT38)) {
 471        tcg_out8(s, 0x0f);
 472        if (opc & P_EXT38) {
 473            tcg_out8(s, 0x38);
 474        }
 475    }
 476    tcg_out8(s, opc);
 477}
 478/* Discard the register arguments to tcg_out_opc early, so as not to penalize
 479   the 32-bit compilation paths.  This method works with all versions of gcc,
 480   whereas relying on optimization may not be able to exclude them.  */
 481#define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc)
 482#endif
 483
 484static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
 485{
 486    tcg_out_opc(s, opc, r, rm, 0);
 487    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
 488}
 489
 490static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
 491{
 492    int tmp;
 493
 494    if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
 495        /* Three byte VEX prefix.  */
 496        tcg_out8(s, 0xc4);
 497
 498        /* VEX.m-mmmm */
 499        if (opc & P_EXT38) {
 500            tmp = 2;
 501        } else if (opc & P_EXT) {
 502            tmp = 1;
 503        } else {
 504            tcg_abort();
 505        }
 506        tmp |= 0x40;                       /* VEX.X */
 507        tmp |= (r & 8 ? 0 : 0x80);         /* VEX.R */
 508        tmp |= (rm & 8 ? 0 : 0x20);        /* VEX.B */
 509        tcg_out8(s, tmp);
 510
 511        tmp = (opc & P_REXW ? 0x80 : 0);   /* VEX.W */
 512    } else {
 513        /* Two byte VEX prefix.  */
 514        tcg_out8(s, 0xc5);
 515
 516        tmp = (r & 8 ? 0 : 0x80);          /* VEX.R */
 517    }
 518    /* VEX.pp */
 519    if (opc & P_DATA16) {
 520        tmp |= 1;                          /* 0x66 */
 521    } else if (opc & P_SIMDF3) {
 522        tmp |= 2;                          /* 0xf3 */
 523    } else if (opc & P_SIMDF2) {
 524        tmp |= 3;                          /* 0xf2 */
 525    }
 526    tmp |= (~v & 15) << 3;                 /* VEX.vvvv */
 527    tcg_out8(s, tmp);
 528    tcg_out8(s, opc);
 529    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
 530}
 531
 532/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
 533   We handle either RM and INDEX missing with a negative value.  In 64-bit
 534   mode for absolute addresses, ~RM is the size of the immediate operand
 535   that will follow the instruction.  */
 536
 537static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
 538                                     int index, int shift, intptr_t offset)
 539{
 540    int mod, len;
 541
 542    if (index < 0 && rm < 0) {
 543        if (TCG_TARGET_REG_BITS == 64) {
 544            /* Try for a rip-relative addressing mode.  This has replaced
 545               the 32-bit-mode absolute addressing encoding.  */
 546            intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
 547            intptr_t disp = offset - pc;
 548            if (disp == (int32_t)disp) {
 549                tcg_out_opc(s, opc, r, 0, 0);
 550                tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
 551                tcg_out32(s, disp);
 552                return;
 553            }
 554
 555            /* Try for an absolute address encoding.  This requires the
 556               use of the MODRM+SIB encoding and is therefore larger than
 557               rip-relative addressing.  */
 558            if (offset == (int32_t)offset) {
 559                tcg_out_opc(s, opc, r, 0, 0);
 560                tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
 561                tcg_out8(s, (4 << 3) | 5);
 562                tcg_out32(s, offset);
 563                return;
 564            }
 565
 566            /* ??? The memory isn't directly addressable.  */
 567            tcg_abort();
 568        } else {
 569            /* Absolute address.  */
 570            tcg_out_opc(s, opc, r, 0, 0);
 571            tcg_out8(s, (r << 3) | 5);
 572            tcg_out32(s, offset);
 573            return;
 574        }
 575    }
 576
 577    /* Find the length of the immediate addend.  Note that the encoding
 578       that would be used for (%ebp) indicates absolute addressing.  */
 579    if (rm < 0) {
 580        mod = 0, len = 4, rm = 5;
 581    } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
 582        mod = 0, len = 0;
 583    } else if (offset == (int8_t)offset) {
 584        mod = 0x40, len = 1;
 585    } else {
 586        mod = 0x80, len = 4;
 587    }
 588
 589    /* Use a single byte MODRM format if possible.  Note that the encoding
 590       that would be used for %esp is the escape to the two byte form.  */
 591    if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
 592        /* Single byte MODRM format.  */
 593        tcg_out_opc(s, opc, r, rm, 0);
 594        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
 595    } else {
 596        /* Two byte MODRM+SIB format.  */
 597
 598        /* Note that the encoding that would place %esp into the index
 599           field indicates no index register.  In 64-bit mode, the REX.X
 600           bit counts, so %r12 can be used as the index.  */
 601        if (index < 0) {
 602            index = 4;
 603        } else {
 604            assert(index != TCG_REG_ESP);
 605        }
 606
 607        tcg_out_opc(s, opc, r, rm, index);
 608        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
 609        tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
 610    }
 611
 612    if (len == 1) {
 613        tcg_out8(s, offset);
 614    } else if (len == 4) {
 615        tcg_out32(s, offset);
 616    }
 617}
 618
 619/* A simplification of the above with no index or shift.  */
 620static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
 621                                        int rm, intptr_t offset)
 622{
 623    tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
 624}
 625
 626/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
 627static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
 628{
 629    /* Propagate an opcode prefix, such as P_REXW.  */
 630    int ext = subop & ~0x7;
 631    subop &= 0x7;
 632
 633    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
 634}
 635
 636static inline void tcg_out_mov(TCGContext *s, TCGType type,
 637                               TCGReg ret, TCGReg arg)
 638{
 639    if (arg != ret) {
 640        int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
 641        tcg_out_modrm(s, opc, ret, arg);
 642    }
 643}
 644
 645static void tcg_out_movi(TCGContext *s, TCGType type,
 646                         TCGReg ret, tcg_target_long arg)
 647{
 648    tcg_target_long diff;
 649
 650    if (arg == 0) {
 651        tgen_arithr(s, ARITH_XOR, ret, ret);
 652        return;
 653    }
 654    if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
 655        tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
 656        tcg_out32(s, arg);
 657        return;
 658    }
 659    if (arg == (int32_t)arg) {
 660        tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
 661        tcg_out32(s, arg);
 662        return;
 663    }
 664
 665    /* Try a 7 byte pc-relative lea before the 10 byte movq.  */
 666    diff = arg - ((uintptr_t)s->code_ptr + 7);
 667    if (diff == (int32_t)diff) {
 668        tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
 669        tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
 670        tcg_out32(s, diff);
 671        return;
 672    }
 673
 674    tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
 675    tcg_out64(s, arg);
 676}
 677
 678static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
 679{
 680    if (val == (int8_t)val) {
 681        tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
 682        tcg_out8(s, val);
 683    } else if (val == (int32_t)val) {
 684        tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
 685        tcg_out32(s, val);
 686    } else {
 687        tcg_abort();
 688    }
 689}
 690
 691static inline void tcg_out_push(TCGContext *s, int reg)
 692{
 693    tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
 694}
 695
 696static inline void tcg_out_pop(TCGContext *s, int reg)
 697{
 698    tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
 699}
 700
 701static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
 702                              TCGReg arg1, intptr_t arg2)
 703{
 704    int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
 705    tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
 706}
 707
 708static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
 709                              TCGReg arg1, intptr_t arg2)
 710{
 711    int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
 712    tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
 713}
 714
 715static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
 716                               tcg_target_long ofs, tcg_target_long val)
 717{
 718    int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
 719    tcg_out_modrm_offset(s, opc, 0, base, ofs);
 720    tcg_out32(s, val);
 721}
 722
 723static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
 724{
 725    /* Propagate an opcode prefix, such as P_DATA16.  */
 726    int ext = subopc & ~0x7;
 727    subopc &= 0x7;
 728
 729    if (count == 1) {
 730        tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
 731    } else {
 732        tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
 733        tcg_out8(s, count);
 734    }
 735}
 736
 737static inline void tcg_out_bswap32(TCGContext *s, int reg)
 738{
 739    tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
 740}
 741
 742static inline void tcg_out_rolw_8(TCGContext *s, int reg)
 743{
 744    tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
 745}
 746
 747static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
 748{
 749    /* movzbl */
 750    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
 751    tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
 752}
 753
 754static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
 755{
 756    /* movsbl */
 757    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
 758    tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
 759}
 760
 761static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
 762{
 763    /* movzwl */
 764    tcg_out_modrm(s, OPC_MOVZWL, dest, src);
 765}
 766
 767static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
 768{
 769    /* movsw[lq] */
 770    tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
 771}
 772
 773static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
 774{
 775    /* 32-bit mov zero extends.  */
 776    tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
 777}
 778
 779static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
 780{
 781    tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
 782}
 783
 784static inline void tcg_out_bswap64(TCGContext *s, int reg)
 785{
 786    tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
 787}
 788
 789static void tgen_arithi(TCGContext *s, int c, int r0,
 790                        tcg_target_long val, int cf)
 791{
 792    int rexw = 0;
 793
 794    if (TCG_TARGET_REG_BITS == 64) {
 795        rexw = c & -8;
 796        c &= 7;
 797    }
 798
 799    /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
 800       partial flags update stalls on Pentium4 and are not recommended
 801       by current Intel optimization manuals.  */
 802    if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
 803        int is_inc = (c == ARITH_ADD) ^ (val < 0);
 804        if (TCG_TARGET_REG_BITS == 64) {
 805            /* The single-byte increment encodings are re-tasked as the
 806               REX prefixes.  Use the MODRM encoding.  */
 807            tcg_out_modrm(s, OPC_GRP5 + rexw,
 808                          (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
 809        } else {
 810            tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
 811        }
 812        return;
 813    }
 814
 815    if (c == ARITH_AND) {
 816        if (TCG_TARGET_REG_BITS == 64) {
 817            if (val == 0xffffffffu) {
 818                tcg_out_ext32u(s, r0, r0);
 819                return;
 820            }
 821            if (val == (uint32_t)val) {
 822                /* AND with no high bits set can use a 32-bit operation.  */
 823                rexw = 0;
 824            }
 825        }
 826        if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
 827            tcg_out_ext8u(s, r0, r0);
 828            return;
 829        }
 830        if (val == 0xffffu) {
 831            tcg_out_ext16u(s, r0, r0);
 832            return;
 833        }
 834    }
 835
 836    if (val == (int8_t)val) {
 837        tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
 838        tcg_out8(s, val);
 839        return;
 840    }
 841    if (rexw == 0 || val == (int32_t)val) {
 842        tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
 843        tcg_out32(s, val);
 844        return;
 845    }
 846
 847    tcg_abort();
 848}
 849
 850static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
 851{
 852    if (val != 0) {
 853        tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
 854    }
 855}
 856
 857/* Use SMALL != 0 to force a short forward branch.  */
 858static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
 859{
 860    int32_t val, val1;
 861
 862    if (l->has_value) {
 863        val = tcg_pcrel_diff(s, l->u.value_ptr);
 864        val1 = val - 2;
 865        if ((int8_t)val1 == val1) {
 866            if (opc == -1) {
 867                tcg_out8(s, OPC_JMP_short);
 868            } else {
 869                tcg_out8(s, OPC_JCC_short + opc);
 870            }
 871            tcg_out8(s, val1);
 872        } else {
 873            if (small) {
 874                tcg_abort();
 875            }
 876            if (opc == -1) {
 877                tcg_out8(s, OPC_JMP_long);
 878                tcg_out32(s, val - 5);
 879            } else {
 880                tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
 881                tcg_out32(s, val - 6);
 882            }
 883        }
 884    } else if (small) {
 885        if (opc == -1) {
 886            tcg_out8(s, OPC_JMP_short);
 887        } else {
 888            tcg_out8(s, OPC_JCC_short + opc);
 889        }
 890        tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
 891        s->code_ptr += 1;
 892    } else {
 893        if (opc == -1) {
 894            tcg_out8(s, OPC_JMP_long);
 895        } else {
 896            tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
 897        }
 898        tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
 899        s->code_ptr += 4;
 900    }
 901}
 902
 903static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
 904                        int const_arg2, int rexw)
 905{
 906    if (const_arg2) {
 907        if (arg2 == 0) {
 908            /* test r, r */
 909            tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
 910        } else {
 911            tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
 912        }
 913    } else {
 914        tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
 915    }
 916}
 917
 918static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
 919                             TCGArg arg1, TCGArg arg2, int const_arg2,
 920                             TCGLabel *label, int small)
 921{
 922    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
 923    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
 924}
 925
 926#if TCG_TARGET_REG_BITS == 64
 927static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
 928                             TCGArg arg1, TCGArg arg2, int const_arg2,
 929                             TCGLabel *label, int small)
 930{
 931    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
 932    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
 933}
 934#else
 935/* XXX: we implement it at the target level to avoid having to
 936   handle cross basic blocks temporaries */
 937static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
 938                            const int *const_args, int small)
 939{
 940    TCGLabel *label_next = gen_new_label();
 941    TCGLabel *label_this = arg_label(args[5]);
 942
 943    switch(args[4]) {
 944    case TCG_COND_EQ:
 945        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
 946                         label_next, 1);
 947        tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
 948                         label_this, small);
 949        break;
 950    case TCG_COND_NE:
 951        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
 952                         label_this, small);
 953        tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
 954                         label_this, small);
 955        break;
 956    case TCG_COND_LT:
 957        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
 958                         label_this, small);
 959        tcg_out_jxx(s, JCC_JNE, label_next, 1);
 960        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
 961                         label_this, small);
 962        break;
 963    case TCG_COND_LE:
 964        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
 965                         label_this, small);
 966        tcg_out_jxx(s, JCC_JNE, label_next, 1);
 967        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
 968                         label_this, small);
 969        break;
 970    case TCG_COND_GT:
 971        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
 972                         label_this, small);
 973        tcg_out_jxx(s, JCC_JNE, label_next, 1);
 974        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
 975                         label_this, small);
 976        break;
 977    case TCG_COND_GE:
 978        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
 979                         label_this, small);
 980        tcg_out_jxx(s, JCC_JNE, label_next, 1);
 981        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
 982                         label_this, small);
 983        break;
 984    case TCG_COND_LTU:
 985        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
 986                         label_this, small);
 987        tcg_out_jxx(s, JCC_JNE, label_next, 1);
 988        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
 989                         label_this, small);
 990        break;
 991    case TCG_COND_LEU:
 992        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
 993                         label_this, small);
 994        tcg_out_jxx(s, JCC_JNE, label_next, 1);
 995        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
 996                         label_this, small);
 997        break;
 998    case TCG_COND_GTU:
 999        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1000                         label_this, small);
1001        tcg_out_jxx(s, JCC_JNE, label_next, 1);
1002        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
1003                         label_this, small);
1004        break;
1005    case TCG_COND_GEU:
1006        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1007                         label_this, small);
1008        tcg_out_jxx(s, JCC_JNE, label_next, 1);
1009        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
1010                         label_this, small);
1011        break;
1012    default:
1013        tcg_abort();
1014    }
1015    tcg_out_label(s, label_next, s->code_ptr);
1016}
1017#endif
1018
1019static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1020                              TCGArg arg1, TCGArg arg2, int const_arg2)
1021{
1022    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
1023    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1024    tcg_out_ext8u(s, dest, dest);
1025}
1026
1027#if TCG_TARGET_REG_BITS == 64
1028static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1029                              TCGArg arg1, TCGArg arg2, int const_arg2)
1030{
1031    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1032    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1033    tcg_out_ext8u(s, dest, dest);
1034}
1035#else
1036static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1037                             const int *const_args)
1038{
1039    TCGArg new_args[6];
1040    TCGLabel *label_true, *label_over;
1041
1042    memcpy(new_args, args+1, 5*sizeof(TCGArg));
1043
1044    if (args[0] == args[1] || args[0] == args[2]
1045        || (!const_args[3] && args[0] == args[3])
1046        || (!const_args[4] && args[0] == args[4])) {
1047        /* When the destination overlaps with one of the argument
1048           registers, don't do anything tricky.  */
1049        label_true = gen_new_label();
1050        label_over = gen_new_label();
1051
1052        new_args[5] = label_arg(label_true);
1053        tcg_out_brcond2(s, new_args, const_args+1, 1);
1054
1055        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1056        tcg_out_jxx(s, JCC_JMP, label_over, 1);
1057        tcg_out_label(s, label_true, s->code_ptr);
1058
1059        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
1060        tcg_out_label(s, label_over, s->code_ptr);
1061    } else {
1062        /* When the destination does not overlap one of the arguments,
1063           clear the destination first, jump if cond false, and emit an
1064           increment in the true case.  This results in smaller code.  */
1065
1066        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1067
1068        label_over = gen_new_label();
1069        new_args[4] = tcg_invert_cond(new_args[4]);
1070        new_args[5] = label_arg(label_over);
1071        tcg_out_brcond2(s, new_args, const_args+1, 1);
1072
1073        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
1074        tcg_out_label(s, label_over, s->code_ptr);
1075    }
1076}
1077#endif
1078
1079static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1080                              TCGArg c1, TCGArg c2, int const_c2,
1081                              TCGArg v1)
1082{
1083    tcg_out_cmp(s, c1, c2, const_c2, 0);
1084    if (have_cmov) {
1085        tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
1086    } else {
1087        TCGLabel *over = gen_new_label();
1088        tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
1089        tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
1090        tcg_out_label(s, over, s->code_ptr);
1091    }
1092}
1093
1094#if TCG_TARGET_REG_BITS == 64
1095static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1096                              TCGArg c1, TCGArg c2, int const_c2,
1097                              TCGArg v1)
1098{
1099    tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1100    tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
1101}
1102#endif
1103
1104static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
1105{
1106    intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
1107
1108    if (disp == (int32_t)disp) {
1109        tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1110        tcg_out32(s, disp);
1111    } else {
1112        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest);
1113        tcg_out_modrm(s, OPC_GRP5,
1114                      call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1115    }
1116}
1117
1118static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
1119{
1120    tcg_out_branch(s, 1, dest);
1121}
1122
1123static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
1124{
1125    tcg_out_branch(s, 0, dest);
1126}
1127
1128#if defined(CONFIG_SOFTMMU)
1129/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1130 *                                     int mmu_idx, uintptr_t ra)
1131 */
1132static void * const qemu_ld_helpers[16] = {
1133    [MO_UB]   = helper_ret_ldub_mmu,
1134    [MO_LEUW] = helper_le_lduw_mmu,
1135    [MO_LEUL] = helper_le_ldul_mmu,
1136    [MO_LEQ]  = helper_le_ldq_mmu,
1137    [MO_BEUW] = helper_be_lduw_mmu,
1138    [MO_BEUL] = helper_be_ldul_mmu,
1139    [MO_BEQ]  = helper_be_ldq_mmu,
1140};
1141
1142/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1143 *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
1144 */
1145static void * const qemu_st_helpers[16] = {
1146    [MO_UB]   = helper_ret_stb_mmu,
1147    [MO_LEUW] = helper_le_stw_mmu,
1148    [MO_LEUL] = helper_le_stl_mmu,
1149    [MO_LEQ]  = helper_le_stq_mmu,
1150    [MO_BEUW] = helper_be_stw_mmu,
1151    [MO_BEUL] = helper_be_stl_mmu,
1152    [MO_BEQ]  = helper_be_stq_mmu,
1153};
1154
1155/* Perform the TLB load and compare.
1156
1157   Inputs:
1158   ADDRLO and ADDRHI contain the low and high part of the address.
1159
1160   MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1161
1162   WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1163   This should be offsetof addr_read or addr_write.
1164
1165   Outputs:
1166   LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1167   positions of the displacements of forward jumps to the TLB miss case.
1168
1169   Second argument register is loaded with the low part of the address.
1170   In the TLB hit case, it has been adjusted as indicated by the TLB
1171   and so is a host address.  In the TLB miss case, it continues to
1172   hold a guest address.
1173
1174   First argument register is clobbered.  */
1175
1176static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1177                                    int mem_index, TCGMemOp opc,
1178                                    tcg_insn_unit **label_ptr, int which)
1179{
1180    const TCGReg r0 = TCG_REG_L0;
1181    const TCGReg r1 = TCG_REG_L1;
1182    TCGType ttype = TCG_TYPE_I32;
1183    TCGType tlbtype = TCG_TYPE_I32;
1184    int trexw = 0, hrexw = 0, tlbrexw = 0;
1185    int s_mask = (1 << (opc & MO_SIZE)) - 1;
1186    bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0;
1187
1188    if (TCG_TARGET_REG_BITS == 64) {
1189        if (TARGET_LONG_BITS == 64) {
1190            ttype = TCG_TYPE_I64;
1191            trexw = P_REXW;
1192        }
1193        if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1194            hrexw = P_REXW;
1195            if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) {
1196                tlbtype = TCG_TYPE_I64;
1197                tlbrexw = P_REXW;
1198            }
1199        }
1200    }
1201
1202    tcg_out_mov(s, tlbtype, r0, addrlo);
1203    if (aligned) {
1204        tcg_out_mov(s, ttype, r1, addrlo);
1205    } else {
1206        /* For unaligned access check that we don't cross pages using
1207           the page address of the last byte.  */
1208        tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask);
1209    }
1210
1211    tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
1212                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1213
1214    tgen_arithi(s, ARITH_AND + trexw, r1,
1215                TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0);
1216    tgen_arithi(s, ARITH_AND + tlbrexw, r0,
1217                (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1218
1219    tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1220                             offsetof(CPUArchState, tlb_table[mem_index][0])
1221                             + which);
1222
1223    /* cmp 0(r0), r1 */
1224    tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1225
1226    /* Prepare for both the fast path add of the tlb addend, and the slow
1227       path function argument setup.  There are two cases worth note:
1228       For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1229       before the fastpath ADDQ below.  For 64-bit guest and x32 host, MOVQ
1230       copies the entire guest address for the slow path, while truncation
1231       for the 32-bit host happens with the fastpath ADDL below.  */
1232    tcg_out_mov(s, ttype, r1, addrlo);
1233
1234    /* jne slow_path */
1235    tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1236    label_ptr[0] = s->code_ptr;
1237    s->code_ptr += 4;
1238
1239    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1240        /* cmp 4(r0), addrhi */
1241        tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
1242
1243        /* jne slow_path */
1244        tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1245        label_ptr[1] = s->code_ptr;
1246        s->code_ptr += 4;
1247    }
1248
1249    /* TLB Hit.  */
1250
1251    /* add addend(r0), r1 */
1252    tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1253                         offsetof(CPUTLBEntry, addend) - which);
1254}
1255
1256/*
1257 * Record the context of a call to the out of line helper code for the slow path
1258 * for a load or store, so that we can later generate the correct helper code
1259 */
1260static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1261                                TCGReg datalo, TCGReg datahi,
1262                                TCGReg addrlo, TCGReg addrhi,
1263                                tcg_insn_unit *raddr,
1264                                tcg_insn_unit **label_ptr)
1265{
1266    TCGLabelQemuLdst *label = new_ldst_label(s);
1267
1268    label->is_ld = is_ld;
1269    label->oi = oi;
1270    label->datalo_reg = datalo;
1271    label->datahi_reg = datahi;
1272    label->addrlo_reg = addrlo;
1273    label->addrhi_reg = addrhi;
1274    label->raddr = raddr;
1275    label->label_ptr[0] = label_ptr[0];
1276    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1277        label->label_ptr[1] = label_ptr[1];
1278    }
1279}
1280
1281/*
1282 * Generate code for the slow path for a load at the end of block
1283 */
1284static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1285{
1286    TCGMemOpIdx oi = l->oi;
1287    TCGMemOp opc = get_memop(oi);
1288    TCGReg data_reg;
1289    tcg_insn_unit **label_ptr = &l->label_ptr[0];
1290
1291    /* resolve label address */
1292    tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1293    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1294        tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1295    }
1296
1297    if (TCG_TARGET_REG_BITS == 32) {
1298        int ofs = 0;
1299
1300        tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1301        ofs += 4;
1302
1303        tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1304        ofs += 4;
1305
1306        if (TARGET_LONG_BITS == 64) {
1307            tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1308            ofs += 4;
1309        }
1310
1311        tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi);
1312        ofs += 4;
1313
1314        tcg_out_sti(s, TCG_TYPE_PTR, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
1315    } else {
1316        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1317        /* The second argument is already loaded with addrlo.  */
1318        tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
1319        tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1320                     (uintptr_t)l->raddr);
1321    }
1322
1323    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1324
1325    data_reg = l->datalo_reg;
1326    switch (opc & MO_SSIZE) {
1327    case MO_SB:
1328        tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1329        break;
1330    case MO_SW:
1331        tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1332        break;
1333#if TCG_TARGET_REG_BITS == 64
1334    case MO_SL:
1335        tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1336        break;
1337#endif
1338    case MO_UB:
1339    case MO_UW:
1340        /* Note that the helpers have zero-extended to tcg_target_long.  */
1341    case MO_UL:
1342        tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1343        break;
1344    case MO_Q:
1345        if (TCG_TARGET_REG_BITS == 64) {
1346            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1347        } else if (data_reg == TCG_REG_EDX) {
1348            /* xchg %edx, %eax */
1349            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1350            tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1351        } else {
1352            tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1353            tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1354        }
1355        break;
1356    default:
1357        tcg_abort();
1358    }
1359
1360    /* Jump to the code corresponding to next IR of qemu_st */
1361    tcg_out_jmp(s, l->raddr);
1362}
1363
1364/*
1365 * Generate code for the slow path for a store at the end of block
1366 */
1367static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1368{
1369    TCGMemOpIdx oi = l->oi;
1370    TCGMemOp opc = get_memop(oi);
1371    TCGMemOp s_bits = opc & MO_SIZE;
1372    tcg_insn_unit **label_ptr = &l->label_ptr[0];
1373    TCGReg retaddr;
1374
1375    /* resolve label address */
1376    tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1377    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1378        tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1379    }
1380
1381    if (TCG_TARGET_REG_BITS == 32) {
1382        int ofs = 0;
1383
1384        tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1385        ofs += 4;
1386
1387        tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1388        ofs += 4;
1389
1390        if (TARGET_LONG_BITS == 64) {
1391            tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1392            ofs += 4;
1393        }
1394
1395        tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1396        ofs += 4;
1397
1398        if (s_bits == MO_64) {
1399            tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1400            ofs += 4;
1401        }
1402
1403        tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi);
1404        ofs += 4;
1405
1406        retaddr = TCG_REG_EAX;
1407        tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1408        tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
1409    } else {
1410        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1411        /* The second argument is already loaded with addrlo.  */
1412        tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1413                    tcg_target_call_iarg_regs[2], l->datalo_reg);
1414        tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
1415
1416        if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1417            retaddr = tcg_target_call_iarg_regs[4];
1418            tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1419        } else {
1420            retaddr = TCG_REG_RAX;
1421            tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1422            tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
1423                       TCG_TARGET_CALL_STACK_OFFSET);
1424        }
1425    }
1426
1427    /* "Tail call" to the helper, with the return address back inline.  */
1428    tcg_out_push(s, retaddr);
1429    tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1430}
1431#elif defined(__x86_64__) && defined(__linux__)
1432# include <asm/prctl.h>
1433# include <sys/prctl.h>
1434
1435int arch_prctl(int code, unsigned long addr);
1436
1437static int guest_base_flags;
1438static inline void setup_guest_base_seg(void)
1439{
1440    if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
1441        guest_base_flags = P_GS;
1442    }
1443}
1444#else
1445# define guest_base_flags 0
1446static inline void setup_guest_base_seg(void) { }
1447#endif /* SOFTMMU */
1448
1449static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1450                                   TCGReg base, int index, intptr_t ofs,
1451                                   int seg, TCGMemOp memop)
1452{
1453    const TCGMemOp real_bswap = memop & MO_BSWAP;
1454    TCGMemOp bswap = real_bswap;
1455    int movop = OPC_MOVL_GvEv;
1456
1457    if (have_movbe && real_bswap) {
1458        bswap = 0;
1459        movop = OPC_MOVBE_GyMy;
1460    }
1461
1462    switch (memop & MO_SSIZE) {
1463    case MO_UB:
1464        tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
1465                                 base, index, 0, ofs);
1466        break;
1467    case MO_SB:
1468        tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
1469                                 base, index, 0, ofs);
1470        break;
1471    case MO_UW:
1472        tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1473                                 base, index, 0, ofs);
1474        if (real_bswap) {
1475            tcg_out_rolw_8(s, datalo);
1476        }
1477        break;
1478    case MO_SW:
1479        if (real_bswap) {
1480            if (have_movbe) {
1481                tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
1482                                         datalo, base, index, 0, ofs);
1483            } else {
1484                tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1485                                         base, index, 0, ofs);
1486                tcg_out_rolw_8(s, datalo);
1487            }
1488            tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1489        } else {
1490            tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
1491                                     datalo, base, index, 0, ofs);
1492        }
1493        break;
1494    case MO_UL:
1495        tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
1496        if (bswap) {
1497            tcg_out_bswap32(s, datalo);
1498        }
1499        break;
1500#if TCG_TARGET_REG_BITS == 64
1501    case MO_SL:
1502        if (real_bswap) {
1503            tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1504                                     base, index, 0, ofs);
1505            if (bswap) {
1506                tcg_out_bswap32(s, datalo);
1507            }
1508            tcg_out_ext32s(s, datalo, datalo);
1509        } else {
1510            tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
1511                                     base, index, 0, ofs);
1512        }
1513        break;
1514#endif
1515    case MO_Q:
1516        if (TCG_TARGET_REG_BITS == 64) {
1517            tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
1518                                     base, index, 0, ofs);
1519            if (bswap) {
1520                tcg_out_bswap64(s, datalo);
1521            }
1522        } else {
1523            if (real_bswap) {
1524                int t = datalo;
1525                datalo = datahi;
1526                datahi = t;
1527            }
1528            if (base != datalo) {
1529                tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1530                                         base, index, 0, ofs);
1531                tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1532                                         base, index, 0, ofs + 4);
1533            } else {
1534                tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1535                                         base, index, 0, ofs + 4);
1536                tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1537                                         base, index, 0, ofs);
1538            }
1539            if (bswap) {
1540                tcg_out_bswap32(s, datalo);
1541                tcg_out_bswap32(s, datahi);
1542            }
1543        }
1544        break;
1545    default:
1546        tcg_abort();
1547    }
1548}
1549
1550/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1551   EAX. It will be useful once fixed registers globals are less
1552   common. */
1553static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1554{
1555    TCGReg datalo, datahi, addrlo;
1556    TCGReg addrhi __attribute__((unused));
1557    TCGMemOpIdx oi;
1558    TCGMemOp opc;
1559#if defined(CONFIG_SOFTMMU)
1560    int mem_index;
1561    tcg_insn_unit *label_ptr[2];
1562#endif
1563
1564    datalo = *args++;
1565    datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1566    addrlo = *args++;
1567    addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1568    oi = *args++;
1569    opc = get_memop(oi);
1570
1571#if defined(CONFIG_SOFTMMU)
1572    mem_index = get_mmuidx(oi);
1573
1574    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
1575                     label_ptr, offsetof(CPUTLBEntry, addr_read));
1576
1577    /* TLB Hit.  */
1578    tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
1579
1580    /* Record the current context of a load into ldst label */
1581    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1582                        s->code_ptr, label_ptr);
1583#else
1584    {
1585        int32_t offset = guest_base;
1586        TCGReg base = addrlo;
1587        int index = -1;
1588        int seg = 0;
1589
1590        /* For a 32-bit guest, the high 32 bits may contain garbage.
1591           We can do this with the ADDR32 prefix if we're not using
1592           a guest base, or when using segmentation.  Otherwise we
1593           need to zero-extend manually.  */
1594        if (guest_base == 0 || guest_base_flags) {
1595            seg = guest_base_flags;
1596            offset = 0;
1597            if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1598                seg |= P_ADDR32;
1599            }
1600        } else if (TCG_TARGET_REG_BITS == 64) {
1601            if (TARGET_LONG_BITS == 32) {
1602                tcg_out_ext32u(s, TCG_REG_L0, base);
1603                base = TCG_REG_L0;
1604            }
1605            if (offset != guest_base) {
1606                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
1607                index = TCG_REG_L1;
1608                offset = 0;
1609            }
1610        }
1611
1612        tcg_out_qemu_ld_direct(s, datalo, datahi,
1613                               base, index, offset, seg, opc);
1614    }
1615#endif
1616}
1617
1618static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1619                                   TCGReg base, intptr_t ofs, int seg,
1620                                   TCGMemOp memop)
1621{
1622    /* ??? Ideally we wouldn't need a scratch register.  For user-only,
1623       we could perform the bswap twice to restore the original value
1624       instead of moving to the scratch.  But as it is, the L constraint
1625       means that TCG_REG_L0 is definitely free here.  */
1626    const TCGReg scratch = TCG_REG_L0;
1627    const TCGMemOp real_bswap = memop & MO_BSWAP;
1628    TCGMemOp bswap = real_bswap;
1629    int movop = OPC_MOVL_EvGv;
1630
1631    if (have_movbe && real_bswap) {
1632        bswap = 0;
1633        movop = OPC_MOVBE_MyGy;
1634    }
1635
1636    switch (memop & MO_SIZE) {
1637    case MO_8:
1638        /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
1639           Use the scratch register if necessary.  */
1640        if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1641            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1642            datalo = scratch;
1643        }
1644        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1645                             datalo, base, ofs);
1646        break;
1647    case MO_16:
1648        if (bswap) {
1649            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1650            tcg_out_rolw_8(s, scratch);
1651            datalo = scratch;
1652        }
1653        tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
1654        break;
1655    case MO_32:
1656        if (bswap) {
1657            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1658            tcg_out_bswap32(s, scratch);
1659            datalo = scratch;
1660        }
1661        tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1662        break;
1663    case MO_64:
1664        if (TCG_TARGET_REG_BITS == 64) {
1665            if (bswap) {
1666                tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1667                tcg_out_bswap64(s, scratch);
1668                datalo = scratch;
1669            }
1670            tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
1671        } else if (bswap) {
1672            tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1673            tcg_out_bswap32(s, scratch);
1674            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1675            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1676            tcg_out_bswap32(s, scratch);
1677            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1678        } else {
1679            if (real_bswap) {
1680                int t = datalo;
1681                datalo = datahi;
1682                datahi = t;
1683            }
1684            tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1685            tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
1686        }
1687        break;
1688    default:
1689        tcg_abort();
1690    }
1691}
1692
1693static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1694{
1695    TCGReg datalo, datahi, addrlo;
1696    TCGReg addrhi __attribute__((unused));
1697    TCGMemOpIdx oi;
1698    TCGMemOp opc;
1699#if defined(CONFIG_SOFTMMU)
1700    int mem_index;
1701    tcg_insn_unit *label_ptr[2];
1702#endif
1703
1704    datalo = *args++;
1705    datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1706    addrlo = *args++;
1707    addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1708    oi = *args++;
1709    opc = get_memop(oi);
1710
1711#if defined(CONFIG_SOFTMMU)
1712    mem_index = get_mmuidx(oi);
1713
1714    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
1715                     label_ptr, offsetof(CPUTLBEntry, addr_write));
1716
1717    /* TLB Hit.  */
1718    tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1719
1720    /* Record the current context of a store into ldst label */
1721    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1722                        s->code_ptr, label_ptr);
1723#else
1724    {
1725        int32_t offset = guest_base;
1726        TCGReg base = addrlo;
1727        int seg = 0;
1728
1729        /* See comment in tcg_out_qemu_ld re zero-extension of addrlo.  */
1730        if (guest_base == 0 || guest_base_flags) {
1731            seg = guest_base_flags;
1732            offset = 0;
1733            if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1734                seg |= P_ADDR32;
1735            }
1736        } else if (TCG_TARGET_REG_BITS == 64) {
1737            /* ??? Note that we can't use the same SIB addressing scheme
1738               as for loads, since we require L0 free for bswap.  */
1739            if (offset != guest_base) {
1740                if (TARGET_LONG_BITS == 32) {
1741                    tcg_out_ext32u(s, TCG_REG_L0, base);
1742                    base = TCG_REG_L0;
1743                }
1744                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
1745                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1746                base = TCG_REG_L1;
1747                offset = 0;
1748            } else if (TARGET_LONG_BITS == 32) {
1749                tcg_out_ext32u(s, TCG_REG_L1, base);
1750                base = TCG_REG_L1;
1751            }
1752        }
1753
1754        tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
1755    }
1756#endif
1757}
1758
1759static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1760                              const TCGArg *args, const int *const_args)
1761{
1762    int c, vexop, rexw = 0;
1763
1764#if TCG_TARGET_REG_BITS == 64
1765# define OP_32_64(x) \
1766        case glue(glue(INDEX_op_, x), _i64): \
1767            rexw = P_REXW; /* FALLTHRU */    \
1768        case glue(glue(INDEX_op_, x), _i32)
1769#else
1770# define OP_32_64(x) \
1771        case glue(glue(INDEX_op_, x), _i32)
1772#endif
1773
1774    switch(opc) {
1775    case INDEX_op_exit_tb:
1776        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1777        tcg_out_jmp(s, tb_ret_addr);
1778        break;
1779    case INDEX_op_goto_tb:
1780        if (s->tb_jmp_offset) {
1781            /* direct jump method */
1782            tcg_out8(s, OPC_JMP_long); /* jmp im */
1783            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
1784            tcg_out32(s, 0);
1785        } else {
1786            /* indirect jump method */
1787            tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1788                                 (intptr_t)(s->tb_next + args[0]));
1789        }
1790        s->tb_next_offset[args[0]] = tcg_current_code_size(s);
1791        break;
1792    case INDEX_op_br:
1793        tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0);
1794        break;
1795    OP_32_64(ld8u):
1796        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1797        tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1798        break;
1799    OP_32_64(ld8s):
1800        tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1801        break;
1802    OP_32_64(ld16u):
1803        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1804        tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1805        break;
1806    OP_32_64(ld16s):
1807        tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1808        break;
1809#if TCG_TARGET_REG_BITS == 64
1810    case INDEX_op_ld32u_i64:
1811#endif
1812    case INDEX_op_ld_i32:
1813        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1814        break;
1815
1816    OP_32_64(st8):
1817        if (const_args[0]) {
1818            tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1819                                 0, args[1], args[2]);
1820            tcg_out8(s, args[0]);
1821        } else {
1822            tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1823                                 args[0], args[1], args[2]);
1824        }
1825        break;
1826    OP_32_64(st16):
1827        if (const_args[0]) {
1828            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1829                                 0, args[1], args[2]);
1830            tcg_out16(s, args[0]);
1831        } else {
1832            tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1833                                 args[0], args[1], args[2]);
1834        }
1835        break;
1836#if TCG_TARGET_REG_BITS == 64
1837    case INDEX_op_st32_i64:
1838#endif
1839    case INDEX_op_st_i32:
1840        if (const_args[0]) {
1841            tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1842            tcg_out32(s, args[0]);
1843        } else {
1844            tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1845        }
1846        break;
1847
1848    OP_32_64(add):
1849        /* For 3-operand addition, use LEA.  */
1850        if (args[0] != args[1]) {
1851            TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1852
1853            if (const_args[2]) {
1854                c3 = a2, a2 = -1;
1855            } else if (a0 == a2) {
1856                /* Watch out for dest = src + dest, since we've removed
1857                   the matching constraint on the add.  */
1858                tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1859                break;
1860            }
1861
1862            tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1863            break;
1864        }
1865        c = ARITH_ADD;
1866        goto gen_arith;
1867    OP_32_64(sub):
1868        c = ARITH_SUB;
1869        goto gen_arith;
1870    OP_32_64(and):
1871        c = ARITH_AND;
1872        goto gen_arith;
1873    OP_32_64(or):
1874        c = ARITH_OR;
1875        goto gen_arith;
1876    OP_32_64(xor):
1877        c = ARITH_XOR;
1878        goto gen_arith;
1879    gen_arith:
1880        if (const_args[2]) {
1881            tgen_arithi(s, c + rexw, args[0], args[2], 0);
1882        } else {
1883            tgen_arithr(s, c + rexw, args[0], args[2]);
1884        }
1885        break;
1886
1887    OP_32_64(andc):
1888        if (const_args[2]) {
1889            tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
1890                        args[0], args[1]);
1891            tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
1892        } else {
1893            tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
1894        }
1895        break;
1896
1897    OP_32_64(mul):
1898        if (const_args[2]) {
1899            int32_t val;
1900            val = args[2];
1901            if (val == (int8_t)val) {
1902                tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1903                tcg_out8(s, val);
1904            } else {
1905                tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1906                tcg_out32(s, val);
1907            }
1908        } else {
1909            tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1910        }
1911        break;
1912
1913    OP_32_64(div2):
1914        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1915        break;
1916    OP_32_64(divu2):
1917        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1918        break;
1919
1920    OP_32_64(shl):
1921        c = SHIFT_SHL;
1922        vexop = OPC_SHLX;
1923        goto gen_shift_maybe_vex;
1924    OP_32_64(shr):
1925        c = SHIFT_SHR;
1926        vexop = OPC_SHRX;
1927        goto gen_shift_maybe_vex;
1928    OP_32_64(sar):
1929        c = SHIFT_SAR;
1930        vexop = OPC_SARX;
1931        goto gen_shift_maybe_vex;
1932    OP_32_64(rotl):
1933        c = SHIFT_ROL;
1934        goto gen_shift;
1935    OP_32_64(rotr):
1936        c = SHIFT_ROR;
1937        goto gen_shift;
1938    gen_shift_maybe_vex:
1939        if (have_bmi2 && !const_args[2]) {
1940            tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
1941            break;
1942        }
1943        /* FALLTHRU */
1944    gen_shift:
1945        if (const_args[2]) {
1946            tcg_out_shifti(s, c + rexw, args[0], args[2]);
1947        } else {
1948            tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1949        }
1950        break;
1951
1952    case INDEX_op_brcond_i32:
1953        tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1954                         arg_label(args[3]), 0);
1955        break;
1956    case INDEX_op_setcond_i32:
1957        tcg_out_setcond32(s, args[3], args[0], args[1],
1958                          args[2], const_args[2]);
1959        break;
1960    case INDEX_op_movcond_i32:
1961        tcg_out_movcond32(s, args[5], args[0], args[1],
1962                          args[2], const_args[2], args[3]);
1963        break;
1964
1965    OP_32_64(bswap16):
1966        tcg_out_rolw_8(s, args[0]);
1967        break;
1968    OP_32_64(bswap32):
1969        tcg_out_bswap32(s, args[0]);
1970        break;
1971
1972    OP_32_64(neg):
1973        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1974        break;
1975    OP_32_64(not):
1976        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1977        break;
1978
1979    OP_32_64(ext8s):
1980        tcg_out_ext8s(s, args[0], args[1], rexw);
1981        break;
1982    OP_32_64(ext16s):
1983        tcg_out_ext16s(s, args[0], args[1], rexw);
1984        break;
1985    OP_32_64(ext8u):
1986        tcg_out_ext8u(s, args[0], args[1]);
1987        break;
1988    OP_32_64(ext16u):
1989        tcg_out_ext16u(s, args[0], args[1]);
1990        break;
1991
1992    case INDEX_op_qemu_ld_i32:
1993        tcg_out_qemu_ld(s, args, 0);
1994        break;
1995    case INDEX_op_qemu_ld_i64:
1996        tcg_out_qemu_ld(s, args, 1);
1997        break;
1998    case INDEX_op_qemu_st_i32:
1999        tcg_out_qemu_st(s, args, 0);
2000        break;
2001    case INDEX_op_qemu_st_i64:
2002        tcg_out_qemu_st(s, args, 1);
2003        break;
2004
2005    OP_32_64(mulu2):
2006        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
2007        break;
2008    OP_32_64(muls2):
2009        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
2010        break;
2011    OP_32_64(add2):
2012        if (const_args[4]) {
2013            tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
2014        } else {
2015            tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
2016        }
2017        if (const_args[5]) {
2018            tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
2019        } else {
2020            tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
2021        }
2022        break;
2023    OP_32_64(sub2):
2024        if (const_args[4]) {
2025            tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
2026        } else {
2027            tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
2028        }
2029        if (const_args[5]) {
2030            tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
2031        } else {
2032            tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
2033        }
2034        break;
2035
2036#if TCG_TARGET_REG_BITS == 32
2037    case INDEX_op_brcond2_i32:
2038        tcg_out_brcond2(s, args, const_args, 0);
2039        break;
2040    case INDEX_op_setcond2_i32:
2041        tcg_out_setcond2(s, args, const_args);
2042        break;
2043#else /* TCG_TARGET_REG_BITS == 64 */
2044    case INDEX_op_ld32s_i64:
2045        tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
2046        break;
2047    case INDEX_op_ld_i64:
2048        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2049        break;
2050    case INDEX_op_st_i64:
2051        if (const_args[0]) {
2052            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
2053                                 0, args[1], args[2]);
2054            tcg_out32(s, args[0]);
2055        } else {
2056            tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2057        }
2058        break;
2059
2060    case INDEX_op_brcond_i64:
2061        tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
2062                         arg_label(args[3]), 0);
2063        break;
2064    case INDEX_op_setcond_i64:
2065        tcg_out_setcond64(s, args[3], args[0], args[1],
2066                          args[2], const_args[2]);
2067        break;
2068    case INDEX_op_movcond_i64:
2069        tcg_out_movcond64(s, args[5], args[0], args[1],
2070                          args[2], const_args[2], args[3]);
2071        break;
2072
2073    case INDEX_op_bswap64_i64:
2074        tcg_out_bswap64(s, args[0]);
2075        break;
2076    case INDEX_op_extu_i32_i64:
2077    case INDEX_op_ext32u_i64:
2078        tcg_out_ext32u(s, args[0], args[1]);
2079        break;
2080    case INDEX_op_ext_i32_i64:
2081    case INDEX_op_ext32s_i64:
2082        tcg_out_ext32s(s, args[0], args[1]);
2083        break;
2084#endif
2085
2086    OP_32_64(deposit):
2087        if (args[3] == 0 && args[4] == 8) {
2088            /* load bits 0..7 */
2089            tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
2090                          args[2], args[0]);
2091        } else if (args[3] == 8 && args[4] == 8) {
2092            /* load bits 8..15 */
2093            tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
2094        } else if (args[3] == 0 && args[4] == 16) {
2095            /* load bits 0..15 */
2096            tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
2097        } else {
2098            tcg_abort();
2099        }
2100        break;
2101
2102    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2103    case INDEX_op_mov_i64:
2104    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
2105    case INDEX_op_movi_i64:
2106    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2107    default:
2108        tcg_abort();
2109    }
2110
2111#undef OP_32_64
2112}
2113
2114static const TCGTargetOpDef x86_op_defs[] = {
2115    { INDEX_op_exit_tb, { } },
2116    { INDEX_op_goto_tb, { } },
2117    { INDEX_op_br, { } },
2118    { INDEX_op_ld8u_i32, { "r", "r" } },
2119    { INDEX_op_ld8s_i32, { "r", "r" } },
2120    { INDEX_op_ld16u_i32, { "r", "r" } },
2121    { INDEX_op_ld16s_i32, { "r", "r" } },
2122    { INDEX_op_ld_i32, { "r", "r" } },
2123    { INDEX_op_st8_i32, { "qi", "r" } },
2124    { INDEX_op_st16_i32, { "ri", "r" } },
2125    { INDEX_op_st_i32, { "ri", "r" } },
2126
2127    { INDEX_op_add_i32, { "r", "r", "ri" } },
2128    { INDEX_op_sub_i32, { "r", "0", "ri" } },
2129    { INDEX_op_mul_i32, { "r", "0", "ri" } },
2130    { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
2131    { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
2132    { INDEX_op_and_i32, { "r", "0", "ri" } },
2133    { INDEX_op_or_i32, { "r", "0", "ri" } },
2134    { INDEX_op_xor_i32, { "r", "0", "ri" } },
2135    { INDEX_op_andc_i32, { "r", "r", "ri" } },
2136
2137    { INDEX_op_shl_i32, { "r", "0", "Ci" } },
2138    { INDEX_op_shr_i32, { "r", "0", "Ci" } },
2139    { INDEX_op_sar_i32, { "r", "0", "Ci" } },
2140    { INDEX_op_rotl_i32, { "r", "0", "ci" } },
2141    { INDEX_op_rotr_i32, { "r", "0", "ci" } },
2142
2143    { INDEX_op_brcond_i32, { "r", "ri" } },
2144
2145    { INDEX_op_bswap16_i32, { "r", "0" } },
2146    { INDEX_op_bswap32_i32, { "r", "0" } },
2147
2148    { INDEX_op_neg_i32, { "r", "0" } },
2149
2150    { INDEX_op_not_i32, { "r", "0" } },
2151
2152    { INDEX_op_ext8s_i32, { "r", "q" } },
2153    { INDEX_op_ext16s_i32, { "r", "r" } },
2154    { INDEX_op_ext8u_i32, { "r", "q" } },
2155    { INDEX_op_ext16u_i32, { "r", "r" } },
2156
2157    { INDEX_op_setcond_i32, { "q", "r", "ri" } },
2158
2159    { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
2160    { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
2161
2162    { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
2163    { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
2164    { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2165    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2166
2167#if TCG_TARGET_REG_BITS == 32
2168    { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
2169    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2170#else
2171    { INDEX_op_ld8u_i64, { "r", "r" } },
2172    { INDEX_op_ld8s_i64, { "r", "r" } },
2173    { INDEX_op_ld16u_i64, { "r", "r" } },
2174    { INDEX_op_ld16s_i64, { "r", "r" } },
2175    { INDEX_op_ld32u_i64, { "r", "r" } },
2176    { INDEX_op_ld32s_i64, { "r", "r" } },
2177    { INDEX_op_ld_i64, { "r", "r" } },
2178    { INDEX_op_st8_i64, { "ri", "r" } },
2179    { INDEX_op_st16_i64, { "ri", "r" } },
2180    { INDEX_op_st32_i64, { "ri", "r" } },
2181    { INDEX_op_st_i64, { "re", "r" } },
2182
2183    { INDEX_op_add_i64, { "r", "r", "re" } },
2184    { INDEX_op_mul_i64, { "r", "0", "re" } },
2185    { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2186    { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2187    { INDEX_op_sub_i64, { "r", "0", "re" } },
2188    { INDEX_op_and_i64, { "r", "0", "reZ" } },
2189    { INDEX_op_or_i64, { "r", "0", "re" } },
2190    { INDEX_op_xor_i64, { "r", "0", "re" } },
2191    { INDEX_op_andc_i64, { "r", "r", "rI" } },
2192
2193    { INDEX_op_shl_i64, { "r", "0", "Ci" } },
2194    { INDEX_op_shr_i64, { "r", "0", "Ci" } },
2195    { INDEX_op_sar_i64, { "r", "0", "Ci" } },
2196    { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2197    { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2198
2199    { INDEX_op_brcond_i64, { "r", "re" } },
2200    { INDEX_op_setcond_i64, { "r", "r", "re" } },
2201
2202    { INDEX_op_bswap16_i64, { "r", "0" } },
2203    { INDEX_op_bswap32_i64, { "r", "0" } },
2204    { INDEX_op_bswap64_i64, { "r", "0" } },
2205    { INDEX_op_neg_i64, { "r", "0" } },
2206    { INDEX_op_not_i64, { "r", "0" } },
2207
2208    { INDEX_op_ext8s_i64, { "r", "r" } },
2209    { INDEX_op_ext16s_i64, { "r", "r" } },
2210    { INDEX_op_ext32s_i64, { "r", "r" } },
2211    { INDEX_op_ext8u_i64, { "r", "r" } },
2212    { INDEX_op_ext16u_i64, { "r", "r" } },
2213    { INDEX_op_ext32u_i64, { "r", "r" } },
2214
2215    { INDEX_op_ext_i32_i64, { "r", "r" } },
2216    { INDEX_op_extu_i32_i64, { "r", "r" } },
2217
2218    { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
2219    { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
2220
2221    { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2222    { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2223    { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2224    { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
2225#endif
2226
2227#if TCG_TARGET_REG_BITS == 64
2228    { INDEX_op_qemu_ld_i32, { "r", "L" } },
2229    { INDEX_op_qemu_st_i32, { "L", "L" } },
2230    { INDEX_op_qemu_ld_i64, { "r", "L" } },
2231    { INDEX_op_qemu_st_i64, { "L", "L" } },
2232#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2233    { INDEX_op_qemu_ld_i32, { "r", "L" } },
2234    { INDEX_op_qemu_st_i32, { "L", "L" } },
2235    { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
2236    { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
2237#else
2238    { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
2239    { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
2240    { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
2241    { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
2242#endif
2243    { -1 },
2244};
2245
2246static int tcg_target_callee_save_regs[] = {
2247#if TCG_TARGET_REG_BITS == 64
2248    TCG_REG_RBP,
2249    TCG_REG_RBX,
2250#if defined(_WIN64)
2251    TCG_REG_RDI,
2252    TCG_REG_RSI,
2253#endif
2254    TCG_REG_R12,
2255    TCG_REG_R13,
2256    TCG_REG_R14, /* Currently used for the global env. */
2257    TCG_REG_R15,
2258#else
2259    TCG_REG_EBP, /* Currently used for the global env. */
2260    TCG_REG_EBX,
2261    TCG_REG_ESI,
2262    TCG_REG_EDI,
2263#endif
2264};
2265
2266/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2267   and tcg_register_jit.  */
2268
2269#define PUSH_SIZE \
2270    ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2271     * (TCG_TARGET_REG_BITS / 8))
2272
2273#define FRAME_SIZE \
2274    ((PUSH_SIZE \
2275      + TCG_STATIC_CALL_ARGS_SIZE \
2276      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2277      + TCG_TARGET_STACK_ALIGN - 1) \
2278     & ~(TCG_TARGET_STACK_ALIGN - 1))
2279
2280/* Generate global QEMU prologue and epilogue code */
2281static void tcg_target_qemu_prologue(TCGContext *s)
2282{
2283    int i, stack_addend;
2284
2285    /* TB prologue */
2286
2287    /* Reserve some stack space, also for TCG temps.  */
2288    stack_addend = FRAME_SIZE - PUSH_SIZE;
2289    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2290                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2291
2292    /* Save all callee saved registers.  */
2293    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2294        tcg_out_push(s, tcg_target_callee_save_regs[i]);
2295    }
2296
2297#if TCG_TARGET_REG_BITS == 32
2298    tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2299               (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2300    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2301    /* jmp *tb.  */
2302    tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2303                         (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2304                         + stack_addend);
2305#else
2306    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2307#ifdef _WIN64
2308    /* Emit zeroed stack frame. Fixes Windows longjmp issues??? */
2309    for (i = 0; i < stack_addend; i += 8) {
2310        tcg_out_pushi(s, 0);
2311    }
2312#else
2313    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2314#endif
2315    /* jmp *tb.  */
2316    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2317#endif
2318
2319    /* TB epilogue */
2320    tb_ret_addr = s->code_ptr;
2321
2322    tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2323
2324    for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2325        tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2326    }
2327    tcg_out_opc(s, OPC_RET, 0, 0, 0);
2328
2329#if !defined(CONFIG_SOFTMMU)
2330    /* Try to set up a segment register to point to guest_base.  */
2331    if (guest_base) {
2332        setup_guest_base_seg();
2333    }
2334#endif
2335}
2336
2337static void tcg_target_init(TCGContext *s)
2338{
2339#ifdef CONFIG_CPUID_H
2340    unsigned a, b, c, d;
2341    int max = __get_cpuid_max(0, 0);
2342
2343    if (max >= 1) {
2344        __cpuid(1, a, b, c, d);
2345#ifndef have_cmov
2346        /* For 32-bit, 99% certainty that we're running on hardware that
2347           supports cmov, but we still need to check.  In case cmov is not
2348           available, we'll use a small forward branch.  */
2349        have_cmov = (d & bit_CMOV) != 0;
2350#endif
2351#ifndef have_movbe
2352        /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2353           need to probe for it.  */
2354        have_movbe = (c & bit_MOVBE) != 0;
2355#endif
2356    }
2357
2358    if (max >= 7) {
2359        /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs.  */
2360        __cpuid_count(7, 0, a, b, c, d);
2361#ifdef bit_BMI
2362        have_bmi1 = (b & bit_BMI) != 0;
2363#endif
2364#ifndef have_bmi2
2365        have_bmi2 = (b & bit_BMI2) != 0;
2366#endif
2367    }
2368#endif
2369
2370    if (TCG_TARGET_REG_BITS == 64) {
2371        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2372        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2373    } else {
2374        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2375    }
2376
2377    tcg_regset_clear(tcg_target_call_clobber_regs);
2378    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2379    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2380    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2381    if (TCG_TARGET_REG_BITS == 64) {
2382#if !defined(_WIN64)
2383        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2384        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2385#endif
2386        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2387        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2388        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2389        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2390    }
2391
2392    tcg_regset_clear(s->reserved_regs);
2393    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2394
2395    tcg_add_target_add_op_defs(x86_op_defs);
2396}
2397
2398typedef struct {
2399    DebugFrameHeader h;
2400    uint8_t fde_def_cfa[4];
2401    uint8_t fde_reg_ofs[14];
2402} DebugFrame;
2403
2404/* We're expecting a 2 byte uleb128 encoded value.  */
2405QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2406
2407#if !defined(__ELF__)
2408    /* Host machine without ELF. */
2409#elif TCG_TARGET_REG_BITS == 64
2410#define ELF_HOST_MACHINE EM_X86_64
2411static const DebugFrame debug_frame = {
2412    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2413    .h.cie.id = -1,
2414    .h.cie.version = 1,
2415    .h.cie.code_align = 1,
2416    .h.cie.data_align = 0x78,             /* sleb128 -8 */
2417    .h.cie.return_column = 16,
2418
2419    /* Total FDE size does not include the "len" member.  */
2420    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2421
2422    .fde_def_cfa = {
2423        12, 7,                          /* DW_CFA_def_cfa %rsp, ... */
2424        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2425        (FRAME_SIZE >> 7)
2426    },
2427    .fde_reg_ofs = {
2428        0x90, 1,                        /* DW_CFA_offset, %rip, -8 */
2429        /* The following ordering must match tcg_target_callee_save_regs.  */
2430        0x86, 2,                        /* DW_CFA_offset, %rbp, -16 */
2431        0x83, 3,                        /* DW_CFA_offset, %rbx, -24 */
2432        0x8c, 4,                        /* DW_CFA_offset, %r12, -32 */
2433        0x8d, 5,                        /* DW_CFA_offset, %r13, -40 */
2434        0x8e, 6,                        /* DW_CFA_offset, %r14, -48 */
2435        0x8f, 7,                        /* DW_CFA_offset, %r15, -56 */
2436    }
2437};
2438#else
2439#define ELF_HOST_MACHINE EM_386
2440static const DebugFrame debug_frame = {
2441    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2442    .h.cie.id = -1,
2443    .h.cie.version = 1,
2444    .h.cie.code_align = 1,
2445    .h.cie.data_align = 0x7c,             /* sleb128 -4 */
2446    .h.cie.return_column = 8,
2447
2448    /* Total FDE size does not include the "len" member.  */
2449    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2450
2451    .fde_def_cfa = {
2452        12, 4,                          /* DW_CFA_def_cfa %esp, ... */
2453        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2454        (FRAME_SIZE >> 7)
2455    },
2456    .fde_reg_ofs = {
2457        0x88, 1,                        /* DW_CFA_offset, %eip, -4 */
2458        /* The following ordering must match tcg_target_callee_save_regs.  */
2459        0x85, 2,                        /* DW_CFA_offset, %ebp, -8 */
2460        0x83, 3,                        /* DW_CFA_offset, %ebx, -12 */
2461        0x86, 4,                        /* DW_CFA_offset, %esi, -16 */
2462        0x87, 5,                        /* DW_CFA_offset, %edi, -20 */
2463    }
2464};
2465#endif
2466
2467#if defined(ELF_HOST_MACHINE)
2468void tcg_register_jit(void *buf, size_t buf_size)
2469{
2470    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2471}
2472#endif
2473