qemu/target/i386/tcg/translate.c
<<
>>
Prefs
   1/*
   2 *  i386 translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "qemu/host-utils.h"
  22#include "cpu.h"
  23#include "disas/disas.h"
  24#include "exec/exec-all.h"
  25#include "tcg/tcg-op.h"
  26#include "exec/cpu_ldst.h"
  27#include "exec/translator.h"
  28
  29#include "exec/helper-proto.h"
  30#include "exec/helper-gen.h"
  31#include "helper-tcg.h"
  32
  33#include "exec/log.h"
  34
  35#define PREFIX_REPZ   0x01
  36#define PREFIX_REPNZ  0x02
  37#define PREFIX_LOCK   0x04
  38#define PREFIX_DATA   0x08
  39#define PREFIX_ADR    0x10
  40#define PREFIX_VEX    0x20
  41#define PREFIX_REX    0x40
  42
  43#ifdef TARGET_X86_64
  44# define ctztl  ctz64
  45# define clztl  clz64
  46#else
  47# define ctztl  ctz32
  48# define clztl  clz32
  49#endif
  50
  51/* For a switch indexed by MODRM, match all memory operands for a given OP.  */
  52#define CASE_MODRM_MEM_OP(OP) \
  53    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  54    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  55    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
  56
  57#define CASE_MODRM_OP(OP) \
  58    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  59    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  60    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
  61    case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
  62
  63//#define MACRO_TEST   1
  64
  65/* global register indexes */
  66static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
  67static TCGv_i32 cpu_cc_op;
  68static TCGv cpu_regs[CPU_NB_REGS];
  69static TCGv cpu_seg_base[6];
  70static TCGv_i64 cpu_bndl[4];
  71static TCGv_i64 cpu_bndu[4];
  72
  73#include "exec/gen-icount.h"
  74
  75typedef struct DisasContext {
  76    DisasContextBase base;
  77
  78    target_ulong pc;       /* pc = eip + cs_base */
  79    target_ulong pc_start; /* pc at TB entry */
  80    target_ulong cs_base;  /* base of CS segment */
  81
  82    MemOp aflag;
  83    MemOp dflag;
  84
  85    int8_t override; /* -1 if no override, else R_CS, R_DS, etc */
  86    uint8_t prefix;
  87
  88#ifndef CONFIG_USER_ONLY
  89    uint8_t cpl;   /* code priv level */
  90    uint8_t iopl;  /* i/o priv level */
  91#endif
  92    uint8_t vex_l;  /* vex vector length */
  93    uint8_t vex_v;  /* vex vvvv register, without 1's complement.  */
  94    uint8_t popl_esp_hack; /* for correct popl with esp base handling */
  95    uint8_t rip_offset; /* only used in x86_64, but left for simplicity */
  96
  97#ifdef TARGET_X86_64
  98    uint8_t rex_r;
  99    uint8_t rex_x;
 100    uint8_t rex_b;
 101    bool rex_w;
 102#endif
 103    bool jmp_opt; /* use direct block chaining for direct jumps */
 104    bool repz_opt; /* optimize jumps within repz instructions */
 105    bool cc_op_dirty;
 106
 107    CCOp cc_op;  /* current CC operation */
 108    int mem_index; /* select memory access functions */
 109    uint32_t flags; /* all execution flags */
 110    int cpuid_features;
 111    int cpuid_ext_features;
 112    int cpuid_ext2_features;
 113    int cpuid_ext3_features;
 114    int cpuid_7_0_ebx_features;
 115    int cpuid_xsave_features;
 116
 117    /* TCG local temps */
 118    TCGv cc_srcT;
 119    TCGv A0;
 120    TCGv T0;
 121    TCGv T1;
 122
 123    /* TCG local register indexes (only used inside old micro ops) */
 124    TCGv tmp0;
 125    TCGv tmp4;
 126    TCGv_ptr ptr0;
 127    TCGv_ptr ptr1;
 128    TCGv_i32 tmp2_i32;
 129    TCGv_i32 tmp3_i32;
 130    TCGv_i64 tmp1_i64;
 131
 132    sigjmp_buf jmpbuf;
 133} DisasContext;
 134
 135/* The environment in which user-only runs is constrained. */
 136#ifdef CONFIG_USER_ONLY
 137#define PE(S)     true
 138#define CPL(S)    3
 139#define IOPL(S)   0
 140#define SVME(S)   false
 141#define GUEST(S)  false
 142#else
 143#define PE(S)     (((S)->flags & HF_PE_MASK) != 0)
 144#define CPL(S)    ((S)->cpl)
 145#define IOPL(S)   ((S)->iopl)
 146#define SVME(S)   (((S)->flags & HF_SVME_MASK) != 0)
 147#define GUEST(S)  (((S)->flags & HF_GUEST_MASK) != 0)
 148#endif
 149#if defined(CONFIG_USER_ONLY) && defined(TARGET_X86_64)
 150#define VM86(S)   false
 151#define CODE32(S) true
 152#define SS32(S)   true
 153#define ADDSEG(S) false
 154#else
 155#define VM86(S)   (((S)->flags & HF_VM_MASK) != 0)
 156#define CODE32(S) (((S)->flags & HF_CS32_MASK) != 0)
 157#define SS32(S)   (((S)->flags & HF_SS32_MASK) != 0)
 158#define ADDSEG(S) (((S)->flags & HF_ADDSEG_MASK) != 0)
 159#endif
 160#if !defined(TARGET_X86_64)
 161#define CODE64(S) false
 162#define LMA(S)    false
 163#elif defined(CONFIG_USER_ONLY)
 164#define CODE64(S) true
 165#define LMA(S)    true
 166#else
 167#define CODE64(S) (((S)->flags & HF_CS64_MASK) != 0)
 168#define LMA(S)    (((S)->flags & HF_LMA_MASK) != 0)
 169#endif
 170
 171#ifdef TARGET_X86_64
 172#define REX_PREFIX(S)  (((S)->prefix & PREFIX_REX) != 0)
 173#define REX_W(S)       ((S)->rex_w)
 174#define REX_R(S)       ((S)->rex_r + 0)
 175#define REX_X(S)       ((S)->rex_x + 0)
 176#define REX_B(S)       ((S)->rex_b + 0)
 177#else
 178#define REX_PREFIX(S)  false
 179#define REX_W(S)       false
 180#define REX_R(S)       0
 181#define REX_X(S)       0
 182#define REX_B(S)       0
 183#endif
 184
 185/*
 186 * Many sysemu-only helpers are not reachable for user-only.
 187 * Define stub generators here, so that we need not either sprinkle
 188 * ifdefs through the translator, nor provide the helper function.
 189 */
 190#define STUB_HELPER(NAME, ...) \
 191    static inline void gen_helper_##NAME(__VA_ARGS__) \
 192    { qemu_build_not_reached(); }
 193
 194#ifdef CONFIG_USER_ONLY
 195STUB_HELPER(clgi, TCGv_env env)
 196STUB_HELPER(flush_page, TCGv_env env, TCGv addr)
 197STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs)
 198STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port)
 199STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port)
 200STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port)
 201STUB_HELPER(monitor, TCGv_env env, TCGv addr)
 202STUB_HELPER(mwait, TCGv_env env, TCGv_i32 pc_ofs)
 203STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
 204STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
 205STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
 206STUB_HELPER(rdmsr, TCGv_env env)
 207STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg)
 208STUB_HELPER(get_dr, TCGv ret, TCGv_env env, TCGv_i32 reg)
 209STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val)
 210STUB_HELPER(stgi, TCGv_env env)
 211STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type)
 212STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag)
 213STUB_HELPER(vmmcall, TCGv_env env)
 214STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs)
 215STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag)
 216STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val)
 217STUB_HELPER(wrmsr, TCGv_env env)
 218#endif
 219
 220static void gen_eob(DisasContext *s);
 221static void gen_jr(DisasContext *s, TCGv dest);
 222static void gen_jmp(DisasContext *s, target_ulong eip);
 223static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 224static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
 225static void gen_exception_gpf(DisasContext *s);
 226
 227/* i386 arith/logic operations */
 228enum {
 229    OP_ADDL,
 230    OP_ORL,
 231    OP_ADCL,
 232    OP_SBBL,
 233    OP_ANDL,
 234    OP_SUBL,
 235    OP_XORL,
 236    OP_CMPL,
 237};
 238
 239/* i386 shift ops */
 240enum {
 241    OP_ROL,
 242    OP_ROR,
 243    OP_RCL,
 244    OP_RCR,
 245    OP_SHL,
 246    OP_SHR,
 247    OP_SHL1, /* undocumented */
 248    OP_SAR = 7,
 249};
 250
 251enum {
 252    JCC_O,
 253    JCC_B,
 254    JCC_Z,
 255    JCC_BE,
 256    JCC_S,
 257    JCC_P,
 258    JCC_L,
 259    JCC_LE,
 260};
 261
 262enum {
 263    /* I386 int registers */
 264    OR_EAX,   /* MUST be even numbered */
 265    OR_ECX,
 266    OR_EDX,
 267    OR_EBX,
 268    OR_ESP,
 269    OR_EBP,
 270    OR_ESI,
 271    OR_EDI,
 272
 273    OR_TMP0 = 16,    /* temporary operand register */
 274    OR_TMP1,
 275    OR_A0, /* temporary register used when doing address evaluation */
 276};
 277
 278enum {
 279    USES_CC_DST  = 1,
 280    USES_CC_SRC  = 2,
 281    USES_CC_SRC2 = 4,
 282    USES_CC_SRCT = 8,
 283};
 284
 285/* Bit set if the global variable is live after setting CC_OP to X.  */
 286static const uint8_t cc_op_live[CC_OP_NB] = {
 287    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 288    [CC_OP_EFLAGS] = USES_CC_SRC,
 289    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
 290    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
 291    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 292    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
 293    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 294    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
 295    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
 296    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
 297    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
 298    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
 299    [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
 300    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
 301    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
 302    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 303    [CC_OP_CLR] = 0,
 304    [CC_OP_POPCNT] = USES_CC_SRC,
 305};
 306
 307static void set_cc_op(DisasContext *s, CCOp op)
 308{
 309    int dead;
 310
 311    if (s->cc_op == op) {
 312        return;
 313    }
 314
 315    /* Discard CC computation that will no longer be used.  */
 316    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
 317    if (dead & USES_CC_DST) {
 318        tcg_gen_discard_tl(cpu_cc_dst);
 319    }
 320    if (dead & USES_CC_SRC) {
 321        tcg_gen_discard_tl(cpu_cc_src);
 322    }
 323    if (dead & USES_CC_SRC2) {
 324        tcg_gen_discard_tl(cpu_cc_src2);
 325    }
 326    if (dead & USES_CC_SRCT) {
 327        tcg_gen_discard_tl(s->cc_srcT);
 328    }
 329
 330    if (op == CC_OP_DYNAMIC) {
 331        /* The DYNAMIC setting is translator only, and should never be
 332           stored.  Thus we always consider it clean.  */
 333        s->cc_op_dirty = false;
 334    } else {
 335        /* Discard any computed CC_OP value (see shifts).  */
 336        if (s->cc_op == CC_OP_DYNAMIC) {
 337            tcg_gen_discard_i32(cpu_cc_op);
 338        }
 339        s->cc_op_dirty = true;
 340    }
 341    s->cc_op = op;
 342}
 343
 344static void gen_update_cc_op(DisasContext *s)
 345{
 346    if (s->cc_op_dirty) {
 347        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
 348        s->cc_op_dirty = false;
 349    }
 350}
 351
 352#ifdef TARGET_X86_64
 353
 354#define NB_OP_SIZES 4
 355
 356#else /* !TARGET_X86_64 */
 357
 358#define NB_OP_SIZES 3
 359
 360#endif /* !TARGET_X86_64 */
 361
 362#if HOST_BIG_ENDIAN
 363#define REG_B_OFFSET (sizeof(target_ulong) - 1)
 364#define REG_H_OFFSET (sizeof(target_ulong) - 2)
 365#define REG_W_OFFSET (sizeof(target_ulong) - 2)
 366#define REG_L_OFFSET (sizeof(target_ulong) - 4)
 367#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
 368#else
 369#define REG_B_OFFSET 0
 370#define REG_H_OFFSET 1
 371#define REG_W_OFFSET 0
 372#define REG_L_OFFSET 0
 373#define REG_LH_OFFSET 4
 374#endif
 375
 376/* In instruction encodings for byte register accesses the
 377 * register number usually indicates "low 8 bits of register N";
 378 * however there are some special cases where N 4..7 indicates
 379 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
 380 * true for this special case, false otherwise.
 381 */
 382static inline bool byte_reg_is_xH(DisasContext *s, int reg)
 383{
 384    /* Any time the REX prefix is present, byte registers are uniform */
 385    if (reg < 4 || REX_PREFIX(s)) {
 386        return false;
 387    }
 388    return true;
 389}
 390
 391/* Select the size of a push/pop operation.  */
 392static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
 393{
 394    if (CODE64(s)) {
 395        return ot == MO_16 ? MO_16 : MO_64;
 396    } else {
 397        return ot;
 398    }
 399}
 400
 401/* Select the size of the stack pointer.  */
 402static inline MemOp mo_stacksize(DisasContext *s)
 403{
 404    return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
 405}
 406
 407/* Select only size 64 else 32.  Used for SSE operand sizes.  */
 408static inline MemOp mo_64_32(MemOp ot)
 409{
 410#ifdef TARGET_X86_64
 411    return ot == MO_64 ? MO_64 : MO_32;
 412#else
 413    return MO_32;
 414#endif
 415}
 416
 417/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
 418   byte vs word opcodes.  */
 419static inline MemOp mo_b_d(int b, MemOp ot)
 420{
 421    return b & 1 ? ot : MO_8;
 422}
 423
 424/* Select size 8 if lsb of B is clear, else OT capped at 32.
 425   Used for decoding operand size of port opcodes.  */
 426static inline MemOp mo_b_d32(int b, MemOp ot)
 427{
 428    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 429}
 430
 431static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
 432{
 433    switch(ot) {
 434    case MO_8:
 435        if (!byte_reg_is_xH(s, reg)) {
 436            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
 437        } else {
 438            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
 439        }
 440        break;
 441    case MO_16:
 442        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
 443        break;
 444    case MO_32:
 445        /* For x86_64, this sets the higher half of register to zero.
 446           For i386, this is equivalent to a mov. */
 447        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
 448        break;
 449#ifdef TARGET_X86_64
 450    case MO_64:
 451        tcg_gen_mov_tl(cpu_regs[reg], t0);
 452        break;
 453#endif
 454    default:
 455        tcg_abort();
 456    }
 457}
 458
 459static inline
 460void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
 461{
 462    if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
 463        tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
 464    } else {
 465        tcg_gen_mov_tl(t0, cpu_regs[reg]);
 466    }
 467}
 468
 469static void gen_add_A0_im(DisasContext *s, int val)
 470{
 471    tcg_gen_addi_tl(s->A0, s->A0, val);
 472    if (!CODE64(s)) {
 473        tcg_gen_ext32u_tl(s->A0, s->A0);
 474    }
 475}
 476
 477static inline void gen_op_jmp_v(TCGv dest)
 478{
 479    tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 480}
 481
 482static inline
 483void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
 484{
 485    tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
 486    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 487}
 488
 489static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
 490{
 491    tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
 492    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 493}
 494
 495static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 496{
 497    tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
 498}
 499
 500static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 501{
 502    tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 503}
 504
 505static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 506{
 507    if (d == OR_TMP0) {
 508        gen_op_st_v(s, idx, s->T0, s->A0);
 509    } else {
 510        gen_op_mov_reg_v(s, idx, d, s->T0);
 511    }
 512}
 513
 514static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
 515{
 516    tcg_gen_movi_tl(s->tmp0, pc);
 517    gen_op_jmp_v(s->tmp0);
 518}
 519
 520/* Compute SEG:REG into A0.  SEG is selected from the override segment
 521   (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
 522   indicate no override.  */
 523static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
 524                          int def_seg, int ovr_seg)
 525{
 526    switch (aflag) {
 527#ifdef TARGET_X86_64
 528    case MO_64:
 529        if (ovr_seg < 0) {
 530            tcg_gen_mov_tl(s->A0, a0);
 531            return;
 532        }
 533        break;
 534#endif
 535    case MO_32:
 536        /* 32 bit address */
 537        if (ovr_seg < 0 && ADDSEG(s)) {
 538            ovr_seg = def_seg;
 539        }
 540        if (ovr_seg < 0) {
 541            tcg_gen_ext32u_tl(s->A0, a0);
 542            return;
 543        }
 544        break;
 545    case MO_16:
 546        /* 16 bit address */
 547        tcg_gen_ext16u_tl(s->A0, a0);
 548        a0 = s->A0;
 549        if (ovr_seg < 0) {
 550            if (ADDSEG(s)) {
 551                ovr_seg = def_seg;
 552            } else {
 553                return;
 554            }
 555        }
 556        break;
 557    default:
 558        tcg_abort();
 559    }
 560
 561    if (ovr_seg >= 0) {
 562        TCGv seg = cpu_seg_base[ovr_seg];
 563
 564        if (aflag == MO_64) {
 565            tcg_gen_add_tl(s->A0, a0, seg);
 566        } else if (CODE64(s)) {
 567            tcg_gen_ext32u_tl(s->A0, a0);
 568            tcg_gen_add_tl(s->A0, s->A0, seg);
 569        } else {
 570            tcg_gen_add_tl(s->A0, a0, seg);
 571            tcg_gen_ext32u_tl(s->A0, s->A0);
 572        }
 573    }
 574}
 575
 576static inline void gen_string_movl_A0_ESI(DisasContext *s)
 577{
 578    gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
 579}
 580
 581static inline void gen_string_movl_A0_EDI(DisasContext *s)
 582{
 583    gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
 584}
 585
 586static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
 587{
 588    tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
 589    tcg_gen_shli_tl(s->T0, s->T0, ot);
 590};
 591
 592static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
 593{
 594    switch (size) {
 595    case MO_8:
 596        if (sign) {
 597            tcg_gen_ext8s_tl(dst, src);
 598        } else {
 599            tcg_gen_ext8u_tl(dst, src);
 600        }
 601        return dst;
 602    case MO_16:
 603        if (sign) {
 604            tcg_gen_ext16s_tl(dst, src);
 605        } else {
 606            tcg_gen_ext16u_tl(dst, src);
 607        }
 608        return dst;
 609#ifdef TARGET_X86_64
 610    case MO_32:
 611        if (sign) {
 612            tcg_gen_ext32s_tl(dst, src);
 613        } else {
 614            tcg_gen_ext32u_tl(dst, src);
 615        }
 616        return dst;
 617#endif
 618    default:
 619        return src;
 620    }
 621}
 622
 623static void gen_extu(MemOp ot, TCGv reg)
 624{
 625    gen_ext_tl(reg, reg, ot, false);
 626}
 627
 628static void gen_exts(MemOp ot, TCGv reg)
 629{
 630    gen_ext_tl(reg, reg, ot, true);
 631}
 632
 633static inline
 634void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
 635{
 636    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
 637    gen_extu(size, s->tmp0);
 638    tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
 639}
 640
 641static inline
 642void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
 643{
 644    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
 645    gen_extu(size, s->tmp0);
 646    tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
 647}
 648
 649static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
 650{
 651    switch (ot) {
 652    case MO_8:
 653        gen_helper_inb(v, cpu_env, n);
 654        break;
 655    case MO_16:
 656        gen_helper_inw(v, cpu_env, n);
 657        break;
 658    case MO_32:
 659        gen_helper_inl(v, cpu_env, n);
 660        break;
 661    default:
 662        tcg_abort();
 663    }
 664}
 665
 666static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
 667{
 668    switch (ot) {
 669    case MO_8:
 670        gen_helper_outb(cpu_env, v, n);
 671        break;
 672    case MO_16:
 673        gen_helper_outw(cpu_env, v, n);
 674        break;
 675    case MO_32:
 676        gen_helper_outl(cpu_env, v, n);
 677        break;
 678    default:
 679        tcg_abort();
 680    }
 681}
 682
 683/*
 684 * Validate that access to [port, port + 1<<ot) is allowed.
 685 * Raise #GP, or VMM exit if not.
 686 */
 687static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port,
 688                         uint32_t svm_flags)
 689{
 690#ifdef CONFIG_USER_ONLY
 691    /*
 692     * We do not implement the ioperm(2) syscall, so the TSS check
 693     * will always fail.
 694     */
 695    gen_exception_gpf(s);
 696    return false;
 697#else
 698    if (PE(s) && (CPL(s) > IOPL(s) || VM86(s))) {
 699        gen_helper_check_io(cpu_env, port, tcg_constant_i32(1 << ot));
 700    }
 701    if (GUEST(s)) {
 702        target_ulong cur_eip = s->base.pc_next - s->cs_base;
 703        target_ulong next_eip = s->pc - s->cs_base;
 704
 705        gen_update_cc_op(s);
 706        gen_jmp_im(s, cur_eip);
 707        if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
 708            svm_flags |= SVM_IOIO_REP_MASK;
 709        }
 710        svm_flags |= 1 << (SVM_IOIO_SIZE_SHIFT + ot);
 711        gen_helper_svm_check_io(cpu_env, port,
 712                                tcg_constant_i32(svm_flags),
 713                                tcg_constant_i32(next_eip - cur_eip));
 714    }
 715    return true;
 716#endif
 717}
 718
 719static inline void gen_movs(DisasContext *s, MemOp ot)
 720{
 721    gen_string_movl_A0_ESI(s);
 722    gen_op_ld_v(s, ot, s->T0, s->A0);
 723    gen_string_movl_A0_EDI(s);
 724    gen_op_st_v(s, ot, s->T0, s->A0);
 725    gen_op_movl_T0_Dshift(s, ot);
 726    gen_op_add_reg_T0(s, s->aflag, R_ESI);
 727    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 728}
 729
 730static void gen_op_update1_cc(DisasContext *s)
 731{
 732    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 733}
 734
 735static void gen_op_update2_cc(DisasContext *s)
 736{
 737    tcg_gen_mov_tl(cpu_cc_src, s->T1);
 738    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 739}
 740
 741static void gen_op_update3_cc(DisasContext *s, TCGv reg)
 742{
 743    tcg_gen_mov_tl(cpu_cc_src2, reg);
 744    tcg_gen_mov_tl(cpu_cc_src, s->T1);
 745    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 746}
 747
 748static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
 749{
 750    tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
 751}
 752
 753static void gen_op_update_neg_cc(DisasContext *s)
 754{
 755    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 756    tcg_gen_neg_tl(cpu_cc_src, s->T0);
 757    tcg_gen_movi_tl(s->cc_srcT, 0);
 758}
 759
 760/* compute all eflags to cc_src */
 761static void gen_compute_eflags(DisasContext *s)
 762{
 763    TCGv zero, dst, src1, src2;
 764    int live, dead;
 765
 766    if (s->cc_op == CC_OP_EFLAGS) {
 767        return;
 768    }
 769    if (s->cc_op == CC_OP_CLR) {
 770        tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
 771        set_cc_op(s, CC_OP_EFLAGS);
 772        return;
 773    }
 774
 775    zero = NULL;
 776    dst = cpu_cc_dst;
 777    src1 = cpu_cc_src;
 778    src2 = cpu_cc_src2;
 779
 780    /* Take care to not read values that are not live.  */
 781    live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
 782    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
 783    if (dead) {
 784        zero = tcg_const_tl(0);
 785        if (dead & USES_CC_DST) {
 786            dst = zero;
 787        }
 788        if (dead & USES_CC_SRC) {
 789            src1 = zero;
 790        }
 791        if (dead & USES_CC_SRC2) {
 792            src2 = zero;
 793        }
 794    }
 795
 796    gen_update_cc_op(s);
 797    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
 798    set_cc_op(s, CC_OP_EFLAGS);
 799
 800    if (dead) {
 801        tcg_temp_free(zero);
 802    }
 803}
 804
 805typedef struct CCPrepare {
 806    TCGCond cond;
 807    TCGv reg;
 808    TCGv reg2;
 809    target_ulong imm;
 810    target_ulong mask;
 811    bool use_reg2;
 812    bool no_setcond;
 813} CCPrepare;
 814
 815/* compute eflags.C to reg */
 816static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 817{
 818    TCGv t0, t1;
 819    int size, shift;
 820
 821    switch (s->cc_op) {
 822    case CC_OP_SUBB ... CC_OP_SUBQ:
 823        /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
 824        size = s->cc_op - CC_OP_SUBB;
 825        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 826        /* If no temporary was used, be careful not to alias t1 and t0.  */
 827        t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
 828        tcg_gen_mov_tl(t0, s->cc_srcT);
 829        gen_extu(size, t0);
 830        goto add_sub;
 831
 832    case CC_OP_ADDB ... CC_OP_ADDQ:
 833        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
 834        size = s->cc_op - CC_OP_ADDB;
 835        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 836        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 837    add_sub:
 838        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
 839                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 840
 841    case CC_OP_LOGICB ... CC_OP_LOGICQ:
 842    case CC_OP_CLR:
 843    case CC_OP_POPCNT:
 844        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 845
 846    case CC_OP_INCB ... CC_OP_INCQ:
 847    case CC_OP_DECB ... CC_OP_DECQ:
 848        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 849                             .mask = -1, .no_setcond = true };
 850
 851    case CC_OP_SHLB ... CC_OP_SHLQ:
 852        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
 853        size = s->cc_op - CC_OP_SHLB;
 854        shift = (8 << size) - 1;
 855        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 856                             .mask = (target_ulong)1 << shift };
 857
 858    case CC_OP_MULB ... CC_OP_MULQ:
 859        return (CCPrepare) { .cond = TCG_COND_NE,
 860                             .reg = cpu_cc_src, .mask = -1 };
 861
 862    case CC_OP_BMILGB ... CC_OP_BMILGQ:
 863        size = s->cc_op - CC_OP_BMILGB;
 864        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
 865        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 866
 867    case CC_OP_ADCX:
 868    case CC_OP_ADCOX:
 869        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
 870                             .mask = -1, .no_setcond = true };
 871
 872    case CC_OP_EFLAGS:
 873    case CC_OP_SARB ... CC_OP_SARQ:
 874        /* CC_SRC & 1 */
 875        return (CCPrepare) { .cond = TCG_COND_NE,
 876                             .reg = cpu_cc_src, .mask = CC_C };
 877
 878    default:
 879       /* The need to compute only C from CC_OP_DYNAMIC is important
 880          in efficiently implementing e.g. INC at the start of a TB.  */
 881       gen_update_cc_op(s);
 882       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
 883                               cpu_cc_src2, cpu_cc_op);
 884       return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 885                            .mask = -1, .no_setcond = true };
 886    }
 887}
 888
 889/* compute eflags.P to reg */
 890static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 891{
 892    gen_compute_eflags(s);
 893    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 894                         .mask = CC_P };
 895}
 896
 897/* compute eflags.S to reg */
 898static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 899{
 900    switch (s->cc_op) {
 901    case CC_OP_DYNAMIC:
 902        gen_compute_eflags(s);
 903        /* FALLTHRU */
 904    case CC_OP_EFLAGS:
 905    case CC_OP_ADCX:
 906    case CC_OP_ADOX:
 907    case CC_OP_ADCOX:
 908        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 909                             .mask = CC_S };
 910    case CC_OP_CLR:
 911    case CC_OP_POPCNT:
 912        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 913    default:
 914        {
 915            MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 916            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
 917            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
 918        }
 919    }
 920}
 921
 922/* compute eflags.O to reg */
 923static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 924{
 925    switch (s->cc_op) {
 926    case CC_OP_ADOX:
 927    case CC_OP_ADCOX:
 928        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
 929                             .mask = -1, .no_setcond = true };
 930    case CC_OP_CLR:
 931    case CC_OP_POPCNT:
 932        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 933    default:
 934        gen_compute_eflags(s);
 935        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 936                             .mask = CC_O };
 937    }
 938}
 939
 940/* compute eflags.Z to reg */
 941static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 942{
 943    switch (s->cc_op) {
 944    case CC_OP_DYNAMIC:
 945        gen_compute_eflags(s);
 946        /* FALLTHRU */
 947    case CC_OP_EFLAGS:
 948    case CC_OP_ADCX:
 949    case CC_OP_ADOX:
 950    case CC_OP_ADCOX:
 951        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 952                             .mask = CC_Z };
 953    case CC_OP_CLR:
 954        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
 955    case CC_OP_POPCNT:
 956        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
 957                             .mask = -1 };
 958    default:
 959        {
 960            MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 961            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 962            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 963        }
 964    }
 965}
 966
 967/* perform a conditional store into register 'reg' according to jump opcode
 968   value 'b'. In the fast case, T0 is guaranted not to be used. */
 969static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 970{
 971    int inv, jcc_op, cond;
 972    MemOp size;
 973    CCPrepare cc;
 974    TCGv t0;
 975
 976    inv = b & 1;
 977    jcc_op = (b >> 1) & 7;
 978
 979    switch (s->cc_op) {
 980    case CC_OP_SUBB ... CC_OP_SUBQ:
 981        /* We optimize relational operators for the cmp/jcc case.  */
 982        size = s->cc_op - CC_OP_SUBB;
 983        switch (jcc_op) {
 984        case JCC_BE:
 985            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
 986            gen_extu(size, s->tmp4);
 987            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 988            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
 989                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 990            break;
 991
 992        case JCC_L:
 993            cond = TCG_COND_LT;
 994            goto fast_jcc_l;
 995        case JCC_LE:
 996            cond = TCG_COND_LE;
 997        fast_jcc_l:
 998            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
 999            gen_exts(size, s->tmp4);
1000            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
1001            cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
1002                               .reg2 = t0, .mask = -1, .use_reg2 = true };
1003            break;
1004
1005        default:
1006            goto slow_jcc;
1007        }
1008        break;
1009
1010    default:
1011    slow_jcc:
1012        /* This actually generates good code for JC, JZ and JS.  */
1013        switch (jcc_op) {
1014        case JCC_O:
1015            cc = gen_prepare_eflags_o(s, reg);
1016            break;
1017        case JCC_B:
1018            cc = gen_prepare_eflags_c(s, reg);
1019            break;
1020        case JCC_Z:
1021            cc = gen_prepare_eflags_z(s, reg);
1022            break;
1023        case JCC_BE:
1024            gen_compute_eflags(s);
1025            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
1026                               .mask = CC_Z | CC_C };
1027            break;
1028        case JCC_S:
1029            cc = gen_prepare_eflags_s(s, reg);
1030            break;
1031        case JCC_P:
1032            cc = gen_prepare_eflags_p(s, reg);
1033            break;
1034        case JCC_L:
1035            gen_compute_eflags(s);
1036            if (reg == cpu_cc_src) {
1037                reg = s->tmp0;
1038            }
1039            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1040            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1041            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1042                               .mask = CC_S };
1043            break;
1044        default:
1045        case JCC_LE:
1046            gen_compute_eflags(s);
1047            if (reg == cpu_cc_src) {
1048                reg = s->tmp0;
1049            }
1050            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1051            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1052            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1053                               .mask = CC_S | CC_Z };
1054            break;
1055        }
1056        break;
1057    }
1058
1059    if (inv) {
1060        cc.cond = tcg_invert_cond(cc.cond);
1061    }
1062    return cc;
1063}
1064
1065static void gen_setcc1(DisasContext *s, int b, TCGv reg)
1066{
1067    CCPrepare cc = gen_prepare_cc(s, b, reg);
1068
1069    if (cc.no_setcond) {
1070        if (cc.cond == TCG_COND_EQ) {
1071            tcg_gen_xori_tl(reg, cc.reg, 1);
1072        } else {
1073            tcg_gen_mov_tl(reg, cc.reg);
1074        }
1075        return;
1076    }
1077
1078    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1079        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1080        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1081        tcg_gen_andi_tl(reg, reg, 1);
1082        return;
1083    }
1084    if (cc.mask != -1) {
1085        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1086        cc.reg = reg;
1087    }
1088    if (cc.use_reg2) {
1089        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1090    } else {
1091        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1092    }
1093}
1094
1095static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1096{
1097    gen_setcc1(s, JCC_B << 1, reg);
1098}
1099
1100/* generate a conditional jump to label 'l1' according to jump opcode
1101   value 'b'. In the fast case, T0 is guaranted not to be used. */
1102static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1103{
1104    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1105
1106    if (cc.mask != -1) {
1107        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1108        cc.reg = s->T0;
1109    }
1110    if (cc.use_reg2) {
1111        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1112    } else {
1113        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1114    }
1115}
1116
1117/* Generate a conditional jump to label 'l1' according to jump opcode
1118   value 'b'. In the fast case, T0 is guaranted not to be used.
1119   A translation block must end soon.  */
1120static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1121{
1122    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1123
1124    gen_update_cc_op(s);
1125    if (cc.mask != -1) {
1126        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1127        cc.reg = s->T0;
1128    }
1129    set_cc_op(s, CC_OP_DYNAMIC);
1130    if (cc.use_reg2) {
1131        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1132    } else {
1133        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1134    }
1135}
1136
1137/* XXX: does not work with gdbstub "ice" single step - not a
1138   serious problem */
1139static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1140{
1141    TCGLabel *l1 = gen_new_label();
1142    TCGLabel *l2 = gen_new_label();
1143    gen_op_jnz_ecx(s, s->aflag, l1);
1144    gen_set_label(l2);
1145    gen_jmp_tb(s, next_eip, 1);
1146    gen_set_label(l1);
1147    return l2;
1148}
1149
1150static inline void gen_stos(DisasContext *s, MemOp ot)
1151{
1152    gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1153    gen_string_movl_A0_EDI(s);
1154    gen_op_st_v(s, ot, s->T0, s->A0);
1155    gen_op_movl_T0_Dshift(s, ot);
1156    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1157}
1158
1159static inline void gen_lods(DisasContext *s, MemOp ot)
1160{
1161    gen_string_movl_A0_ESI(s);
1162    gen_op_ld_v(s, ot, s->T0, s->A0);
1163    gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1164    gen_op_movl_T0_Dshift(s, ot);
1165    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1166}
1167
1168static inline void gen_scas(DisasContext *s, MemOp ot)
1169{
1170    gen_string_movl_A0_EDI(s);
1171    gen_op_ld_v(s, ot, s->T1, s->A0);
1172    gen_op(s, OP_CMPL, ot, R_EAX);
1173    gen_op_movl_T0_Dshift(s, ot);
1174    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1175}
1176
1177static inline void gen_cmps(DisasContext *s, MemOp ot)
1178{
1179    gen_string_movl_A0_EDI(s);
1180    gen_op_ld_v(s, ot, s->T1, s->A0);
1181    gen_string_movl_A0_ESI(s);
1182    gen_op(s, OP_CMPL, ot, OR_TMP0);
1183    gen_op_movl_T0_Dshift(s, ot);
1184    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1185    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1186}
1187
1188static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1189{
1190    if (s->flags & HF_IOBPT_MASK) {
1191#ifdef CONFIG_USER_ONLY
1192        /* user-mode cpu should not be in IOBPT mode */
1193        g_assert_not_reached();
1194#else
1195        TCGv_i32 t_size = tcg_const_i32(1 << ot);
1196        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1197
1198        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1199        tcg_temp_free_i32(t_size);
1200        tcg_temp_free(t_next);
1201#endif /* CONFIG_USER_ONLY */
1202    }
1203}
1204
1205static inline void gen_ins(DisasContext *s, MemOp ot)
1206{
1207    gen_string_movl_A0_EDI(s);
1208    /* Note: we must do this dummy write first to be restartable in
1209       case of page fault. */
1210    tcg_gen_movi_tl(s->T0, 0);
1211    gen_op_st_v(s, ot, s->T0, s->A0);
1212    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1213    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1214    gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1215    gen_op_st_v(s, ot, s->T0, s->A0);
1216    gen_op_movl_T0_Dshift(s, ot);
1217    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1218    gen_bpt_io(s, s->tmp2_i32, ot);
1219}
1220
1221static inline void gen_outs(DisasContext *s, MemOp ot)
1222{
1223    gen_string_movl_A0_ESI(s);
1224    gen_op_ld_v(s, ot, s->T0, s->A0);
1225
1226    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1227    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1228    tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1229    gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1230    gen_op_movl_T0_Dshift(s, ot);
1231    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1232    gen_bpt_io(s, s->tmp2_i32, ot);
1233}
1234
1235/* same method as Valgrind : we generate jumps to current or next
1236   instruction */
1237#define GEN_REPZ(op)                                                          \
1238static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1239                                 target_ulong cur_eip, target_ulong next_eip) \
1240{                                                                             \
1241    TCGLabel *l2;                                                             \
1242    gen_update_cc_op(s);                                                      \
1243    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1244    gen_ ## op(s, ot);                                                        \
1245    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1246    /* a loop would cause two single step exceptions if ECX = 1               \
1247       before rep string_insn */                                              \
1248    if (s->repz_opt)                                                          \
1249        gen_op_jz_ecx(s, s->aflag, l2);                                       \
1250    gen_jmp(s, cur_eip);                                                      \
1251}
1252
1253#define GEN_REPZ2(op)                                                         \
1254static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1255                                   target_ulong cur_eip,                      \
1256                                   target_ulong next_eip,                     \
1257                                   int nz)                                    \
1258{                                                                             \
1259    TCGLabel *l2;                                                             \
1260    gen_update_cc_op(s);                                                      \
1261    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1262    gen_ ## op(s, ot);                                                        \
1263    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1264    gen_update_cc_op(s);                                                      \
1265    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1266    if (s->repz_opt)                                                          \
1267        gen_op_jz_ecx(s, s->aflag, l2);                                       \
1268    gen_jmp(s, cur_eip);                                                      \
1269}
1270
1271GEN_REPZ(movs)
1272GEN_REPZ(stos)
1273GEN_REPZ(lods)
1274GEN_REPZ(ins)
1275GEN_REPZ(outs)
1276GEN_REPZ2(scas)
1277GEN_REPZ2(cmps)
1278
1279static void gen_helper_fp_arith_ST0_FT0(int op)
1280{
1281    switch (op) {
1282    case 0:
1283        gen_helper_fadd_ST0_FT0(cpu_env);
1284        break;
1285    case 1:
1286        gen_helper_fmul_ST0_FT0(cpu_env);
1287        break;
1288    case 2:
1289        gen_helper_fcom_ST0_FT0(cpu_env);
1290        break;
1291    case 3:
1292        gen_helper_fcom_ST0_FT0(cpu_env);
1293        break;
1294    case 4:
1295        gen_helper_fsub_ST0_FT0(cpu_env);
1296        break;
1297    case 5:
1298        gen_helper_fsubr_ST0_FT0(cpu_env);
1299        break;
1300    case 6:
1301        gen_helper_fdiv_ST0_FT0(cpu_env);
1302        break;
1303    case 7:
1304        gen_helper_fdivr_ST0_FT0(cpu_env);
1305        break;
1306    }
1307}
1308
1309/* NOTE the exception in "r" op ordering */
1310static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1311{
1312    TCGv_i32 tmp = tcg_const_i32(opreg);
1313    switch (op) {
1314    case 0:
1315        gen_helper_fadd_STN_ST0(cpu_env, tmp);
1316        break;
1317    case 1:
1318        gen_helper_fmul_STN_ST0(cpu_env, tmp);
1319        break;
1320    case 4:
1321        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1322        break;
1323    case 5:
1324        gen_helper_fsub_STN_ST0(cpu_env, tmp);
1325        break;
1326    case 6:
1327        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1328        break;
1329    case 7:
1330        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1331        break;
1332    }
1333}
1334
1335static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1336{
1337    gen_update_cc_op(s);
1338    gen_jmp_im(s, cur_eip);
1339    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1340    s->base.is_jmp = DISAS_NORETURN;
1341}
1342
1343/* Generate #UD for the current instruction.  The assumption here is that
1344   the instruction is known, but it isn't allowed in the current cpu mode.  */
1345static void gen_illegal_opcode(DisasContext *s)
1346{
1347    gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1348}
1349
1350/* Generate #GP for the current instruction. */
1351static void gen_exception_gpf(DisasContext *s)
1352{
1353    gen_exception(s, EXCP0D_GPF, s->pc_start - s->cs_base);
1354}
1355
1356/* Check for cpl == 0; if not, raise #GP and return false. */
1357static bool check_cpl0(DisasContext *s)
1358{
1359    if (CPL(s) == 0) {
1360        return true;
1361    }
1362    gen_exception_gpf(s);
1363    return false;
1364}
1365
1366/* If vm86, check for iopl == 3; if not, raise #GP and return false. */
1367static bool check_vm86_iopl(DisasContext *s)
1368{
1369    if (!VM86(s) || IOPL(s) == 3) {
1370        return true;
1371    }
1372    gen_exception_gpf(s);
1373    return false;
1374}
1375
1376/* Check for iopl allowing access; if not, raise #GP and return false. */
1377static bool check_iopl(DisasContext *s)
1378{
1379    if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) {
1380        return true;
1381    }
1382    gen_exception_gpf(s);
1383    return false;
1384}
1385
1386/* if d == OR_TMP0, it means memory operand (address in A0) */
1387static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1388{
1389    if (d != OR_TMP0) {
1390        if (s1->prefix & PREFIX_LOCK) {
1391            /* Lock prefix when destination is not memory.  */
1392            gen_illegal_opcode(s1);
1393            return;
1394        }
1395        gen_op_mov_v_reg(s1, ot, s1->T0, d);
1396    } else if (!(s1->prefix & PREFIX_LOCK)) {
1397        gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1398    }
1399    switch(op) {
1400    case OP_ADCL:
1401        gen_compute_eflags_c(s1, s1->tmp4);
1402        if (s1->prefix & PREFIX_LOCK) {
1403            tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1404            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1405                                        s1->mem_index, ot | MO_LE);
1406        } else {
1407            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1408            tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1409            gen_op_st_rm_T0_A0(s1, ot, d);
1410        }
1411        gen_op_update3_cc(s1, s1->tmp4);
1412        set_cc_op(s1, CC_OP_ADCB + ot);
1413        break;
1414    case OP_SBBL:
1415        gen_compute_eflags_c(s1, s1->tmp4);
1416        if (s1->prefix & PREFIX_LOCK) {
1417            tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1418            tcg_gen_neg_tl(s1->T0, s1->T0);
1419            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1420                                        s1->mem_index, ot | MO_LE);
1421        } else {
1422            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1423            tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1424            gen_op_st_rm_T0_A0(s1, ot, d);
1425        }
1426        gen_op_update3_cc(s1, s1->tmp4);
1427        set_cc_op(s1, CC_OP_SBBB + ot);
1428        break;
1429    case OP_ADDL:
1430        if (s1->prefix & PREFIX_LOCK) {
1431            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1432                                        s1->mem_index, ot | MO_LE);
1433        } else {
1434            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1435            gen_op_st_rm_T0_A0(s1, ot, d);
1436        }
1437        gen_op_update2_cc(s1);
1438        set_cc_op(s1, CC_OP_ADDB + ot);
1439        break;
1440    case OP_SUBL:
1441        if (s1->prefix & PREFIX_LOCK) {
1442            tcg_gen_neg_tl(s1->T0, s1->T1);
1443            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1444                                        s1->mem_index, ot | MO_LE);
1445            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1446        } else {
1447            tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1448            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1449            gen_op_st_rm_T0_A0(s1, ot, d);
1450        }
1451        gen_op_update2_cc(s1);
1452        set_cc_op(s1, CC_OP_SUBB + ot);
1453        break;
1454    default:
1455    case OP_ANDL:
1456        if (s1->prefix & PREFIX_LOCK) {
1457            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1458                                        s1->mem_index, ot | MO_LE);
1459        } else {
1460            tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1461            gen_op_st_rm_T0_A0(s1, ot, d);
1462        }
1463        gen_op_update1_cc(s1);
1464        set_cc_op(s1, CC_OP_LOGICB + ot);
1465        break;
1466    case OP_ORL:
1467        if (s1->prefix & PREFIX_LOCK) {
1468            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1469                                       s1->mem_index, ot | MO_LE);
1470        } else {
1471            tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1472            gen_op_st_rm_T0_A0(s1, ot, d);
1473        }
1474        gen_op_update1_cc(s1);
1475        set_cc_op(s1, CC_OP_LOGICB + ot);
1476        break;
1477    case OP_XORL:
1478        if (s1->prefix & PREFIX_LOCK) {
1479            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1480                                        s1->mem_index, ot | MO_LE);
1481        } else {
1482            tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1483            gen_op_st_rm_T0_A0(s1, ot, d);
1484        }
1485        gen_op_update1_cc(s1);
1486        set_cc_op(s1, CC_OP_LOGICB + ot);
1487        break;
1488    case OP_CMPL:
1489        tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1490        tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1491        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1492        set_cc_op(s1, CC_OP_SUBB + ot);
1493        break;
1494    }
1495}
1496
1497/* if d == OR_TMP0, it means memory operand (address in A0) */
1498static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1499{
1500    if (s1->prefix & PREFIX_LOCK) {
1501        if (d != OR_TMP0) {
1502            /* Lock prefix when destination is not memory */
1503            gen_illegal_opcode(s1);
1504            return;
1505        }
1506        tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1507        tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1508                                    s1->mem_index, ot | MO_LE);
1509    } else {
1510        if (d != OR_TMP0) {
1511            gen_op_mov_v_reg(s1, ot, s1->T0, d);
1512        } else {
1513            gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1514        }
1515        tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1516        gen_op_st_rm_T0_A0(s1, ot, d);
1517    }
1518
1519    gen_compute_eflags_c(s1, cpu_cc_src);
1520    tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1521    set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1522}
1523
1524static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1525                            TCGv shm1, TCGv count, bool is_right)
1526{
1527    TCGv_i32 z32, s32, oldop;
1528    TCGv z_tl;
1529
1530    /* Store the results into the CC variables.  If we know that the
1531       variable must be dead, store unconditionally.  Otherwise we'll
1532       need to not disrupt the current contents.  */
1533    z_tl = tcg_const_tl(0);
1534    if (cc_op_live[s->cc_op] & USES_CC_DST) {
1535        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1536                           result, cpu_cc_dst);
1537    } else {
1538        tcg_gen_mov_tl(cpu_cc_dst, result);
1539    }
1540    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1541        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1542                           shm1, cpu_cc_src);
1543    } else {
1544        tcg_gen_mov_tl(cpu_cc_src, shm1);
1545    }
1546    tcg_temp_free(z_tl);
1547
1548    /* Get the two potential CC_OP values into temporaries.  */
1549    tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1550    if (s->cc_op == CC_OP_DYNAMIC) {
1551        oldop = cpu_cc_op;
1552    } else {
1553        tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1554        oldop = s->tmp3_i32;
1555    }
1556
1557    /* Conditionally store the CC_OP value.  */
1558    z32 = tcg_const_i32(0);
1559    s32 = tcg_temp_new_i32();
1560    tcg_gen_trunc_tl_i32(s32, count);
1561    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1562    tcg_temp_free_i32(z32);
1563    tcg_temp_free_i32(s32);
1564
1565    /* The CC_OP value is no longer predictable.  */
1566    set_cc_op(s, CC_OP_DYNAMIC);
1567}
1568
1569static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1570                            int is_right, int is_arith)
1571{
1572    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1573
1574    /* load */
1575    if (op1 == OR_TMP0) {
1576        gen_op_ld_v(s, ot, s->T0, s->A0);
1577    } else {
1578        gen_op_mov_v_reg(s, ot, s->T0, op1);
1579    }
1580
1581    tcg_gen_andi_tl(s->T1, s->T1, mask);
1582    tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1583
1584    if (is_right) {
1585        if (is_arith) {
1586            gen_exts(ot, s->T0);
1587            tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1588            tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1589        } else {
1590            gen_extu(ot, s->T0);
1591            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1592            tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1593        }
1594    } else {
1595        tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1596        tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1597    }
1598
1599    /* store */
1600    gen_op_st_rm_T0_A0(s, ot, op1);
1601
1602    gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1603}
1604
1605static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1606                            int is_right, int is_arith)
1607{
1608    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1609
1610    /* load */
1611    if (op1 == OR_TMP0)
1612        gen_op_ld_v(s, ot, s->T0, s->A0);
1613    else
1614        gen_op_mov_v_reg(s, ot, s->T0, op1);
1615
1616    op2 &= mask;
1617    if (op2 != 0) {
1618        if (is_right) {
1619            if (is_arith) {
1620                gen_exts(ot, s->T0);
1621                tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1622                tcg_gen_sari_tl(s->T0, s->T0, op2);
1623            } else {
1624                gen_extu(ot, s->T0);
1625                tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1626                tcg_gen_shri_tl(s->T0, s->T0, op2);
1627            }
1628        } else {
1629            tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1630            tcg_gen_shli_tl(s->T0, s->T0, op2);
1631        }
1632    }
1633
1634    /* store */
1635    gen_op_st_rm_T0_A0(s, ot, op1);
1636
1637    /* update eflags if non zero shift */
1638    if (op2 != 0) {
1639        tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1640        tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1641        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1642    }
1643}
1644
1645static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1646{
1647    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1648    TCGv_i32 t0, t1;
1649
1650    /* load */
1651    if (op1 == OR_TMP0) {
1652        gen_op_ld_v(s, ot, s->T0, s->A0);
1653    } else {
1654        gen_op_mov_v_reg(s, ot, s->T0, op1);
1655    }
1656
1657    tcg_gen_andi_tl(s->T1, s->T1, mask);
1658
1659    switch (ot) {
1660    case MO_8:
1661        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1662        tcg_gen_ext8u_tl(s->T0, s->T0);
1663        tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1664        goto do_long;
1665    case MO_16:
1666        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1667        tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1668        goto do_long;
1669    do_long:
1670#ifdef TARGET_X86_64
1671    case MO_32:
1672        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1673        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1674        if (is_right) {
1675            tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1676        } else {
1677            tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1678        }
1679        tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1680        break;
1681#endif
1682    default:
1683        if (is_right) {
1684            tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1685        } else {
1686            tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1687        }
1688        break;
1689    }
1690
1691    /* store */
1692    gen_op_st_rm_T0_A0(s, ot, op1);
1693
1694    /* We'll need the flags computed into CC_SRC.  */
1695    gen_compute_eflags(s);
1696
1697    /* The value that was "rotated out" is now present at the other end
1698       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1699       since we've computed the flags into CC_SRC, these variables are
1700       currently dead.  */
1701    if (is_right) {
1702        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1703        tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1704        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1705    } else {
1706        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1707        tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1708    }
1709    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1710    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1711
1712    /* Now conditionally store the new CC_OP value.  If the shift count
1713       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1714       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1715       exactly as we computed above.  */
1716    t0 = tcg_const_i32(0);
1717    t1 = tcg_temp_new_i32();
1718    tcg_gen_trunc_tl_i32(t1, s->T1);
1719    tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1720    tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1721    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1722                        s->tmp2_i32, s->tmp3_i32);
1723    tcg_temp_free_i32(t0);
1724    tcg_temp_free_i32(t1);
1725
1726    /* The CC_OP value is no longer predictable.  */ 
1727    set_cc_op(s, CC_OP_DYNAMIC);
1728}
1729
1730static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1731                          int is_right)
1732{
1733    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1734    int shift;
1735
1736    /* load */
1737    if (op1 == OR_TMP0) {
1738        gen_op_ld_v(s, ot, s->T0, s->A0);
1739    } else {
1740        gen_op_mov_v_reg(s, ot, s->T0, op1);
1741    }
1742
1743    op2 &= mask;
1744    if (op2 != 0) {
1745        switch (ot) {
1746#ifdef TARGET_X86_64
1747        case MO_32:
1748            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1749            if (is_right) {
1750                tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1751            } else {
1752                tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1753            }
1754            tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1755            break;
1756#endif
1757        default:
1758            if (is_right) {
1759                tcg_gen_rotri_tl(s->T0, s->T0, op2);
1760            } else {
1761                tcg_gen_rotli_tl(s->T0, s->T0, op2);
1762            }
1763            break;
1764        case MO_8:
1765            mask = 7;
1766            goto do_shifts;
1767        case MO_16:
1768            mask = 15;
1769        do_shifts:
1770            shift = op2 & mask;
1771            if (is_right) {
1772                shift = mask + 1 - shift;
1773            }
1774            gen_extu(ot, s->T0);
1775            tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1776            tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1777            tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1778            break;
1779        }
1780    }
1781
1782    /* store */
1783    gen_op_st_rm_T0_A0(s, ot, op1);
1784
1785    if (op2 != 0) {
1786        /* Compute the flags into CC_SRC.  */
1787        gen_compute_eflags(s);
1788
1789        /* The value that was "rotated out" is now present at the other end
1790           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1791           since we've computed the flags into CC_SRC, these variables are
1792           currently dead.  */
1793        if (is_right) {
1794            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1795            tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1796            tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1797        } else {
1798            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1799            tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1800        }
1801        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1802        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1803        set_cc_op(s, CC_OP_ADCOX);
1804    }
1805}
1806
1807/* XXX: add faster immediate = 1 case */
1808static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1809                           int is_right)
1810{
1811    gen_compute_eflags(s);
1812    assert(s->cc_op == CC_OP_EFLAGS);
1813
1814    /* load */
1815    if (op1 == OR_TMP0)
1816        gen_op_ld_v(s, ot, s->T0, s->A0);
1817    else
1818        gen_op_mov_v_reg(s, ot, s->T0, op1);
1819    
1820    if (is_right) {
1821        switch (ot) {
1822        case MO_8:
1823            gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1824            break;
1825        case MO_16:
1826            gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1827            break;
1828        case MO_32:
1829            gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1830            break;
1831#ifdef TARGET_X86_64
1832        case MO_64:
1833            gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1834            break;
1835#endif
1836        default:
1837            tcg_abort();
1838        }
1839    } else {
1840        switch (ot) {
1841        case MO_8:
1842            gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1843            break;
1844        case MO_16:
1845            gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1846            break;
1847        case MO_32:
1848            gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1849            break;
1850#ifdef TARGET_X86_64
1851        case MO_64:
1852            gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1853            break;
1854#endif
1855        default:
1856            tcg_abort();
1857        }
1858    }
1859    /* store */
1860    gen_op_st_rm_T0_A0(s, ot, op1);
1861}
1862
1863/* XXX: add faster immediate case */
1864static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1865                             bool is_right, TCGv count_in)
1866{
1867    target_ulong mask = (ot == MO_64 ? 63 : 31);
1868    TCGv count;
1869
1870    /* load */
1871    if (op1 == OR_TMP0) {
1872        gen_op_ld_v(s, ot, s->T0, s->A0);
1873    } else {
1874        gen_op_mov_v_reg(s, ot, s->T0, op1);
1875    }
1876
1877    count = tcg_temp_new();
1878    tcg_gen_andi_tl(count, count_in, mask);
1879
1880    switch (ot) {
1881    case MO_16:
1882        /* Note: we implement the Intel behaviour for shift count > 16.
1883           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1884           portion by constructing it as a 32-bit value.  */
1885        if (is_right) {
1886            tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1887            tcg_gen_mov_tl(s->T1, s->T0);
1888            tcg_gen_mov_tl(s->T0, s->tmp0);
1889        } else {
1890            tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1891        }
1892        /*
1893         * If TARGET_X86_64 defined then fall through into MO_32 case,
1894         * otherwise fall through default case.
1895         */
1896    case MO_32:
1897#ifdef TARGET_X86_64
1898        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1899        tcg_gen_subi_tl(s->tmp0, count, 1);
1900        if (is_right) {
1901            tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1902            tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1903            tcg_gen_shr_i64(s->T0, s->T0, count);
1904        } else {
1905            tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1906            tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1907            tcg_gen_shl_i64(s->T0, s->T0, count);
1908            tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1909            tcg_gen_shri_i64(s->T0, s->T0, 32);
1910        }
1911        break;
1912#endif
1913    default:
1914        tcg_gen_subi_tl(s->tmp0, count, 1);
1915        if (is_right) {
1916            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1917
1918            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1919            tcg_gen_shr_tl(s->T0, s->T0, count);
1920            tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1921        } else {
1922            tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1923            if (ot == MO_16) {
1924                /* Only needed if count > 16, for Intel behaviour.  */
1925                tcg_gen_subfi_tl(s->tmp4, 33, count);
1926                tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1927                tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1928            }
1929
1930            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1931            tcg_gen_shl_tl(s->T0, s->T0, count);
1932            tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1933        }
1934        tcg_gen_movi_tl(s->tmp4, 0);
1935        tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1936                           s->tmp4, s->T1);
1937        tcg_gen_or_tl(s->T0, s->T0, s->T1);
1938        break;
1939    }
1940
1941    /* store */
1942    gen_op_st_rm_T0_A0(s, ot, op1);
1943
1944    gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1945    tcg_temp_free(count);
1946}
1947
1948static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1949{
1950    if (s != OR_TMP1)
1951        gen_op_mov_v_reg(s1, ot, s1->T1, s);
1952    switch(op) {
1953    case OP_ROL:
1954        gen_rot_rm_T1(s1, ot, d, 0);
1955        break;
1956    case OP_ROR:
1957        gen_rot_rm_T1(s1, ot, d, 1);
1958        break;
1959    case OP_SHL:
1960    case OP_SHL1:
1961        gen_shift_rm_T1(s1, ot, d, 0, 0);
1962        break;
1963    case OP_SHR:
1964        gen_shift_rm_T1(s1, ot, d, 1, 0);
1965        break;
1966    case OP_SAR:
1967        gen_shift_rm_T1(s1, ot, d, 1, 1);
1968        break;
1969    case OP_RCL:
1970        gen_rotc_rm_T1(s1, ot, d, 0);
1971        break;
1972    case OP_RCR:
1973        gen_rotc_rm_T1(s1, ot, d, 1);
1974        break;
1975    }
1976}
1977
1978static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1979{
1980    switch(op) {
1981    case OP_ROL:
1982        gen_rot_rm_im(s1, ot, d, c, 0);
1983        break;
1984    case OP_ROR:
1985        gen_rot_rm_im(s1, ot, d, c, 1);
1986        break;
1987    case OP_SHL:
1988    case OP_SHL1:
1989        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1990        break;
1991    case OP_SHR:
1992        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1993        break;
1994    case OP_SAR:
1995        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1996        break;
1997    default:
1998        /* currently not optimized */
1999        tcg_gen_movi_tl(s1->T1, c);
2000        gen_shift(s1, op, ot, d, OR_TMP1);
2001        break;
2002    }
2003}
2004
2005#define X86_MAX_INSN_LENGTH 15
2006
2007static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
2008{
2009    uint64_t pc = s->pc;
2010
2011    s->pc += num_bytes;
2012    if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
2013        /* If the instruction's 16th byte is on a different page than the 1st, a
2014         * page fault on the second page wins over the general protection fault
2015         * caused by the instruction being too long.
2016         * This can happen even if the operand is only one byte long!
2017         */
2018        if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
2019            volatile uint8_t unused =
2020                cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
2021            (void) unused;
2022        }
2023        siglongjmp(s->jmpbuf, 1);
2024    }
2025
2026    return pc;
2027}
2028
2029static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
2030{
2031    return translator_ldub(env, &s->base, advance_pc(env, s, 1));
2032}
2033
2034static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
2035{
2036    return translator_ldsw(env, &s->base, advance_pc(env, s, 2));
2037}
2038
2039static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
2040{
2041    return translator_lduw(env, &s->base, advance_pc(env, s, 2));
2042}
2043
2044static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
2045{
2046    return translator_ldl(env, &s->base, advance_pc(env, s, 4));
2047}
2048
2049#ifdef TARGET_X86_64
2050static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
2051{
2052    return translator_ldq(env, &s->base, advance_pc(env, s, 8));
2053}
2054#endif
2055
2056/* Decompose an address.  */
2057
2058typedef struct AddressParts {
2059    int def_seg;
2060    int base;
2061    int index;
2062    int scale;
2063    target_long disp;
2064} AddressParts;
2065
2066static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
2067                                    int modrm)
2068{
2069    int def_seg, base, index, scale, mod, rm;
2070    target_long disp;
2071    bool havesib;
2072
2073    def_seg = R_DS;
2074    index = -1;
2075    scale = 0;
2076    disp = 0;
2077
2078    mod = (modrm >> 6) & 3;
2079    rm = modrm & 7;
2080    base = rm | REX_B(s);
2081
2082    if (mod == 3) {
2083        /* Normally filtered out earlier, but including this path
2084           simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
2085        goto done;
2086    }
2087
2088    switch (s->aflag) {
2089    case MO_64:
2090    case MO_32:
2091        havesib = 0;
2092        if (rm == 4) {
2093            int code = x86_ldub_code(env, s);
2094            scale = (code >> 6) & 3;
2095            index = ((code >> 3) & 7) | REX_X(s);
2096            if (index == 4) {
2097                index = -1;  /* no index */
2098            }
2099            base = (code & 7) | REX_B(s);
2100            havesib = 1;
2101        }
2102
2103        switch (mod) {
2104        case 0:
2105            if ((base & 7) == 5) {
2106                base = -1;
2107                disp = (int32_t)x86_ldl_code(env, s);
2108                if (CODE64(s) && !havesib) {
2109                    base = -2;
2110                    disp += s->pc + s->rip_offset;
2111                }
2112            }
2113            break;
2114        case 1:
2115            disp = (int8_t)x86_ldub_code(env, s);
2116            break;
2117        default:
2118        case 2:
2119            disp = (int32_t)x86_ldl_code(env, s);
2120            break;
2121        }
2122
2123        /* For correct popl handling with esp.  */
2124        if (base == R_ESP && s->popl_esp_hack) {
2125            disp += s->popl_esp_hack;
2126        }
2127        if (base == R_EBP || base == R_ESP) {
2128            def_seg = R_SS;
2129        }
2130        break;
2131
2132    case MO_16:
2133        if (mod == 0) {
2134            if (rm == 6) {
2135                base = -1;
2136                disp = x86_lduw_code(env, s);
2137                break;
2138            }
2139        } else if (mod == 1) {
2140            disp = (int8_t)x86_ldub_code(env, s);
2141        } else {
2142            disp = (int16_t)x86_lduw_code(env, s);
2143        }
2144
2145        switch (rm) {
2146        case 0:
2147            base = R_EBX;
2148            index = R_ESI;
2149            break;
2150        case 1:
2151            base = R_EBX;
2152            index = R_EDI;
2153            break;
2154        case 2:
2155            base = R_EBP;
2156            index = R_ESI;
2157            def_seg = R_SS;
2158            break;
2159        case 3:
2160            base = R_EBP;
2161            index = R_EDI;
2162            def_seg = R_SS;
2163            break;
2164        case 4:
2165            base = R_ESI;
2166            break;
2167        case 5:
2168            base = R_EDI;
2169            break;
2170        case 6:
2171            base = R_EBP;
2172            def_seg = R_SS;
2173            break;
2174        default:
2175        case 7:
2176            base = R_EBX;
2177            break;
2178        }
2179        break;
2180
2181    default:
2182        tcg_abort();
2183    }
2184
2185 done:
2186    return (AddressParts){ def_seg, base, index, scale, disp };
2187}
2188
2189/* Compute the address, with a minimum number of TCG ops.  */
2190static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2191{
2192    TCGv ea = NULL;
2193
2194    if (a.index >= 0) {
2195        if (a.scale == 0) {
2196            ea = cpu_regs[a.index];
2197        } else {
2198            tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2199            ea = s->A0;
2200        }
2201        if (a.base >= 0) {
2202            tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2203            ea = s->A0;
2204        }
2205    } else if (a.base >= 0) {
2206        ea = cpu_regs[a.base];
2207    }
2208    if (!ea) {
2209        tcg_gen_movi_tl(s->A0, a.disp);
2210        ea = s->A0;
2211    } else if (a.disp != 0) {
2212        tcg_gen_addi_tl(s->A0, ea, a.disp);
2213        ea = s->A0;
2214    }
2215
2216    return ea;
2217}
2218
2219static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2220{
2221    AddressParts a = gen_lea_modrm_0(env, s, modrm);
2222    TCGv ea = gen_lea_modrm_1(s, a);
2223    gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2224}
2225
2226static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2227{
2228    (void)gen_lea_modrm_0(env, s, modrm);
2229}
2230
2231/* Used for BNDCL, BNDCU, BNDCN.  */
2232static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2233                      TCGCond cond, TCGv_i64 bndv)
2234{
2235    TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2236
2237    tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2238    if (!CODE64(s)) {
2239        tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2240    }
2241    tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2242    tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2243    gen_helper_bndck(cpu_env, s->tmp2_i32);
2244}
2245
2246/* used for LEA and MOV AX, mem */
2247static void gen_add_A0_ds_seg(DisasContext *s)
2248{
2249    gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2250}
2251
2252/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2253   OR_TMP0 */
2254static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2255                           MemOp ot, int reg, int is_store)
2256{
2257    int mod, rm;
2258
2259    mod = (modrm >> 6) & 3;
2260    rm = (modrm & 7) | REX_B(s);
2261    if (mod == 3) {
2262        if (is_store) {
2263            if (reg != OR_TMP0)
2264                gen_op_mov_v_reg(s, ot, s->T0, reg);
2265            gen_op_mov_reg_v(s, ot, rm, s->T0);
2266        } else {
2267            gen_op_mov_v_reg(s, ot, s->T0, rm);
2268            if (reg != OR_TMP0)
2269                gen_op_mov_reg_v(s, ot, reg, s->T0);
2270        }
2271    } else {
2272        gen_lea_modrm(env, s, modrm);
2273        if (is_store) {
2274            if (reg != OR_TMP0)
2275                gen_op_mov_v_reg(s, ot, s->T0, reg);
2276            gen_op_st_v(s, ot, s->T0, s->A0);
2277        } else {
2278            gen_op_ld_v(s, ot, s->T0, s->A0);
2279            if (reg != OR_TMP0)
2280                gen_op_mov_reg_v(s, ot, reg, s->T0);
2281        }
2282    }
2283}
2284
2285static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2286{
2287    uint32_t ret;
2288
2289    switch (ot) {
2290    case MO_8:
2291        ret = x86_ldub_code(env, s);
2292        break;
2293    case MO_16:
2294        ret = x86_lduw_code(env, s);
2295        break;
2296    case MO_32:
2297#ifdef TARGET_X86_64
2298    case MO_64:
2299#endif
2300        ret = x86_ldl_code(env, s);
2301        break;
2302    default:
2303        tcg_abort();
2304    }
2305    return ret;
2306}
2307
2308static inline int insn_const_size(MemOp ot)
2309{
2310    if (ot <= MO_32) {
2311        return 1 << ot;
2312    } else {
2313        return 4;
2314    }
2315}
2316
2317static void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2318{
2319    target_ulong pc = s->cs_base + eip;
2320
2321    if (translator_use_goto_tb(&s->base, pc))  {
2322        /* jump to same page: we can use a direct jump */
2323        tcg_gen_goto_tb(tb_num);
2324        gen_jmp_im(s, eip);
2325        tcg_gen_exit_tb(s->base.tb, tb_num);
2326        s->base.is_jmp = DISAS_NORETURN;
2327    } else {
2328        /* jump to another page */
2329        gen_jmp_im(s, eip);
2330        gen_jr(s, s->tmp0);
2331    }
2332}
2333
2334static inline void gen_jcc(DisasContext *s, int b,
2335                           target_ulong val, target_ulong next_eip)
2336{
2337    TCGLabel *l1, *l2;
2338
2339    if (s->jmp_opt) {
2340        l1 = gen_new_label();
2341        gen_jcc1(s, b, l1);
2342
2343        gen_goto_tb(s, 0, next_eip);
2344
2345        gen_set_label(l1);
2346        gen_goto_tb(s, 1, val);
2347    } else {
2348        l1 = gen_new_label();
2349        l2 = gen_new_label();
2350        gen_jcc1(s, b, l1);
2351
2352        gen_jmp_im(s, next_eip);
2353        tcg_gen_br(l2);
2354
2355        gen_set_label(l1);
2356        gen_jmp_im(s, val);
2357        gen_set_label(l2);
2358        gen_eob(s);
2359    }
2360}
2361
2362static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2363                        int modrm, int reg)
2364{
2365    CCPrepare cc;
2366
2367    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2368
2369    cc = gen_prepare_cc(s, b, s->T1);
2370    if (cc.mask != -1) {
2371        TCGv t0 = tcg_temp_new();
2372        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2373        cc.reg = t0;
2374    }
2375    if (!cc.use_reg2) {
2376        cc.reg2 = tcg_const_tl(cc.imm);
2377    }
2378
2379    tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2380                       s->T0, cpu_regs[reg]);
2381    gen_op_mov_reg_v(s, ot, reg, s->T0);
2382
2383    if (cc.mask != -1) {
2384        tcg_temp_free(cc.reg);
2385    }
2386    if (!cc.use_reg2) {
2387        tcg_temp_free(cc.reg2);
2388    }
2389}
2390
2391static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
2392{
2393    tcg_gen_ld32u_tl(s->T0, cpu_env,
2394                     offsetof(CPUX86State,segs[seg_reg].selector));
2395}
2396
2397static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
2398{
2399    tcg_gen_ext16u_tl(s->T0, s->T0);
2400    tcg_gen_st32_tl(s->T0, cpu_env,
2401                    offsetof(CPUX86State,segs[seg_reg].selector));
2402    tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2403}
2404
2405/* move T0 to seg_reg and compute if the CPU state may change. Never
2406   call this function with seg_reg == R_CS */
2407static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
2408{
2409    if (PE(s) && !VM86(s)) {
2410        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2411        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2412        /* abort translation because the addseg value may change or
2413           because ss32 may change. For R_SS, translation must always
2414           stop as a special handling must be done to disable hardware
2415           interrupts for the next instruction */
2416        if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) {
2417            s->base.is_jmp = DISAS_TOO_MANY;
2418        }
2419    } else {
2420        gen_op_movl_seg_T0_vm(s, seg_reg);
2421        if (seg_reg == R_SS) {
2422            s->base.is_jmp = DISAS_TOO_MANY;
2423        }
2424    }
2425}
2426
2427static void gen_svm_check_intercept(DisasContext *s, uint32_t type)
2428{
2429    /* no SVM activated; fast case */
2430    if (likely(!GUEST(s))) {
2431        return;
2432    }
2433    gen_helper_svm_check_intercept(cpu_env, tcg_constant_i32(type));
2434}
2435
2436static inline void gen_stack_update(DisasContext *s, int addend)
2437{
2438    gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2439}
2440
2441/* Generate a push. It depends on ss32, addseg and dflag.  */
2442static void gen_push_v(DisasContext *s, TCGv val)
2443{
2444    MemOp d_ot = mo_pushpop(s, s->dflag);
2445    MemOp a_ot = mo_stacksize(s);
2446    int size = 1 << d_ot;
2447    TCGv new_esp = s->A0;
2448
2449    tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2450
2451    if (!CODE64(s)) {
2452        if (ADDSEG(s)) {
2453            new_esp = s->tmp4;
2454            tcg_gen_mov_tl(new_esp, s->A0);
2455        }
2456        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2457    }
2458
2459    gen_op_st_v(s, d_ot, val, s->A0);
2460    gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2461}
2462
2463/* two step pop is necessary for precise exceptions */
2464static MemOp gen_pop_T0(DisasContext *s)
2465{
2466    MemOp d_ot = mo_pushpop(s, s->dflag);
2467
2468    gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2469    gen_op_ld_v(s, d_ot, s->T0, s->A0);
2470
2471    return d_ot;
2472}
2473
2474static inline void gen_pop_update(DisasContext *s, MemOp ot)
2475{
2476    gen_stack_update(s, 1 << ot);
2477}
2478
2479static inline void gen_stack_A0(DisasContext *s)
2480{
2481    gen_lea_v_seg(s, SS32(s) ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2482}
2483
2484static void gen_pusha(DisasContext *s)
2485{
2486    MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2487    MemOp d_ot = s->dflag;
2488    int size = 1 << d_ot;
2489    int i;
2490
2491    for (i = 0; i < 8; i++) {
2492        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2493        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2494        gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2495    }
2496
2497    gen_stack_update(s, -8 * size);
2498}
2499
2500static void gen_popa(DisasContext *s)
2501{
2502    MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2503    MemOp d_ot = s->dflag;
2504    int size = 1 << d_ot;
2505    int i;
2506
2507    for (i = 0; i < 8; i++) {
2508        /* ESP is not reloaded */
2509        if (7 - i == R_ESP) {
2510            continue;
2511        }
2512        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2513        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2514        gen_op_ld_v(s, d_ot, s->T0, s->A0);
2515        gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2516    }
2517
2518    gen_stack_update(s, 8 * size);
2519}
2520
2521static void gen_enter(DisasContext *s, int esp_addend, int level)
2522{
2523    MemOp d_ot = mo_pushpop(s, s->dflag);
2524    MemOp a_ot = CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
2525    int size = 1 << d_ot;
2526
2527    /* Push BP; compute FrameTemp into T1.  */
2528    tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2529    gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2530    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2531
2532    level &= 31;
2533    if (level != 0) {
2534        int i;
2535
2536        /* Copy level-1 pointers from the previous frame.  */
2537        for (i = 1; i < level; ++i) {
2538            tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2539            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2540            gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2541
2542            tcg_gen_subi_tl(s->A0, s->T1, size * i);
2543            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2544            gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2545        }
2546
2547        /* Push the current FrameTemp as the last level.  */
2548        tcg_gen_subi_tl(s->A0, s->T1, size * level);
2549        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2550        gen_op_st_v(s, d_ot, s->T1, s->A0);
2551    }
2552
2553    /* Copy the FrameTemp value to EBP.  */
2554    gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2555
2556    /* Compute the final value of ESP.  */
2557    tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2558    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2559}
2560
2561static void gen_leave(DisasContext *s)
2562{
2563    MemOp d_ot = mo_pushpop(s, s->dflag);
2564    MemOp a_ot = mo_stacksize(s);
2565
2566    gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2567    gen_op_ld_v(s, d_ot, s->T0, s->A0);
2568
2569    tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2570
2571    gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2572    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2573}
2574
2575/* Similarly, except that the assumption here is that we don't decode
2576   the instruction at all -- either a missing opcode, an unimplemented
2577   feature, or just a bogus instruction stream.  */
2578static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2579{
2580    gen_illegal_opcode(s);
2581
2582    if (qemu_loglevel_mask(LOG_UNIMP)) {
2583        FILE *logfile = qemu_log_trylock();
2584        if (logfile) {
2585            target_ulong pc = s->pc_start, end = s->pc;
2586
2587            fprintf(logfile, "ILLOPC: " TARGET_FMT_lx ":", pc);
2588            for (; pc < end; ++pc) {
2589                fprintf(logfile, " %02x", cpu_ldub_code(env, pc));
2590            }
2591            fprintf(logfile, "\n");
2592            qemu_log_unlock(logfile);
2593        }
2594    }
2595}
2596
2597/* an interrupt is different from an exception because of the
2598   privilege checks */
2599static void gen_interrupt(DisasContext *s, int intno,
2600                          target_ulong cur_eip, target_ulong next_eip)
2601{
2602    gen_update_cc_op(s);
2603    gen_jmp_im(s, cur_eip);
2604    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2605                               tcg_const_i32(next_eip - cur_eip));
2606    s->base.is_jmp = DISAS_NORETURN;
2607}
2608
2609static void gen_set_hflag(DisasContext *s, uint32_t mask)
2610{
2611    if ((s->flags & mask) == 0) {
2612        TCGv_i32 t = tcg_temp_new_i32();
2613        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2614        tcg_gen_ori_i32(t, t, mask);
2615        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2616        tcg_temp_free_i32(t);
2617        s->flags |= mask;
2618    }
2619}
2620
2621static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2622{
2623    if (s->flags & mask) {
2624        TCGv_i32 t = tcg_temp_new_i32();
2625        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2626        tcg_gen_andi_i32(t, t, ~mask);
2627        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2628        tcg_temp_free_i32(t);
2629        s->flags &= ~mask;
2630    }
2631}
2632
2633/* Clear BND registers during legacy branches.  */
2634static void gen_bnd_jmp(DisasContext *s)
2635{
2636    /* Clear the registers only if BND prefix is missing, MPX is enabled,
2637       and if the BNDREGs are known to be in use (non-zero) already.
2638       The helper itself will check BNDPRESERVE at runtime.  */
2639    if ((s->prefix & PREFIX_REPNZ) == 0
2640        && (s->flags & HF_MPX_EN_MASK) != 0
2641        && (s->flags & HF_MPX_IU_MASK) != 0) {
2642        gen_helper_bnd_jmp(cpu_env);
2643    }
2644}
2645
2646/* Generate an end of block. Trace exception is also generated if needed.
2647   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2648   If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2649   S->TF.  This is used by the syscall/sysret insns.  */
2650static void
2651do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2652{
2653    gen_update_cc_op(s);
2654
2655    /* If several instructions disable interrupts, only the first does it.  */
2656    if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2657        gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2658    } else {
2659        gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2660    }
2661
2662    if (s->base.tb->flags & HF_RF_MASK) {
2663        gen_helper_reset_rf(cpu_env);
2664    }
2665    if (recheck_tf) {
2666        gen_helper_rechecking_single_step(cpu_env);
2667        tcg_gen_exit_tb(NULL, 0);
2668    } else if (s->flags & HF_TF_MASK) {
2669        gen_helper_single_step(cpu_env);
2670    } else if (jr) {
2671        tcg_gen_lookup_and_goto_ptr();
2672    } else {
2673        tcg_gen_exit_tb(NULL, 0);
2674    }
2675    s->base.is_jmp = DISAS_NORETURN;
2676}
2677
2678static inline void
2679gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2680{
2681    do_gen_eob_worker(s, inhibit, recheck_tf, false);
2682}
2683
2684/* End of block.
2685   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2686static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2687{
2688    gen_eob_worker(s, inhibit, false);
2689}
2690
2691/* End of block, resetting the inhibit irq flag.  */
2692static void gen_eob(DisasContext *s)
2693{
2694    gen_eob_worker(s, false, false);
2695}
2696
2697/* Jump to register */
2698static void gen_jr(DisasContext *s, TCGv dest)
2699{
2700    do_gen_eob_worker(s, false, false, true);
2701}
2702
2703/* generate a jump to eip. No segment change must happen before as a
2704   direct call to the next block may occur */
2705static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2706{
2707    gen_update_cc_op(s);
2708    set_cc_op(s, CC_OP_DYNAMIC);
2709    if (s->jmp_opt) {
2710        gen_goto_tb(s, tb_num, eip);
2711    } else {
2712        gen_jmp_im(s, eip);
2713        gen_eob(s);
2714    }
2715}
2716
2717static void gen_jmp(DisasContext *s, target_ulong eip)
2718{
2719    gen_jmp_tb(s, eip, 0);
2720}
2721
2722static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2723{
2724    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ);
2725    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2726}
2727
2728static inline void gen_stq_env_A0(DisasContext *s, int offset)
2729{
2730    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2731    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ);
2732}
2733
2734static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2735{
2736    int mem_index = s->mem_index;
2737    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEUQ);
2738    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2739    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2740    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
2741    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2742}
2743
2744static inline void gen_sto_env_A0(DisasContext *s, int offset)
2745{
2746    int mem_index = s->mem_index;
2747    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2748    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEUQ);
2749    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2750    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2751    tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
2752}
2753
2754static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2755{
2756    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2757    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2758    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2759    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2760}
2761
2762static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2763{
2764    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2765    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2766}
2767
2768static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2769{
2770    tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2771    tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2772}
2773
2774static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2775{
2776    tcg_gen_movi_i64(s->tmp1_i64, 0);
2777    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2778}
2779
2780typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2781typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2782typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2783typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2784typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2785typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2786                               TCGv_i32 val);
2787typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2788typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2789                               TCGv val);
2790
2791#define SSE_SPECIAL ((void *)1)
2792#define SSE_DUMMY ((void *)2)
2793
2794#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2795#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2796                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2797
2798static const SSEFunc_0_epp sse_op_table1[256][4] = {
2799    /* 3DNow! extensions */
2800    [0x0e] = { SSE_DUMMY }, /* femms */
2801    [0x0f] = { SSE_DUMMY }, /* pf... */
2802    /* pure SSE operations */
2803    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2804    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2805    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2806    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2807    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2808    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2809    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2810    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2811
2812    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2813    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2814    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2815    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2816    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2817    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2818    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2819    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2820    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2821    [0x51] = SSE_FOP(sqrt),
2822    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2823    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2824    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2825    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2826    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2827    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2828    [0x58] = SSE_FOP(add),
2829    [0x59] = SSE_FOP(mul),
2830    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2831               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2832    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2833    [0x5c] = SSE_FOP(sub),
2834    [0x5d] = SSE_FOP(min),
2835    [0x5e] = SSE_FOP(div),
2836    [0x5f] = SSE_FOP(max),
2837
2838    [0xc2] = SSE_FOP(cmpeq),
2839    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2840               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2841
2842    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2843    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2844    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2845
2846    /* MMX ops and their SSE extensions */
2847    [0x60] = MMX_OP2(punpcklbw),
2848    [0x61] = MMX_OP2(punpcklwd),
2849    [0x62] = MMX_OP2(punpckldq),
2850    [0x63] = MMX_OP2(packsswb),
2851    [0x64] = MMX_OP2(pcmpgtb),
2852    [0x65] = MMX_OP2(pcmpgtw),
2853    [0x66] = MMX_OP2(pcmpgtl),
2854    [0x67] = MMX_OP2(packuswb),
2855    [0x68] = MMX_OP2(punpckhbw),
2856    [0x69] = MMX_OP2(punpckhwd),
2857    [0x6a] = MMX_OP2(punpckhdq),
2858    [0x6b] = MMX_OP2(packssdw),
2859    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2860    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2861    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2862    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2863    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2864               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2865               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2866               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2867    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2868    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2869    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2870    [0x74] = MMX_OP2(pcmpeqb),
2871    [0x75] = MMX_OP2(pcmpeqw),
2872    [0x76] = MMX_OP2(pcmpeql),
2873    [0x77] = { SSE_DUMMY }, /* emms */
2874    [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2875    [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2876    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2877    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2878    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2879    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2880    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2881    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2882    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2883    [0xd1] = MMX_OP2(psrlw),
2884    [0xd2] = MMX_OP2(psrld),
2885    [0xd3] = MMX_OP2(psrlq),
2886    [0xd4] = MMX_OP2(paddq),
2887    [0xd5] = MMX_OP2(pmullw),
2888    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2889    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2890    [0xd8] = MMX_OP2(psubusb),
2891    [0xd9] = MMX_OP2(psubusw),
2892    [0xda] = MMX_OP2(pminub),
2893    [0xdb] = MMX_OP2(pand),
2894    [0xdc] = MMX_OP2(paddusb),
2895    [0xdd] = MMX_OP2(paddusw),
2896    [0xde] = MMX_OP2(pmaxub),
2897    [0xdf] = MMX_OP2(pandn),
2898    [0xe0] = MMX_OP2(pavgb),
2899    [0xe1] = MMX_OP2(psraw),
2900    [0xe2] = MMX_OP2(psrad),
2901    [0xe3] = MMX_OP2(pavgw),
2902    [0xe4] = MMX_OP2(pmulhuw),
2903    [0xe5] = MMX_OP2(pmulhw),
2904    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2905    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2906    [0xe8] = MMX_OP2(psubsb),
2907    [0xe9] = MMX_OP2(psubsw),
2908    [0xea] = MMX_OP2(pminsw),
2909    [0xeb] = MMX_OP2(por),
2910    [0xec] = MMX_OP2(paddsb),
2911    [0xed] = MMX_OP2(paddsw),
2912    [0xee] = MMX_OP2(pmaxsw),
2913    [0xef] = MMX_OP2(pxor),
2914    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2915    [0xf1] = MMX_OP2(psllw),
2916    [0xf2] = MMX_OP2(pslld),
2917    [0xf3] = MMX_OP2(psllq),
2918    [0xf4] = MMX_OP2(pmuludq),
2919    [0xf5] = MMX_OP2(pmaddwd),
2920    [0xf6] = MMX_OP2(psadbw),
2921    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2922               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2923    [0xf8] = MMX_OP2(psubb),
2924    [0xf9] = MMX_OP2(psubw),
2925    [0xfa] = MMX_OP2(psubl),
2926    [0xfb] = MMX_OP2(psubq),
2927    [0xfc] = MMX_OP2(paddb),
2928    [0xfd] = MMX_OP2(paddw),
2929    [0xfe] = MMX_OP2(paddl),
2930};
2931
2932static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2933    [0 + 2] = MMX_OP2(psrlw),
2934    [0 + 4] = MMX_OP2(psraw),
2935    [0 + 6] = MMX_OP2(psllw),
2936    [8 + 2] = MMX_OP2(psrld),
2937    [8 + 4] = MMX_OP2(psrad),
2938    [8 + 6] = MMX_OP2(pslld),
2939    [16 + 2] = MMX_OP2(psrlq),
2940    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2941    [16 + 6] = MMX_OP2(psllq),
2942    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2943};
2944
2945static const SSEFunc_0_epi sse_op_table3ai[] = {
2946    gen_helper_cvtsi2ss,
2947    gen_helper_cvtsi2sd
2948};
2949
2950#ifdef TARGET_X86_64
2951static const SSEFunc_0_epl sse_op_table3aq[] = {
2952    gen_helper_cvtsq2ss,
2953    gen_helper_cvtsq2sd
2954};
2955#endif
2956
2957static const SSEFunc_i_ep sse_op_table3bi[] = {
2958    gen_helper_cvttss2si,
2959    gen_helper_cvtss2si,
2960    gen_helper_cvttsd2si,
2961    gen_helper_cvtsd2si
2962};
2963
2964#ifdef TARGET_X86_64
2965static const SSEFunc_l_ep sse_op_table3bq[] = {
2966    gen_helper_cvttss2sq,
2967    gen_helper_cvtss2sq,
2968    gen_helper_cvttsd2sq,
2969    gen_helper_cvtsd2sq
2970};
2971#endif
2972
2973static const SSEFunc_0_epp sse_op_table4[8][4] = {
2974    SSE_FOP(cmpeq),
2975    SSE_FOP(cmplt),
2976    SSE_FOP(cmple),
2977    SSE_FOP(cmpunord),
2978    SSE_FOP(cmpneq),
2979    SSE_FOP(cmpnlt),
2980    SSE_FOP(cmpnle),
2981    SSE_FOP(cmpord),
2982};
2983
2984static const SSEFunc_0_epp sse_op_table5[256] = {
2985    [0x0c] = gen_helper_pi2fw,
2986    [0x0d] = gen_helper_pi2fd,
2987    [0x1c] = gen_helper_pf2iw,
2988    [0x1d] = gen_helper_pf2id,
2989    [0x8a] = gen_helper_pfnacc,
2990    [0x8e] = gen_helper_pfpnacc,
2991    [0x90] = gen_helper_pfcmpge,
2992    [0x94] = gen_helper_pfmin,
2993    [0x96] = gen_helper_pfrcp,
2994    [0x97] = gen_helper_pfrsqrt,
2995    [0x9a] = gen_helper_pfsub,
2996    [0x9e] = gen_helper_pfadd,
2997    [0xa0] = gen_helper_pfcmpgt,
2998    [0xa4] = gen_helper_pfmax,
2999    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
3000    [0xa7] = gen_helper_movq, /* pfrsqit1 */
3001    [0xaa] = gen_helper_pfsubr,
3002    [0xae] = gen_helper_pfacc,
3003    [0xb0] = gen_helper_pfcmpeq,
3004    [0xb4] = gen_helper_pfmul,
3005    [0xb6] = gen_helper_movq, /* pfrcpit2 */
3006    [0xb7] = gen_helper_pmulhrw_mmx,
3007    [0xbb] = gen_helper_pswapd,
3008    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
3009};
3010
3011struct SSEOpHelper_epp {
3012    SSEFunc_0_epp op[2];
3013    uint32_t ext_mask;
3014};
3015
3016struct SSEOpHelper_eppi {
3017    SSEFunc_0_eppi op[2];
3018    uint32_t ext_mask;
3019};
3020
3021#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
3022#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
3023#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
3024#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
3025#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
3026        CPUID_EXT_PCLMULQDQ }
3027#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
3028
3029static const struct SSEOpHelper_epp sse_op_table6[256] = {
3030    [0x00] = SSSE3_OP(pshufb),
3031    [0x01] = SSSE3_OP(phaddw),
3032    [0x02] = SSSE3_OP(phaddd),
3033    [0x03] = SSSE3_OP(phaddsw),
3034    [0x04] = SSSE3_OP(pmaddubsw),
3035    [0x05] = SSSE3_OP(phsubw),
3036    [0x06] = SSSE3_OP(phsubd),
3037    [0x07] = SSSE3_OP(phsubsw),
3038    [0x08] = SSSE3_OP(psignb),
3039    [0x09] = SSSE3_OP(psignw),
3040    [0x0a] = SSSE3_OP(psignd),
3041    [0x0b] = SSSE3_OP(pmulhrsw),
3042    [0x10] = SSE41_OP(pblendvb),
3043    [0x14] = SSE41_OP(blendvps),
3044    [0x15] = SSE41_OP(blendvpd),
3045    [0x17] = SSE41_OP(ptest),
3046    [0x1c] = SSSE3_OP(pabsb),
3047    [0x1d] = SSSE3_OP(pabsw),
3048    [0x1e] = SSSE3_OP(pabsd),
3049    [0x20] = SSE41_OP(pmovsxbw),
3050    [0x21] = SSE41_OP(pmovsxbd),
3051    [0x22] = SSE41_OP(pmovsxbq),
3052    [0x23] = SSE41_OP(pmovsxwd),
3053    [0x24] = SSE41_OP(pmovsxwq),
3054    [0x25] = SSE41_OP(pmovsxdq),
3055    [0x28] = SSE41_OP(pmuldq),
3056    [0x29] = SSE41_OP(pcmpeqq),
3057    [0x2a] = SSE41_SPECIAL, /* movntqda */
3058    [0x2b] = SSE41_OP(packusdw),
3059    [0x30] = SSE41_OP(pmovzxbw),
3060    [0x31] = SSE41_OP(pmovzxbd),
3061    [0x32] = SSE41_OP(pmovzxbq),
3062    [0x33] = SSE41_OP(pmovzxwd),
3063    [0x34] = SSE41_OP(pmovzxwq),
3064    [0x35] = SSE41_OP(pmovzxdq),
3065    [0x37] = SSE42_OP(pcmpgtq),
3066    [0x38] = SSE41_OP(pminsb),
3067    [0x39] = SSE41_OP(pminsd),
3068    [0x3a] = SSE41_OP(pminuw),
3069    [0x3b] = SSE41_OP(pminud),
3070    [0x3c] = SSE41_OP(pmaxsb),
3071    [0x3d] = SSE41_OP(pmaxsd),
3072    [0x3e] = SSE41_OP(pmaxuw),
3073    [0x3f] = SSE41_OP(pmaxud),
3074    [0x40] = SSE41_OP(pmulld),
3075    [0x41] = SSE41_OP(phminposuw),
3076    [0xdb] = AESNI_OP(aesimc),
3077    [0xdc] = AESNI_OP(aesenc),
3078    [0xdd] = AESNI_OP(aesenclast),
3079    [0xde] = AESNI_OP(aesdec),
3080    [0xdf] = AESNI_OP(aesdeclast),
3081};
3082
3083static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3084    [0x08] = SSE41_OP(roundps),
3085    [0x09] = SSE41_OP(roundpd),
3086    [0x0a] = SSE41_OP(roundss),
3087    [0x0b] = SSE41_OP(roundsd),
3088    [0x0c] = SSE41_OP(blendps),
3089    [0x0d] = SSE41_OP(blendpd),
3090    [0x0e] = SSE41_OP(pblendw),
3091    [0x0f] = SSSE3_OP(palignr),
3092    [0x14] = SSE41_SPECIAL, /* pextrb */
3093    [0x15] = SSE41_SPECIAL, /* pextrw */
3094    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3095    [0x17] = SSE41_SPECIAL, /* extractps */
3096    [0x20] = SSE41_SPECIAL, /* pinsrb */
3097    [0x21] = SSE41_SPECIAL, /* insertps */
3098    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3099    [0x40] = SSE41_OP(dpps),
3100    [0x41] = SSE41_OP(dppd),
3101    [0x42] = SSE41_OP(mpsadbw),
3102    [0x44] = PCLMULQDQ_OP(pclmulqdq),
3103    [0x60] = SSE42_OP(pcmpestrm),
3104    [0x61] = SSE42_OP(pcmpestri),
3105    [0x62] = SSE42_OP(pcmpistrm),
3106    [0x63] = SSE42_OP(pcmpistri),
3107    [0xdf] = AESNI_OP(aeskeygenassist),
3108};
3109
3110static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3111                    target_ulong pc_start)
3112{
3113    int b1, op1_offset, op2_offset, is_xmm, val;
3114    int modrm, mod, rm, reg;
3115    SSEFunc_0_epp sse_fn_epp;
3116    SSEFunc_0_eppi sse_fn_eppi;
3117    SSEFunc_0_ppi sse_fn_ppi;
3118    SSEFunc_0_eppt sse_fn_eppt;
3119    MemOp ot;
3120
3121    b &= 0xff;
3122    if (s->prefix & PREFIX_DATA)
3123        b1 = 1;
3124    else if (s->prefix & PREFIX_REPZ)
3125        b1 = 2;
3126    else if (s->prefix & PREFIX_REPNZ)
3127        b1 = 3;
3128    else
3129        b1 = 0;
3130    sse_fn_epp = sse_op_table1[b][b1];
3131    if (!sse_fn_epp) {
3132        goto unknown_op;
3133    }
3134    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3135        is_xmm = 1;
3136    } else {
3137        if (b1 == 0) {
3138            /* MMX case */
3139            is_xmm = 0;
3140        } else {
3141            is_xmm = 1;
3142        }
3143    }
3144    /* simple MMX/SSE operation */
3145    if (s->flags & HF_TS_MASK) {
3146        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3147        return;
3148    }
3149    if (s->flags & HF_EM_MASK) {
3150    illegal_op:
3151        gen_illegal_opcode(s);
3152        return;
3153    }
3154    if (is_xmm
3155        && !(s->flags & HF_OSFXSR_MASK)
3156        && (b != 0x38 && b != 0x3a)) {
3157        goto unknown_op;
3158    }
3159    if (b == 0x0e) {
3160        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3161            /* If we were fully decoding this we might use illegal_op.  */
3162            goto unknown_op;
3163        }
3164        /* femms */
3165        gen_helper_emms(cpu_env);
3166        return;
3167    }
3168    if (b == 0x77) {
3169        /* emms */
3170        gen_helper_emms(cpu_env);
3171        return;
3172    }
3173    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3174       the static cpu state) */
3175    if (!is_xmm) {
3176        gen_helper_enter_mmx(cpu_env);
3177    }
3178
3179    modrm = x86_ldub_code(env, s);
3180    reg = ((modrm >> 3) & 7);
3181    if (is_xmm) {
3182        reg |= REX_R(s);
3183    }
3184    mod = (modrm >> 6) & 3;
3185    if (sse_fn_epp == SSE_SPECIAL) {
3186        b |= (b1 << 8);
3187        switch(b) {
3188        case 0x0e7: /* movntq */
3189            if (mod == 3) {
3190                goto illegal_op;
3191            }
3192            gen_lea_modrm(env, s, modrm);
3193            gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3194            break;
3195        case 0x1e7: /* movntdq */
3196        case 0x02b: /* movntps */
3197        case 0x12b: /* movntps */
3198            if (mod == 3)
3199                goto illegal_op;
3200            gen_lea_modrm(env, s, modrm);
3201            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3202            break;
3203        case 0x3f0: /* lddqu */
3204            if (mod == 3)
3205                goto illegal_op;
3206            gen_lea_modrm(env, s, modrm);
3207            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3208            break;
3209        case 0x22b: /* movntss */
3210        case 0x32b: /* movntsd */
3211            if (mod == 3)
3212                goto illegal_op;
3213            gen_lea_modrm(env, s, modrm);
3214            if (b1 & 1) {
3215                gen_stq_env_A0(s, offsetof(CPUX86State,
3216                                           xmm_regs[reg].ZMM_Q(0)));
3217            } else {
3218                tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3219                    xmm_regs[reg].ZMM_L(0)));
3220                gen_op_st_v(s, MO_32, s->T0, s->A0);
3221            }
3222            break;
3223        case 0x6e: /* movd mm, ea */
3224#ifdef TARGET_X86_64
3225            if (s->dflag == MO_64) {
3226                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3227                tcg_gen_st_tl(s->T0, cpu_env,
3228                              offsetof(CPUX86State, fpregs[reg].mmx));
3229            } else
3230#endif
3231            {
3232                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3233                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3234                                 offsetof(CPUX86State,fpregs[reg].mmx));
3235                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3236                gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3237            }
3238            break;
3239        case 0x16e: /* movd xmm, ea */
3240#ifdef TARGET_X86_64
3241            if (s->dflag == MO_64) {
3242                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3243                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3244                                 offsetof(CPUX86State,xmm_regs[reg]));
3245                gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3246            } else
3247#endif
3248            {
3249                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3250                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3251                                 offsetof(CPUX86State,xmm_regs[reg]));
3252                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3253                gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3254            }
3255            break;
3256        case 0x6f: /* movq mm, ea */
3257            if (mod != 3) {
3258                gen_lea_modrm(env, s, modrm);
3259                gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3260            } else {
3261                rm = (modrm & 7);
3262                tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3263                               offsetof(CPUX86State,fpregs[rm].mmx));
3264                tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3265                               offsetof(CPUX86State,fpregs[reg].mmx));
3266            }
3267            break;
3268        case 0x010: /* movups */
3269        case 0x110: /* movupd */
3270        case 0x028: /* movaps */
3271        case 0x128: /* movapd */
3272        case 0x16f: /* movdqa xmm, ea */
3273        case 0x26f: /* movdqu xmm, ea */
3274            if (mod != 3) {
3275                gen_lea_modrm(env, s, modrm);
3276                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3277            } else {
3278                rm = (modrm & 7) | REX_B(s);
3279                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3280                            offsetof(CPUX86State,xmm_regs[rm]));
3281            }
3282            break;
3283        case 0x210: /* movss xmm, ea */
3284            if (mod != 3) {
3285                gen_lea_modrm(env, s, modrm);
3286                gen_op_ld_v(s, MO_32, s->T0, s->A0);
3287                tcg_gen_st32_tl(s->T0, cpu_env,
3288                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3289                tcg_gen_movi_tl(s->T0, 0);
3290                tcg_gen_st32_tl(s->T0, cpu_env,
3291                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3292                tcg_gen_st32_tl(s->T0, cpu_env,
3293                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3294                tcg_gen_st32_tl(s->T0, cpu_env,
3295                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3296            } else {
3297                rm = (modrm & 7) | REX_B(s);
3298                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3299                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3300            }
3301            break;
3302        case 0x310: /* movsd xmm, ea */
3303            if (mod != 3) {
3304                gen_lea_modrm(env, s, modrm);
3305                gen_ldq_env_A0(s, offsetof(CPUX86State,
3306                                           xmm_regs[reg].ZMM_Q(0)));
3307                tcg_gen_movi_tl(s->T0, 0);
3308                tcg_gen_st32_tl(s->T0, cpu_env,
3309                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3310                tcg_gen_st32_tl(s->T0, cpu_env,
3311                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3312            } else {
3313                rm = (modrm & 7) | REX_B(s);
3314                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3315                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3316            }
3317            break;
3318        case 0x012: /* movlps */
3319        case 0x112: /* movlpd */
3320            if (mod != 3) {
3321                gen_lea_modrm(env, s, modrm);
3322                gen_ldq_env_A0(s, offsetof(CPUX86State,
3323                                           xmm_regs[reg].ZMM_Q(0)));
3324            } else {
3325                /* movhlps */
3326                rm = (modrm & 7) | REX_B(s);
3327                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3328                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3329            }
3330            break;
3331        case 0x212: /* movsldup */
3332            if (mod != 3) {
3333                gen_lea_modrm(env, s, modrm);
3334                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3335            } else {
3336                rm = (modrm & 7) | REX_B(s);
3337                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3338                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3339                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3340                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3341            }
3342            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3343                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3344            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3345                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3346            break;
3347        case 0x312: /* movddup */
3348            if (mod != 3) {
3349                gen_lea_modrm(env, s, modrm);
3350                gen_ldq_env_A0(s, offsetof(CPUX86State,
3351                                           xmm_regs[reg].ZMM_Q(0)));
3352            } else {
3353                rm = (modrm & 7) | REX_B(s);
3354                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3355                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3356            }
3357            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3358                        offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3359            break;
3360        case 0x016: /* movhps */
3361        case 0x116: /* movhpd */
3362            if (mod != 3) {
3363                gen_lea_modrm(env, s, modrm);
3364                gen_ldq_env_A0(s, offsetof(CPUX86State,
3365                                           xmm_regs[reg].ZMM_Q(1)));
3366            } else {
3367                /* movlhps */
3368                rm = (modrm & 7) | REX_B(s);
3369                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3370                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3371            }
3372            break;
3373        case 0x216: /* movshdup */
3374            if (mod != 3) {
3375                gen_lea_modrm(env, s, modrm);
3376                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3377            } else {
3378                rm = (modrm & 7) | REX_B(s);
3379                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3380                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3381                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3382                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3383            }
3384            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3385                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3386            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3387                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3388            break;
3389        case 0x178:
3390        case 0x378:
3391            {
3392                int bit_index, field_length;
3393
3394                if (b1 == 1 && reg != 0)
3395                    goto illegal_op;
3396                field_length = x86_ldub_code(env, s) & 0x3F;
3397                bit_index = x86_ldub_code(env, s) & 0x3F;
3398                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3399                    offsetof(CPUX86State,xmm_regs[reg]));
3400                if (b1 == 1)
3401                    gen_helper_extrq_i(cpu_env, s->ptr0,
3402                                       tcg_const_i32(bit_index),
3403                                       tcg_const_i32(field_length));
3404                else
3405                    gen_helper_insertq_i(cpu_env, s->ptr0,
3406                                         tcg_const_i32(bit_index),
3407                                         tcg_const_i32(field_length));
3408            }
3409            break;
3410        case 0x7e: /* movd ea, mm */
3411#ifdef TARGET_X86_64
3412            if (s->dflag == MO_64) {
3413                tcg_gen_ld_i64(s->T0, cpu_env,
3414                               offsetof(CPUX86State,fpregs[reg].mmx));
3415                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3416            } else
3417#endif
3418            {
3419                tcg_gen_ld32u_tl(s->T0, cpu_env,
3420                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3421                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3422            }
3423            break;
3424        case 0x17e: /* movd ea, xmm */
3425#ifdef TARGET_X86_64
3426            if (s->dflag == MO_64) {
3427                tcg_gen_ld_i64(s->T0, cpu_env,
3428                               offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3429                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3430            } else
3431#endif
3432            {
3433                tcg_gen_ld32u_tl(s->T0, cpu_env,
3434                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3435                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3436            }
3437            break;
3438        case 0x27e: /* movq xmm, ea */
3439            if (mod != 3) {
3440                gen_lea_modrm(env, s, modrm);
3441                gen_ldq_env_A0(s, offsetof(CPUX86State,
3442                                           xmm_regs[reg].ZMM_Q(0)));
3443            } else {
3444                rm = (modrm & 7) | REX_B(s);
3445                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3446                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3447            }
3448            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3449            break;
3450        case 0x7f: /* movq ea, mm */
3451            if (mod != 3) {
3452                gen_lea_modrm(env, s, modrm);
3453                gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3454            } else {
3455                rm = (modrm & 7);
3456                gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3457                            offsetof(CPUX86State,fpregs[reg].mmx));
3458            }
3459            break;
3460        case 0x011: /* movups */
3461        case 0x111: /* movupd */
3462        case 0x029: /* movaps */
3463        case 0x129: /* movapd */
3464        case 0x17f: /* movdqa ea, xmm */
3465        case 0x27f: /* movdqu ea, xmm */
3466            if (mod != 3) {
3467                gen_lea_modrm(env, s, modrm);
3468                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3469            } else {
3470                rm = (modrm & 7) | REX_B(s);
3471                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3472                            offsetof(CPUX86State,xmm_regs[reg]));
3473            }
3474            break;
3475        case 0x211: /* movss ea, xmm */
3476            if (mod != 3) {
3477                gen_lea_modrm(env, s, modrm);
3478                tcg_gen_ld32u_tl(s->T0, cpu_env,
3479                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3480                gen_op_st_v(s, MO_32, s->T0, s->A0);
3481            } else {
3482                rm = (modrm & 7) | REX_B(s);
3483                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3484                            offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3485            }
3486            break;
3487        case 0x311: /* movsd ea, xmm */
3488            if (mod != 3) {
3489                gen_lea_modrm(env, s, modrm);
3490                gen_stq_env_A0(s, offsetof(CPUX86State,
3491                                           xmm_regs[reg].ZMM_Q(0)));
3492            } else {
3493                rm = (modrm & 7) | REX_B(s);
3494                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3495                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3496            }
3497            break;
3498        case 0x013: /* movlps */
3499        case 0x113: /* movlpd */
3500            if (mod != 3) {
3501                gen_lea_modrm(env, s, modrm);
3502                gen_stq_env_A0(s, offsetof(CPUX86State,
3503                                           xmm_regs[reg].ZMM_Q(0)));
3504            } else {
3505                goto illegal_op;
3506            }
3507            break;
3508        case 0x017: /* movhps */
3509        case 0x117: /* movhpd */
3510            if (mod != 3) {
3511                gen_lea_modrm(env, s, modrm);
3512                gen_stq_env_A0(s, offsetof(CPUX86State,
3513                                           xmm_regs[reg].ZMM_Q(1)));
3514            } else {
3515                goto illegal_op;
3516            }
3517            break;
3518        case 0x71: /* shift mm, im */
3519        case 0x72:
3520        case 0x73:
3521        case 0x171: /* shift xmm, im */
3522        case 0x172:
3523        case 0x173:
3524            val = x86_ldub_code(env, s);
3525            if (is_xmm) {
3526                tcg_gen_movi_tl(s->T0, val);
3527                tcg_gen_st32_tl(s->T0, cpu_env,
3528                                offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3529                tcg_gen_movi_tl(s->T0, 0);
3530                tcg_gen_st32_tl(s->T0, cpu_env,
3531                                offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3532                op1_offset = offsetof(CPUX86State,xmm_t0);
3533            } else {
3534                tcg_gen_movi_tl(s->T0, val);
3535                tcg_gen_st32_tl(s->T0, cpu_env,
3536                                offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3537                tcg_gen_movi_tl(s->T0, 0);
3538                tcg_gen_st32_tl(s->T0, cpu_env,
3539                                offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3540                op1_offset = offsetof(CPUX86State,mmx_t0);
3541            }
3542            assert(b1 < 2);
3543            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3544                                       (((modrm >> 3)) & 7)][b1];
3545            if (!sse_fn_epp) {
3546                goto unknown_op;
3547            }
3548            if (is_xmm) {
3549                rm = (modrm & 7) | REX_B(s);
3550                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3551            } else {
3552                rm = (modrm & 7);
3553                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3554            }
3555            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3556            tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3557            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3558            break;
3559        case 0x050: /* movmskps */
3560            rm = (modrm & 7) | REX_B(s);
3561            tcg_gen_addi_ptr(s->ptr0, cpu_env,
3562                             offsetof(CPUX86State,xmm_regs[rm]));
3563            gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3564            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3565            break;
3566        case 0x150: /* movmskpd */
3567            rm = (modrm & 7) | REX_B(s);
3568            tcg_gen_addi_ptr(s->ptr0, cpu_env,
3569                             offsetof(CPUX86State,xmm_regs[rm]));
3570            gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3571            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3572            break;
3573        case 0x02a: /* cvtpi2ps */
3574        case 0x12a: /* cvtpi2pd */
3575            gen_helper_enter_mmx(cpu_env);
3576            if (mod != 3) {
3577                gen_lea_modrm(env, s, modrm);
3578                op2_offset = offsetof(CPUX86State,mmx_t0);
3579                gen_ldq_env_A0(s, op2_offset);
3580            } else {
3581                rm = (modrm & 7);
3582                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3583            }
3584            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3585            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3586            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3587            switch(b >> 8) {
3588            case 0x0:
3589                gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3590                break;
3591            default:
3592            case 0x1:
3593                gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3594                break;
3595            }
3596            break;
3597        case 0x22a: /* cvtsi2ss */
3598        case 0x32a: /* cvtsi2sd */
3599            ot = mo_64_32(s->dflag);
3600            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3601            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3602            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3603            if (ot == MO_32) {
3604                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3605                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3606                sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3607            } else {
3608#ifdef TARGET_X86_64
3609                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3610                sse_fn_epl(cpu_env, s->ptr0, s->T0);
3611#else
3612                goto illegal_op;
3613#endif
3614            }
3615            break;
3616        case 0x02c: /* cvttps2pi */
3617        case 0x12c: /* cvttpd2pi */
3618        case 0x02d: /* cvtps2pi */
3619        case 0x12d: /* cvtpd2pi */
3620            gen_helper_enter_mmx(cpu_env);
3621            if (mod != 3) {
3622                gen_lea_modrm(env, s, modrm);
3623                op2_offset = offsetof(CPUX86State,xmm_t0);
3624                gen_ldo_env_A0(s, op2_offset);
3625            } else {
3626                rm = (modrm & 7) | REX_B(s);
3627                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3628            }
3629            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3630            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3631            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3632            switch(b) {
3633            case 0x02c:
3634                gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3635                break;
3636            case 0x12c:
3637                gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3638                break;
3639            case 0x02d:
3640                gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3641                break;
3642            case 0x12d:
3643                gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3644                break;
3645            }
3646            break;
3647        case 0x22c: /* cvttss2si */
3648        case 0x32c: /* cvttsd2si */
3649        case 0x22d: /* cvtss2si */
3650        case 0x32d: /* cvtsd2si */
3651            ot = mo_64_32(s->dflag);
3652            if (mod != 3) {
3653                gen_lea_modrm(env, s, modrm);
3654                if ((b >> 8) & 1) {
3655                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3656                } else {
3657                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
3658                    tcg_gen_st32_tl(s->T0, cpu_env,
3659                                    offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3660                }
3661                op2_offset = offsetof(CPUX86State,xmm_t0);
3662            } else {
3663                rm = (modrm & 7) | REX_B(s);
3664                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3665            }
3666            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3667            if (ot == MO_32) {
3668                SSEFunc_i_ep sse_fn_i_ep =
3669                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3670                sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3671                tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3672            } else {
3673#ifdef TARGET_X86_64
3674                SSEFunc_l_ep sse_fn_l_ep =
3675                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3676                sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3677#else
3678                goto illegal_op;
3679#endif
3680            }
3681            gen_op_mov_reg_v(s, ot, reg, s->T0);
3682            break;
3683        case 0xc4: /* pinsrw */
3684        case 0x1c4:
3685            s->rip_offset = 1;
3686            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3687            val = x86_ldub_code(env, s);
3688            if (b1) {
3689                val &= 7;
3690                tcg_gen_st16_tl(s->T0, cpu_env,
3691                                offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3692            } else {
3693                val &= 3;
3694                tcg_gen_st16_tl(s->T0, cpu_env,
3695                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3696            }
3697            break;
3698        case 0xc5: /* pextrw */
3699        case 0x1c5:
3700            if (mod != 3)
3701                goto illegal_op;
3702            ot = mo_64_32(s->dflag);
3703            val = x86_ldub_code(env, s);
3704            if (b1) {
3705                val &= 7;
3706                rm = (modrm & 7) | REX_B(s);
3707                tcg_gen_ld16u_tl(s->T0, cpu_env,
3708                                 offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3709            } else {
3710                val &= 3;
3711                rm = (modrm & 7);
3712                tcg_gen_ld16u_tl(s->T0, cpu_env,
3713                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3714            }
3715            reg = ((modrm >> 3) & 7) | REX_R(s);
3716            gen_op_mov_reg_v(s, ot, reg, s->T0);
3717            break;
3718        case 0x1d6: /* movq ea, xmm */
3719            if (mod != 3) {
3720                gen_lea_modrm(env, s, modrm);
3721                gen_stq_env_A0(s, offsetof(CPUX86State,
3722                                           xmm_regs[reg].ZMM_Q(0)));
3723            } else {
3724                rm = (modrm & 7) | REX_B(s);
3725                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3726                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3727                gen_op_movq_env_0(s,
3728                                  offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3729            }
3730            break;
3731        case 0x2d6: /* movq2dq */
3732            gen_helper_enter_mmx(cpu_env);
3733            rm = (modrm & 7);
3734            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3735                        offsetof(CPUX86State,fpregs[rm].mmx));
3736            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3737            break;
3738        case 0x3d6: /* movdq2q */
3739            gen_helper_enter_mmx(cpu_env);
3740            rm = (modrm & 7) | REX_B(s);
3741            gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3742                        offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3743            break;
3744        case 0xd7: /* pmovmskb */
3745        case 0x1d7:
3746            if (mod != 3)
3747                goto illegal_op;
3748            if (b1) {
3749                rm = (modrm & 7) | REX_B(s);
3750                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3751                                 offsetof(CPUX86State, xmm_regs[rm]));
3752                gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3753            } else {
3754                rm = (modrm & 7);
3755                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3756                                 offsetof(CPUX86State, fpregs[rm].mmx));
3757                gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3758            }
3759            reg = ((modrm >> 3) & 7) | REX_R(s);
3760            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3761            break;
3762
3763        case 0x138:
3764        case 0x038:
3765            b = modrm;
3766            if ((b & 0xf0) == 0xf0) {
3767                goto do_0f_38_fx;
3768            }
3769            modrm = x86_ldub_code(env, s);
3770            rm = modrm & 7;
3771            reg = ((modrm >> 3) & 7) | REX_R(s);
3772            mod = (modrm >> 6) & 3;
3773
3774            assert(b1 < 2);
3775            sse_fn_epp = sse_op_table6[b].op[b1];
3776            if (!sse_fn_epp) {
3777                goto unknown_op;
3778            }
3779            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3780                goto illegal_op;
3781
3782            if (b1) {
3783                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3784                if (mod == 3) {
3785                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3786                } else {
3787                    op2_offset = offsetof(CPUX86State,xmm_t0);
3788                    gen_lea_modrm(env, s, modrm);
3789                    switch (b) {
3790                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3791                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3792                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3793                        gen_ldq_env_A0(s, op2_offset +
3794                                        offsetof(ZMMReg, ZMM_Q(0)));
3795                        break;
3796                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3797                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3798                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3799                                            s->mem_index, MO_LEUL);
3800                        tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3801                                        offsetof(ZMMReg, ZMM_L(0)));
3802                        break;
3803                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3804                        tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3805                                           s->mem_index, MO_LEUW);
3806                        tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3807                                        offsetof(ZMMReg, ZMM_W(0)));
3808                        break;
3809                    case 0x2a:            /* movntqda */
3810                        gen_ldo_env_A0(s, op1_offset);
3811                        return;
3812                    default:
3813                        gen_ldo_env_A0(s, op2_offset);
3814                    }
3815                }
3816            } else {
3817                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3818                if (mod == 3) {
3819                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3820                } else {
3821                    op2_offset = offsetof(CPUX86State,mmx_t0);
3822                    gen_lea_modrm(env, s, modrm);
3823                    gen_ldq_env_A0(s, op2_offset);
3824                }
3825            }
3826            if (sse_fn_epp == SSE_SPECIAL) {
3827                goto unknown_op;
3828            }
3829
3830            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3831            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3832            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3833
3834            if (b == 0x17) {
3835                set_cc_op(s, CC_OP_EFLAGS);
3836            }
3837            break;
3838
3839        case 0x238:
3840        case 0x338:
3841        do_0f_38_fx:
3842            /* Various integer extensions at 0f 38 f[0-f].  */
3843            b = modrm | (b1 << 8);
3844            modrm = x86_ldub_code(env, s);
3845            reg = ((modrm >> 3) & 7) | REX_R(s);
3846
3847            switch (b) {
3848            case 0x3f0: /* crc32 Gd,Eb */
3849            case 0x3f1: /* crc32 Gd,Ey */
3850            do_crc32:
3851                if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3852                    goto illegal_op;
3853                }
3854                if ((b & 0xff) == 0xf0) {
3855                    ot = MO_8;
3856                } else if (s->dflag != MO_64) {
3857                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3858                } else {
3859                    ot = MO_64;
3860                }
3861
3862                tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3863                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3864                gen_helper_crc32(s->T0, s->tmp2_i32,
3865                                 s->T0, tcg_const_i32(8 << ot));
3866
3867                ot = mo_64_32(s->dflag);
3868                gen_op_mov_reg_v(s, ot, reg, s->T0);
3869                break;
3870
3871            case 0x1f0: /* crc32 or movbe */
3872            case 0x1f1:
3873                /* For these insns, the f3 prefix is supposed to have priority
3874                   over the 66 prefix, but that's not what we implement above
3875                   setting b1.  */
3876                if (s->prefix & PREFIX_REPNZ) {
3877                    goto do_crc32;
3878                }
3879                /* FALLTHRU */
3880            case 0x0f0: /* movbe Gy,My */
3881            case 0x0f1: /* movbe My,Gy */
3882                if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3883                    goto illegal_op;
3884                }
3885                if (s->dflag != MO_64) {
3886                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3887                } else {
3888                    ot = MO_64;
3889                }
3890
3891                gen_lea_modrm(env, s, modrm);
3892                if ((b & 1) == 0) {
3893                    tcg_gen_qemu_ld_tl(s->T0, s->A0,
3894                                       s->mem_index, ot | MO_BE);
3895                    gen_op_mov_reg_v(s, ot, reg, s->T0);
3896                } else {
3897                    tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3898                                       s->mem_index, ot | MO_BE);
3899                }
3900                break;
3901
3902            case 0x0f2: /* andn Gy, By, Ey */
3903                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3904                    || !(s->prefix & PREFIX_VEX)
3905                    || s->vex_l != 0) {
3906                    goto illegal_op;
3907                }
3908                ot = mo_64_32(s->dflag);
3909                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3910                tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3911                gen_op_mov_reg_v(s, ot, reg, s->T0);
3912                gen_op_update1_cc(s);
3913                set_cc_op(s, CC_OP_LOGICB + ot);
3914                break;
3915
3916            case 0x0f7: /* bextr Gy, Ey, By */
3917                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3918                    || !(s->prefix & PREFIX_VEX)
3919                    || s->vex_l != 0) {
3920                    goto illegal_op;
3921                }
3922                ot = mo_64_32(s->dflag);
3923                {
3924                    TCGv bound, zero;
3925
3926                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3927                    /* Extract START, and shift the operand.
3928                       Shifts larger than operand size get zeros.  */
3929                    tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3930                    tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3931
3932                    bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3933                    zero = tcg_const_tl(0);
3934                    tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3935                                       s->T0, zero);
3936                    tcg_temp_free(zero);
3937
3938                    /* Extract the LEN into a mask.  Lengths larger than
3939                       operand size get all ones.  */
3940                    tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3941                    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3942                                       s->A0, bound);
3943                    tcg_temp_free(bound);
3944                    tcg_gen_movi_tl(s->T1, 1);
3945                    tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3946                    tcg_gen_subi_tl(s->T1, s->T1, 1);
3947                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
3948
3949                    gen_op_mov_reg_v(s, ot, reg, s->T0);
3950                    gen_op_update1_cc(s);
3951                    set_cc_op(s, CC_OP_LOGICB + ot);
3952                }
3953                break;
3954
3955            case 0x0f5: /* bzhi Gy, Ey, By */
3956                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3957                    || !(s->prefix & PREFIX_VEX)
3958                    || s->vex_l != 0) {
3959                    goto illegal_op;
3960                }
3961                ot = mo_64_32(s->dflag);
3962                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3963                tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3964                {
3965                    TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3966                    /* Note that since we're using BMILG (in order to get O
3967                       cleared) we need to store the inverse into C.  */
3968                    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3969                                       s->T1, bound);
3970                    tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3971                                       bound, bound, s->T1);
3972                    tcg_temp_free(bound);
3973                }
3974                tcg_gen_movi_tl(s->A0, -1);
3975                tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3976                tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3977                gen_op_mov_reg_v(s, ot, reg, s->T0);
3978                gen_op_update1_cc(s);
3979                set_cc_op(s, CC_OP_BMILGB + ot);
3980                break;
3981
3982            case 0x3f6: /* mulx By, Gy, rdx, Ey */
3983                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3984                    || !(s->prefix & PREFIX_VEX)
3985                    || s->vex_l != 0) {
3986                    goto illegal_op;
3987                }
3988                ot = mo_64_32(s->dflag);
3989                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3990                switch (ot) {
3991                default:
3992                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3993                    tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
3994                    tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
3995                                      s->tmp2_i32, s->tmp3_i32);
3996                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
3997                    tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
3998                    break;
3999#ifdef TARGET_X86_64
4000                case MO_64:
4001                    tcg_gen_mulu2_i64(s->T0, s->T1,
4002                                      s->T0, cpu_regs[R_EDX]);
4003                    tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
4004                    tcg_gen_mov_i64(cpu_regs[reg], s->T1);
4005                    break;
4006#endif
4007                }
4008                break;
4009
4010            case 0x3f5: /* pdep Gy, By, Ey */
4011                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4012                    || !(s->prefix & PREFIX_VEX)
4013                    || s->vex_l != 0) {
4014                    goto illegal_op;
4015                }
4016                ot = mo_64_32(s->dflag);
4017                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4018                /* Note that by zero-extending the source operand, we
4019                   automatically handle zero-extending the result.  */
4020                if (ot == MO_64) {
4021                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4022                } else {
4023                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4024                }
4025                gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
4026                break;
4027
4028            case 0x2f5: /* pext Gy, By, Ey */
4029                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4030                    || !(s->prefix & PREFIX_VEX)
4031                    || s->vex_l != 0) {
4032                    goto illegal_op;
4033                }
4034                ot = mo_64_32(s->dflag);
4035                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4036                /* Note that by zero-extending the source operand, we
4037                   automatically handle zero-extending the result.  */
4038                if (ot == MO_64) {
4039                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4040                } else {
4041                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4042                }
4043                gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
4044                break;
4045
4046            case 0x1f6: /* adcx Gy, Ey */
4047            case 0x2f6: /* adox Gy, Ey */
4048                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
4049                    goto illegal_op;
4050                } else {
4051                    TCGv carry_in, carry_out, zero;
4052                    int end_op;
4053
4054                    ot = mo_64_32(s->dflag);
4055                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4056
4057                    /* Re-use the carry-out from a previous round.  */
4058                    carry_in = NULL;
4059                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
4060                    switch (s->cc_op) {
4061                    case CC_OP_ADCX:
4062                        if (b == 0x1f6) {
4063                            carry_in = cpu_cc_dst;
4064                            end_op = CC_OP_ADCX;
4065                        } else {
4066                            end_op = CC_OP_ADCOX;
4067                        }
4068                        break;
4069                    case CC_OP_ADOX:
4070                        if (b == 0x1f6) {
4071                            end_op = CC_OP_ADCOX;
4072                        } else {
4073                            carry_in = cpu_cc_src2;
4074                            end_op = CC_OP_ADOX;
4075                        }
4076                        break;
4077                    case CC_OP_ADCOX:
4078                        end_op = CC_OP_ADCOX;
4079                        carry_in = carry_out;
4080                        break;
4081                    default:
4082                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4083                        break;
4084                    }
4085                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
4086                    if (!carry_in) {
4087                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4088                            gen_compute_eflags(s);
4089                        }
4090                        carry_in = s->tmp0;
4091                        tcg_gen_extract_tl(carry_in, cpu_cc_src,
4092                                           ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4093                    }
4094
4095                    switch (ot) {
4096#ifdef TARGET_X86_64
4097                    case MO_32:
4098                        /* If we know TL is 64-bit, and we want a 32-bit
4099                           result, just do everything in 64-bit arithmetic.  */
4100                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4101                        tcg_gen_ext32u_i64(s->T0, s->T0);
4102                        tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4103                        tcg_gen_add_i64(s->T0, s->T0, carry_in);
4104                        tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4105                        tcg_gen_shri_i64(carry_out, s->T0, 32);
4106                        break;
4107#endif
4108                    default:
4109                        /* Otherwise compute the carry-out in two steps.  */
4110                        zero = tcg_const_tl(0);
4111                        tcg_gen_add2_tl(s->T0, carry_out,
4112                                        s->T0, zero,
4113                                        carry_in, zero);
4114                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4115                                        cpu_regs[reg], carry_out,
4116                                        s->T0, zero);
4117                        tcg_temp_free(zero);
4118                        break;
4119                    }
4120                    set_cc_op(s, end_op);
4121                }
4122                break;
4123
4124            case 0x1f7: /* shlx Gy, Ey, By */
4125            case 0x2f7: /* sarx Gy, Ey, By */
4126            case 0x3f7: /* shrx Gy, Ey, By */
4127                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4128                    || !(s->prefix & PREFIX_VEX)
4129                    || s->vex_l != 0) {
4130                    goto illegal_op;
4131                }
4132                ot = mo_64_32(s->dflag);
4133                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4134                if (ot == MO_64) {
4135                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4136                } else {
4137                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4138                }
4139                if (b == 0x1f7) {
4140                    tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4141                } else if (b == 0x2f7) {
4142                    if (ot != MO_64) {
4143                        tcg_gen_ext32s_tl(s->T0, s->T0);
4144                    }
4145                    tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4146                } else {
4147                    if (ot != MO_64) {
4148                        tcg_gen_ext32u_tl(s->T0, s->T0);
4149                    }
4150                    tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4151                }
4152                gen_op_mov_reg_v(s, ot, reg, s->T0);
4153                break;
4154
4155            case 0x0f3:
4156            case 0x1f3:
4157            case 0x2f3:
4158            case 0x3f3: /* Group 17 */
4159                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4160                    || !(s->prefix & PREFIX_VEX)
4161                    || s->vex_l != 0) {
4162                    goto illegal_op;
4163                }
4164                ot = mo_64_32(s->dflag);
4165                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4166
4167                tcg_gen_mov_tl(cpu_cc_src, s->T0);
4168                switch (reg & 7) {
4169                case 1: /* blsr By,Ey */
4170                    tcg_gen_subi_tl(s->T1, s->T0, 1);
4171                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
4172                    break;
4173                case 2: /* blsmsk By,Ey */
4174                    tcg_gen_subi_tl(s->T1, s->T0, 1);
4175                    tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4176                    break;
4177                case 3: /* blsi By, Ey */
4178                    tcg_gen_neg_tl(s->T1, s->T0);
4179                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
4180                    break;
4181                default:
4182                    goto unknown_op;
4183                }
4184                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4185                gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4186                set_cc_op(s, CC_OP_BMILGB + ot);
4187                break;
4188
4189            default:
4190                goto unknown_op;
4191            }
4192            break;
4193
4194        case 0x03a:
4195        case 0x13a:
4196            b = modrm;
4197            modrm = x86_ldub_code(env, s);
4198            rm = modrm & 7;
4199            reg = ((modrm >> 3) & 7) | REX_R(s);
4200            mod = (modrm >> 6) & 3;
4201
4202            assert(b1 < 2);
4203            sse_fn_eppi = sse_op_table7[b].op[b1];
4204            if (!sse_fn_eppi) {
4205                goto unknown_op;
4206            }
4207            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4208                goto illegal_op;
4209
4210            s->rip_offset = 1;
4211
4212            if (sse_fn_eppi == SSE_SPECIAL) {
4213                ot = mo_64_32(s->dflag);
4214                rm = (modrm & 7) | REX_B(s);
4215                if (mod != 3)
4216                    gen_lea_modrm(env, s, modrm);
4217                reg = ((modrm >> 3) & 7) | REX_R(s);
4218                val = x86_ldub_code(env, s);
4219                switch (b) {
4220                case 0x14: /* pextrb */
4221                    tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4222                                            xmm_regs[reg].ZMM_B(val & 15)));
4223                    if (mod == 3) {
4224                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4225                    } else {
4226                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4227                                           s->mem_index, MO_UB);
4228                    }
4229                    break;
4230                case 0x15: /* pextrw */
4231                    tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4232                                            xmm_regs[reg].ZMM_W(val & 7)));
4233                    if (mod == 3) {
4234                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4235                    } else {
4236                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4237                                           s->mem_index, MO_LEUW);
4238                    }
4239                    break;
4240                case 0x16:
4241                    if (ot == MO_32) { /* pextrd */
4242                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4243                                        offsetof(CPUX86State,
4244                                                xmm_regs[reg].ZMM_L(val & 3)));
4245                        if (mod == 3) {
4246                            tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4247                        } else {
4248                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4249                                                s->mem_index, MO_LEUL);
4250                        }
4251                    } else { /* pextrq */
4252#ifdef TARGET_X86_64
4253                        tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4254                                        offsetof(CPUX86State,
4255                                                xmm_regs[reg].ZMM_Q(val & 1)));
4256                        if (mod == 3) {
4257                            tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4258                        } else {
4259                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4260                                                s->mem_index, MO_LEUQ);
4261                        }
4262#else
4263                        goto illegal_op;
4264#endif
4265                    }
4266                    break;
4267                case 0x17: /* extractps */
4268                    tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4269                                            xmm_regs[reg].ZMM_L(val & 3)));
4270                    if (mod == 3) {
4271                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4272                    } else {
4273                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4274                                           s->mem_index, MO_LEUL);
4275                    }
4276                    break;
4277                case 0x20: /* pinsrb */
4278                    if (mod == 3) {
4279                        gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4280                    } else {
4281                        tcg_gen_qemu_ld_tl(s->T0, s->A0,
4282                                           s->mem_index, MO_UB);
4283                    }
4284                    tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4285                                            xmm_regs[reg].ZMM_B(val & 15)));
4286                    break;
4287                case 0x21: /* insertps */
4288                    if (mod == 3) {
4289                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4290                                        offsetof(CPUX86State,xmm_regs[rm]
4291                                                .ZMM_L((val >> 6) & 3)));
4292                    } else {
4293                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4294                                            s->mem_index, MO_LEUL);
4295                    }
4296                    tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4297                                    offsetof(CPUX86State,xmm_regs[reg]
4298                                            .ZMM_L((val >> 4) & 3)));
4299                    if ((val >> 0) & 1)
4300                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4301                                        cpu_env, offsetof(CPUX86State,
4302                                                xmm_regs[reg].ZMM_L(0)));
4303                    if ((val >> 1) & 1)
4304                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4305                                        cpu_env, offsetof(CPUX86State,
4306                                                xmm_regs[reg].ZMM_L(1)));
4307                    if ((val >> 2) & 1)
4308                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4309                                        cpu_env, offsetof(CPUX86State,
4310                                                xmm_regs[reg].ZMM_L(2)));
4311                    if ((val >> 3) & 1)
4312                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4313                                        cpu_env, offsetof(CPUX86State,
4314                                                xmm_regs[reg].ZMM_L(3)));
4315                    break;
4316                case 0x22:
4317                    if (ot == MO_32) { /* pinsrd */
4318                        if (mod == 3) {
4319                            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4320                        } else {
4321                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4322                                                s->mem_index, MO_LEUL);
4323                        }
4324                        tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4325                                        offsetof(CPUX86State,
4326                                                xmm_regs[reg].ZMM_L(val & 3)));
4327                    } else { /* pinsrq */
4328#ifdef TARGET_X86_64
4329                        if (mod == 3) {
4330                            gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4331                        } else {
4332                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4333                                                s->mem_index, MO_LEUQ);
4334                        }
4335                        tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4336                                        offsetof(CPUX86State,
4337                                                xmm_regs[reg].ZMM_Q(val & 1)));
4338#else
4339                        goto illegal_op;
4340#endif
4341                    }
4342                    break;
4343                }
4344                return;
4345            }
4346
4347            if (b1) {
4348                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4349                if (mod == 3) {
4350                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4351                } else {
4352                    op2_offset = offsetof(CPUX86State,xmm_t0);
4353                    gen_lea_modrm(env, s, modrm);
4354                    gen_ldo_env_A0(s, op2_offset);
4355                }
4356            } else {
4357                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4358                if (mod == 3) {
4359                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4360                } else {
4361                    op2_offset = offsetof(CPUX86State,mmx_t0);
4362                    gen_lea_modrm(env, s, modrm);
4363                    gen_ldq_env_A0(s, op2_offset);
4364                }
4365            }
4366            val = x86_ldub_code(env, s);
4367
4368            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4369                set_cc_op(s, CC_OP_EFLAGS);
4370
4371                if (s->dflag == MO_64) {
4372                    /* The helper must use entire 64-bit gp registers */
4373                    val |= 1 << 8;
4374                }
4375            }
4376
4377            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4378            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4379            sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4380            break;
4381
4382        case 0x33a:
4383            /* Various integer extensions at 0f 3a f[0-f].  */
4384            b = modrm | (b1 << 8);
4385            modrm = x86_ldub_code(env, s);
4386            reg = ((modrm >> 3) & 7) | REX_R(s);
4387
4388            switch (b) {
4389            case 0x3f0: /* rorx Gy,Ey, Ib */
4390                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4391                    || !(s->prefix & PREFIX_VEX)
4392                    || s->vex_l != 0) {
4393                    goto illegal_op;
4394                }
4395                ot = mo_64_32(s->dflag);
4396                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4397                b = x86_ldub_code(env, s);
4398                if (ot == MO_64) {
4399                    tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4400                } else {
4401                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4402                    tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4403                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4404                }
4405                gen_op_mov_reg_v(s, ot, reg, s->T0);
4406                break;
4407
4408            default:
4409                goto unknown_op;
4410            }
4411            break;
4412
4413        default:
4414        unknown_op:
4415            gen_unknown_opcode(env, s);
4416            return;
4417        }
4418    } else {
4419        /* generic MMX or SSE operation */
4420        switch(b) {
4421        case 0x70: /* pshufx insn */
4422        case 0xc6: /* pshufx insn */
4423        case 0xc2: /* compare insns */
4424            s->rip_offset = 1;
4425            break;
4426        default:
4427            break;
4428        }
4429        if (is_xmm) {
4430            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4431            if (mod != 3) {
4432                int sz = 4;
4433
4434                gen_lea_modrm(env, s, modrm);
4435                op2_offset = offsetof(CPUX86State,xmm_t0);
4436
4437                switch (b) {
4438                case 0x50 ... 0x5a:
4439                case 0x5c ... 0x5f:
4440                case 0xc2:
4441                    /* Most sse scalar operations.  */
4442                    if (b1 == 2) {
4443                        sz = 2;
4444                    } else if (b1 == 3) {
4445                        sz = 3;
4446                    }
4447                    break;
4448
4449                case 0x2e:  /* ucomis[sd] */
4450                case 0x2f:  /* comis[sd] */
4451                    if (b1 == 0) {
4452                        sz = 2;
4453                    } else {
4454                        sz = 3;
4455                    }
4456                    break;
4457                }
4458
4459                switch (sz) {
4460                case 2:
4461                    /* 32 bit access */
4462                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
4463                    tcg_gen_st32_tl(s->T0, cpu_env,
4464                                    offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4465                    break;
4466                case 3:
4467                    /* 64 bit access */
4468                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4469                    break;
4470                default:
4471                    /* 128 bit access */
4472                    gen_ldo_env_A0(s, op2_offset);
4473                    break;
4474                }
4475            } else {
4476                rm = (modrm & 7) | REX_B(s);
4477                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4478            }
4479        } else {
4480            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4481            if (mod != 3) {
4482                gen_lea_modrm(env, s, modrm);
4483                op2_offset = offsetof(CPUX86State,mmx_t0);
4484                gen_ldq_env_A0(s, op2_offset);
4485            } else {
4486                rm = (modrm & 7);
4487                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4488            }
4489        }
4490        switch(b) {
4491        case 0x0f: /* 3DNow! data insns */
4492            val = x86_ldub_code(env, s);
4493            sse_fn_epp = sse_op_table5[val];
4494            if (!sse_fn_epp) {
4495                goto unknown_op;
4496            }
4497            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4498                goto illegal_op;
4499            }
4500            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4501            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4502            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4503            break;
4504        case 0x70: /* pshufx insn */
4505        case 0xc6: /* pshufx insn */
4506            val = x86_ldub_code(env, s);
4507            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4508            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4509            /* XXX: introduce a new table? */
4510            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4511            sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4512            break;
4513        case 0xc2:
4514            /* compare insns, bits 7:3 (7:5 for AVX) are ignored */
4515            val = x86_ldub_code(env, s) & 7;
4516            sse_fn_epp = sse_op_table4[val][b1];
4517
4518            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4519            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4520            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4521            break;
4522        case 0xf7:
4523            /* maskmov : we must prepare A0 */
4524            if (mod != 3)
4525                goto illegal_op;
4526            tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4527            gen_extu(s->aflag, s->A0);
4528            gen_add_A0_ds_seg(s);
4529
4530            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4531            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4532            /* XXX: introduce a new table? */
4533            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4534            sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4535            break;
4536        default:
4537            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4538            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4539            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4540            break;
4541        }
4542        if (b == 0x2e || b == 0x2f) {
4543            set_cc_op(s, CC_OP_EFLAGS);
4544        }
4545    }
4546}
4547
4548/* convert one instruction. s->base.is_jmp is set if the translation must
4549   be stopped. Return the next pc value */
4550static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4551{
4552    CPUX86State *env = cpu->env_ptr;
4553    int b, prefixes;
4554    int shift;
4555    MemOp ot, aflag, dflag;
4556    int modrm, reg, rm, mod, op, opreg, val;
4557    target_ulong next_eip, tval;
4558    target_ulong pc_start = s->base.pc_next;
4559
4560    s->pc_start = s->pc = pc_start;
4561    s->override = -1;
4562#ifdef TARGET_X86_64
4563    s->rex_w = false;
4564    s->rex_r = 0;
4565    s->rex_x = 0;
4566    s->rex_b = 0;
4567#endif
4568    s->rip_offset = 0; /* for relative ip address */
4569    s->vex_l = 0;
4570    s->vex_v = 0;
4571    if (sigsetjmp(s->jmpbuf, 0) != 0) {
4572        gen_exception_gpf(s);
4573        return s->pc;
4574    }
4575
4576    prefixes = 0;
4577
4578 next_byte:
4579    b = x86_ldub_code(env, s);
4580    /* Collect prefixes.  */
4581    switch (b) {
4582    case 0xf3:
4583        prefixes |= PREFIX_REPZ;
4584        goto next_byte;
4585    case 0xf2:
4586        prefixes |= PREFIX_REPNZ;
4587        goto next_byte;
4588    case 0xf0:
4589        prefixes |= PREFIX_LOCK;
4590        goto next_byte;
4591    case 0x2e:
4592        s->override = R_CS;
4593        goto next_byte;
4594    case 0x36:
4595        s->override = R_SS;
4596        goto next_byte;
4597    case 0x3e:
4598        s->override = R_DS;
4599        goto next_byte;
4600    case 0x26:
4601        s->override = R_ES;
4602        goto next_byte;
4603    case 0x64:
4604        s->override = R_FS;
4605        goto next_byte;
4606    case 0x65:
4607        s->override = R_GS;
4608        goto next_byte;
4609    case 0x66:
4610        prefixes |= PREFIX_DATA;
4611        goto next_byte;
4612    case 0x67:
4613        prefixes |= PREFIX_ADR;
4614        goto next_byte;
4615#ifdef TARGET_X86_64
4616    case 0x40 ... 0x4f:
4617        if (CODE64(s)) {
4618            /* REX prefix */
4619            prefixes |= PREFIX_REX;
4620            s->rex_w = (b >> 3) & 1;
4621            s->rex_r = (b & 0x4) << 1;
4622            s->rex_x = (b & 0x2) << 2;
4623            s->rex_b = (b & 0x1) << 3;
4624            goto next_byte;
4625        }
4626        break;
4627#endif
4628    case 0xc5: /* 2-byte VEX */
4629    case 0xc4: /* 3-byte VEX */
4630        /* VEX prefixes cannot be used except in 32-bit mode.
4631           Otherwise the instruction is LES or LDS.  */
4632        if (CODE32(s) && !VM86(s)) {
4633            static const int pp_prefix[4] = {
4634                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4635            };
4636            int vex3, vex2 = x86_ldub_code(env, s);
4637
4638            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4639                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4640                   otherwise the instruction is LES or LDS.  */
4641                s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4642                break;
4643            }
4644
4645            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4646            if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4647                            | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
4648                goto illegal_op;
4649            }
4650#ifdef TARGET_X86_64
4651            s->rex_r = (~vex2 >> 4) & 8;
4652#endif
4653            if (b == 0xc5) {
4654                /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4655                vex3 = vex2;
4656                b = x86_ldub_code(env, s) | 0x100;
4657            } else {
4658                /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4659                vex3 = x86_ldub_code(env, s);
4660#ifdef TARGET_X86_64
4661                s->rex_x = (~vex2 >> 3) & 8;
4662                s->rex_b = (~vex2 >> 2) & 8;
4663                s->rex_w = (vex3 >> 7) & 1;
4664#endif
4665                switch (vex2 & 0x1f) {
4666                case 0x01: /* Implied 0f leading opcode bytes.  */
4667                    b = x86_ldub_code(env, s) | 0x100;
4668                    break;
4669                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4670                    b = 0x138;
4671                    break;
4672                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4673                    b = 0x13a;
4674                    break;
4675                default:   /* Reserved for future use.  */
4676                    goto unknown_op;
4677                }
4678            }
4679            s->vex_v = (~vex3 >> 3) & 0xf;
4680            s->vex_l = (vex3 >> 2) & 1;
4681            prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4682        }
4683        break;
4684    }
4685
4686    /* Post-process prefixes.  */
4687    if (CODE64(s)) {
4688        /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4689           data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4690           over 0x66 if both are present.  */
4691        dflag = (REX_W(s) ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4692        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4693        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4694    } else {
4695        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4696        if (CODE32(s) ^ ((prefixes & PREFIX_DATA) != 0)) {
4697            dflag = MO_32;
4698        } else {
4699            dflag = MO_16;
4700        }
4701        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4702        if (CODE32(s) ^ ((prefixes & PREFIX_ADR) != 0)) {
4703            aflag = MO_32;
4704        }  else {
4705            aflag = MO_16;
4706        }
4707    }
4708
4709    s->prefix = prefixes;
4710    s->aflag = aflag;
4711    s->dflag = dflag;
4712
4713    /* now check op code */
4714 reswitch:
4715    switch(b) {
4716    case 0x0f:
4717        /**************************/
4718        /* extended op code */
4719        b = x86_ldub_code(env, s) | 0x100;
4720        goto reswitch;
4721
4722        /**************************/
4723        /* arith & logic */
4724    case 0x00 ... 0x05:
4725    case 0x08 ... 0x0d:
4726    case 0x10 ... 0x15:
4727    case 0x18 ... 0x1d:
4728    case 0x20 ... 0x25:
4729    case 0x28 ... 0x2d:
4730    case 0x30 ... 0x35:
4731    case 0x38 ... 0x3d:
4732        {
4733            int op, f, val;
4734            op = (b >> 3) & 7;
4735            f = (b >> 1) & 3;
4736
4737            ot = mo_b_d(b, dflag);
4738
4739            switch(f) {
4740            case 0: /* OP Ev, Gv */
4741                modrm = x86_ldub_code(env, s);
4742                reg = ((modrm >> 3) & 7) | REX_R(s);
4743                mod = (modrm >> 6) & 3;
4744                rm = (modrm & 7) | REX_B(s);
4745                if (mod != 3) {
4746                    gen_lea_modrm(env, s, modrm);
4747                    opreg = OR_TMP0;
4748                } else if (op == OP_XORL && rm == reg) {
4749                xor_zero:
4750                    /* xor reg, reg optimisation */
4751                    set_cc_op(s, CC_OP_CLR);
4752                    tcg_gen_movi_tl(s->T0, 0);
4753                    gen_op_mov_reg_v(s, ot, reg, s->T0);
4754                    break;
4755                } else {
4756                    opreg = rm;
4757                }
4758                gen_op_mov_v_reg(s, ot, s->T1, reg);
4759                gen_op(s, op, ot, opreg);
4760                break;
4761            case 1: /* OP Gv, Ev */
4762                modrm = x86_ldub_code(env, s);
4763                mod = (modrm >> 6) & 3;
4764                reg = ((modrm >> 3) & 7) | REX_R(s);
4765                rm = (modrm & 7) | REX_B(s);
4766                if (mod != 3) {
4767                    gen_lea_modrm(env, s, modrm);
4768                    gen_op_ld_v(s, ot, s->T1, s->A0);
4769                } else if (op == OP_XORL && rm == reg) {
4770                    goto xor_zero;
4771                } else {
4772                    gen_op_mov_v_reg(s, ot, s->T1, rm);
4773                }
4774                gen_op(s, op, ot, reg);
4775                break;
4776            case 2: /* OP A, Iv */
4777                val = insn_get(env, s, ot);
4778                tcg_gen_movi_tl(s->T1, val);
4779                gen_op(s, op, ot, OR_EAX);
4780                break;
4781            }
4782        }
4783        break;
4784
4785    case 0x82:
4786        if (CODE64(s))
4787            goto illegal_op;
4788        /* fall through */
4789    case 0x80: /* GRP1 */
4790    case 0x81:
4791    case 0x83:
4792        {
4793            int val;
4794
4795            ot = mo_b_d(b, dflag);
4796
4797            modrm = x86_ldub_code(env, s);
4798            mod = (modrm >> 6) & 3;
4799            rm = (modrm & 7) | REX_B(s);
4800            op = (modrm >> 3) & 7;
4801
4802            if (mod != 3) {
4803                if (b == 0x83)
4804                    s->rip_offset = 1;
4805                else
4806                    s->rip_offset = insn_const_size(ot);
4807                gen_lea_modrm(env, s, modrm);
4808                opreg = OR_TMP0;
4809            } else {
4810                opreg = rm;
4811            }
4812
4813            switch(b) {
4814            default:
4815            case 0x80:
4816            case 0x81:
4817            case 0x82:
4818                val = insn_get(env, s, ot);
4819                break;
4820            case 0x83:
4821                val = (int8_t)insn_get(env, s, MO_8);
4822                break;
4823            }
4824            tcg_gen_movi_tl(s->T1, val);
4825            gen_op(s, op, ot, opreg);
4826        }
4827        break;
4828
4829        /**************************/
4830        /* inc, dec, and other misc arith */
4831    case 0x40 ... 0x47: /* inc Gv */
4832        ot = dflag;
4833        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4834        break;
4835    case 0x48 ... 0x4f: /* dec Gv */
4836        ot = dflag;
4837        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4838        break;
4839    case 0xf6: /* GRP3 */
4840    case 0xf7:
4841        ot = mo_b_d(b, dflag);
4842
4843        modrm = x86_ldub_code(env, s);
4844        mod = (modrm >> 6) & 3;
4845        rm = (modrm & 7) | REX_B(s);
4846        op = (modrm >> 3) & 7;
4847        if (mod != 3) {
4848            if (op == 0) {
4849                s->rip_offset = insn_const_size(ot);
4850            }
4851            gen_lea_modrm(env, s, modrm);
4852            /* For those below that handle locked memory, don't load here.  */
4853            if (!(s->prefix & PREFIX_LOCK)
4854                || op != 2) {
4855                gen_op_ld_v(s, ot, s->T0, s->A0);
4856            }
4857        } else {
4858            gen_op_mov_v_reg(s, ot, s->T0, rm);
4859        }
4860
4861        switch(op) {
4862        case 0: /* test */
4863            val = insn_get(env, s, ot);
4864            tcg_gen_movi_tl(s->T1, val);
4865            gen_op_testl_T0_T1_cc(s);
4866            set_cc_op(s, CC_OP_LOGICB + ot);
4867            break;
4868        case 2: /* not */
4869            if (s->prefix & PREFIX_LOCK) {
4870                if (mod == 3) {
4871                    goto illegal_op;
4872                }
4873                tcg_gen_movi_tl(s->T0, ~0);
4874                tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4875                                            s->mem_index, ot | MO_LE);
4876            } else {
4877                tcg_gen_not_tl(s->T0, s->T0);
4878                if (mod != 3) {
4879                    gen_op_st_v(s, ot, s->T0, s->A0);
4880                } else {
4881                    gen_op_mov_reg_v(s, ot, rm, s->T0);
4882                }
4883            }
4884            break;
4885        case 3: /* neg */
4886            if (s->prefix & PREFIX_LOCK) {
4887                TCGLabel *label1;
4888                TCGv a0, t0, t1, t2;
4889
4890                if (mod == 3) {
4891                    goto illegal_op;
4892                }
4893                a0 = tcg_temp_local_new();
4894                t0 = tcg_temp_local_new();
4895                label1 = gen_new_label();
4896
4897                tcg_gen_mov_tl(a0, s->A0);
4898                tcg_gen_mov_tl(t0, s->T0);
4899
4900                gen_set_label(label1);
4901                t1 = tcg_temp_new();
4902                t2 = tcg_temp_new();
4903                tcg_gen_mov_tl(t2, t0);
4904                tcg_gen_neg_tl(t1, t0);
4905                tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4906                                          s->mem_index, ot | MO_LE);
4907                tcg_temp_free(t1);
4908                tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4909
4910                tcg_temp_free(t2);
4911                tcg_temp_free(a0);
4912                tcg_gen_mov_tl(s->T0, t0);
4913                tcg_temp_free(t0);
4914            } else {
4915                tcg_gen_neg_tl(s->T0, s->T0);
4916                if (mod != 3) {
4917                    gen_op_st_v(s, ot, s->T0, s->A0);
4918                } else {
4919                    gen_op_mov_reg_v(s, ot, rm, s->T0);
4920                }
4921            }
4922            gen_op_update_neg_cc(s);
4923            set_cc_op(s, CC_OP_SUBB + ot);
4924            break;
4925        case 4: /* mul */
4926            switch(ot) {
4927            case MO_8:
4928                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4929                tcg_gen_ext8u_tl(s->T0, s->T0);
4930                tcg_gen_ext8u_tl(s->T1, s->T1);
4931                /* XXX: use 32 bit mul which could be faster */
4932                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4933                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4934                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4935                tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4936                set_cc_op(s, CC_OP_MULB);
4937                break;
4938            case MO_16:
4939                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4940                tcg_gen_ext16u_tl(s->T0, s->T0);
4941                tcg_gen_ext16u_tl(s->T1, s->T1);
4942                /* XXX: use 32 bit mul which could be faster */
4943                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4944                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4945                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4946                tcg_gen_shri_tl(s->T0, s->T0, 16);
4947                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4948                tcg_gen_mov_tl(cpu_cc_src, s->T0);
4949                set_cc_op(s, CC_OP_MULW);
4950                break;
4951            default:
4952            case MO_32:
4953                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4954                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4955                tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4956                                  s->tmp2_i32, s->tmp3_i32);
4957                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4958                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4959                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4960                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4961                set_cc_op(s, CC_OP_MULL);
4962                break;
4963#ifdef TARGET_X86_64
4964            case MO_64:
4965                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4966                                  s->T0, cpu_regs[R_EAX]);
4967                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4968                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4969                set_cc_op(s, CC_OP_MULQ);
4970                break;
4971#endif
4972            }
4973            break;
4974        case 5: /* imul */
4975            switch(ot) {
4976            case MO_8:
4977                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4978                tcg_gen_ext8s_tl(s->T0, s->T0);
4979                tcg_gen_ext8s_tl(s->T1, s->T1);
4980                /* XXX: use 32 bit mul which could be faster */
4981                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4982                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4983                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4984                tcg_gen_ext8s_tl(s->tmp0, s->T0);
4985                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4986                set_cc_op(s, CC_OP_MULB);
4987                break;
4988            case MO_16:
4989                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4990                tcg_gen_ext16s_tl(s->T0, s->T0);
4991                tcg_gen_ext16s_tl(s->T1, s->T1);
4992                /* XXX: use 32 bit mul which could be faster */
4993                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4994                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4995                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4996                tcg_gen_ext16s_tl(s->tmp0, s->T0);
4997                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4998                tcg_gen_shri_tl(s->T0, s->T0, 16);
4999                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5000                set_cc_op(s, CC_OP_MULW);
5001                break;
5002            default:
5003            case MO_32:
5004                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5005                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
5006                tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5007                                  s->tmp2_i32, s->tmp3_i32);
5008                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
5009                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
5010                tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5011                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5012                tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5013                tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5014                set_cc_op(s, CC_OP_MULL);
5015                break;
5016#ifdef TARGET_X86_64
5017            case MO_64:
5018                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
5019                                  s->T0, cpu_regs[R_EAX]);
5020                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5021                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
5022                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
5023                set_cc_op(s, CC_OP_MULQ);
5024                break;
5025#endif
5026            }
5027            break;
5028        case 6: /* div */
5029            switch(ot) {
5030            case MO_8:
5031                gen_helper_divb_AL(cpu_env, s->T0);
5032                break;
5033            case MO_16:
5034                gen_helper_divw_AX(cpu_env, s->T0);
5035                break;
5036            default:
5037            case MO_32:
5038                gen_helper_divl_EAX(cpu_env, s->T0);
5039                break;
5040#ifdef TARGET_X86_64
5041            case MO_64:
5042                gen_helper_divq_EAX(cpu_env, s->T0);
5043                break;
5044#endif
5045            }
5046            break;
5047        case 7: /* idiv */
5048            switch(ot) {
5049            case MO_8:
5050                gen_helper_idivb_AL(cpu_env, s->T0);
5051                break;
5052            case MO_16:
5053                gen_helper_idivw_AX(cpu_env, s->T0);
5054                break;
5055            default:
5056            case MO_32:
5057                gen_helper_idivl_EAX(cpu_env, s->T0);
5058                break;
5059#ifdef TARGET_X86_64
5060            case MO_64:
5061                gen_helper_idivq_EAX(cpu_env, s->T0);
5062                break;
5063#endif
5064            }
5065            break;
5066        default:
5067            goto unknown_op;
5068        }
5069        break;
5070
5071    case 0xfe: /* GRP4 */
5072    case 0xff: /* GRP5 */
5073        ot = mo_b_d(b, dflag);
5074
5075        modrm = x86_ldub_code(env, s);
5076        mod = (modrm >> 6) & 3;
5077        rm = (modrm & 7) | REX_B(s);
5078        op = (modrm >> 3) & 7;
5079        if (op >= 2 && b == 0xfe) {
5080            goto unknown_op;
5081        }
5082        if (CODE64(s)) {
5083            if (op == 2 || op == 4) {
5084                /* operand size for jumps is 64 bit */
5085                ot = MO_64;
5086            } else if (op == 3 || op == 5) {
5087                ot = dflag != MO_16 ? MO_32 + REX_W(s) : MO_16;
5088            } else if (op == 6) {
5089                /* default push size is 64 bit */
5090                ot = mo_pushpop(s, dflag);
5091            }
5092        }
5093        if (mod != 3) {
5094            gen_lea_modrm(env, s, modrm);
5095            if (op >= 2 && op != 3 && op != 5)
5096                gen_op_ld_v(s, ot, s->T0, s->A0);
5097        } else {
5098            gen_op_mov_v_reg(s, ot, s->T0, rm);
5099        }
5100
5101        switch(op) {
5102        case 0: /* inc Ev */
5103            if (mod != 3)
5104                opreg = OR_TMP0;
5105            else
5106                opreg = rm;
5107            gen_inc(s, ot, opreg, 1);
5108            break;
5109        case 1: /* dec Ev */
5110            if (mod != 3)
5111                opreg = OR_TMP0;
5112            else
5113                opreg = rm;
5114            gen_inc(s, ot, opreg, -1);
5115            break;
5116        case 2: /* call Ev */
5117            /* XXX: optimize if memory (no 'and' is necessary) */
5118            if (dflag == MO_16) {
5119                tcg_gen_ext16u_tl(s->T0, s->T0);
5120            }
5121            next_eip = s->pc - s->cs_base;
5122            tcg_gen_movi_tl(s->T1, next_eip);
5123            gen_push_v(s, s->T1);
5124            gen_op_jmp_v(s->T0);
5125            gen_bnd_jmp(s);
5126            gen_jr(s, s->T0);
5127            break;
5128        case 3: /* lcall Ev */
5129            if (mod == 3) {
5130                goto illegal_op;
5131            }
5132            gen_op_ld_v(s, ot, s->T1, s->A0);
5133            gen_add_A0_im(s, 1 << ot);
5134            gen_op_ld_v(s, MO_16, s->T0, s->A0);
5135        do_lcall:
5136            if (PE(s) && !VM86(s)) {
5137                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5138                gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5139                                           tcg_const_i32(dflag - 1),
5140                                           tcg_const_tl(s->pc - s->cs_base));
5141            } else {
5142                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5143                gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5144                                      tcg_const_i32(dflag - 1),
5145                                      tcg_const_i32(s->pc - s->cs_base));
5146            }
5147            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5148            gen_jr(s, s->tmp4);
5149            break;
5150        case 4: /* jmp Ev */
5151            if (dflag == MO_16) {
5152                tcg_gen_ext16u_tl(s->T0, s->T0);
5153            }
5154            gen_op_jmp_v(s->T0);
5155            gen_bnd_jmp(s);
5156            gen_jr(s, s->T0);
5157            break;
5158        case 5: /* ljmp Ev */
5159            if (mod == 3) {
5160                goto illegal_op;
5161            }
5162            gen_op_ld_v(s, ot, s->T1, s->A0);
5163            gen_add_A0_im(s, 1 << ot);
5164            gen_op_ld_v(s, MO_16, s->T0, s->A0);
5165        do_ljmp:
5166            if (PE(s) && !VM86(s)) {
5167                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5168                gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5169                                          tcg_const_tl(s->pc - s->cs_base));
5170            } else {
5171                gen_op_movl_seg_T0_vm(s, R_CS);
5172                gen_op_jmp_v(s->T1);
5173            }
5174            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5175            gen_jr(s, s->tmp4);
5176            break;
5177        case 6: /* push Ev */
5178            gen_push_v(s, s->T0);
5179            break;
5180        default:
5181            goto unknown_op;
5182        }
5183        break;
5184
5185    case 0x84: /* test Ev, Gv */
5186    case 0x85:
5187        ot = mo_b_d(b, dflag);
5188
5189        modrm = x86_ldub_code(env, s);
5190        reg = ((modrm >> 3) & 7) | REX_R(s);
5191
5192        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5193        gen_op_mov_v_reg(s, ot, s->T1, reg);
5194        gen_op_testl_T0_T1_cc(s);
5195        set_cc_op(s, CC_OP_LOGICB + ot);
5196        break;
5197
5198    case 0xa8: /* test eAX, Iv */
5199    case 0xa9:
5200        ot = mo_b_d(b, dflag);
5201        val = insn_get(env, s, ot);
5202
5203        gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5204        tcg_gen_movi_tl(s->T1, val);
5205        gen_op_testl_T0_T1_cc(s);
5206        set_cc_op(s, CC_OP_LOGICB + ot);
5207        break;
5208
5209    case 0x98: /* CWDE/CBW */
5210        switch (dflag) {
5211#ifdef TARGET_X86_64
5212        case MO_64:
5213            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5214            tcg_gen_ext32s_tl(s->T0, s->T0);
5215            gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5216            break;
5217#endif
5218        case MO_32:
5219            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5220            tcg_gen_ext16s_tl(s->T0, s->T0);
5221            gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5222            break;
5223        case MO_16:
5224            gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5225            tcg_gen_ext8s_tl(s->T0, s->T0);
5226            gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5227            break;
5228        default:
5229            tcg_abort();
5230        }
5231        break;
5232    case 0x99: /* CDQ/CWD */
5233        switch (dflag) {
5234#ifdef TARGET_X86_64
5235        case MO_64:
5236            gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5237            tcg_gen_sari_tl(s->T0, s->T0, 63);
5238            gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5239            break;
5240#endif
5241        case MO_32:
5242            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5243            tcg_gen_ext32s_tl(s->T0, s->T0);
5244            tcg_gen_sari_tl(s->T0, s->T0, 31);
5245            gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5246            break;
5247        case MO_16:
5248            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5249            tcg_gen_ext16s_tl(s->T0, s->T0);
5250            tcg_gen_sari_tl(s->T0, s->T0, 15);
5251            gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5252            break;
5253        default:
5254            tcg_abort();
5255        }
5256        break;
5257    case 0x1af: /* imul Gv, Ev */
5258    case 0x69: /* imul Gv, Ev, I */
5259    case 0x6b:
5260        ot = dflag;
5261        modrm = x86_ldub_code(env, s);
5262        reg = ((modrm >> 3) & 7) | REX_R(s);
5263        if (b == 0x69)
5264            s->rip_offset = insn_const_size(ot);
5265        else if (b == 0x6b)
5266            s->rip_offset = 1;
5267        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5268        if (b == 0x69) {
5269            val = insn_get(env, s, ot);
5270            tcg_gen_movi_tl(s->T1, val);
5271        } else if (b == 0x6b) {
5272            val = (int8_t)insn_get(env, s, MO_8);
5273            tcg_gen_movi_tl(s->T1, val);
5274        } else {
5275            gen_op_mov_v_reg(s, ot, s->T1, reg);
5276        }
5277        switch (ot) {
5278#ifdef TARGET_X86_64
5279        case MO_64:
5280            tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5281            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5282            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5283            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5284            break;
5285#endif
5286        case MO_32:
5287            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5288            tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5289            tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5290                              s->tmp2_i32, s->tmp3_i32);
5291            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5292            tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5293            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5294            tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5295            tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5296            break;
5297        default:
5298            tcg_gen_ext16s_tl(s->T0, s->T0);
5299            tcg_gen_ext16s_tl(s->T1, s->T1);
5300            /* XXX: use 32 bit mul which could be faster */
5301            tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5302            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5303            tcg_gen_ext16s_tl(s->tmp0, s->T0);
5304            tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5305            gen_op_mov_reg_v(s, ot, reg, s->T0);
5306            break;
5307        }
5308        set_cc_op(s, CC_OP_MULB + ot);
5309        break;
5310    case 0x1c0:
5311    case 0x1c1: /* xadd Ev, Gv */
5312        ot = mo_b_d(b, dflag);
5313        modrm = x86_ldub_code(env, s);
5314        reg = ((modrm >> 3) & 7) | REX_R(s);
5315        mod = (modrm >> 6) & 3;
5316        gen_op_mov_v_reg(s, ot, s->T0, reg);
5317        if (mod == 3) {
5318            rm = (modrm & 7) | REX_B(s);
5319            gen_op_mov_v_reg(s, ot, s->T1, rm);
5320            tcg_gen_add_tl(s->T0, s->T0, s->T1);
5321            gen_op_mov_reg_v(s, ot, reg, s->T1);
5322            gen_op_mov_reg_v(s, ot, rm, s->T0);
5323        } else {
5324            gen_lea_modrm(env, s, modrm);
5325            if (s->prefix & PREFIX_LOCK) {
5326                tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5327                                            s->mem_index, ot | MO_LE);
5328                tcg_gen_add_tl(s->T0, s->T0, s->T1);
5329            } else {
5330                gen_op_ld_v(s, ot, s->T1, s->A0);
5331                tcg_gen_add_tl(s->T0, s->T0, s->T1);
5332                gen_op_st_v(s, ot, s->T0, s->A0);
5333            }
5334            gen_op_mov_reg_v(s, ot, reg, s->T1);
5335        }
5336        gen_op_update2_cc(s);
5337        set_cc_op(s, CC_OP_ADDB + ot);
5338        break;
5339    case 0x1b0:
5340    case 0x1b1: /* cmpxchg Ev, Gv */
5341        {
5342            TCGv oldv, newv, cmpv;
5343
5344            ot = mo_b_d(b, dflag);
5345            modrm = x86_ldub_code(env, s);
5346            reg = ((modrm >> 3) & 7) | REX_R(s);
5347            mod = (modrm >> 6) & 3;
5348            oldv = tcg_temp_new();
5349            newv = tcg_temp_new();
5350            cmpv = tcg_temp_new();
5351            gen_op_mov_v_reg(s, ot, newv, reg);
5352            tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5353
5354            if (s->prefix & PREFIX_LOCK) {
5355                if (mod == 3) {
5356                    goto illegal_op;
5357                }
5358                gen_lea_modrm(env, s, modrm);
5359                tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5360                                          s->mem_index, ot | MO_LE);
5361                gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5362            } else {
5363                if (mod == 3) {
5364                    rm = (modrm & 7) | REX_B(s);
5365                    gen_op_mov_v_reg(s, ot, oldv, rm);
5366                } else {
5367                    gen_lea_modrm(env, s, modrm);
5368                    gen_op_ld_v(s, ot, oldv, s->A0);
5369                    rm = 0; /* avoid warning */
5370                }
5371                gen_extu(ot, oldv);
5372                gen_extu(ot, cmpv);
5373                /* store value = (old == cmp ? new : old);  */
5374                tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5375                if (mod == 3) {
5376                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5377                    gen_op_mov_reg_v(s, ot, rm, newv);
5378                } else {
5379                    /* Perform an unconditional store cycle like physical cpu;
5380                       must be before changing accumulator to ensure
5381                       idempotency if the store faults and the instruction
5382                       is restarted */
5383                    gen_op_st_v(s, ot, newv, s->A0);
5384                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5385                }
5386            }
5387            tcg_gen_mov_tl(cpu_cc_src, oldv);
5388            tcg_gen_mov_tl(s->cc_srcT, cmpv);
5389            tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5390            set_cc_op(s, CC_OP_SUBB + ot);
5391            tcg_temp_free(oldv);
5392            tcg_temp_free(newv);
5393            tcg_temp_free(cmpv);
5394        }
5395        break;
5396    case 0x1c7: /* cmpxchg8b */
5397        modrm = x86_ldub_code(env, s);
5398        mod = (modrm >> 6) & 3;
5399        switch ((modrm >> 3) & 7) {
5400        case 1: /* CMPXCHG8, CMPXCHG16 */
5401            if (mod == 3) {
5402                goto illegal_op;
5403            }
5404#ifdef TARGET_X86_64
5405            if (dflag == MO_64) {
5406                if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5407                    goto illegal_op;
5408                }
5409                gen_lea_modrm(env, s, modrm);
5410                if ((s->prefix & PREFIX_LOCK) &&
5411                    (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5412                    gen_helper_cmpxchg16b(cpu_env, s->A0);
5413                } else {
5414                    gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5415                }
5416                set_cc_op(s, CC_OP_EFLAGS);
5417                break;
5418            }
5419#endif        
5420            if (!(s->cpuid_features & CPUID_CX8)) {
5421                goto illegal_op;
5422            }
5423            gen_lea_modrm(env, s, modrm);
5424            if ((s->prefix & PREFIX_LOCK) &&
5425                (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5426                gen_helper_cmpxchg8b(cpu_env, s->A0);
5427            } else {
5428                gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5429            }
5430            set_cc_op(s, CC_OP_EFLAGS);
5431            break;
5432
5433        case 7: /* RDSEED */
5434        case 6: /* RDRAND */
5435            if (mod != 3 ||
5436                (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5437                !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5438                goto illegal_op;
5439            }
5440            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5441                gen_io_start();
5442            }
5443            gen_helper_rdrand(s->T0, cpu_env);
5444            rm = (modrm & 7) | REX_B(s);
5445            gen_op_mov_reg_v(s, dflag, rm, s->T0);
5446            set_cc_op(s, CC_OP_EFLAGS);
5447            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5448                gen_jmp(s, s->pc - s->cs_base);
5449            }
5450            break;
5451
5452        default:
5453            goto illegal_op;
5454        }
5455        break;
5456
5457        /**************************/
5458        /* push/pop */
5459    case 0x50 ... 0x57: /* push */
5460        gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5461        gen_push_v(s, s->T0);
5462        break;
5463    case 0x58 ... 0x5f: /* pop */
5464        ot = gen_pop_T0(s);
5465        /* NOTE: order is important for pop %sp */
5466        gen_pop_update(s, ot);
5467        gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5468        break;
5469    case 0x60: /* pusha */
5470        if (CODE64(s))
5471            goto illegal_op;
5472        gen_pusha(s);
5473        break;
5474    case 0x61: /* popa */
5475        if (CODE64(s))
5476            goto illegal_op;
5477        gen_popa(s);
5478        break;
5479    case 0x68: /* push Iv */
5480    case 0x6a:
5481        ot = mo_pushpop(s, dflag);
5482        if (b == 0x68)
5483            val = insn_get(env, s, ot);
5484        else
5485            val = (int8_t)insn_get(env, s, MO_8);
5486        tcg_gen_movi_tl(s->T0, val);
5487        gen_push_v(s, s->T0);
5488        break;
5489    case 0x8f: /* pop Ev */
5490        modrm = x86_ldub_code(env, s);
5491        mod = (modrm >> 6) & 3;
5492        ot = gen_pop_T0(s);
5493        if (mod == 3) {
5494            /* NOTE: order is important for pop %sp */
5495            gen_pop_update(s, ot);
5496            rm = (modrm & 7) | REX_B(s);
5497            gen_op_mov_reg_v(s, ot, rm, s->T0);
5498        } else {
5499            /* NOTE: order is important too for MMU exceptions */
5500            s->popl_esp_hack = 1 << ot;
5501            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5502            s->popl_esp_hack = 0;
5503            gen_pop_update(s, ot);
5504        }
5505        break;
5506    case 0xc8: /* enter */
5507        {
5508            int level;
5509            val = x86_lduw_code(env, s);
5510            level = x86_ldub_code(env, s);
5511            gen_enter(s, val, level);
5512        }
5513        break;
5514    case 0xc9: /* leave */
5515        gen_leave(s);
5516        break;
5517    case 0x06: /* push es */
5518    case 0x0e: /* push cs */
5519    case 0x16: /* push ss */
5520    case 0x1e: /* push ds */
5521        if (CODE64(s))
5522            goto illegal_op;
5523        gen_op_movl_T0_seg(s, b >> 3);
5524        gen_push_v(s, s->T0);
5525        break;
5526    case 0x1a0: /* push fs */
5527    case 0x1a8: /* push gs */
5528        gen_op_movl_T0_seg(s, (b >> 3) & 7);
5529        gen_push_v(s, s->T0);
5530        break;
5531    case 0x07: /* pop es */
5532    case 0x17: /* pop ss */
5533    case 0x1f: /* pop ds */
5534        if (CODE64(s))
5535            goto illegal_op;
5536        reg = b >> 3;
5537        ot = gen_pop_T0(s);
5538        gen_movl_seg_T0(s, reg);
5539        gen_pop_update(s, ot);
5540        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5541        if (s->base.is_jmp) {
5542            gen_jmp_im(s, s->pc - s->cs_base);
5543            if (reg == R_SS) {
5544                s->flags &= ~HF_TF_MASK;
5545                gen_eob_inhibit_irq(s, true);
5546            } else {
5547                gen_eob(s);
5548            }
5549        }
5550        break;
5551    case 0x1a1: /* pop fs */
5552    case 0x1a9: /* pop gs */
5553        ot = gen_pop_T0(s);
5554        gen_movl_seg_T0(s, (b >> 3) & 7);
5555        gen_pop_update(s, ot);
5556        if (s->base.is_jmp) {
5557            gen_jmp_im(s, s->pc - s->cs_base);
5558            gen_eob(s);
5559        }
5560        break;
5561
5562        /**************************/
5563        /* mov */
5564    case 0x88:
5565    case 0x89: /* mov Gv, Ev */
5566        ot = mo_b_d(b, dflag);
5567        modrm = x86_ldub_code(env, s);
5568        reg = ((modrm >> 3) & 7) | REX_R(s);
5569
5570        /* generate a generic store */
5571        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5572        break;
5573    case 0xc6:
5574    case 0xc7: /* mov Ev, Iv */
5575        ot = mo_b_d(b, dflag);
5576        modrm = x86_ldub_code(env, s);
5577        mod = (modrm >> 6) & 3;
5578        if (mod != 3) {
5579            s->rip_offset = insn_const_size(ot);
5580            gen_lea_modrm(env, s, modrm);
5581        }
5582        val = insn_get(env, s, ot);
5583        tcg_gen_movi_tl(s->T0, val);
5584        if (mod != 3) {
5585            gen_op_st_v(s, ot, s->T0, s->A0);
5586        } else {
5587            gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5588        }
5589        break;
5590    case 0x8a:
5591    case 0x8b: /* mov Ev, Gv */
5592        ot = mo_b_d(b, dflag);
5593        modrm = x86_ldub_code(env, s);
5594        reg = ((modrm >> 3) & 7) | REX_R(s);
5595
5596        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5597        gen_op_mov_reg_v(s, ot, reg, s->T0);
5598        break;
5599    case 0x8e: /* mov seg, Gv */
5600        modrm = x86_ldub_code(env, s);
5601        reg = (modrm >> 3) & 7;
5602        if (reg >= 6 || reg == R_CS)
5603            goto illegal_op;
5604        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5605        gen_movl_seg_T0(s, reg);
5606        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5607        if (s->base.is_jmp) {
5608            gen_jmp_im(s, s->pc - s->cs_base);
5609            if (reg == R_SS) {
5610                s->flags &= ~HF_TF_MASK;
5611                gen_eob_inhibit_irq(s, true);
5612            } else {
5613                gen_eob(s);
5614            }
5615        }
5616        break;
5617    case 0x8c: /* mov Gv, seg */
5618        modrm = x86_ldub_code(env, s);
5619        reg = (modrm >> 3) & 7;
5620        mod = (modrm >> 6) & 3;
5621        if (reg >= 6)
5622            goto illegal_op;
5623        gen_op_movl_T0_seg(s, reg);
5624        ot = mod == 3 ? dflag : MO_16;
5625        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5626        break;
5627
5628    case 0x1b6: /* movzbS Gv, Eb */
5629    case 0x1b7: /* movzwS Gv, Eb */
5630    case 0x1be: /* movsbS Gv, Eb */
5631    case 0x1bf: /* movswS Gv, Eb */
5632        {
5633            MemOp d_ot;
5634            MemOp s_ot;
5635
5636            /* d_ot is the size of destination */
5637            d_ot = dflag;
5638            /* ot is the size of source */
5639            ot = (b & 1) + MO_8;
5640            /* s_ot is the sign+size of source */
5641            s_ot = b & 8 ? MO_SIGN | ot : ot;
5642
5643            modrm = x86_ldub_code(env, s);
5644            reg = ((modrm >> 3) & 7) | REX_R(s);
5645            mod = (modrm >> 6) & 3;
5646            rm = (modrm & 7) | REX_B(s);
5647
5648            if (mod == 3) {
5649                if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5650                    tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5651                } else {
5652                    gen_op_mov_v_reg(s, ot, s->T0, rm);
5653                    switch (s_ot) {
5654                    case MO_UB:
5655                        tcg_gen_ext8u_tl(s->T0, s->T0);
5656                        break;
5657                    case MO_SB:
5658                        tcg_gen_ext8s_tl(s->T0, s->T0);
5659                        break;
5660                    case MO_UW:
5661                        tcg_gen_ext16u_tl(s->T0, s->T0);
5662                        break;
5663                    default:
5664                    case MO_SW:
5665                        tcg_gen_ext16s_tl(s->T0, s->T0);
5666                        break;
5667                    }
5668                }
5669                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5670            } else {
5671                gen_lea_modrm(env, s, modrm);
5672                gen_op_ld_v(s, s_ot, s->T0, s->A0);
5673                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5674            }
5675        }
5676        break;
5677
5678    case 0x8d: /* lea */
5679        modrm = x86_ldub_code(env, s);
5680        mod = (modrm >> 6) & 3;
5681        if (mod == 3)
5682            goto illegal_op;
5683        reg = ((modrm >> 3) & 7) | REX_R(s);
5684        {
5685            AddressParts a = gen_lea_modrm_0(env, s, modrm);
5686            TCGv ea = gen_lea_modrm_1(s, a);
5687            gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5688            gen_op_mov_reg_v(s, dflag, reg, s->A0);
5689        }
5690        break;
5691
5692    case 0xa0: /* mov EAX, Ov */
5693    case 0xa1:
5694    case 0xa2: /* mov Ov, EAX */
5695    case 0xa3:
5696        {
5697            target_ulong offset_addr;
5698
5699            ot = mo_b_d(b, dflag);
5700            switch (s->aflag) {
5701#ifdef TARGET_X86_64
5702            case MO_64:
5703                offset_addr = x86_ldq_code(env, s);
5704                break;
5705#endif
5706            default:
5707                offset_addr = insn_get(env, s, s->aflag);
5708                break;
5709            }
5710            tcg_gen_movi_tl(s->A0, offset_addr);
5711            gen_add_A0_ds_seg(s);
5712            if ((b & 2) == 0) {
5713                gen_op_ld_v(s, ot, s->T0, s->A0);
5714                gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5715            } else {
5716                gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5717                gen_op_st_v(s, ot, s->T0, s->A0);
5718            }
5719        }
5720        break;
5721    case 0xd7: /* xlat */
5722        tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5723        tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5724        tcg_gen_add_tl(s->A0, s->A0, s->T0);
5725        gen_extu(s->aflag, s->A0);
5726        gen_add_A0_ds_seg(s);
5727        gen_op_ld_v(s, MO_8, s->T0, s->A0);
5728        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5729        break;
5730    case 0xb0 ... 0xb7: /* mov R, Ib */
5731        val = insn_get(env, s, MO_8);
5732        tcg_gen_movi_tl(s->T0, val);
5733        gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5734        break;
5735    case 0xb8 ... 0xbf: /* mov R, Iv */
5736#ifdef TARGET_X86_64
5737        if (dflag == MO_64) {
5738            uint64_t tmp;
5739            /* 64 bit case */
5740            tmp = x86_ldq_code(env, s);
5741            reg = (b & 7) | REX_B(s);
5742            tcg_gen_movi_tl(s->T0, tmp);
5743            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5744        } else
5745#endif
5746        {
5747            ot = dflag;
5748            val = insn_get(env, s, ot);
5749            reg = (b & 7) | REX_B(s);
5750            tcg_gen_movi_tl(s->T0, val);
5751            gen_op_mov_reg_v(s, ot, reg, s->T0);
5752        }
5753        break;
5754
5755    case 0x91 ... 0x97: /* xchg R, EAX */
5756    do_xchg_reg_eax:
5757        ot = dflag;
5758        reg = (b & 7) | REX_B(s);
5759        rm = R_EAX;
5760        goto do_xchg_reg;
5761    case 0x86:
5762    case 0x87: /* xchg Ev, Gv */
5763        ot = mo_b_d(b, dflag);
5764        modrm = x86_ldub_code(env, s);
5765        reg = ((modrm >> 3) & 7) | REX_R(s);
5766        mod = (modrm >> 6) & 3;
5767        if (mod == 3) {
5768            rm = (modrm & 7) | REX_B(s);
5769        do_xchg_reg:
5770            gen_op_mov_v_reg(s, ot, s->T0, reg);
5771            gen_op_mov_v_reg(s, ot, s->T1, rm);
5772            gen_op_mov_reg_v(s, ot, rm, s->T0);
5773            gen_op_mov_reg_v(s, ot, reg, s->T1);
5774        } else {
5775            gen_lea_modrm(env, s, modrm);
5776            gen_op_mov_v_reg(s, ot, s->T0, reg);
5777            /* for xchg, lock is implicit */
5778            tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5779                                   s->mem_index, ot | MO_LE);
5780            gen_op_mov_reg_v(s, ot, reg, s->T1);
5781        }
5782        break;
5783    case 0xc4: /* les Gv */
5784        /* In CODE64 this is VEX3; see above.  */
5785        op = R_ES;
5786        goto do_lxx;
5787    case 0xc5: /* lds Gv */
5788        /* In CODE64 this is VEX2; see above.  */
5789        op = R_DS;
5790        goto do_lxx;
5791    case 0x1b2: /* lss Gv */
5792        op = R_SS;
5793        goto do_lxx;
5794    case 0x1b4: /* lfs Gv */
5795        op = R_FS;
5796        goto do_lxx;
5797    case 0x1b5: /* lgs Gv */
5798        op = R_GS;
5799    do_lxx:
5800        ot = dflag != MO_16 ? MO_32 : MO_16;
5801        modrm = x86_ldub_code(env, s);
5802        reg = ((modrm >> 3) & 7) | REX_R(s);
5803        mod = (modrm >> 6) & 3;
5804        if (mod == 3)
5805            goto illegal_op;
5806        gen_lea_modrm(env, s, modrm);
5807        gen_op_ld_v(s, ot, s->T1, s->A0);
5808        gen_add_A0_im(s, 1 << ot);
5809        /* load the segment first to handle exceptions properly */
5810        gen_op_ld_v(s, MO_16, s->T0, s->A0);
5811        gen_movl_seg_T0(s, op);
5812        /* then put the data */
5813        gen_op_mov_reg_v(s, ot, reg, s->T1);
5814        if (s->base.is_jmp) {
5815            gen_jmp_im(s, s->pc - s->cs_base);
5816            gen_eob(s);
5817        }
5818        break;
5819
5820        /************************/
5821        /* shifts */
5822    case 0xc0:
5823    case 0xc1:
5824        /* shift Ev,Ib */
5825        shift = 2;
5826    grp2:
5827        {
5828            ot = mo_b_d(b, dflag);
5829            modrm = x86_ldub_code(env, s);
5830            mod = (modrm >> 6) & 3;
5831            op = (modrm >> 3) & 7;
5832
5833            if (mod != 3) {
5834                if (shift == 2) {
5835                    s->rip_offset = 1;
5836                }
5837                gen_lea_modrm(env, s, modrm);
5838                opreg = OR_TMP0;
5839            } else {
5840                opreg = (modrm & 7) | REX_B(s);
5841            }
5842
5843            /* simpler op */
5844            if (shift == 0) {
5845                gen_shift(s, op, ot, opreg, OR_ECX);
5846            } else {
5847                if (shift == 2) {
5848                    shift = x86_ldub_code(env, s);
5849                }
5850                gen_shifti(s, op, ot, opreg, shift);
5851            }
5852        }
5853        break;
5854    case 0xd0:
5855    case 0xd1:
5856        /* shift Ev,1 */
5857        shift = 1;
5858        goto grp2;
5859    case 0xd2:
5860    case 0xd3:
5861        /* shift Ev,cl */
5862        shift = 0;
5863        goto grp2;
5864
5865    case 0x1a4: /* shld imm */
5866        op = 0;
5867        shift = 1;
5868        goto do_shiftd;
5869    case 0x1a5: /* shld cl */
5870        op = 0;
5871        shift = 0;
5872        goto do_shiftd;
5873    case 0x1ac: /* shrd imm */
5874        op = 1;
5875        shift = 1;
5876        goto do_shiftd;
5877    case 0x1ad: /* shrd cl */
5878        op = 1;
5879        shift = 0;
5880    do_shiftd:
5881        ot = dflag;
5882        modrm = x86_ldub_code(env, s);
5883        mod = (modrm >> 6) & 3;
5884        rm = (modrm & 7) | REX_B(s);
5885        reg = ((modrm >> 3) & 7) | REX_R(s);
5886        if (mod != 3) {
5887            gen_lea_modrm(env, s, modrm);
5888            opreg = OR_TMP0;
5889        } else {
5890            opreg = rm;
5891        }
5892        gen_op_mov_v_reg(s, ot, s->T1, reg);
5893
5894        if (shift) {
5895            TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5896            gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5897            tcg_temp_free(imm);
5898        } else {
5899            gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5900        }
5901        break;
5902
5903        /************************/
5904        /* floats */
5905    case 0xd8 ... 0xdf:
5906        {
5907            bool update_fip = true;
5908
5909            if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5910                /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5911                /* XXX: what to do if illegal op ? */
5912                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5913                break;
5914            }
5915            modrm = x86_ldub_code(env, s);
5916            mod = (modrm >> 6) & 3;
5917            rm = modrm & 7;
5918            op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5919            if (mod != 3) {
5920                /* memory op */
5921                AddressParts a = gen_lea_modrm_0(env, s, modrm);
5922                TCGv ea = gen_lea_modrm_1(s, a);
5923                TCGv last_addr = tcg_temp_new();
5924                bool update_fdp = true;
5925
5926                tcg_gen_mov_tl(last_addr, ea);
5927                gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
5928
5929                switch (op) {
5930                case 0x00 ... 0x07: /* fxxxs */
5931                case 0x10 ... 0x17: /* fixxxl */
5932                case 0x20 ... 0x27: /* fxxxl */
5933                case 0x30 ... 0x37: /* fixxx */
5934                    {
5935                        int op1;
5936                        op1 = op & 7;
5937
5938                        switch (op >> 4) {
5939                        case 0:
5940                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5941                                                s->mem_index, MO_LEUL);
5942                            gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5943                            break;
5944                        case 1:
5945                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5946                                                s->mem_index, MO_LEUL);
5947                            gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5948                            break;
5949                        case 2:
5950                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5951                                                s->mem_index, MO_LEUQ);
5952                            gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5953                            break;
5954                        case 3:
5955                        default:
5956                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5957                                                s->mem_index, MO_LESW);
5958                            gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5959                            break;
5960                        }
5961
5962                        gen_helper_fp_arith_ST0_FT0(op1);
5963                        if (op1 == 3) {
5964                            /* fcomp needs pop */
5965                            gen_helper_fpop(cpu_env);
5966                        }
5967                    }
5968                    break;
5969                case 0x08: /* flds */
5970                case 0x0a: /* fsts */
5971                case 0x0b: /* fstps */
5972                case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5973                case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5974                case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5975                    switch (op & 7) {
5976                    case 0:
5977                        switch (op >> 4) {
5978                        case 0:
5979                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5980                                                s->mem_index, MO_LEUL);
5981                            gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5982                            break;
5983                        case 1:
5984                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5985                                                s->mem_index, MO_LEUL);
5986                            gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5987                            break;
5988                        case 2:
5989                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5990                                                s->mem_index, MO_LEUQ);
5991                            gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
5992                            break;
5993                        case 3:
5994                        default:
5995                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5996                                                s->mem_index, MO_LESW);
5997                            gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5998                            break;
5999                        }
6000                        break;
6001                    case 1:
6002                        /* XXX: the corresponding CPUID bit must be tested ! */
6003                        switch (op >> 4) {
6004                        case 1:
6005                            gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
6006                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6007                                                s->mem_index, MO_LEUL);
6008                            break;
6009                        case 2:
6010                            gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
6011                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6012                                                s->mem_index, MO_LEUQ);
6013                            break;
6014                        case 3:
6015                        default:
6016                            gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
6017                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6018                                                s->mem_index, MO_LEUW);
6019                            break;
6020                        }
6021                        gen_helper_fpop(cpu_env);
6022                        break;
6023                    default:
6024                        switch (op >> 4) {
6025                        case 0:
6026                            gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
6027                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6028                                                s->mem_index, MO_LEUL);
6029                            break;
6030                        case 1:
6031                            gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
6032                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6033                                                s->mem_index, MO_LEUL);
6034                            break;
6035                        case 2:
6036                            gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
6037                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6038                                                s->mem_index, MO_LEUQ);
6039                            break;
6040                        case 3:
6041                        default:
6042                            gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
6043                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6044                                                s->mem_index, MO_LEUW);
6045                            break;
6046                        }
6047                        if ((op & 7) == 3) {
6048                            gen_helper_fpop(cpu_env);
6049                        }
6050                        break;
6051                    }
6052                    break;
6053                case 0x0c: /* fldenv mem */
6054                    gen_helper_fldenv(cpu_env, s->A0,
6055                                      tcg_const_i32(dflag - 1));
6056                    update_fip = update_fdp = false;
6057                    break;
6058                case 0x0d: /* fldcw mem */
6059                    tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6060                                        s->mem_index, MO_LEUW);
6061                    gen_helper_fldcw(cpu_env, s->tmp2_i32);
6062                    update_fip = update_fdp = false;
6063                    break;
6064                case 0x0e: /* fnstenv mem */
6065                    gen_helper_fstenv(cpu_env, s->A0,
6066                                      tcg_const_i32(dflag - 1));
6067                    update_fip = update_fdp = false;
6068                    break;
6069                case 0x0f: /* fnstcw mem */
6070                    gen_helper_fnstcw(s->tmp2_i32, cpu_env);
6071                    tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6072                                        s->mem_index, MO_LEUW);
6073                    update_fip = update_fdp = false;
6074                    break;
6075                case 0x1d: /* fldt mem */
6076                    gen_helper_fldt_ST0(cpu_env, s->A0);
6077                    break;
6078                case 0x1f: /* fstpt mem */
6079                    gen_helper_fstt_ST0(cpu_env, s->A0);
6080                    gen_helper_fpop(cpu_env);
6081                    break;
6082                case 0x2c: /* frstor mem */
6083                    gen_helper_frstor(cpu_env, s->A0,
6084                                      tcg_const_i32(dflag - 1));
6085                    update_fip = update_fdp = false;
6086                    break;
6087                case 0x2e: /* fnsave mem */
6088                    gen_helper_fsave(cpu_env, s->A0,
6089                                     tcg_const_i32(dflag - 1));
6090                    update_fip = update_fdp = false;
6091                    break;
6092                case 0x2f: /* fnstsw mem */
6093                    gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6094                    tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6095                                        s->mem_index, MO_LEUW);
6096                    update_fip = update_fdp = false;
6097                    break;
6098                case 0x3c: /* fbld */
6099                    gen_helper_fbld_ST0(cpu_env, s->A0);
6100                    break;
6101                case 0x3e: /* fbstp */
6102                    gen_helper_fbst_ST0(cpu_env, s->A0);
6103                    gen_helper_fpop(cpu_env);
6104                    break;
6105                case 0x3d: /* fildll */
6106                    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
6107                                        s->mem_index, MO_LEUQ);
6108                    gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6109                    break;
6110                case 0x3f: /* fistpll */
6111                    gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6112                    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6113                                        s->mem_index, MO_LEUQ);
6114                    gen_helper_fpop(cpu_env);
6115                    break;
6116                default:
6117                    goto unknown_op;
6118                }
6119
6120                if (update_fdp) {
6121                    int last_seg = s->override >= 0 ? s->override : a.def_seg;
6122
6123                    tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
6124                                   offsetof(CPUX86State,
6125                                            segs[last_seg].selector));
6126                    tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
6127                                     offsetof(CPUX86State, fpds));
6128                    tcg_gen_st_tl(last_addr, cpu_env,
6129                                  offsetof(CPUX86State, fpdp));
6130                }
6131                tcg_temp_free(last_addr);
6132            } else {
6133                /* register float ops */
6134                opreg = rm;
6135
6136                switch (op) {
6137                case 0x08: /* fld sti */
6138                    gen_helper_fpush(cpu_env);
6139                    gen_helper_fmov_ST0_STN(cpu_env,
6140                                            tcg_const_i32((opreg + 1) & 7));
6141                    break;
6142                case 0x09: /* fxchg sti */
6143                case 0x29: /* fxchg4 sti, undocumented op */
6144                case 0x39: /* fxchg7 sti, undocumented op */
6145                    gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6146                    break;
6147                case 0x0a: /* grp d9/2 */
6148                    switch (rm) {
6149                    case 0: /* fnop */
6150                        /* check exceptions (FreeBSD FPU probe) */
6151                        gen_helper_fwait(cpu_env);
6152                        update_fip = false;
6153                        break;
6154                    default:
6155                        goto unknown_op;
6156                    }
6157                    break;
6158                case 0x0c: /* grp d9/4 */
6159                    switch (rm) {
6160                    case 0: /* fchs */
6161                        gen_helper_fchs_ST0(cpu_env);
6162                        break;
6163                    case 1: /* fabs */
6164                        gen_helper_fabs_ST0(cpu_env);
6165                        break;
6166                    case 4: /* ftst */
6167                        gen_helper_fldz_FT0(cpu_env);
6168                        gen_helper_fcom_ST0_FT0(cpu_env);
6169                        break;
6170                    case 5: /* fxam */
6171                        gen_helper_fxam_ST0(cpu_env);
6172                        break;
6173                    default:
6174                        goto unknown_op;
6175                    }
6176                    break;
6177                case 0x0d: /* grp d9/5 */
6178                    {
6179                        switch (rm) {
6180                        case 0:
6181                            gen_helper_fpush(cpu_env);
6182                            gen_helper_fld1_ST0(cpu_env);
6183                            break;
6184                        case 1:
6185                            gen_helper_fpush(cpu_env);
6186                            gen_helper_fldl2t_ST0(cpu_env);
6187                            break;
6188                        case 2:
6189                            gen_helper_fpush(cpu_env);
6190                            gen_helper_fldl2e_ST0(cpu_env);
6191                            break;
6192                        case 3:
6193                            gen_helper_fpush(cpu_env);
6194                            gen_helper_fldpi_ST0(cpu_env);
6195                            break;
6196                        case 4:
6197                            gen_helper_fpush(cpu_env);
6198                            gen_helper_fldlg2_ST0(cpu_env);
6199                            break;
6200                        case 5:
6201                            gen_helper_fpush(cpu_env);
6202                            gen_helper_fldln2_ST0(cpu_env);
6203                            break;
6204                        case 6:
6205                            gen_helper_fpush(cpu_env);
6206                            gen_helper_fldz_ST0(cpu_env);
6207                            break;
6208                        default:
6209                            goto unknown_op;
6210                        }
6211                    }
6212                    break;
6213                case 0x0e: /* grp d9/6 */
6214                    switch (rm) {
6215                    case 0: /* f2xm1 */
6216                        gen_helper_f2xm1(cpu_env);
6217                        break;
6218                    case 1: /* fyl2x */
6219                        gen_helper_fyl2x(cpu_env);
6220                        break;
6221                    case 2: /* fptan */
6222                        gen_helper_fptan(cpu_env);
6223                        break;
6224                    case 3: /* fpatan */
6225                        gen_helper_fpatan(cpu_env);
6226                        break;
6227                    case 4: /* fxtract */
6228                        gen_helper_fxtract(cpu_env);
6229                        break;
6230                    case 5: /* fprem1 */
6231                        gen_helper_fprem1(cpu_env);
6232                        break;
6233                    case 6: /* fdecstp */
6234                        gen_helper_fdecstp(cpu_env);
6235                        break;
6236                    default:
6237                    case 7: /* fincstp */
6238                        gen_helper_fincstp(cpu_env);
6239                        break;
6240                    }
6241                    break;
6242                case 0x0f: /* grp d9/7 */
6243                    switch (rm) {
6244                    case 0: /* fprem */
6245                        gen_helper_fprem(cpu_env);
6246                        break;
6247                    case 1: /* fyl2xp1 */
6248                        gen_helper_fyl2xp1(cpu_env);
6249                        break;
6250                    case 2: /* fsqrt */
6251                        gen_helper_fsqrt(cpu_env);
6252                        break;
6253                    case 3: /* fsincos */
6254                        gen_helper_fsincos(cpu_env);
6255                        break;
6256                    case 5: /* fscale */
6257                        gen_helper_fscale(cpu_env);
6258                        break;
6259                    case 4: /* frndint */
6260                        gen_helper_frndint(cpu_env);
6261                        break;
6262                    case 6: /* fsin */
6263                        gen_helper_fsin(cpu_env);
6264                        break;
6265                    default:
6266                    case 7: /* fcos */
6267                        gen_helper_fcos(cpu_env);
6268                        break;
6269                    }
6270                    break;
6271                case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6272                case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6273                case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6274                    {
6275                        int op1;
6276
6277                        op1 = op & 7;
6278                        if (op >= 0x20) {
6279                            gen_helper_fp_arith_STN_ST0(op1, opreg);
6280                            if (op >= 0x30) {
6281                                gen_helper_fpop(cpu_env);
6282                            }
6283                        } else {
6284                            gen_helper_fmov_FT0_STN(cpu_env,
6285                                                    tcg_const_i32(opreg));
6286                            gen_helper_fp_arith_ST0_FT0(op1);
6287                        }
6288                    }
6289                    break;
6290                case 0x02: /* fcom */
6291                case 0x22: /* fcom2, undocumented op */
6292                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6293                    gen_helper_fcom_ST0_FT0(cpu_env);
6294                    break;
6295                case 0x03: /* fcomp */
6296                case 0x23: /* fcomp3, undocumented op */
6297                case 0x32: /* fcomp5, undocumented op */
6298                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6299                    gen_helper_fcom_ST0_FT0(cpu_env);
6300                    gen_helper_fpop(cpu_env);
6301                    break;
6302                case 0x15: /* da/5 */
6303                    switch (rm) {
6304                    case 1: /* fucompp */
6305                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6306                        gen_helper_fucom_ST0_FT0(cpu_env);
6307                        gen_helper_fpop(cpu_env);
6308                        gen_helper_fpop(cpu_env);
6309                        break;
6310                    default:
6311                        goto unknown_op;
6312                    }
6313                    break;
6314                case 0x1c:
6315                    switch (rm) {
6316                    case 0: /* feni (287 only, just do nop here) */
6317                        break;
6318                    case 1: /* fdisi (287 only, just do nop here) */
6319                        break;
6320                    case 2: /* fclex */
6321                        gen_helper_fclex(cpu_env);
6322                        update_fip = false;
6323                        break;
6324                    case 3: /* fninit */
6325                        gen_helper_fninit(cpu_env);
6326                        update_fip = false;
6327                        break;
6328                    case 4: /* fsetpm (287 only, just do nop here) */
6329                        break;
6330                    default:
6331                        goto unknown_op;
6332                    }
6333                    break;
6334                case 0x1d: /* fucomi */
6335                    if (!(s->cpuid_features & CPUID_CMOV)) {
6336                        goto illegal_op;
6337                    }
6338                    gen_update_cc_op(s);
6339                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6340                    gen_helper_fucomi_ST0_FT0(cpu_env);
6341                    set_cc_op(s, CC_OP_EFLAGS);
6342                    break;
6343                case 0x1e: /* fcomi */
6344                    if (!(s->cpuid_features & CPUID_CMOV)) {
6345                        goto illegal_op;
6346                    }
6347                    gen_update_cc_op(s);
6348                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6349                    gen_helper_fcomi_ST0_FT0(cpu_env);
6350                    set_cc_op(s, CC_OP_EFLAGS);
6351                    break;
6352                case 0x28: /* ffree sti */
6353                    gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6354                    break;
6355                case 0x2a: /* fst sti */
6356                    gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6357                    break;
6358                case 0x2b: /* fstp sti */
6359                case 0x0b: /* fstp1 sti, undocumented op */
6360                case 0x3a: /* fstp8 sti, undocumented op */
6361                case 0x3b: /* fstp9 sti, undocumented op */
6362                    gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6363                    gen_helper_fpop(cpu_env);
6364                    break;
6365                case 0x2c: /* fucom st(i) */
6366                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6367                    gen_helper_fucom_ST0_FT0(cpu_env);
6368                    break;
6369                case 0x2d: /* fucomp st(i) */
6370                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6371                    gen_helper_fucom_ST0_FT0(cpu_env);
6372                    gen_helper_fpop(cpu_env);
6373                    break;
6374                case 0x33: /* de/3 */
6375                    switch (rm) {
6376                    case 1: /* fcompp */
6377                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6378                        gen_helper_fcom_ST0_FT0(cpu_env);
6379                        gen_helper_fpop(cpu_env);
6380                        gen_helper_fpop(cpu_env);
6381                        break;
6382                    default:
6383                        goto unknown_op;
6384                    }
6385                    break;
6386                case 0x38: /* ffreep sti, undocumented op */
6387                    gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6388                    gen_helper_fpop(cpu_env);
6389                    break;
6390                case 0x3c: /* df/4 */
6391                    switch (rm) {
6392                    case 0:
6393                        gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6394                        tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6395                        gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6396                        break;
6397                    default:
6398                        goto unknown_op;
6399                    }
6400                    break;
6401                case 0x3d: /* fucomip */
6402                    if (!(s->cpuid_features & CPUID_CMOV)) {
6403                        goto illegal_op;
6404                    }
6405                    gen_update_cc_op(s);
6406                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6407                    gen_helper_fucomi_ST0_FT0(cpu_env);
6408                    gen_helper_fpop(cpu_env);
6409                    set_cc_op(s, CC_OP_EFLAGS);
6410                    break;
6411                case 0x3e: /* fcomip */
6412                    if (!(s->cpuid_features & CPUID_CMOV)) {
6413                        goto illegal_op;
6414                    }
6415                    gen_update_cc_op(s);
6416                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6417                    gen_helper_fcomi_ST0_FT0(cpu_env);
6418                    gen_helper_fpop(cpu_env);
6419                    set_cc_op(s, CC_OP_EFLAGS);
6420                    break;
6421                case 0x10 ... 0x13: /* fcmovxx */
6422                case 0x18 ... 0x1b:
6423                    {
6424                        int op1;
6425                        TCGLabel *l1;
6426                        static const uint8_t fcmov_cc[8] = {
6427                            (JCC_B << 1),
6428                            (JCC_Z << 1),
6429                            (JCC_BE << 1),
6430                            (JCC_P << 1),
6431                        };
6432
6433                        if (!(s->cpuid_features & CPUID_CMOV)) {
6434                            goto illegal_op;
6435                        }
6436                        op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6437                        l1 = gen_new_label();
6438                        gen_jcc1_noeob(s, op1, l1);
6439                        gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6440                        gen_set_label(l1);
6441                    }
6442                    break;
6443                default:
6444                    goto unknown_op;
6445                }
6446            }
6447
6448            if (update_fip) {
6449                tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
6450                               offsetof(CPUX86State, segs[R_CS].selector));
6451                tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
6452                                 offsetof(CPUX86State, fpcs));
6453                tcg_gen_st_tl(tcg_constant_tl(pc_start - s->cs_base),
6454                              cpu_env, offsetof(CPUX86State, fpip));
6455            }
6456        }
6457        break;
6458        /************************/
6459        /* string ops */
6460
6461    case 0xa4: /* movsS */
6462    case 0xa5:
6463        ot = mo_b_d(b, dflag);
6464        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6465            gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6466        } else {
6467            gen_movs(s, ot);
6468        }
6469        break;
6470
6471    case 0xaa: /* stosS */
6472    case 0xab:
6473        ot = mo_b_d(b, dflag);
6474        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6475            gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6476        } else {
6477            gen_stos(s, ot);
6478        }
6479        break;
6480    case 0xac: /* lodsS */
6481    case 0xad:
6482        ot = mo_b_d(b, dflag);
6483        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6484            gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6485        } else {
6486            gen_lods(s, ot);
6487        }
6488        break;
6489    case 0xae: /* scasS */
6490    case 0xaf:
6491        ot = mo_b_d(b, dflag);
6492        if (prefixes & PREFIX_REPNZ) {
6493            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6494        } else if (prefixes & PREFIX_REPZ) {
6495            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6496        } else {
6497            gen_scas(s, ot);
6498        }
6499        break;
6500
6501    case 0xa6: /* cmpsS */
6502    case 0xa7:
6503        ot = mo_b_d(b, dflag);
6504        if (prefixes & PREFIX_REPNZ) {
6505            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6506        } else if (prefixes & PREFIX_REPZ) {
6507            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6508        } else {
6509            gen_cmps(s, ot);
6510        }
6511        break;
6512    case 0x6c: /* insS */
6513    case 0x6d:
6514        ot = mo_b_d32(b, dflag);
6515        tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6516        tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6517        if (!gen_check_io(s, ot, s->tmp2_i32,
6518                          SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) {
6519            break;
6520        }
6521        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6522            gen_io_start();
6523        }
6524        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6525            gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6526            /* jump generated by gen_repz_ins */
6527        } else {
6528            gen_ins(s, ot);
6529            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6530                gen_jmp(s, s->pc - s->cs_base);
6531            }
6532        }
6533        break;
6534    case 0x6e: /* outsS */
6535    case 0x6f:
6536        ot = mo_b_d32(b, dflag);
6537        tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6538        tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6539        if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_STR_MASK)) {
6540            break;
6541        }
6542        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6543            gen_io_start();
6544        }
6545        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6546            gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6547            /* jump generated by gen_repz_outs */
6548        } else {
6549            gen_outs(s, ot);
6550            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6551                gen_jmp(s, s->pc - s->cs_base);
6552            }
6553        }
6554        break;
6555
6556        /************************/
6557        /* port I/O */
6558
6559    case 0xe4:
6560    case 0xe5:
6561        ot = mo_b_d32(b, dflag);
6562        val = x86_ldub_code(env, s);
6563        tcg_gen_movi_i32(s->tmp2_i32, val);
6564        if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6565            break;
6566        }
6567        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6568            gen_io_start();
6569        }
6570        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6571        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6572        gen_bpt_io(s, s->tmp2_i32, ot);
6573        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6574            gen_jmp(s, s->pc - s->cs_base);
6575        }
6576        break;
6577    case 0xe6:
6578    case 0xe7:
6579        ot = mo_b_d32(b, dflag);
6580        val = x86_ldub_code(env, s);
6581        tcg_gen_movi_i32(s->tmp2_i32, val);
6582        if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6583            break;
6584        }
6585        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6586            gen_io_start();
6587        }
6588        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6589        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6590        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6591        gen_bpt_io(s, s->tmp2_i32, ot);
6592        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6593            gen_jmp(s, s->pc - s->cs_base);
6594        }
6595        break;
6596    case 0xec:
6597    case 0xed:
6598        ot = mo_b_d32(b, dflag);
6599        tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6600        tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6601        if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6602            break;
6603        }
6604        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6605            gen_io_start();
6606        }
6607        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6608        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6609        gen_bpt_io(s, s->tmp2_i32, ot);
6610        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6611            gen_jmp(s, s->pc - s->cs_base);
6612        }
6613        break;
6614    case 0xee:
6615    case 0xef:
6616        ot = mo_b_d32(b, dflag);
6617        tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6618        tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6619        if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6620            break;
6621        }
6622        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6623            gen_io_start();
6624        }
6625        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6626        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6627        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6628        gen_bpt_io(s, s->tmp2_i32, ot);
6629        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6630            gen_jmp(s, s->pc - s->cs_base);
6631        }
6632        break;
6633
6634        /************************/
6635        /* control */
6636    case 0xc2: /* ret im */
6637        val = x86_ldsw_code(env, s);
6638        ot = gen_pop_T0(s);
6639        gen_stack_update(s, val + (1 << ot));
6640        /* Note that gen_pop_T0 uses a zero-extending load.  */
6641        gen_op_jmp_v(s->T0);
6642        gen_bnd_jmp(s);
6643        gen_jr(s, s->T0);
6644        break;
6645    case 0xc3: /* ret */
6646        ot = gen_pop_T0(s);
6647        gen_pop_update(s, ot);
6648        /* Note that gen_pop_T0 uses a zero-extending load.  */
6649        gen_op_jmp_v(s->T0);
6650        gen_bnd_jmp(s);
6651        gen_jr(s, s->T0);
6652        break;
6653    case 0xca: /* lret im */
6654        val = x86_ldsw_code(env, s);
6655    do_lret:
6656        if (PE(s) && !VM86(s)) {
6657            gen_update_cc_op(s);
6658            gen_jmp_im(s, pc_start - s->cs_base);
6659            gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6660                                      tcg_const_i32(val));
6661        } else {
6662            gen_stack_A0(s);
6663            /* pop offset */
6664            gen_op_ld_v(s, dflag, s->T0, s->A0);
6665            /* NOTE: keeping EIP updated is not a problem in case of
6666               exception */
6667            gen_op_jmp_v(s->T0);
6668            /* pop selector */
6669            gen_add_A0_im(s, 1 << dflag);
6670            gen_op_ld_v(s, dflag, s->T0, s->A0);
6671            gen_op_movl_seg_T0_vm(s, R_CS);
6672            /* add stack offset */
6673            gen_stack_update(s, val + (2 << dflag));
6674        }
6675        gen_eob(s);
6676        break;
6677    case 0xcb: /* lret */
6678        val = 0;
6679        goto do_lret;
6680    case 0xcf: /* iret */
6681        gen_svm_check_intercept(s, SVM_EXIT_IRET);
6682        if (!PE(s) || VM86(s)) {
6683            /* real mode or vm86 mode */
6684            if (!check_vm86_iopl(s)) {
6685                break;
6686            }
6687            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6688        } else {
6689            gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6690                                      tcg_const_i32(s->pc - s->cs_base));
6691        }
6692        set_cc_op(s, CC_OP_EFLAGS);
6693        gen_eob(s);
6694        break;
6695    case 0xe8: /* call im */
6696        {
6697            if (dflag != MO_16) {
6698                tval = (int32_t)insn_get(env, s, MO_32);
6699            } else {
6700                tval = (int16_t)insn_get(env, s, MO_16);
6701            }
6702            next_eip = s->pc - s->cs_base;
6703            tval += next_eip;
6704            if (dflag == MO_16) {
6705                tval &= 0xffff;
6706            } else if (!CODE64(s)) {
6707                tval &= 0xffffffff;
6708            }
6709            tcg_gen_movi_tl(s->T0, next_eip);
6710            gen_push_v(s, s->T0);
6711            gen_bnd_jmp(s);
6712            gen_jmp(s, tval);
6713        }
6714        break;
6715    case 0x9a: /* lcall im */
6716        {
6717            unsigned int selector, offset;
6718
6719            if (CODE64(s))
6720                goto illegal_op;
6721            ot = dflag;
6722            offset = insn_get(env, s, ot);
6723            selector = insn_get(env, s, MO_16);
6724
6725            tcg_gen_movi_tl(s->T0, selector);
6726            tcg_gen_movi_tl(s->T1, offset);
6727        }
6728        goto do_lcall;
6729    case 0xe9: /* jmp im */
6730        if (dflag != MO_16) {
6731            tval = (int32_t)insn_get(env, s, MO_32);
6732        } else {
6733            tval = (int16_t)insn_get(env, s, MO_16);
6734        }
6735        tval += s->pc - s->cs_base;
6736        if (dflag == MO_16) {
6737            tval &= 0xffff;
6738        } else if (!CODE64(s)) {
6739            tval &= 0xffffffff;
6740        }
6741        gen_bnd_jmp(s);
6742        gen_jmp(s, tval);
6743        break;
6744    case 0xea: /* ljmp im */
6745        {
6746            unsigned int selector, offset;
6747
6748            if (CODE64(s))
6749                goto illegal_op;
6750            ot = dflag;
6751            offset = insn_get(env, s, ot);
6752            selector = insn_get(env, s, MO_16);
6753
6754            tcg_gen_movi_tl(s->T0, selector);
6755            tcg_gen_movi_tl(s->T1, offset);
6756        }
6757        goto do_ljmp;
6758    case 0xeb: /* jmp Jb */
6759        tval = (int8_t)insn_get(env, s, MO_8);
6760        tval += s->pc - s->cs_base;
6761        if (dflag == MO_16) {
6762            tval &= 0xffff;
6763        }
6764        gen_jmp(s, tval);
6765        break;
6766    case 0x70 ... 0x7f: /* jcc Jb */
6767        tval = (int8_t)insn_get(env, s, MO_8);
6768        goto do_jcc;
6769    case 0x180 ... 0x18f: /* jcc Jv */
6770        if (dflag != MO_16) {
6771            tval = (int32_t)insn_get(env, s, MO_32);
6772        } else {
6773            tval = (int16_t)insn_get(env, s, MO_16);
6774        }
6775    do_jcc:
6776        next_eip = s->pc - s->cs_base;
6777        tval += next_eip;
6778        if (dflag == MO_16) {
6779            tval &= 0xffff;
6780        }
6781        gen_bnd_jmp(s);
6782        gen_jcc(s, b, tval, next_eip);
6783        break;
6784
6785    case 0x190 ... 0x19f: /* setcc Gv */
6786        modrm = x86_ldub_code(env, s);
6787        gen_setcc1(s, b, s->T0);
6788        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6789        break;
6790    case 0x140 ... 0x14f: /* cmov Gv, Ev */
6791        if (!(s->cpuid_features & CPUID_CMOV)) {
6792            goto illegal_op;
6793        }
6794        ot = dflag;
6795        modrm = x86_ldub_code(env, s);
6796        reg = ((modrm >> 3) & 7) | REX_R(s);
6797        gen_cmovcc1(env, s, ot, b, modrm, reg);
6798        break;
6799
6800        /************************/
6801        /* flags */
6802    case 0x9c: /* pushf */
6803        gen_svm_check_intercept(s, SVM_EXIT_PUSHF);
6804        if (check_vm86_iopl(s)) {
6805            gen_update_cc_op(s);
6806            gen_helper_read_eflags(s->T0, cpu_env);
6807            gen_push_v(s, s->T0);
6808        }
6809        break;
6810    case 0x9d: /* popf */
6811        gen_svm_check_intercept(s, SVM_EXIT_POPF);
6812        if (check_vm86_iopl(s)) {
6813            ot = gen_pop_T0(s);
6814            if (CPL(s) == 0) {
6815                if (dflag != MO_16) {
6816                    gen_helper_write_eflags(cpu_env, s->T0,
6817                                            tcg_const_i32((TF_MASK | AC_MASK |
6818                                                           ID_MASK | NT_MASK |
6819                                                           IF_MASK |
6820                                                           IOPL_MASK)));
6821                } else {
6822                    gen_helper_write_eflags(cpu_env, s->T0,
6823                                            tcg_const_i32((TF_MASK | AC_MASK |
6824                                                           ID_MASK | NT_MASK |
6825                                                           IF_MASK | IOPL_MASK)
6826                                                          & 0xffff));
6827                }
6828            } else {
6829                if (CPL(s) <= IOPL(s)) {
6830                    if (dflag != MO_16) {
6831                        gen_helper_write_eflags(cpu_env, s->T0,
6832                                                tcg_const_i32((TF_MASK |
6833                                                               AC_MASK |
6834                                                               ID_MASK |
6835                                                               NT_MASK |
6836                                                               IF_MASK)));
6837                    } else {
6838                        gen_helper_write_eflags(cpu_env, s->T0,
6839                                                tcg_const_i32((TF_MASK |
6840                                                               AC_MASK |
6841                                                               ID_MASK |
6842                                                               NT_MASK |
6843                                                               IF_MASK)
6844                                                              & 0xffff));
6845                    }
6846                } else {
6847                    if (dflag != MO_16) {
6848                        gen_helper_write_eflags(cpu_env, s->T0,
6849                                           tcg_const_i32((TF_MASK | AC_MASK |
6850                                                          ID_MASK | NT_MASK)));
6851                    } else {
6852                        gen_helper_write_eflags(cpu_env, s->T0,
6853                                           tcg_const_i32((TF_MASK | AC_MASK |
6854                                                          ID_MASK | NT_MASK)
6855                                                         & 0xffff));
6856                    }
6857                }
6858            }
6859            gen_pop_update(s, ot);
6860            set_cc_op(s, CC_OP_EFLAGS);
6861            /* abort translation because TF/AC flag may change */
6862            gen_jmp_im(s, s->pc - s->cs_base);
6863            gen_eob(s);
6864        }
6865        break;
6866    case 0x9e: /* sahf */
6867        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6868            goto illegal_op;
6869        gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6870        gen_compute_eflags(s);
6871        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6872        tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6873        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6874        break;
6875    case 0x9f: /* lahf */
6876        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6877            goto illegal_op;
6878        gen_compute_eflags(s);
6879        /* Note: gen_compute_eflags() only gives the condition codes */
6880        tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6881        gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6882        break;
6883    case 0xf5: /* cmc */
6884        gen_compute_eflags(s);
6885        tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6886        break;
6887    case 0xf8: /* clc */
6888        gen_compute_eflags(s);
6889        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6890        break;
6891    case 0xf9: /* stc */
6892        gen_compute_eflags(s);
6893        tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6894        break;
6895    case 0xfc: /* cld */
6896        tcg_gen_movi_i32(s->tmp2_i32, 1);
6897        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6898        break;
6899    case 0xfd: /* std */
6900        tcg_gen_movi_i32(s->tmp2_i32, -1);
6901        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6902        break;
6903
6904        /************************/
6905        /* bit operations */
6906    case 0x1ba: /* bt/bts/btr/btc Gv, im */
6907        ot = dflag;
6908        modrm = x86_ldub_code(env, s);
6909        op = (modrm >> 3) & 7;
6910        mod = (modrm >> 6) & 3;
6911        rm = (modrm & 7) | REX_B(s);
6912        if (mod != 3) {
6913            s->rip_offset = 1;
6914            gen_lea_modrm(env, s, modrm);
6915            if (!(s->prefix & PREFIX_LOCK)) {
6916                gen_op_ld_v(s, ot, s->T0, s->A0);
6917            }
6918        } else {
6919            gen_op_mov_v_reg(s, ot, s->T0, rm);
6920        }
6921        /* load shift */
6922        val = x86_ldub_code(env, s);
6923        tcg_gen_movi_tl(s->T1, val);
6924        if (op < 4)
6925            goto unknown_op;
6926        op -= 4;
6927        goto bt_op;
6928    case 0x1a3: /* bt Gv, Ev */
6929        op = 0;
6930        goto do_btx;
6931    case 0x1ab: /* bts */
6932        op = 1;
6933        goto do_btx;
6934    case 0x1b3: /* btr */
6935        op = 2;
6936        goto do_btx;
6937    case 0x1bb: /* btc */
6938        op = 3;
6939    do_btx:
6940        ot = dflag;
6941        modrm = x86_ldub_code(env, s);
6942        reg = ((modrm >> 3) & 7) | REX_R(s);
6943        mod = (modrm >> 6) & 3;
6944        rm = (modrm & 7) | REX_B(s);
6945        gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6946        if (mod != 3) {
6947            AddressParts a = gen_lea_modrm_0(env, s, modrm);
6948            /* specific case: we need to add a displacement */
6949            gen_exts(ot, s->T1);
6950            tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6951            tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6952            tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6953            gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6954            if (!(s->prefix & PREFIX_LOCK)) {
6955                gen_op_ld_v(s, ot, s->T0, s->A0);
6956            }
6957        } else {
6958            gen_op_mov_v_reg(s, ot, s->T0, rm);
6959        }
6960    bt_op:
6961        tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6962        tcg_gen_movi_tl(s->tmp0, 1);
6963        tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6964        if (s->prefix & PREFIX_LOCK) {
6965            switch (op) {
6966            case 0: /* bt */
6967                /* Needs no atomic ops; we surpressed the normal
6968                   memory load for LOCK above so do it now.  */
6969                gen_op_ld_v(s, ot, s->T0, s->A0);
6970                break;
6971            case 1: /* bts */
6972                tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6973                                           s->mem_index, ot | MO_LE);
6974                break;
6975            case 2: /* btr */
6976                tcg_gen_not_tl(s->tmp0, s->tmp0);
6977                tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6978                                            s->mem_index, ot | MO_LE);
6979                break;
6980            default:
6981            case 3: /* btc */
6982                tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6983                                            s->mem_index, ot | MO_LE);
6984                break;
6985            }
6986            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6987        } else {
6988            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6989            switch (op) {
6990            case 0: /* bt */
6991                /* Data already loaded; nothing to do.  */
6992                break;
6993            case 1: /* bts */
6994                tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
6995                break;
6996            case 2: /* btr */
6997                tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
6998                break;
6999            default:
7000            case 3: /* btc */
7001                tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
7002                break;
7003            }
7004            if (op != 0) {
7005                if (mod != 3) {
7006                    gen_op_st_v(s, ot, s->T0, s->A0);
7007                } else {
7008                    gen_op_mov_reg_v(s, ot, rm, s->T0);
7009                }
7010            }
7011        }
7012
7013        /* Delay all CC updates until after the store above.  Note that
7014           C is the result of the test, Z is unchanged, and the others
7015           are all undefined.  */
7016        switch (s->cc_op) {
7017        case CC_OP_MULB ... CC_OP_MULQ:
7018        case CC_OP_ADDB ... CC_OP_ADDQ:
7019        case CC_OP_ADCB ... CC_OP_ADCQ:
7020        case CC_OP_SUBB ... CC_OP_SUBQ:
7021        case CC_OP_SBBB ... CC_OP_SBBQ:
7022        case CC_OP_LOGICB ... CC_OP_LOGICQ:
7023        case CC_OP_INCB ... CC_OP_INCQ:
7024        case CC_OP_DECB ... CC_OP_DECQ:
7025        case CC_OP_SHLB ... CC_OP_SHLQ:
7026        case CC_OP_SARB ... CC_OP_SARQ:
7027        case CC_OP_BMILGB ... CC_OP_BMILGQ:
7028            /* Z was going to be computed from the non-zero status of CC_DST.
7029               We can get that same Z value (and the new C value) by leaving
7030               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
7031               same width.  */
7032            tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
7033            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
7034            break;
7035        default:
7036            /* Otherwise, generate EFLAGS and replace the C bit.  */
7037            gen_compute_eflags(s);
7038            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
7039                               ctz32(CC_C), 1);
7040            break;
7041        }
7042        break;
7043    case 0x1bc: /* bsf / tzcnt */
7044    case 0x1bd: /* bsr / lzcnt */
7045        ot = dflag;
7046        modrm = x86_ldub_code(env, s);
7047        reg = ((modrm >> 3) & 7) | REX_R(s);
7048        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
7049        gen_extu(ot, s->T0);
7050
7051        /* Note that lzcnt and tzcnt are in different extensions.  */
7052        if ((prefixes & PREFIX_REPZ)
7053            && (b & 1
7054                ? s->cpuid_ext3_features & CPUID_EXT3_ABM
7055                : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
7056            int size = 8 << ot;
7057            /* For lzcnt/tzcnt, C bit is defined related to the input. */
7058            tcg_gen_mov_tl(cpu_cc_src, s->T0);
7059            if (b & 1) {
7060                /* For lzcnt, reduce the target_ulong result by the
7061                   number of zeros that we expect to find at the top.  */
7062                tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
7063                tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
7064            } else {
7065                /* For tzcnt, a zero input must return the operand size.  */
7066                tcg_gen_ctzi_tl(s->T0, s->T0, size);
7067            }
7068            /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
7069            gen_op_update1_cc(s);
7070            set_cc_op(s, CC_OP_BMILGB + ot);
7071        } else {
7072            /* For bsr/bsf, only the Z bit is defined and it is related
7073               to the input and not the result.  */
7074            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
7075            set_cc_op(s, CC_OP_LOGICB + ot);
7076
7077            /* ??? The manual says that the output is undefined when the
7078               input is zero, but real hardware leaves it unchanged, and
7079               real programs appear to depend on that.  Accomplish this
7080               by passing the output as the value to return upon zero.  */
7081            if (b & 1) {
7082                /* For bsr, return the bit index of the first 1 bit,
7083                   not the count of leading zeros.  */
7084                tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
7085                tcg_gen_clz_tl(s->T0, s->T0, s->T1);
7086                tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
7087            } else {
7088                tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
7089            }
7090        }
7091        gen_op_mov_reg_v(s, ot, reg, s->T0);
7092        break;
7093        /************************/
7094        /* bcd */
7095    case 0x27: /* daa */
7096        if (CODE64(s))
7097            goto illegal_op;
7098        gen_update_cc_op(s);
7099        gen_helper_daa(cpu_env);
7100        set_cc_op(s, CC_OP_EFLAGS);
7101        break;
7102    case 0x2f: /* das */
7103        if (CODE64(s))
7104            goto illegal_op;
7105        gen_update_cc_op(s);
7106        gen_helper_das(cpu_env);
7107        set_cc_op(s, CC_OP_EFLAGS);
7108        break;
7109    case 0x37: /* aaa */
7110        if (CODE64(s))
7111            goto illegal_op;
7112        gen_update_cc_op(s);
7113        gen_helper_aaa(cpu_env);
7114        set_cc_op(s, CC_OP_EFLAGS);
7115        break;
7116    case 0x3f: /* aas */
7117        if (CODE64(s))
7118            goto illegal_op;
7119        gen_update_cc_op(s);
7120        gen_helper_aas(cpu_env);
7121        set_cc_op(s, CC_OP_EFLAGS);
7122        break;
7123    case 0xd4: /* aam */
7124        if (CODE64(s))
7125            goto illegal_op;
7126        val = x86_ldub_code(env, s);
7127        if (val == 0) {
7128            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7129        } else {
7130            gen_helper_aam(cpu_env, tcg_const_i32(val));
7131            set_cc_op(s, CC_OP_LOGICB);
7132        }
7133        break;
7134    case 0xd5: /* aad */
7135        if (CODE64(s))
7136            goto illegal_op;
7137        val = x86_ldub_code(env, s);
7138        gen_helper_aad(cpu_env, tcg_const_i32(val));
7139        set_cc_op(s, CC_OP_LOGICB);
7140        break;
7141        /************************/
7142        /* misc */
7143    case 0x90: /* nop */
7144        /* XXX: correct lock test for all insn */
7145        if (prefixes & PREFIX_LOCK) {
7146            goto illegal_op;
7147        }
7148        /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7149        if (REX_B(s)) {
7150            goto do_xchg_reg_eax;
7151        }
7152        if (prefixes & PREFIX_REPZ) {
7153            gen_update_cc_op(s);
7154            gen_jmp_im(s, pc_start - s->cs_base);
7155            gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7156            s->base.is_jmp = DISAS_NORETURN;
7157        }
7158        break;
7159    case 0x9b: /* fwait */
7160        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7161            (HF_MP_MASK | HF_TS_MASK)) {
7162            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7163        } else {
7164            gen_helper_fwait(cpu_env);
7165        }
7166        break;
7167    case 0xcc: /* int3 */
7168        gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7169        break;
7170    case 0xcd: /* int N */
7171        val = x86_ldub_code(env, s);
7172        if (check_vm86_iopl(s)) {
7173            gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7174        }
7175        break;
7176    case 0xce: /* into */
7177        if (CODE64(s))
7178            goto illegal_op;
7179        gen_update_cc_op(s);
7180        gen_jmp_im(s, pc_start - s->cs_base);
7181        gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7182        break;
7183#ifdef WANT_ICEBP
7184    case 0xf1: /* icebp (undocumented, exits to external debugger) */
7185        gen_svm_check_intercept(s, SVM_EXIT_ICEBP);
7186        gen_debug(s);
7187        break;
7188#endif
7189    case 0xfa: /* cli */
7190        if (check_iopl(s)) {
7191            gen_helper_cli(cpu_env);
7192        }
7193        break;
7194    case 0xfb: /* sti */
7195        if (check_iopl(s)) {
7196            gen_helper_sti(cpu_env);
7197            /* interruptions are enabled only the first insn after sti */
7198            gen_jmp_im(s, s->pc - s->cs_base);
7199            gen_eob_inhibit_irq(s, true);
7200        }
7201        break;
7202    case 0x62: /* bound */
7203        if (CODE64(s))
7204            goto illegal_op;
7205        ot = dflag;
7206        modrm = x86_ldub_code(env, s);
7207        reg = (modrm >> 3) & 7;
7208        mod = (modrm >> 6) & 3;
7209        if (mod == 3)
7210            goto illegal_op;
7211        gen_op_mov_v_reg(s, ot, s->T0, reg);
7212        gen_lea_modrm(env, s, modrm);
7213        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7214        if (ot == MO_16) {
7215            gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7216        } else {
7217            gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7218        }
7219        break;
7220    case 0x1c8 ... 0x1cf: /* bswap reg */
7221        reg = (b & 7) | REX_B(s);
7222#ifdef TARGET_X86_64
7223        if (dflag == MO_64) {
7224            tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]);
7225            break;
7226        }
7227#endif
7228        tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ);
7229        break;
7230    case 0xd6: /* salc */
7231        if (CODE64(s))
7232            goto illegal_op;
7233        gen_compute_eflags_c(s, s->T0);
7234        tcg_gen_neg_tl(s->T0, s->T0);
7235        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7236        break;
7237    case 0xe0: /* loopnz */
7238    case 0xe1: /* loopz */
7239    case 0xe2: /* loop */
7240    case 0xe3: /* jecxz */
7241        {
7242            TCGLabel *l1, *l2, *l3;
7243
7244            tval = (int8_t)insn_get(env, s, MO_8);
7245            next_eip = s->pc - s->cs_base;
7246            tval += next_eip;
7247            if (dflag == MO_16) {
7248                tval &= 0xffff;
7249            }
7250
7251            l1 = gen_new_label();
7252            l2 = gen_new_label();
7253            l3 = gen_new_label();
7254            gen_update_cc_op(s);
7255            b &= 3;
7256            switch(b) {
7257            case 0: /* loopnz */
7258            case 1: /* loopz */
7259                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7260                gen_op_jz_ecx(s, s->aflag, l3);
7261                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7262                break;
7263            case 2: /* loop */
7264                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7265                gen_op_jnz_ecx(s, s->aflag, l1);
7266                break;
7267            default:
7268            case 3: /* jcxz */
7269                gen_op_jz_ecx(s, s->aflag, l1);
7270                break;
7271            }
7272
7273            gen_set_label(l3);
7274            gen_jmp_im(s, next_eip);
7275            tcg_gen_br(l2);
7276
7277            gen_set_label(l1);
7278            gen_jmp_im(s, tval);
7279            gen_set_label(l2);
7280            gen_eob(s);
7281        }
7282        break;
7283    case 0x130: /* wrmsr */
7284    case 0x132: /* rdmsr */
7285        if (check_cpl0(s)) {
7286            gen_update_cc_op(s);
7287            gen_jmp_im(s, pc_start - s->cs_base);
7288            if (b & 2) {
7289                gen_helper_rdmsr(cpu_env);
7290            } else {
7291                gen_helper_wrmsr(cpu_env);
7292                gen_jmp_im(s, s->pc - s->cs_base);
7293                gen_eob(s);
7294            }
7295        }
7296        break;
7297    case 0x131: /* rdtsc */
7298        gen_update_cc_op(s);
7299        gen_jmp_im(s, pc_start - s->cs_base);
7300        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7301            gen_io_start();
7302        }
7303        gen_helper_rdtsc(cpu_env);
7304        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7305            gen_jmp(s, s->pc - s->cs_base);
7306        }
7307        break;
7308    case 0x133: /* rdpmc */
7309        gen_update_cc_op(s);
7310        gen_jmp_im(s, pc_start - s->cs_base);
7311        gen_helper_rdpmc(cpu_env);
7312        s->base.is_jmp = DISAS_NORETURN;
7313        break;
7314    case 0x134: /* sysenter */
7315        /* For Intel SYSENTER is valid on 64-bit */
7316        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7317            goto illegal_op;
7318        if (!PE(s)) {
7319            gen_exception_gpf(s);
7320        } else {
7321            gen_helper_sysenter(cpu_env);
7322            gen_eob(s);
7323        }
7324        break;
7325    case 0x135: /* sysexit */
7326        /* For Intel SYSEXIT is valid on 64-bit */
7327        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7328            goto illegal_op;
7329        if (!PE(s)) {
7330            gen_exception_gpf(s);
7331        } else {
7332            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7333            gen_eob(s);
7334        }
7335        break;
7336#ifdef TARGET_X86_64
7337    case 0x105: /* syscall */
7338        /* XXX: is it usable in real mode ? */
7339        gen_update_cc_op(s);
7340        gen_jmp_im(s, pc_start - s->cs_base);
7341        gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7342        /* TF handling for the syscall insn is different. The TF bit is  checked
7343           after the syscall insn completes. This allows #DB to not be
7344           generated after one has entered CPL0 if TF is set in FMASK.  */
7345        gen_eob_worker(s, false, true);
7346        break;
7347    case 0x107: /* sysret */
7348        if (!PE(s)) {
7349            gen_exception_gpf(s);
7350        } else {
7351            gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7352            /* condition codes are modified only in long mode */
7353            if (LMA(s)) {
7354                set_cc_op(s, CC_OP_EFLAGS);
7355            }
7356            /* TF handling for the sysret insn is different. The TF bit is
7357               checked after the sysret insn completes. This allows #DB to be
7358               generated "as if" the syscall insn in userspace has just
7359               completed.  */
7360            gen_eob_worker(s, false, true);
7361        }
7362        break;
7363#endif
7364    case 0x1a2: /* cpuid */
7365        gen_update_cc_op(s);
7366        gen_jmp_im(s, pc_start - s->cs_base);
7367        gen_helper_cpuid(cpu_env);
7368        break;
7369    case 0xf4: /* hlt */
7370        if (check_cpl0(s)) {
7371            gen_update_cc_op(s);
7372            gen_jmp_im(s, pc_start - s->cs_base);
7373            gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7374            s->base.is_jmp = DISAS_NORETURN;
7375        }
7376        break;
7377    case 0x100:
7378        modrm = x86_ldub_code(env, s);
7379        mod = (modrm >> 6) & 3;
7380        op = (modrm >> 3) & 7;
7381        switch(op) {
7382        case 0: /* sldt */
7383            if (!PE(s) || VM86(s))
7384                goto illegal_op;
7385            if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7386                break;
7387            }
7388            gen_svm_check_intercept(s, SVM_EXIT_LDTR_READ);
7389            tcg_gen_ld32u_tl(s->T0, cpu_env,
7390                             offsetof(CPUX86State, ldt.selector));
7391            ot = mod == 3 ? dflag : MO_16;
7392            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7393            break;
7394        case 2: /* lldt */
7395            if (!PE(s) || VM86(s))
7396                goto illegal_op;
7397            if (check_cpl0(s)) {
7398                gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE);
7399                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7400                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7401                gen_helper_lldt(cpu_env, s->tmp2_i32);
7402            }
7403            break;
7404        case 1: /* str */
7405            if (!PE(s) || VM86(s))
7406                goto illegal_op;
7407            if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7408                break;
7409            }
7410            gen_svm_check_intercept(s, SVM_EXIT_TR_READ);
7411            tcg_gen_ld32u_tl(s->T0, cpu_env,
7412                             offsetof(CPUX86State, tr.selector));
7413            ot = mod == 3 ? dflag : MO_16;
7414            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7415            break;
7416        case 3: /* ltr */
7417            if (!PE(s) || VM86(s))
7418                goto illegal_op;
7419            if (check_cpl0(s)) {
7420                gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE);
7421                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7422                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7423                gen_helper_ltr(cpu_env, s->tmp2_i32);
7424            }
7425            break;
7426        case 4: /* verr */
7427        case 5: /* verw */
7428            if (!PE(s) || VM86(s))
7429                goto illegal_op;
7430            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7431            gen_update_cc_op(s);
7432            if (op == 4) {
7433                gen_helper_verr(cpu_env, s->T0);
7434            } else {
7435                gen_helper_verw(cpu_env, s->T0);
7436            }
7437            set_cc_op(s, CC_OP_EFLAGS);
7438            break;
7439        default:
7440            goto unknown_op;
7441        }
7442        break;
7443
7444    case 0x101:
7445        modrm = x86_ldub_code(env, s);
7446        switch (modrm) {
7447        CASE_MODRM_MEM_OP(0): /* sgdt */
7448            if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7449                break;
7450            }
7451            gen_svm_check_intercept(s, SVM_EXIT_GDTR_READ);
7452            gen_lea_modrm(env, s, modrm);
7453            tcg_gen_ld32u_tl(s->T0,
7454                             cpu_env, offsetof(CPUX86State, gdt.limit));
7455            gen_op_st_v(s, MO_16, s->T0, s->A0);
7456            gen_add_A0_im(s, 2);
7457            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7458            if (dflag == MO_16) {
7459                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7460            }
7461            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7462            break;
7463
7464        case 0xc8: /* monitor */
7465            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7466                goto illegal_op;
7467            }
7468            gen_update_cc_op(s);
7469            gen_jmp_im(s, pc_start - s->cs_base);
7470            tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7471            gen_extu(s->aflag, s->A0);
7472            gen_add_A0_ds_seg(s);
7473            gen_helper_monitor(cpu_env, s->A0);
7474            break;
7475
7476        case 0xc9: /* mwait */
7477            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7478                goto illegal_op;
7479            }
7480            gen_update_cc_op(s);
7481            gen_jmp_im(s, pc_start - s->cs_base);
7482            gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7483            s->base.is_jmp = DISAS_NORETURN;
7484            break;
7485
7486        case 0xca: /* clac */
7487            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7488                || CPL(s) != 0) {
7489                goto illegal_op;
7490            }
7491            gen_helper_clac(cpu_env);
7492            gen_jmp_im(s, s->pc - s->cs_base);
7493            gen_eob(s);
7494            break;
7495
7496        case 0xcb: /* stac */
7497            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7498                || CPL(s) != 0) {
7499                goto illegal_op;
7500            }
7501            gen_helper_stac(cpu_env);
7502            gen_jmp_im(s, s->pc - s->cs_base);
7503            gen_eob(s);
7504            break;
7505
7506        CASE_MODRM_MEM_OP(1): /* sidt */
7507            if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7508                break;
7509            }
7510            gen_svm_check_intercept(s, SVM_EXIT_IDTR_READ);
7511            gen_lea_modrm(env, s, modrm);
7512            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7513            gen_op_st_v(s, MO_16, s->T0, s->A0);
7514            gen_add_A0_im(s, 2);
7515            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7516            if (dflag == MO_16) {
7517                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7518            }
7519            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7520            break;
7521
7522        case 0xd0: /* xgetbv */
7523            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7524                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7525                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7526                goto illegal_op;
7527            }
7528            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7529            gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7530            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7531            break;
7532
7533        case 0xd1: /* xsetbv */
7534            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7535                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7536                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7537                goto illegal_op;
7538            }
7539            if (!check_cpl0(s)) {
7540                break;
7541            }
7542            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7543                                  cpu_regs[R_EDX]);
7544            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7545            gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7546            /* End TB because translation flags may change.  */
7547            gen_jmp_im(s, s->pc - s->cs_base);
7548            gen_eob(s);
7549            break;
7550
7551        case 0xd8: /* VMRUN */
7552            if (!SVME(s) || !PE(s)) {
7553                goto illegal_op;
7554            }
7555            if (!check_cpl0(s)) {
7556                break;
7557            }
7558            gen_update_cc_op(s);
7559            gen_jmp_im(s, pc_start - s->cs_base);
7560            gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7561                             tcg_const_i32(s->pc - pc_start));
7562            tcg_gen_exit_tb(NULL, 0);
7563            s->base.is_jmp = DISAS_NORETURN;
7564            break;
7565
7566        case 0xd9: /* VMMCALL */
7567            if (!SVME(s)) {
7568                goto illegal_op;
7569            }
7570            gen_update_cc_op(s);
7571            gen_jmp_im(s, pc_start - s->cs_base);
7572            gen_helper_vmmcall(cpu_env);
7573            break;
7574
7575        case 0xda: /* VMLOAD */
7576            if (!SVME(s) || !PE(s)) {
7577                goto illegal_op;
7578            }
7579            if (!check_cpl0(s)) {
7580                break;
7581            }
7582            gen_update_cc_op(s);
7583            gen_jmp_im(s, pc_start - s->cs_base);
7584            gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7585            break;
7586
7587        case 0xdb: /* VMSAVE */
7588            if (!SVME(s) || !PE(s)) {
7589                goto illegal_op;
7590            }
7591            if (!check_cpl0(s)) {
7592                break;
7593            }
7594            gen_update_cc_op(s);
7595            gen_jmp_im(s, pc_start - s->cs_base);
7596            gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7597            break;
7598
7599        case 0xdc: /* STGI */
7600            if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7601                || !PE(s)) {
7602                goto illegal_op;
7603            }
7604            if (!check_cpl0(s)) {
7605                break;
7606            }
7607            gen_update_cc_op(s);
7608            gen_helper_stgi(cpu_env);
7609            gen_jmp_im(s, s->pc - s->cs_base);
7610            gen_eob(s);
7611            break;
7612
7613        case 0xdd: /* CLGI */
7614            if (!SVME(s) || !PE(s)) {
7615                goto illegal_op;
7616            }
7617            if (!check_cpl0(s)) {
7618                break;
7619            }
7620            gen_update_cc_op(s);
7621            gen_jmp_im(s, pc_start - s->cs_base);
7622            gen_helper_clgi(cpu_env);
7623            break;
7624
7625        case 0xde: /* SKINIT */
7626            if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7627                || !PE(s)) {
7628                goto illegal_op;
7629            }
7630            gen_svm_check_intercept(s, SVM_EXIT_SKINIT);
7631            /* If not intercepted, not implemented -- raise #UD. */
7632            goto illegal_op;
7633
7634        case 0xdf: /* INVLPGA */
7635            if (!SVME(s) || !PE(s)) {
7636                goto illegal_op;
7637            }
7638            if (!check_cpl0(s)) {
7639                break;
7640            }
7641            gen_svm_check_intercept(s, SVM_EXIT_INVLPGA);
7642            if (s->aflag == MO_64) {
7643                tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7644            } else {
7645                tcg_gen_ext32u_tl(s->A0, cpu_regs[R_EAX]);
7646            }
7647            gen_helper_flush_page(cpu_env, s->A0);
7648            gen_jmp_im(s, s->pc - s->cs_base);
7649            gen_eob(s);
7650            break;
7651
7652        CASE_MODRM_MEM_OP(2): /* lgdt */
7653            if (!check_cpl0(s)) {
7654                break;
7655            }
7656            gen_svm_check_intercept(s, SVM_EXIT_GDTR_WRITE);
7657            gen_lea_modrm(env, s, modrm);
7658            gen_op_ld_v(s, MO_16, s->T1, s->A0);
7659            gen_add_A0_im(s, 2);
7660            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7661            if (dflag == MO_16) {
7662                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7663            }
7664            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7665            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7666            break;
7667
7668        CASE_MODRM_MEM_OP(3): /* lidt */
7669            if (!check_cpl0(s)) {
7670                break;
7671            }
7672            gen_svm_check_intercept(s, SVM_EXIT_IDTR_WRITE);
7673            gen_lea_modrm(env, s, modrm);
7674            gen_op_ld_v(s, MO_16, s->T1, s->A0);
7675            gen_add_A0_im(s, 2);
7676            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7677            if (dflag == MO_16) {
7678                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7679            }
7680            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7681            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7682            break;
7683
7684        CASE_MODRM_OP(4): /* smsw */
7685            if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7686                break;
7687            }
7688            gen_svm_check_intercept(s, SVM_EXIT_READ_CR0);
7689            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7690            /*
7691             * In 32-bit mode, the higher 16 bits of the destination
7692             * register are undefined.  In practice CR0[31:0] is stored
7693             * just like in 64-bit mode.
7694             */
7695            mod = (modrm >> 6) & 3;
7696            ot = (mod != 3 ? MO_16 : s->dflag);
7697            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7698            break;
7699        case 0xee: /* rdpkru */
7700            if (prefixes & PREFIX_LOCK) {
7701                goto illegal_op;
7702            }
7703            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7704            gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7705            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7706            break;
7707        case 0xef: /* wrpkru */
7708            if (prefixes & PREFIX_LOCK) {
7709                goto illegal_op;
7710            }
7711            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7712                                  cpu_regs[R_EDX]);
7713            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7714            gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7715            break;
7716
7717        CASE_MODRM_OP(6): /* lmsw */
7718            if (!check_cpl0(s)) {
7719                break;
7720            }
7721            gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
7722            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7723            /*
7724             * Only the 4 lower bits of CR0 are modified.
7725             * PE cannot be set to zero if already set to one.
7726             */
7727            tcg_gen_ld_tl(s->T1, cpu_env, offsetof(CPUX86State, cr[0]));
7728            tcg_gen_andi_tl(s->T0, s->T0, 0xf);
7729            tcg_gen_andi_tl(s->T1, s->T1, ~0xe);
7730            tcg_gen_or_tl(s->T0, s->T0, s->T1);
7731            gen_helper_write_crN(cpu_env, tcg_constant_i32(0), s->T0);
7732            gen_jmp_im(s, s->pc - s->cs_base);
7733            gen_eob(s);
7734            break;
7735
7736        CASE_MODRM_MEM_OP(7): /* invlpg */
7737            if (!check_cpl0(s)) {
7738                break;
7739            }
7740            gen_svm_check_intercept(s, SVM_EXIT_INVLPG);
7741            gen_lea_modrm(env, s, modrm);
7742            gen_helper_flush_page(cpu_env, s->A0);
7743            gen_jmp_im(s, s->pc - s->cs_base);
7744            gen_eob(s);
7745            break;
7746
7747        case 0xf8: /* swapgs */
7748#ifdef TARGET_X86_64
7749            if (CODE64(s)) {
7750                if (check_cpl0(s)) {
7751                    tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7752                    tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7753                                  offsetof(CPUX86State, kernelgsbase));
7754                    tcg_gen_st_tl(s->T0, cpu_env,
7755                                  offsetof(CPUX86State, kernelgsbase));
7756                }
7757                break;
7758            }
7759#endif
7760            goto illegal_op;
7761
7762        case 0xf9: /* rdtscp */
7763            if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7764                goto illegal_op;
7765            }
7766            gen_update_cc_op(s);
7767            gen_jmp_im(s, pc_start - s->cs_base);
7768            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7769                gen_io_start();
7770            }
7771            gen_helper_rdtscp(cpu_env);
7772            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7773                gen_jmp(s, s->pc - s->cs_base);
7774            }
7775            break;
7776
7777        default:
7778            goto unknown_op;
7779        }
7780        break;
7781
7782    case 0x108: /* invd */
7783    case 0x109: /* wbinvd */
7784        if (check_cpl0(s)) {
7785            gen_svm_check_intercept(s, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7786            /* nothing to do */
7787        }
7788        break;
7789    case 0x63: /* arpl or movslS (x86_64) */
7790#ifdef TARGET_X86_64
7791        if (CODE64(s)) {
7792            int d_ot;
7793            /* d_ot is the size of destination */
7794            d_ot = dflag;
7795
7796            modrm = x86_ldub_code(env, s);
7797            reg = ((modrm >> 3) & 7) | REX_R(s);
7798            mod = (modrm >> 6) & 3;
7799            rm = (modrm & 7) | REX_B(s);
7800
7801            if (mod == 3) {
7802                gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7803                /* sign extend */
7804                if (d_ot == MO_64) {
7805                    tcg_gen_ext32s_tl(s->T0, s->T0);
7806                }
7807                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7808            } else {
7809                gen_lea_modrm(env, s, modrm);
7810                gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7811                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7812            }
7813        } else
7814#endif
7815        {
7816            TCGLabel *label1;
7817            TCGv t0, t1, t2, a0;
7818
7819            if (!PE(s) || VM86(s))
7820                goto illegal_op;
7821            t0 = tcg_temp_local_new();
7822            t1 = tcg_temp_local_new();
7823            t2 = tcg_temp_local_new();
7824            ot = MO_16;
7825            modrm = x86_ldub_code(env, s);
7826            reg = (modrm >> 3) & 7;
7827            mod = (modrm >> 6) & 3;
7828            rm = modrm & 7;
7829            if (mod != 3) {
7830                gen_lea_modrm(env, s, modrm);
7831                gen_op_ld_v(s, ot, t0, s->A0);
7832                a0 = tcg_temp_local_new();
7833                tcg_gen_mov_tl(a0, s->A0);
7834            } else {
7835                gen_op_mov_v_reg(s, ot, t0, rm);
7836                a0 = NULL;
7837            }
7838            gen_op_mov_v_reg(s, ot, t1, reg);
7839            tcg_gen_andi_tl(s->tmp0, t0, 3);
7840            tcg_gen_andi_tl(t1, t1, 3);
7841            tcg_gen_movi_tl(t2, 0);
7842            label1 = gen_new_label();
7843            tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7844            tcg_gen_andi_tl(t0, t0, ~3);
7845            tcg_gen_or_tl(t0, t0, t1);
7846            tcg_gen_movi_tl(t2, CC_Z);
7847            gen_set_label(label1);
7848            if (mod != 3) {
7849                gen_op_st_v(s, ot, t0, a0);
7850                tcg_temp_free(a0);
7851           } else {
7852                gen_op_mov_reg_v(s, ot, rm, t0);
7853            }
7854            gen_compute_eflags(s);
7855            tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7856            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7857            tcg_temp_free(t0);
7858            tcg_temp_free(t1);
7859            tcg_temp_free(t2);
7860        }
7861        break;
7862    case 0x102: /* lar */
7863    case 0x103: /* lsl */
7864        {
7865            TCGLabel *label1;
7866            TCGv t0;
7867            if (!PE(s) || VM86(s))
7868                goto illegal_op;
7869            ot = dflag != MO_16 ? MO_32 : MO_16;
7870            modrm = x86_ldub_code(env, s);
7871            reg = ((modrm >> 3) & 7) | REX_R(s);
7872            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7873            t0 = tcg_temp_local_new();
7874            gen_update_cc_op(s);
7875            if (b == 0x102) {
7876                gen_helper_lar(t0, cpu_env, s->T0);
7877            } else {
7878                gen_helper_lsl(t0, cpu_env, s->T0);
7879            }
7880            tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7881            label1 = gen_new_label();
7882            tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7883            gen_op_mov_reg_v(s, ot, reg, t0);
7884            gen_set_label(label1);
7885            set_cc_op(s, CC_OP_EFLAGS);
7886            tcg_temp_free(t0);
7887        }
7888        break;
7889    case 0x118:
7890        modrm = x86_ldub_code(env, s);
7891        mod = (modrm >> 6) & 3;
7892        op = (modrm >> 3) & 7;
7893        switch(op) {
7894        case 0: /* prefetchnta */
7895        case 1: /* prefetchnt0 */
7896        case 2: /* prefetchnt0 */
7897        case 3: /* prefetchnt0 */
7898            if (mod == 3)
7899                goto illegal_op;
7900            gen_nop_modrm(env, s, modrm);
7901            /* nothing more to do */
7902            break;
7903        default: /* nop (multi byte) */
7904            gen_nop_modrm(env, s, modrm);
7905            break;
7906        }
7907        break;
7908    case 0x11a:
7909        modrm = x86_ldub_code(env, s);
7910        if (s->flags & HF_MPX_EN_MASK) {
7911            mod = (modrm >> 6) & 3;
7912            reg = ((modrm >> 3) & 7) | REX_R(s);
7913            if (prefixes & PREFIX_REPZ) {
7914                /* bndcl */
7915                if (reg >= 4
7916                    || (prefixes & PREFIX_LOCK)
7917                    || s->aflag == MO_16) {
7918                    goto illegal_op;
7919                }
7920                gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7921            } else if (prefixes & PREFIX_REPNZ) {
7922                /* bndcu */
7923                if (reg >= 4
7924                    || (prefixes & PREFIX_LOCK)
7925                    || s->aflag == MO_16) {
7926                    goto illegal_op;
7927                }
7928                TCGv_i64 notu = tcg_temp_new_i64();
7929                tcg_gen_not_i64(notu, cpu_bndu[reg]);
7930                gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7931                tcg_temp_free_i64(notu);
7932            } else if (prefixes & PREFIX_DATA) {
7933                /* bndmov -- from reg/mem */
7934                if (reg >= 4 || s->aflag == MO_16) {
7935                    goto illegal_op;
7936                }
7937                if (mod == 3) {
7938                    int reg2 = (modrm & 7) | REX_B(s);
7939                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7940                        goto illegal_op;
7941                    }
7942                    if (s->flags & HF_MPX_IU_MASK) {
7943                        tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7944                        tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7945                    }
7946                } else {
7947                    gen_lea_modrm(env, s, modrm);
7948                    if (CODE64(s)) {
7949                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7950                                            s->mem_index, MO_LEUQ);
7951                        tcg_gen_addi_tl(s->A0, s->A0, 8);
7952                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7953                                            s->mem_index, MO_LEUQ);
7954                    } else {
7955                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7956                                            s->mem_index, MO_LEUL);
7957                        tcg_gen_addi_tl(s->A0, s->A0, 4);
7958                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7959                                            s->mem_index, MO_LEUL);
7960                    }
7961                    /* bnd registers are now in-use */
7962                    gen_set_hflag(s, HF_MPX_IU_MASK);
7963                }
7964            } else if (mod != 3) {
7965                /* bndldx */
7966                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7967                if (reg >= 4
7968                    || (prefixes & PREFIX_LOCK)
7969                    || s->aflag == MO_16
7970                    || a.base < -1) {
7971                    goto illegal_op;
7972                }
7973                if (a.base >= 0) {
7974                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7975                } else {
7976                    tcg_gen_movi_tl(s->A0, 0);
7977                }
7978                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7979                if (a.index >= 0) {
7980                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7981                } else {
7982                    tcg_gen_movi_tl(s->T0, 0);
7983                }
7984                if (CODE64(s)) {
7985                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7986                    tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7987                                   offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7988                } else {
7989                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7990                    tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7991                    tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7992                }
7993                gen_set_hflag(s, HF_MPX_IU_MASK);
7994            }
7995        }
7996        gen_nop_modrm(env, s, modrm);
7997        break;
7998    case 0x11b:
7999        modrm = x86_ldub_code(env, s);
8000        if (s->flags & HF_MPX_EN_MASK) {
8001            mod = (modrm >> 6) & 3;
8002            reg = ((modrm >> 3) & 7) | REX_R(s);
8003            if (mod != 3 && (prefixes & PREFIX_REPZ)) {
8004                /* bndmk */
8005                if (reg >= 4
8006                    || (prefixes & PREFIX_LOCK)
8007                    || s->aflag == MO_16) {
8008                    goto illegal_op;
8009                }
8010                AddressParts a = gen_lea_modrm_0(env, s, modrm);
8011                if (a.base >= 0) {
8012                    tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
8013                    if (!CODE64(s)) {
8014                        tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
8015                    }
8016                } else if (a.base == -1) {
8017                    /* no base register has lower bound of 0 */
8018                    tcg_gen_movi_i64(cpu_bndl[reg], 0);
8019                } else {
8020                    /* rip-relative generates #ud */
8021                    goto illegal_op;
8022                }
8023                tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
8024                if (!CODE64(s)) {
8025                    tcg_gen_ext32u_tl(s->A0, s->A0);
8026                }
8027                tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
8028                /* bnd registers are now in-use */
8029                gen_set_hflag(s, HF_MPX_IU_MASK);
8030                break;
8031            } else if (prefixes & PREFIX_REPNZ) {
8032                /* bndcn */
8033                if (reg >= 4
8034                    || (prefixes & PREFIX_LOCK)
8035                    || s->aflag == MO_16) {
8036                    goto illegal_op;
8037                }
8038                gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
8039            } else if (prefixes & PREFIX_DATA) {
8040                /* bndmov -- to reg/mem */
8041                if (reg >= 4 || s->aflag == MO_16) {
8042                    goto illegal_op;
8043                }
8044                if (mod == 3) {
8045                    int reg2 = (modrm & 7) | REX_B(s);
8046                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
8047                        goto illegal_op;
8048                    }
8049                    if (s->flags & HF_MPX_IU_MASK) {
8050                        tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
8051                        tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
8052                    }
8053                } else {
8054                    gen_lea_modrm(env, s, modrm);
8055                    if (CODE64(s)) {
8056                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8057                                            s->mem_index, MO_LEUQ);
8058                        tcg_gen_addi_tl(s->A0, s->A0, 8);
8059                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8060                                            s->mem_index, MO_LEUQ);
8061                    } else {
8062                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8063                                            s->mem_index, MO_LEUL);
8064                        tcg_gen_addi_tl(s->A0, s->A0, 4);
8065                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8066                                            s->mem_index, MO_LEUL);
8067                    }
8068                }
8069            } else if (mod != 3) {
8070                /* bndstx */
8071                AddressParts a = gen_lea_modrm_0(env, s, modrm);
8072                if (reg >= 4
8073                    || (prefixes & PREFIX_LOCK)
8074                    || s->aflag == MO_16
8075                    || a.base < -1) {
8076                    goto illegal_op;
8077                }
8078                if (a.base >= 0) {
8079                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
8080                } else {
8081                    tcg_gen_movi_tl(s->A0, 0);
8082                }
8083                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
8084                if (a.index >= 0) {
8085                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
8086                } else {
8087                    tcg_gen_movi_tl(s->T0, 0);
8088                }
8089                if (CODE64(s)) {
8090                    gen_helper_bndstx64(cpu_env, s->A0, s->T0,
8091                                        cpu_bndl[reg], cpu_bndu[reg]);
8092                } else {
8093                    gen_helper_bndstx32(cpu_env, s->A0, s->T0,
8094                                        cpu_bndl[reg], cpu_bndu[reg]);
8095                }
8096            }
8097        }
8098        gen_nop_modrm(env, s, modrm);
8099        break;
8100    case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
8101        modrm = x86_ldub_code(env, s);
8102        gen_nop_modrm(env, s, modrm);
8103        break;
8104
8105    case 0x120: /* mov reg, crN */
8106    case 0x122: /* mov crN, reg */
8107        if (!check_cpl0(s)) {
8108            break;
8109        }
8110        modrm = x86_ldub_code(env, s);
8111        /*
8112         * Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8113         * AMD documentation (24594.pdf) and testing of Intel 386 and 486
8114         * processors all show that the mod bits are assumed to be 1's,
8115         * regardless of actual values.
8116         */
8117        rm = (modrm & 7) | REX_B(s);
8118        reg = ((modrm >> 3) & 7) | REX_R(s);
8119        switch (reg) {
8120        case 0:
8121            if ((prefixes & PREFIX_LOCK) &&
8122                (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8123                reg = 8;
8124            }
8125            break;
8126        case 2:
8127        case 3:
8128        case 4:
8129        case 8:
8130            break;
8131        default:
8132            goto unknown_op;
8133        }
8134        ot  = (CODE64(s) ? MO_64 : MO_32);
8135
8136        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8137            gen_io_start();
8138        }
8139        if (b & 2) {
8140            gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg);
8141            gen_op_mov_v_reg(s, ot, s->T0, rm);
8142            gen_helper_write_crN(cpu_env, tcg_constant_i32(reg), s->T0);
8143            gen_jmp_im(s, s->pc - s->cs_base);
8144            gen_eob(s);
8145        } else {
8146            gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg);
8147            gen_helper_read_crN(s->T0, cpu_env, tcg_constant_i32(reg));
8148            gen_op_mov_reg_v(s, ot, rm, s->T0);
8149            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8150                gen_jmp(s, s->pc - s->cs_base);
8151            }
8152        }
8153        break;
8154
8155    case 0x121: /* mov reg, drN */
8156    case 0x123: /* mov drN, reg */
8157        if (check_cpl0(s)) {
8158            modrm = x86_ldub_code(env, s);
8159            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8160             * AMD documentation (24594.pdf) and testing of
8161             * intel 386 and 486 processors all show that the mod bits
8162             * are assumed to be 1's, regardless of actual values.
8163             */
8164            rm = (modrm & 7) | REX_B(s);
8165            reg = ((modrm >> 3) & 7) | REX_R(s);
8166            if (CODE64(s))
8167                ot = MO_64;
8168            else
8169                ot = MO_32;
8170            if (reg >= 8) {
8171                goto illegal_op;
8172            }
8173            if (b & 2) {
8174                gen_svm_check_intercept(s, SVM_EXIT_WRITE_DR0 + reg);
8175                gen_op_mov_v_reg(s, ot, s->T0, rm);
8176                tcg_gen_movi_i32(s->tmp2_i32, reg);
8177                gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8178                gen_jmp_im(s, s->pc - s->cs_base);
8179                gen_eob(s);
8180            } else {
8181                gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg);
8182                tcg_gen_movi_i32(s->tmp2_i32, reg);
8183                gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8184                gen_op_mov_reg_v(s, ot, rm, s->T0);
8185            }
8186        }
8187        break;
8188    case 0x106: /* clts */
8189        if (check_cpl0(s)) {
8190            gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
8191            gen_helper_clts(cpu_env);
8192            /* abort block because static cpu state changed */
8193            gen_jmp_im(s, s->pc - s->cs_base);
8194            gen_eob(s);
8195        }
8196        break;
8197    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8198    case 0x1c3: /* MOVNTI reg, mem */
8199        if (!(s->cpuid_features & CPUID_SSE2))
8200            goto illegal_op;
8201        ot = mo_64_32(dflag);
8202        modrm = x86_ldub_code(env, s);
8203        mod = (modrm >> 6) & 3;
8204        if (mod == 3)
8205            goto illegal_op;
8206        reg = ((modrm >> 3) & 7) | REX_R(s);
8207        /* generate a generic store */
8208        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8209        break;
8210    case 0x1ae:
8211        modrm = x86_ldub_code(env, s);
8212        switch (modrm) {
8213        CASE_MODRM_MEM_OP(0): /* fxsave */
8214            if (!(s->cpuid_features & CPUID_FXSR)
8215                || (prefixes & PREFIX_LOCK)) {
8216                goto illegal_op;
8217            }
8218            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8219                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8220                break;
8221            }
8222            gen_lea_modrm(env, s, modrm);
8223            gen_helper_fxsave(cpu_env, s->A0);
8224            break;
8225
8226        CASE_MODRM_MEM_OP(1): /* fxrstor */
8227            if (!(s->cpuid_features & CPUID_FXSR)
8228                || (prefixes & PREFIX_LOCK)) {
8229                goto illegal_op;
8230            }
8231            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8232                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8233                break;
8234            }
8235            gen_lea_modrm(env, s, modrm);
8236            gen_helper_fxrstor(cpu_env, s->A0);
8237            break;
8238
8239        CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8240            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8241                goto illegal_op;
8242            }
8243            if (s->flags & HF_TS_MASK) {
8244                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8245                break;
8246            }
8247            gen_lea_modrm(env, s, modrm);
8248            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8249            gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8250            break;
8251
8252        CASE_MODRM_MEM_OP(3): /* stmxcsr */
8253            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8254                goto illegal_op;
8255            }
8256            if (s->flags & HF_TS_MASK) {
8257                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8258                break;
8259            }
8260            gen_helper_update_mxcsr(cpu_env);
8261            gen_lea_modrm(env, s, modrm);
8262            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8263            gen_op_st_v(s, MO_32, s->T0, s->A0);
8264            break;
8265
8266        CASE_MODRM_MEM_OP(4): /* xsave */
8267            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8268                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8269                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8270                goto illegal_op;
8271            }
8272            gen_lea_modrm(env, s, modrm);
8273            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8274                                  cpu_regs[R_EDX]);
8275            gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8276            break;
8277
8278        CASE_MODRM_MEM_OP(5): /* xrstor */
8279            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8280                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8281                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8282                goto illegal_op;
8283            }
8284            gen_lea_modrm(env, s, modrm);
8285            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8286                                  cpu_regs[R_EDX]);
8287            gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8288            /* XRSTOR is how MPX is enabled, which changes how
8289               we translate.  Thus we need to end the TB.  */
8290            gen_update_cc_op(s);
8291            gen_jmp_im(s, s->pc - s->cs_base);
8292            gen_eob(s);
8293            break;
8294
8295        CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8296            if (prefixes & PREFIX_LOCK) {
8297                goto illegal_op;
8298            }
8299            if (prefixes & PREFIX_DATA) {
8300                /* clwb */
8301                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8302                    goto illegal_op;
8303                }
8304                gen_nop_modrm(env, s, modrm);
8305            } else {
8306                /* xsaveopt */
8307                if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8308                    || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8309                    || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8310                    goto illegal_op;
8311                }
8312                gen_lea_modrm(env, s, modrm);
8313                tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8314                                      cpu_regs[R_EDX]);
8315                gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8316            }
8317            break;
8318
8319        CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8320            if (prefixes & PREFIX_LOCK) {
8321                goto illegal_op;
8322            }
8323            if (prefixes & PREFIX_DATA) {
8324                /* clflushopt */
8325                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8326                    goto illegal_op;
8327                }
8328            } else {
8329                /* clflush */
8330                if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8331                    || !(s->cpuid_features & CPUID_CLFLUSH)) {
8332                    goto illegal_op;
8333                }
8334            }
8335            gen_nop_modrm(env, s, modrm);
8336            break;
8337
8338        case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8339        case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8340        case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8341        case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8342            if (CODE64(s)
8343                && (prefixes & PREFIX_REPZ)
8344                && !(prefixes & PREFIX_LOCK)
8345                && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8346                TCGv base, treg, src, dst;
8347
8348                /* Preserve hflags bits by testing CR4 at runtime.  */
8349                tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8350                gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8351
8352                base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8353                treg = cpu_regs[(modrm & 7) | REX_B(s)];
8354
8355                if (modrm & 0x10) {
8356                    /* wr*base */
8357                    dst = base, src = treg;
8358                } else {
8359                    /* rd*base */
8360                    dst = treg, src = base;
8361                }
8362
8363                if (s->dflag == MO_32) {
8364                    tcg_gen_ext32u_tl(dst, src);
8365                } else {
8366                    tcg_gen_mov_tl(dst, src);
8367                }
8368                break;
8369            }
8370            goto unknown_op;
8371
8372        case 0xf8: /* sfence / pcommit */
8373            if (prefixes & PREFIX_DATA) {
8374                /* pcommit */
8375                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8376                    || (prefixes & PREFIX_LOCK)) {
8377                    goto illegal_op;
8378                }
8379                break;
8380            }
8381            /* fallthru */
8382        case 0xf9 ... 0xff: /* sfence */
8383            if (!(s->cpuid_features & CPUID_SSE)
8384                || (prefixes & PREFIX_LOCK)) {
8385                goto illegal_op;
8386            }
8387            tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8388            break;
8389        case 0xe8 ... 0xef: /* lfence */
8390            if (!(s->cpuid_features & CPUID_SSE)
8391                || (prefixes & PREFIX_LOCK)) {
8392                goto illegal_op;
8393            }
8394            tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8395            break;
8396        case 0xf0 ... 0xf7: /* mfence */
8397            if (!(s->cpuid_features & CPUID_SSE2)
8398                || (prefixes & PREFIX_LOCK)) {
8399                goto illegal_op;
8400            }
8401            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8402            break;
8403
8404        default:
8405            goto unknown_op;
8406        }
8407        break;
8408
8409    case 0x10d: /* 3DNow! prefetch(w) */
8410        modrm = x86_ldub_code(env, s);
8411        mod = (modrm >> 6) & 3;
8412        if (mod == 3)
8413            goto illegal_op;
8414        gen_nop_modrm(env, s, modrm);
8415        break;
8416    case 0x1aa: /* rsm */
8417        gen_svm_check_intercept(s, SVM_EXIT_RSM);
8418        if (!(s->flags & HF_SMM_MASK))
8419            goto illegal_op;
8420#ifdef CONFIG_USER_ONLY
8421        /* we should not be in SMM mode */
8422        g_assert_not_reached();
8423#else
8424        gen_update_cc_op(s);
8425        gen_jmp_im(s, s->pc - s->cs_base);
8426        gen_helper_rsm(cpu_env);
8427#endif /* CONFIG_USER_ONLY */
8428        gen_eob(s);
8429        break;
8430    case 0x1b8: /* SSE4.2 popcnt */
8431        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8432             PREFIX_REPZ)
8433            goto illegal_op;
8434        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8435            goto illegal_op;
8436
8437        modrm = x86_ldub_code(env, s);
8438        reg = ((modrm >> 3) & 7) | REX_R(s);
8439
8440        if (s->prefix & PREFIX_DATA) {
8441            ot = MO_16;
8442        } else {
8443            ot = mo_64_32(dflag);
8444        }
8445
8446        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8447        gen_extu(ot, s->T0);
8448        tcg_gen_mov_tl(cpu_cc_src, s->T0);
8449        tcg_gen_ctpop_tl(s->T0, s->T0);
8450        gen_op_mov_reg_v(s, ot, reg, s->T0);
8451
8452        set_cc_op(s, CC_OP_POPCNT);
8453        break;
8454    case 0x10e ... 0x10f:
8455        /* 3DNow! instructions, ignore prefixes */
8456        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8457        /* fall through */
8458    case 0x110 ... 0x117:
8459    case 0x128 ... 0x12f:
8460    case 0x138 ... 0x13a:
8461    case 0x150 ... 0x179:
8462    case 0x17c ... 0x17f:
8463    case 0x1c2:
8464    case 0x1c4 ... 0x1c6:
8465    case 0x1d0 ... 0x1fe:
8466        gen_sse(env, s, b, pc_start);
8467        break;
8468    default:
8469        goto unknown_op;
8470    }
8471    return s->pc;
8472 illegal_op:
8473    gen_illegal_opcode(s);
8474    return s->pc;
8475 unknown_op:
8476    gen_unknown_opcode(env, s);
8477    return s->pc;
8478}
8479
8480void tcg_x86_init(void)
8481{
8482    static const char reg_names[CPU_NB_REGS][4] = {
8483#ifdef TARGET_X86_64
8484        [R_EAX] = "rax",
8485        [R_EBX] = "rbx",
8486        [R_ECX] = "rcx",
8487        [R_EDX] = "rdx",
8488        [R_ESI] = "rsi",
8489        [R_EDI] = "rdi",
8490        [R_EBP] = "rbp",
8491        [R_ESP] = "rsp",
8492        [8]  = "r8",
8493        [9]  = "r9",
8494        [10] = "r10",
8495        [11] = "r11",
8496        [12] = "r12",
8497        [13] = "r13",
8498        [14] = "r14",
8499        [15] = "r15",
8500#else
8501        [R_EAX] = "eax",
8502        [R_EBX] = "ebx",
8503        [R_ECX] = "ecx",
8504        [R_EDX] = "edx",
8505        [R_ESI] = "esi",
8506        [R_EDI] = "edi",
8507        [R_EBP] = "ebp",
8508        [R_ESP] = "esp",
8509#endif
8510    };
8511    static const char seg_base_names[6][8] = {
8512        [R_CS] = "cs_base",
8513        [R_DS] = "ds_base",
8514        [R_ES] = "es_base",
8515        [R_FS] = "fs_base",
8516        [R_GS] = "gs_base",
8517        [R_SS] = "ss_base",
8518    };
8519    static const char bnd_regl_names[4][8] = {
8520        "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8521    };
8522    static const char bnd_regu_names[4][8] = {
8523        "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8524    };
8525    int i;
8526
8527    cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8528                                       offsetof(CPUX86State, cc_op), "cc_op");
8529    cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8530                                    "cc_dst");
8531    cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8532                                    "cc_src");
8533    cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8534                                     "cc_src2");
8535
8536    for (i = 0; i < CPU_NB_REGS; ++i) {
8537        cpu_regs[i] = tcg_global_mem_new(cpu_env,
8538                                         offsetof(CPUX86State, regs[i]),
8539                                         reg_names[i]);
8540    }
8541
8542    for (i = 0; i < 6; ++i) {
8543        cpu_seg_base[i]
8544            = tcg_global_mem_new(cpu_env,
8545                                 offsetof(CPUX86State, segs[i].base),
8546                                 seg_base_names[i]);
8547    }
8548
8549    for (i = 0; i < 4; ++i) {
8550        cpu_bndl[i]
8551            = tcg_global_mem_new_i64(cpu_env,
8552                                     offsetof(CPUX86State, bnd_regs[i].lb),
8553                                     bnd_regl_names[i]);
8554        cpu_bndu[i]
8555            = tcg_global_mem_new_i64(cpu_env,
8556                                     offsetof(CPUX86State, bnd_regs[i].ub),
8557                                     bnd_regu_names[i]);
8558    }
8559}
8560
8561static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8562{
8563    DisasContext *dc = container_of(dcbase, DisasContext, base);
8564    CPUX86State *env = cpu->env_ptr;
8565    uint32_t flags = dc->base.tb->flags;
8566    uint32_t cflags = tb_cflags(dc->base.tb);
8567    int cpl = (flags >> HF_CPL_SHIFT) & 3;
8568    int iopl = (flags >> IOPL_SHIFT) & 3;
8569
8570    dc->cs_base = dc->base.tb->cs_base;
8571    dc->flags = flags;
8572#ifndef CONFIG_USER_ONLY
8573    dc->cpl = cpl;
8574    dc->iopl = iopl;
8575#endif
8576
8577    /* We make some simplifying assumptions; validate they're correct. */
8578    g_assert(PE(dc) == ((flags & HF_PE_MASK) != 0));
8579    g_assert(CPL(dc) == cpl);
8580    g_assert(IOPL(dc) == iopl);
8581    g_assert(VM86(dc) == ((flags & HF_VM_MASK) != 0));
8582    g_assert(CODE32(dc) == ((flags & HF_CS32_MASK) != 0));
8583    g_assert(CODE64(dc) == ((flags & HF_CS64_MASK) != 0));
8584    g_assert(SS32(dc) == ((flags & HF_SS32_MASK) != 0));
8585    g_assert(LMA(dc) == ((flags & HF_LMA_MASK) != 0));
8586    g_assert(ADDSEG(dc) == ((flags & HF_ADDSEG_MASK) != 0));
8587    g_assert(SVME(dc) == ((flags & HF_SVME_MASK) != 0));
8588    g_assert(GUEST(dc) == ((flags & HF_GUEST_MASK) != 0));
8589
8590    dc->cc_op = CC_OP_DYNAMIC;
8591    dc->cc_op_dirty = false;
8592    dc->popl_esp_hack = 0;
8593    /* select memory access functions */
8594    dc->mem_index = 0;
8595#ifdef CONFIG_SOFTMMU
8596    dc->mem_index = cpu_mmu_index(env, false);
8597#endif
8598    dc->cpuid_features = env->features[FEAT_1_EDX];
8599    dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8600    dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8601    dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8602    dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8603    dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8604    dc->jmp_opt = !((cflags & CF_NO_GOTO_TB) ||
8605                    (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)));
8606    /*
8607     * If jmp_opt, we want to handle each string instruction individually.
8608     * For icount also disable repz optimization so that each iteration
8609     * is accounted separately.
8610     */
8611    dc->repz_opt = !dc->jmp_opt && !(cflags & CF_USE_ICOUNT);
8612
8613    dc->T0 = tcg_temp_new();
8614    dc->T1 = tcg_temp_new();
8615    dc->A0 = tcg_temp_new();
8616
8617    dc->tmp0 = tcg_temp_new();
8618    dc->tmp1_i64 = tcg_temp_new_i64();
8619    dc->tmp2_i32 = tcg_temp_new_i32();
8620    dc->tmp3_i32 = tcg_temp_new_i32();
8621    dc->tmp4 = tcg_temp_new();
8622    dc->ptr0 = tcg_temp_new_ptr();
8623    dc->ptr1 = tcg_temp_new_ptr();
8624    dc->cc_srcT = tcg_temp_local_new();
8625}
8626
8627static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8628{
8629}
8630
8631static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8632{
8633    DisasContext *dc = container_of(dcbase, DisasContext, base);
8634
8635    tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8636}
8637
8638static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8639{
8640    DisasContext *dc = container_of(dcbase, DisasContext, base);
8641    target_ulong pc_next;
8642
8643#ifdef TARGET_VSYSCALL_PAGE
8644    /*
8645     * Detect entry into the vsyscall page and invoke the syscall.
8646     */
8647    if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
8648        gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
8649        dc->base.pc_next = dc->pc + 1;
8650        return;
8651    }
8652#endif
8653
8654    pc_next = disas_insn(dc, cpu);
8655
8656    if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
8657        /* if single step mode, we generate only one instruction and
8658           generate an exception */
8659        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8660           the flag and abort the translation to give the irqs a
8661           chance to happen */
8662        dc->base.is_jmp = DISAS_TOO_MANY;
8663    } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8664               && ((pc_next & TARGET_PAGE_MASK)
8665                   != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8666                       & TARGET_PAGE_MASK)
8667                   || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8668        /* Do not cross the boundary of the pages in icount mode,
8669           it can cause an exception. Do it only when boundary is
8670           crossed by the first instruction in the block.
8671           If current instruction already crossed the bound - it's ok,
8672           because an exception hasn't stopped this code.
8673         */
8674        dc->base.is_jmp = DISAS_TOO_MANY;
8675    } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8676        dc->base.is_jmp = DISAS_TOO_MANY;
8677    }
8678
8679    dc->base.pc_next = pc_next;
8680}
8681
8682static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8683{
8684    DisasContext *dc = container_of(dcbase, DisasContext, base);
8685
8686    if (dc->base.is_jmp == DISAS_TOO_MANY) {
8687        gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8688        gen_eob(dc);
8689    }
8690}
8691
8692static void i386_tr_disas_log(const DisasContextBase *dcbase,
8693                              CPUState *cpu, FILE *logfile)
8694{
8695    DisasContext *dc = container_of(dcbase, DisasContext, base);
8696
8697    fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
8698    target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
8699}
8700
8701static const TranslatorOps i386_tr_ops = {
8702    .init_disas_context = i386_tr_init_disas_context,
8703    .tb_start           = i386_tr_tb_start,
8704    .insn_start         = i386_tr_insn_start,
8705    .translate_insn     = i386_tr_translate_insn,
8706    .tb_stop            = i386_tr_tb_stop,
8707    .disas_log          = i386_tr_disas_log,
8708};
8709
8710/* generate intermediate code for basic block 'tb'.  */
8711void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8712{
8713    DisasContext dc;
8714
8715    translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8716}
8717
8718void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8719                          target_ulong *data)
8720{
8721    int cc_op = data[1];
8722    env->eip = data[0] - tb->cs_base;
8723    if (cc_op != CC_OP_DYNAMIC) {
8724        env->cc_op = cc_op;
8725    }
8726}
8727