qemu/target/i386/tcg/translate.c
<<
>>
Prefs
   1/*
   2 *  i386 translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "qemu/host-utils.h"
  22#include "cpu.h"
  23#include "disas/disas.h"
  24#include "exec/exec-all.h"
  25#include "tcg/tcg-op.h"
  26#include "exec/cpu_ldst.h"
  27#include "exec/translator.h"
  28
  29#include "exec/helper-proto.h"
  30#include "exec/helper-gen.h"
  31#include "helper-tcg.h"
  32
  33#include "exec/log.h"
  34
  35#define PREFIX_REPZ   0x01
  36#define PREFIX_REPNZ  0x02
  37#define PREFIX_LOCK   0x04
  38#define PREFIX_DATA   0x08
  39#define PREFIX_ADR    0x10
  40#define PREFIX_VEX    0x20
  41#define PREFIX_REX    0x40
  42
  43#ifdef TARGET_X86_64
  44# define ctztl  ctz64
  45# define clztl  clz64
  46#else
  47# define ctztl  ctz32
  48# define clztl  clz32
  49#endif
  50
  51/* For a switch indexed by MODRM, match all memory operands for a given OP.  */
  52#define CASE_MODRM_MEM_OP(OP) \
  53    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  54    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  55    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
  56
  57#define CASE_MODRM_OP(OP) \
  58    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  59    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  60    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
  61    case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
  62
  63//#define MACRO_TEST   1
  64
  65/* global register indexes */
  66static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
  67static TCGv_i32 cpu_cc_op;
  68static TCGv cpu_regs[CPU_NB_REGS];
  69static TCGv cpu_seg_base[6];
  70static TCGv_i64 cpu_bndl[4];
  71static TCGv_i64 cpu_bndu[4];
  72
  73#include "exec/gen-icount.h"
  74
  75typedef struct DisasContext {
  76    DisasContextBase base;
  77
  78    target_ulong pc;       /* pc = eip + cs_base */
  79    target_ulong pc_start; /* pc at TB entry */
  80    target_ulong cs_base;  /* base of CS segment */
  81
  82    MemOp aflag;
  83    MemOp dflag;
  84
  85    int8_t override; /* -1 if no override, else R_CS, R_DS, etc */
  86    uint8_t prefix;
  87
  88#ifndef CONFIG_USER_ONLY
  89    uint8_t cpl;   /* code priv level */
  90    uint8_t iopl;  /* i/o priv level */
  91#endif
  92    uint8_t vex_l;  /* vex vector length */
  93    uint8_t vex_v;  /* vex vvvv register, without 1's complement.  */
  94    uint8_t popl_esp_hack; /* for correct popl with esp base handling */
  95    uint8_t rip_offset; /* only used in x86_64, but left for simplicity */
  96
  97#ifdef TARGET_X86_64
  98    uint8_t rex_r;
  99    uint8_t rex_x;
 100    uint8_t rex_b;
 101    bool rex_w;
 102#endif
 103    bool jmp_opt; /* use direct block chaining for direct jumps */
 104    bool repz_opt; /* optimize jumps within repz instructions */
 105    bool cc_op_dirty;
 106
 107    CCOp cc_op;  /* current CC operation */
 108    int mem_index; /* select memory access functions */
 109    uint32_t flags; /* all execution flags */
 110    int cpuid_features;
 111    int cpuid_ext_features;
 112    int cpuid_ext2_features;
 113    int cpuid_ext3_features;
 114    int cpuid_7_0_ebx_features;
 115    int cpuid_xsave_features;
 116
 117    /* TCG local temps */
 118    TCGv cc_srcT;
 119    TCGv A0;
 120    TCGv T0;
 121    TCGv T1;
 122
 123    /* TCG local register indexes (only used inside old micro ops) */
 124    TCGv tmp0;
 125    TCGv tmp4;
 126    TCGv_ptr ptr0;
 127    TCGv_ptr ptr1;
 128    TCGv_i32 tmp2_i32;
 129    TCGv_i32 tmp3_i32;
 130    TCGv_i64 tmp1_i64;
 131
 132    sigjmp_buf jmpbuf;
 133} DisasContext;
 134
 135/* The environment in which user-only runs is constrained. */
 136#ifdef CONFIG_USER_ONLY
 137#define PE(S)     true
 138#define CPL(S)    3
 139#define IOPL(S)   0
 140#define SVME(S)   false
 141#define GUEST(S)  false
 142#else
 143#define PE(S)     (((S)->flags & HF_PE_MASK) != 0)
 144#define CPL(S)    ((S)->cpl)
 145#define IOPL(S)   ((S)->iopl)
 146#define SVME(S)   (((S)->flags & HF_SVME_MASK) != 0)
 147#define GUEST(S)  (((S)->flags & HF_GUEST_MASK) != 0)
 148#endif
 149#if defined(CONFIG_USER_ONLY) && defined(TARGET_X86_64)
 150#define VM86(S)   false
 151#define CODE32(S) true
 152#define SS32(S)   true
 153#define ADDSEG(S) false
 154#else
 155#define VM86(S)   (((S)->flags & HF_VM_MASK) != 0)
 156#define CODE32(S) (((S)->flags & HF_CS32_MASK) != 0)
 157#define SS32(S)   (((S)->flags & HF_SS32_MASK) != 0)
 158#define ADDSEG(S) (((S)->flags & HF_ADDSEG_MASK) != 0)
 159#endif
 160#if !defined(TARGET_X86_64)
 161#define CODE64(S) false
 162#define LMA(S)    false
 163#elif defined(CONFIG_USER_ONLY)
 164#define CODE64(S) true
 165#define LMA(S)    true
 166#else
 167#define CODE64(S) (((S)->flags & HF_CS64_MASK) != 0)
 168#define LMA(S)    (((S)->flags & HF_LMA_MASK) != 0)
 169#endif
 170
 171#ifdef TARGET_X86_64
 172#define REX_PREFIX(S)  (((S)->prefix & PREFIX_REX) != 0)
 173#define REX_W(S)       ((S)->rex_w)
 174#define REX_R(S)       ((S)->rex_r + 0)
 175#define REX_X(S)       ((S)->rex_x + 0)
 176#define REX_B(S)       ((S)->rex_b + 0)
 177#else
 178#define REX_PREFIX(S)  false
 179#define REX_W(S)       false
 180#define REX_R(S)       0
 181#define REX_X(S)       0
 182#define REX_B(S)       0
 183#endif
 184
 185/*
 186 * Many sysemu-only helpers are not reachable for user-only.
 187 * Define stub generators here, so that we need not either sprinkle
 188 * ifdefs through the translator, nor provide the helper function.
 189 */
 190#define STUB_HELPER(NAME, ...) \
 191    static inline void gen_helper_##NAME(__VA_ARGS__) \
 192    { qemu_build_not_reached(); }
 193
 194#ifdef CONFIG_USER_ONLY
 195STUB_HELPER(clgi, TCGv_env env)
 196STUB_HELPER(flush_page, TCGv_env env, TCGv addr)
 197STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs)
 198STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port)
 199STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port)
 200STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port)
 201STUB_HELPER(monitor, TCGv_env env, TCGv addr)
 202STUB_HELPER(mwait, TCGv_env env, TCGv_i32 pc_ofs)
 203STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
 204STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
 205STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
 206STUB_HELPER(rdmsr, TCGv_env env)
 207STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg)
 208STUB_HELPER(get_dr, TCGv ret, TCGv_env env, TCGv_i32 reg)
 209STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val)
 210STUB_HELPER(stgi, TCGv_env env)
 211STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type)
 212STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag)
 213STUB_HELPER(vmmcall, TCGv_env env)
 214STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs)
 215STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag)
 216STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val)
 217STUB_HELPER(wrmsr, TCGv_env env)
 218#endif
 219
 220static void gen_eob(DisasContext *s);
 221static void gen_jr(DisasContext *s, TCGv dest);
 222static void gen_jmp(DisasContext *s, target_ulong eip);
 223static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 224static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
 225static void gen_exception_gpf(DisasContext *s);
 226
 227/* i386 arith/logic operations */
 228enum {
 229    OP_ADDL,
 230    OP_ORL,
 231    OP_ADCL,
 232    OP_SBBL,
 233    OP_ANDL,
 234    OP_SUBL,
 235    OP_XORL,
 236    OP_CMPL,
 237};
 238
 239/* i386 shift ops */
 240enum {
 241    OP_ROL,
 242    OP_ROR,
 243    OP_RCL,
 244    OP_RCR,
 245    OP_SHL,
 246    OP_SHR,
 247    OP_SHL1, /* undocumented */
 248    OP_SAR = 7,
 249};
 250
 251enum {
 252    JCC_O,
 253    JCC_B,
 254    JCC_Z,
 255    JCC_BE,
 256    JCC_S,
 257    JCC_P,
 258    JCC_L,
 259    JCC_LE,
 260};
 261
 262enum {
 263    /* I386 int registers */
 264    OR_EAX,   /* MUST be even numbered */
 265    OR_ECX,
 266    OR_EDX,
 267    OR_EBX,
 268    OR_ESP,
 269    OR_EBP,
 270    OR_ESI,
 271    OR_EDI,
 272
 273    OR_TMP0 = 16,    /* temporary operand register */
 274    OR_TMP1,
 275    OR_A0, /* temporary register used when doing address evaluation */
 276};
 277
 278enum {
 279    USES_CC_DST  = 1,
 280    USES_CC_SRC  = 2,
 281    USES_CC_SRC2 = 4,
 282    USES_CC_SRCT = 8,
 283};
 284
 285/* Bit set if the global variable is live after setting CC_OP to X.  */
 286static const uint8_t cc_op_live[CC_OP_NB] = {
 287    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 288    [CC_OP_EFLAGS] = USES_CC_SRC,
 289    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
 290    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
 291    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 292    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
 293    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 294    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
 295    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
 296    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
 297    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
 298    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
 299    [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
 300    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
 301    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
 302    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 303    [CC_OP_CLR] = 0,
 304    [CC_OP_POPCNT] = USES_CC_SRC,
 305};
 306
 307static void set_cc_op(DisasContext *s, CCOp op)
 308{
 309    int dead;
 310
 311    if (s->cc_op == op) {
 312        return;
 313    }
 314
 315    /* Discard CC computation that will no longer be used.  */
 316    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
 317    if (dead & USES_CC_DST) {
 318        tcg_gen_discard_tl(cpu_cc_dst);
 319    }
 320    if (dead & USES_CC_SRC) {
 321        tcg_gen_discard_tl(cpu_cc_src);
 322    }
 323    if (dead & USES_CC_SRC2) {
 324        tcg_gen_discard_tl(cpu_cc_src2);
 325    }
 326    if (dead & USES_CC_SRCT) {
 327        tcg_gen_discard_tl(s->cc_srcT);
 328    }
 329
 330    if (op == CC_OP_DYNAMIC) {
 331        /* The DYNAMIC setting is translator only, and should never be
 332           stored.  Thus we always consider it clean.  */
 333        s->cc_op_dirty = false;
 334    } else {
 335        /* Discard any computed CC_OP value (see shifts).  */
 336        if (s->cc_op == CC_OP_DYNAMIC) {
 337            tcg_gen_discard_i32(cpu_cc_op);
 338        }
 339        s->cc_op_dirty = true;
 340    }
 341    s->cc_op = op;
 342}
 343
 344static void gen_update_cc_op(DisasContext *s)
 345{
 346    if (s->cc_op_dirty) {
 347        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
 348        s->cc_op_dirty = false;
 349    }
 350}
 351
 352#ifdef TARGET_X86_64
 353
 354#define NB_OP_SIZES 4
 355
 356#else /* !TARGET_X86_64 */
 357
 358#define NB_OP_SIZES 3
 359
 360#endif /* !TARGET_X86_64 */
 361
 362#if defined(HOST_WORDS_BIGENDIAN)
 363#define REG_B_OFFSET (sizeof(target_ulong) - 1)
 364#define REG_H_OFFSET (sizeof(target_ulong) - 2)
 365#define REG_W_OFFSET (sizeof(target_ulong) - 2)
 366#define REG_L_OFFSET (sizeof(target_ulong) - 4)
 367#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
 368#else
 369#define REG_B_OFFSET 0
 370#define REG_H_OFFSET 1
 371#define REG_W_OFFSET 0
 372#define REG_L_OFFSET 0
 373#define REG_LH_OFFSET 4
 374#endif
 375
 376/* In instruction encodings for byte register accesses the
 377 * register number usually indicates "low 8 bits of register N";
 378 * however there are some special cases where N 4..7 indicates
 379 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
 380 * true for this special case, false otherwise.
 381 */
 382static inline bool byte_reg_is_xH(DisasContext *s, int reg)
 383{
 384    /* Any time the REX prefix is present, byte registers are uniform */
 385    if (reg < 4 || REX_PREFIX(s)) {
 386        return false;
 387    }
 388    return true;
 389}
 390
 391/* Select the size of a push/pop operation.  */
 392static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
 393{
 394    if (CODE64(s)) {
 395        return ot == MO_16 ? MO_16 : MO_64;
 396    } else {
 397        return ot;
 398    }
 399}
 400
 401/* Select the size of the stack pointer.  */
 402static inline MemOp mo_stacksize(DisasContext *s)
 403{
 404    return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
 405}
 406
 407/* Select only size 64 else 32.  Used for SSE operand sizes.  */
 408static inline MemOp mo_64_32(MemOp ot)
 409{
 410#ifdef TARGET_X86_64
 411    return ot == MO_64 ? MO_64 : MO_32;
 412#else
 413    return MO_32;
 414#endif
 415}
 416
 417/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
 418   byte vs word opcodes.  */
 419static inline MemOp mo_b_d(int b, MemOp ot)
 420{
 421    return b & 1 ? ot : MO_8;
 422}
 423
 424/* Select size 8 if lsb of B is clear, else OT capped at 32.
 425   Used for decoding operand size of port opcodes.  */
 426static inline MemOp mo_b_d32(int b, MemOp ot)
 427{
 428    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 429}
 430
 431static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
 432{
 433    switch(ot) {
 434    case MO_8:
 435        if (!byte_reg_is_xH(s, reg)) {
 436            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
 437        } else {
 438            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
 439        }
 440        break;
 441    case MO_16:
 442        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
 443        break;
 444    case MO_32:
 445        /* For x86_64, this sets the higher half of register to zero.
 446           For i386, this is equivalent to a mov. */
 447        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
 448        break;
 449#ifdef TARGET_X86_64
 450    case MO_64:
 451        tcg_gen_mov_tl(cpu_regs[reg], t0);
 452        break;
 453#endif
 454    default:
 455        tcg_abort();
 456    }
 457}
 458
 459static inline
 460void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
 461{
 462    if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
 463        tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
 464    } else {
 465        tcg_gen_mov_tl(t0, cpu_regs[reg]);
 466    }
 467}
 468
 469static void gen_add_A0_im(DisasContext *s, int val)
 470{
 471    tcg_gen_addi_tl(s->A0, s->A0, val);
 472    if (!CODE64(s)) {
 473        tcg_gen_ext32u_tl(s->A0, s->A0);
 474    }
 475}
 476
 477static inline void gen_op_jmp_v(TCGv dest)
 478{
 479    tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 480}
 481
 482static inline
 483void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
 484{
 485    tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
 486    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 487}
 488
 489static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
 490{
 491    tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
 492    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 493}
 494
 495static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 496{
 497    tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
 498}
 499
 500static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 501{
 502    tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 503}
 504
 505static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 506{
 507    if (d == OR_TMP0) {
 508        gen_op_st_v(s, idx, s->T0, s->A0);
 509    } else {
 510        gen_op_mov_reg_v(s, idx, d, s->T0);
 511    }
 512}
 513
 514static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
 515{
 516    tcg_gen_movi_tl(s->tmp0, pc);
 517    gen_op_jmp_v(s->tmp0);
 518}
 519
 520/* Compute SEG:REG into A0.  SEG is selected from the override segment
 521   (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
 522   indicate no override.  */
 523static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
 524                          int def_seg, int ovr_seg)
 525{
 526    switch (aflag) {
 527#ifdef TARGET_X86_64
 528    case MO_64:
 529        if (ovr_seg < 0) {
 530            tcg_gen_mov_tl(s->A0, a0);
 531            return;
 532        }
 533        break;
 534#endif
 535    case MO_32:
 536        /* 32 bit address */
 537        if (ovr_seg < 0 && ADDSEG(s)) {
 538            ovr_seg = def_seg;
 539        }
 540        if (ovr_seg < 0) {
 541            tcg_gen_ext32u_tl(s->A0, a0);
 542            return;
 543        }
 544        break;
 545    case MO_16:
 546        /* 16 bit address */
 547        tcg_gen_ext16u_tl(s->A0, a0);
 548        a0 = s->A0;
 549        if (ovr_seg < 0) {
 550            if (ADDSEG(s)) {
 551                ovr_seg = def_seg;
 552            } else {
 553                return;
 554            }
 555        }
 556        break;
 557    default:
 558        tcg_abort();
 559    }
 560
 561    if (ovr_seg >= 0) {
 562        TCGv seg = cpu_seg_base[ovr_seg];
 563
 564        if (aflag == MO_64) {
 565            tcg_gen_add_tl(s->A0, a0, seg);
 566        } else if (CODE64(s)) {
 567            tcg_gen_ext32u_tl(s->A0, a0);
 568            tcg_gen_add_tl(s->A0, s->A0, seg);
 569        } else {
 570            tcg_gen_add_tl(s->A0, a0, seg);
 571            tcg_gen_ext32u_tl(s->A0, s->A0);
 572        }
 573    }
 574}
 575
 576static inline void gen_string_movl_A0_ESI(DisasContext *s)
 577{
 578    gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
 579}
 580
 581static inline void gen_string_movl_A0_EDI(DisasContext *s)
 582{
 583    gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
 584}
 585
 586static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
 587{
 588    tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
 589    tcg_gen_shli_tl(s->T0, s->T0, ot);
 590};
 591
 592static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
 593{
 594    switch (size) {
 595    case MO_8:
 596        if (sign) {
 597            tcg_gen_ext8s_tl(dst, src);
 598        } else {
 599            tcg_gen_ext8u_tl(dst, src);
 600        }
 601        return dst;
 602    case MO_16:
 603        if (sign) {
 604            tcg_gen_ext16s_tl(dst, src);
 605        } else {
 606            tcg_gen_ext16u_tl(dst, src);
 607        }
 608        return dst;
 609#ifdef TARGET_X86_64
 610    case MO_32:
 611        if (sign) {
 612            tcg_gen_ext32s_tl(dst, src);
 613        } else {
 614            tcg_gen_ext32u_tl(dst, src);
 615        }
 616        return dst;
 617#endif
 618    default:
 619        return src;
 620    }
 621}
 622
 623static void gen_extu(MemOp ot, TCGv reg)
 624{
 625    gen_ext_tl(reg, reg, ot, false);
 626}
 627
 628static void gen_exts(MemOp ot, TCGv reg)
 629{
 630    gen_ext_tl(reg, reg, ot, true);
 631}
 632
 633static inline
 634void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
 635{
 636    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
 637    gen_extu(size, s->tmp0);
 638    tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
 639}
 640
 641static inline
 642void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
 643{
 644    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
 645    gen_extu(size, s->tmp0);
 646    tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
 647}
 648
 649static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
 650{
 651    switch (ot) {
 652    case MO_8:
 653        gen_helper_inb(v, cpu_env, n);
 654        break;
 655    case MO_16:
 656        gen_helper_inw(v, cpu_env, n);
 657        break;
 658    case MO_32:
 659        gen_helper_inl(v, cpu_env, n);
 660        break;
 661    default:
 662        tcg_abort();
 663    }
 664}
 665
 666static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
 667{
 668    switch (ot) {
 669    case MO_8:
 670        gen_helper_outb(cpu_env, v, n);
 671        break;
 672    case MO_16:
 673        gen_helper_outw(cpu_env, v, n);
 674        break;
 675    case MO_32:
 676        gen_helper_outl(cpu_env, v, n);
 677        break;
 678    default:
 679        tcg_abort();
 680    }
 681}
 682
 683/*
 684 * Validate that access to [port, port + 1<<ot) is allowed.
 685 * Raise #GP, or VMM exit if not.
 686 */
 687static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port,
 688                         uint32_t svm_flags)
 689{
 690#ifdef CONFIG_USER_ONLY
 691    /*
 692     * We do not implement the ioperm(2) syscall, so the TSS check
 693     * will always fail.
 694     */
 695    gen_exception_gpf(s);
 696    return false;
 697#else
 698    if (PE(s) && (CPL(s) > IOPL(s) || VM86(s))) {
 699        gen_helper_check_io(cpu_env, port, tcg_constant_i32(1 << ot));
 700    }
 701    if (GUEST(s)) {
 702        target_ulong cur_eip = s->base.pc_next - s->cs_base;
 703        target_ulong next_eip = s->pc - s->cs_base;
 704
 705        gen_update_cc_op(s);
 706        gen_jmp_im(s, cur_eip);
 707        if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
 708            svm_flags |= SVM_IOIO_REP_MASK;
 709        }
 710        svm_flags |= 1 << (SVM_IOIO_SIZE_SHIFT + ot);
 711        gen_helper_svm_check_io(cpu_env, port,
 712                                tcg_constant_i32(svm_flags),
 713                                tcg_constant_i32(next_eip - cur_eip));
 714    }
 715    return true;
 716#endif
 717}
 718
 719static inline void gen_movs(DisasContext *s, MemOp ot)
 720{
 721    gen_string_movl_A0_ESI(s);
 722    gen_op_ld_v(s, ot, s->T0, s->A0);
 723    gen_string_movl_A0_EDI(s);
 724    gen_op_st_v(s, ot, s->T0, s->A0);
 725    gen_op_movl_T0_Dshift(s, ot);
 726    gen_op_add_reg_T0(s, s->aflag, R_ESI);
 727    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 728}
 729
 730static void gen_op_update1_cc(DisasContext *s)
 731{
 732    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 733}
 734
 735static void gen_op_update2_cc(DisasContext *s)
 736{
 737    tcg_gen_mov_tl(cpu_cc_src, s->T1);
 738    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 739}
 740
 741static void gen_op_update3_cc(DisasContext *s, TCGv reg)
 742{
 743    tcg_gen_mov_tl(cpu_cc_src2, reg);
 744    tcg_gen_mov_tl(cpu_cc_src, s->T1);
 745    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 746}
 747
 748static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
 749{
 750    tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
 751}
 752
 753static void gen_op_update_neg_cc(DisasContext *s)
 754{
 755    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 756    tcg_gen_neg_tl(cpu_cc_src, s->T0);
 757    tcg_gen_movi_tl(s->cc_srcT, 0);
 758}
 759
 760/* compute all eflags to cc_src */
 761static void gen_compute_eflags(DisasContext *s)
 762{
 763    TCGv zero, dst, src1, src2;
 764    int live, dead;
 765
 766    if (s->cc_op == CC_OP_EFLAGS) {
 767        return;
 768    }
 769    if (s->cc_op == CC_OP_CLR) {
 770        tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
 771        set_cc_op(s, CC_OP_EFLAGS);
 772        return;
 773    }
 774
 775    zero = NULL;
 776    dst = cpu_cc_dst;
 777    src1 = cpu_cc_src;
 778    src2 = cpu_cc_src2;
 779
 780    /* Take care to not read values that are not live.  */
 781    live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
 782    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
 783    if (dead) {
 784        zero = tcg_const_tl(0);
 785        if (dead & USES_CC_DST) {
 786            dst = zero;
 787        }
 788        if (dead & USES_CC_SRC) {
 789            src1 = zero;
 790        }
 791        if (dead & USES_CC_SRC2) {
 792            src2 = zero;
 793        }
 794    }
 795
 796    gen_update_cc_op(s);
 797    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
 798    set_cc_op(s, CC_OP_EFLAGS);
 799
 800    if (dead) {
 801        tcg_temp_free(zero);
 802    }
 803}
 804
 805typedef struct CCPrepare {
 806    TCGCond cond;
 807    TCGv reg;
 808    TCGv reg2;
 809    target_ulong imm;
 810    target_ulong mask;
 811    bool use_reg2;
 812    bool no_setcond;
 813} CCPrepare;
 814
 815/* compute eflags.C to reg */
 816static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 817{
 818    TCGv t0, t1;
 819    int size, shift;
 820
 821    switch (s->cc_op) {
 822    case CC_OP_SUBB ... CC_OP_SUBQ:
 823        /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
 824        size = s->cc_op - CC_OP_SUBB;
 825        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 826        /* If no temporary was used, be careful not to alias t1 and t0.  */
 827        t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
 828        tcg_gen_mov_tl(t0, s->cc_srcT);
 829        gen_extu(size, t0);
 830        goto add_sub;
 831
 832    case CC_OP_ADDB ... CC_OP_ADDQ:
 833        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
 834        size = s->cc_op - CC_OP_ADDB;
 835        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 836        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 837    add_sub:
 838        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
 839                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 840
 841    case CC_OP_LOGICB ... CC_OP_LOGICQ:
 842    case CC_OP_CLR:
 843    case CC_OP_POPCNT:
 844        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 845
 846    case CC_OP_INCB ... CC_OP_INCQ:
 847    case CC_OP_DECB ... CC_OP_DECQ:
 848        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 849                             .mask = -1, .no_setcond = true };
 850
 851    case CC_OP_SHLB ... CC_OP_SHLQ:
 852        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
 853        size = s->cc_op - CC_OP_SHLB;
 854        shift = (8 << size) - 1;
 855        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 856                             .mask = (target_ulong)1 << shift };
 857
 858    case CC_OP_MULB ... CC_OP_MULQ:
 859        return (CCPrepare) { .cond = TCG_COND_NE,
 860                             .reg = cpu_cc_src, .mask = -1 };
 861
 862    case CC_OP_BMILGB ... CC_OP_BMILGQ:
 863        size = s->cc_op - CC_OP_BMILGB;
 864        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
 865        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 866
 867    case CC_OP_ADCX:
 868    case CC_OP_ADCOX:
 869        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
 870                             .mask = -1, .no_setcond = true };
 871
 872    case CC_OP_EFLAGS:
 873    case CC_OP_SARB ... CC_OP_SARQ:
 874        /* CC_SRC & 1 */
 875        return (CCPrepare) { .cond = TCG_COND_NE,
 876                             .reg = cpu_cc_src, .mask = CC_C };
 877
 878    default:
 879       /* The need to compute only C from CC_OP_DYNAMIC is important
 880          in efficiently implementing e.g. INC at the start of a TB.  */
 881       gen_update_cc_op(s);
 882       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
 883                               cpu_cc_src2, cpu_cc_op);
 884       return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 885                            .mask = -1, .no_setcond = true };
 886    }
 887}
 888
 889/* compute eflags.P to reg */
 890static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 891{
 892    gen_compute_eflags(s);
 893    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 894                         .mask = CC_P };
 895}
 896
 897/* compute eflags.S to reg */
 898static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 899{
 900    switch (s->cc_op) {
 901    case CC_OP_DYNAMIC:
 902        gen_compute_eflags(s);
 903        /* FALLTHRU */
 904    case CC_OP_EFLAGS:
 905    case CC_OP_ADCX:
 906    case CC_OP_ADOX:
 907    case CC_OP_ADCOX:
 908        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 909                             .mask = CC_S };
 910    case CC_OP_CLR:
 911    case CC_OP_POPCNT:
 912        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 913    default:
 914        {
 915            MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 916            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
 917            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
 918        }
 919    }
 920}
 921
 922/* compute eflags.O to reg */
 923static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 924{
 925    switch (s->cc_op) {
 926    case CC_OP_ADOX:
 927    case CC_OP_ADCOX:
 928        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
 929                             .mask = -1, .no_setcond = true };
 930    case CC_OP_CLR:
 931    case CC_OP_POPCNT:
 932        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 933    default:
 934        gen_compute_eflags(s);
 935        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 936                             .mask = CC_O };
 937    }
 938}
 939
 940/* compute eflags.Z to reg */
 941static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 942{
 943    switch (s->cc_op) {
 944    case CC_OP_DYNAMIC:
 945        gen_compute_eflags(s);
 946        /* FALLTHRU */
 947    case CC_OP_EFLAGS:
 948    case CC_OP_ADCX:
 949    case CC_OP_ADOX:
 950    case CC_OP_ADCOX:
 951        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 952                             .mask = CC_Z };
 953    case CC_OP_CLR:
 954        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
 955    case CC_OP_POPCNT:
 956        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
 957                             .mask = -1 };
 958    default:
 959        {
 960            MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 961            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 962            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 963        }
 964    }
 965}
 966
 967/* perform a conditional store into register 'reg' according to jump opcode
 968   value 'b'. In the fast case, T0 is guaranted not to be used. */
 969static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 970{
 971    int inv, jcc_op, cond;
 972    MemOp size;
 973    CCPrepare cc;
 974    TCGv t0;
 975
 976    inv = b & 1;
 977    jcc_op = (b >> 1) & 7;
 978
 979    switch (s->cc_op) {
 980    case CC_OP_SUBB ... CC_OP_SUBQ:
 981        /* We optimize relational operators for the cmp/jcc case.  */
 982        size = s->cc_op - CC_OP_SUBB;
 983        switch (jcc_op) {
 984        case JCC_BE:
 985            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
 986            gen_extu(size, s->tmp4);
 987            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 988            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
 989                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 990            break;
 991
 992        case JCC_L:
 993            cond = TCG_COND_LT;
 994            goto fast_jcc_l;
 995        case JCC_LE:
 996            cond = TCG_COND_LE;
 997        fast_jcc_l:
 998            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
 999            gen_exts(size, s->tmp4);
1000            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
1001            cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
1002                               .reg2 = t0, .mask = -1, .use_reg2 = true };
1003            break;
1004
1005        default:
1006            goto slow_jcc;
1007        }
1008        break;
1009
1010    default:
1011    slow_jcc:
1012        /* This actually generates good code for JC, JZ and JS.  */
1013        switch (jcc_op) {
1014        case JCC_O:
1015            cc = gen_prepare_eflags_o(s, reg);
1016            break;
1017        case JCC_B:
1018            cc = gen_prepare_eflags_c(s, reg);
1019            break;
1020        case JCC_Z:
1021            cc = gen_prepare_eflags_z(s, reg);
1022            break;
1023        case JCC_BE:
1024            gen_compute_eflags(s);
1025            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
1026                               .mask = CC_Z | CC_C };
1027            break;
1028        case JCC_S:
1029            cc = gen_prepare_eflags_s(s, reg);
1030            break;
1031        case JCC_P:
1032            cc = gen_prepare_eflags_p(s, reg);
1033            break;
1034        case JCC_L:
1035            gen_compute_eflags(s);
1036            if (reg == cpu_cc_src) {
1037                reg = s->tmp0;
1038            }
1039            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1040            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1041            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1042                               .mask = CC_S };
1043            break;
1044        default:
1045        case JCC_LE:
1046            gen_compute_eflags(s);
1047            if (reg == cpu_cc_src) {
1048                reg = s->tmp0;
1049            }
1050            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1051            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1052            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1053                               .mask = CC_S | CC_Z };
1054            break;
1055        }
1056        break;
1057    }
1058
1059    if (inv) {
1060        cc.cond = tcg_invert_cond(cc.cond);
1061    }
1062    return cc;
1063}
1064
1065static void gen_setcc1(DisasContext *s, int b, TCGv reg)
1066{
1067    CCPrepare cc = gen_prepare_cc(s, b, reg);
1068
1069    if (cc.no_setcond) {
1070        if (cc.cond == TCG_COND_EQ) {
1071            tcg_gen_xori_tl(reg, cc.reg, 1);
1072        } else {
1073            tcg_gen_mov_tl(reg, cc.reg);
1074        }
1075        return;
1076    }
1077
1078    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1079        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1080        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1081        tcg_gen_andi_tl(reg, reg, 1);
1082        return;
1083    }
1084    if (cc.mask != -1) {
1085        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1086        cc.reg = reg;
1087    }
1088    if (cc.use_reg2) {
1089        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1090    } else {
1091        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1092    }
1093}
1094
1095static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1096{
1097    gen_setcc1(s, JCC_B << 1, reg);
1098}
1099
1100/* generate a conditional jump to label 'l1' according to jump opcode
1101   value 'b'. In the fast case, T0 is guaranted not to be used. */
1102static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1103{
1104    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1105
1106    if (cc.mask != -1) {
1107        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1108        cc.reg = s->T0;
1109    }
1110    if (cc.use_reg2) {
1111        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1112    } else {
1113        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1114    }
1115}
1116
1117/* Generate a conditional jump to label 'l1' according to jump opcode
1118   value 'b'. In the fast case, T0 is guaranted not to be used.
1119   A translation block must end soon.  */
1120static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1121{
1122    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1123
1124    gen_update_cc_op(s);
1125    if (cc.mask != -1) {
1126        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1127        cc.reg = s->T0;
1128    }
1129    set_cc_op(s, CC_OP_DYNAMIC);
1130    if (cc.use_reg2) {
1131        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1132    } else {
1133        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1134    }
1135}
1136
1137/* XXX: does not work with gdbstub "ice" single step - not a
1138   serious problem */
1139static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1140{
1141    TCGLabel *l1 = gen_new_label();
1142    TCGLabel *l2 = gen_new_label();
1143    gen_op_jnz_ecx(s, s->aflag, l1);
1144    gen_set_label(l2);
1145    gen_jmp_tb(s, next_eip, 1);
1146    gen_set_label(l1);
1147    return l2;
1148}
1149
1150static inline void gen_stos(DisasContext *s, MemOp ot)
1151{
1152    gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1153    gen_string_movl_A0_EDI(s);
1154    gen_op_st_v(s, ot, s->T0, s->A0);
1155    gen_op_movl_T0_Dshift(s, ot);
1156    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1157}
1158
1159static inline void gen_lods(DisasContext *s, MemOp ot)
1160{
1161    gen_string_movl_A0_ESI(s);
1162    gen_op_ld_v(s, ot, s->T0, s->A0);
1163    gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1164    gen_op_movl_T0_Dshift(s, ot);
1165    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1166}
1167
1168static inline void gen_scas(DisasContext *s, MemOp ot)
1169{
1170    gen_string_movl_A0_EDI(s);
1171    gen_op_ld_v(s, ot, s->T1, s->A0);
1172    gen_op(s, OP_CMPL, ot, R_EAX);
1173    gen_op_movl_T0_Dshift(s, ot);
1174    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1175}
1176
1177static inline void gen_cmps(DisasContext *s, MemOp ot)
1178{
1179    gen_string_movl_A0_EDI(s);
1180    gen_op_ld_v(s, ot, s->T1, s->A0);
1181    gen_string_movl_A0_ESI(s);
1182    gen_op(s, OP_CMPL, ot, OR_TMP0);
1183    gen_op_movl_T0_Dshift(s, ot);
1184    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1185    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1186}
1187
1188static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1189{
1190    if (s->flags & HF_IOBPT_MASK) {
1191#ifdef CONFIG_USER_ONLY
1192        /* user-mode cpu should not be in IOBPT mode */
1193        g_assert_not_reached();
1194#else
1195        TCGv_i32 t_size = tcg_const_i32(1 << ot);
1196        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1197
1198        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1199        tcg_temp_free_i32(t_size);
1200        tcg_temp_free(t_next);
1201#endif /* CONFIG_USER_ONLY */
1202    }
1203}
1204
1205static inline void gen_ins(DisasContext *s, MemOp ot)
1206{
1207    gen_string_movl_A0_EDI(s);
1208    /* Note: we must do this dummy write first to be restartable in
1209       case of page fault. */
1210    tcg_gen_movi_tl(s->T0, 0);
1211    gen_op_st_v(s, ot, s->T0, s->A0);
1212    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1213    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1214    gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1215    gen_op_st_v(s, ot, s->T0, s->A0);
1216    gen_op_movl_T0_Dshift(s, ot);
1217    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1218    gen_bpt_io(s, s->tmp2_i32, ot);
1219}
1220
1221static inline void gen_outs(DisasContext *s, MemOp ot)
1222{
1223    gen_string_movl_A0_ESI(s);
1224    gen_op_ld_v(s, ot, s->T0, s->A0);
1225
1226    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1227    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1228    tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1229    gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1230    gen_op_movl_T0_Dshift(s, ot);
1231    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1232    gen_bpt_io(s, s->tmp2_i32, ot);
1233}
1234
1235/* same method as Valgrind : we generate jumps to current or next
1236   instruction */
1237#define GEN_REPZ(op)                                                          \
1238static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1239                                 target_ulong cur_eip, target_ulong next_eip) \
1240{                                                                             \
1241    TCGLabel *l2;                                                             \
1242    gen_update_cc_op(s);                                                      \
1243    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1244    gen_ ## op(s, ot);                                                        \
1245    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1246    /* a loop would cause two single step exceptions if ECX = 1               \
1247       before rep string_insn */                                              \
1248    if (s->repz_opt)                                                          \
1249        gen_op_jz_ecx(s, s->aflag, l2);                                       \
1250    gen_jmp(s, cur_eip);                                                      \
1251}
1252
1253#define GEN_REPZ2(op)                                                         \
1254static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1255                                   target_ulong cur_eip,                      \
1256                                   target_ulong next_eip,                     \
1257                                   int nz)                                    \
1258{                                                                             \
1259    TCGLabel *l2;                                                             \
1260    gen_update_cc_op(s);                                                      \
1261    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1262    gen_ ## op(s, ot);                                                        \
1263    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1264    gen_update_cc_op(s);                                                      \
1265    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1266    if (s->repz_opt)                                                          \
1267        gen_op_jz_ecx(s, s->aflag, l2);                                       \
1268    gen_jmp(s, cur_eip);                                                      \
1269}
1270
1271GEN_REPZ(movs)
1272GEN_REPZ(stos)
1273GEN_REPZ(lods)
1274GEN_REPZ(ins)
1275GEN_REPZ(outs)
1276GEN_REPZ2(scas)
1277GEN_REPZ2(cmps)
1278
1279static void gen_helper_fp_arith_ST0_FT0(int op)
1280{
1281    switch (op) {
1282    case 0:
1283        gen_helper_fadd_ST0_FT0(cpu_env);
1284        break;
1285    case 1:
1286        gen_helper_fmul_ST0_FT0(cpu_env);
1287        break;
1288    case 2:
1289        gen_helper_fcom_ST0_FT0(cpu_env);
1290        break;
1291    case 3:
1292        gen_helper_fcom_ST0_FT0(cpu_env);
1293        break;
1294    case 4:
1295        gen_helper_fsub_ST0_FT0(cpu_env);
1296        break;
1297    case 5:
1298        gen_helper_fsubr_ST0_FT0(cpu_env);
1299        break;
1300    case 6:
1301        gen_helper_fdiv_ST0_FT0(cpu_env);
1302        break;
1303    case 7:
1304        gen_helper_fdivr_ST0_FT0(cpu_env);
1305        break;
1306    }
1307}
1308
1309/* NOTE the exception in "r" op ordering */
1310static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1311{
1312    TCGv_i32 tmp = tcg_const_i32(opreg);
1313    switch (op) {
1314    case 0:
1315        gen_helper_fadd_STN_ST0(cpu_env, tmp);
1316        break;
1317    case 1:
1318        gen_helper_fmul_STN_ST0(cpu_env, tmp);
1319        break;
1320    case 4:
1321        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1322        break;
1323    case 5:
1324        gen_helper_fsub_STN_ST0(cpu_env, tmp);
1325        break;
1326    case 6:
1327        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1328        break;
1329    case 7:
1330        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1331        break;
1332    }
1333}
1334
1335static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1336{
1337    gen_update_cc_op(s);
1338    gen_jmp_im(s, cur_eip);
1339    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1340    s->base.is_jmp = DISAS_NORETURN;
1341}
1342
1343/* Generate #UD for the current instruction.  The assumption here is that
1344   the instruction is known, but it isn't allowed in the current cpu mode.  */
1345static void gen_illegal_opcode(DisasContext *s)
1346{
1347    gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1348}
1349
1350/* Generate #GP for the current instruction. */
1351static void gen_exception_gpf(DisasContext *s)
1352{
1353    gen_exception(s, EXCP0D_GPF, s->pc_start - s->cs_base);
1354}
1355
1356/* Check for cpl == 0; if not, raise #GP and return false. */
1357static bool check_cpl0(DisasContext *s)
1358{
1359    if (CPL(s) == 0) {
1360        return true;
1361    }
1362    gen_exception_gpf(s);
1363    return false;
1364}
1365
1366/* If vm86, check for iopl == 3; if not, raise #GP and return false. */
1367static bool check_vm86_iopl(DisasContext *s)
1368{
1369    if (!VM86(s) || IOPL(s) == 3) {
1370        return true;
1371    }
1372    gen_exception_gpf(s);
1373    return false;
1374}
1375
1376/* Check for iopl allowing access; if not, raise #GP and return false. */
1377static bool check_iopl(DisasContext *s)
1378{
1379    if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) {
1380        return true;
1381    }
1382    gen_exception_gpf(s);
1383    return false;
1384}
1385
1386/* if d == OR_TMP0, it means memory operand (address in A0) */
1387static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1388{
1389    if (d != OR_TMP0) {
1390        if (s1->prefix & PREFIX_LOCK) {
1391            /* Lock prefix when destination is not memory.  */
1392            gen_illegal_opcode(s1);
1393            return;
1394        }
1395        gen_op_mov_v_reg(s1, ot, s1->T0, d);
1396    } else if (!(s1->prefix & PREFIX_LOCK)) {
1397        gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1398    }
1399    switch(op) {
1400    case OP_ADCL:
1401        gen_compute_eflags_c(s1, s1->tmp4);
1402        if (s1->prefix & PREFIX_LOCK) {
1403            tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1404            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1405                                        s1->mem_index, ot | MO_LE);
1406        } else {
1407            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1408            tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1409            gen_op_st_rm_T0_A0(s1, ot, d);
1410        }
1411        gen_op_update3_cc(s1, s1->tmp4);
1412        set_cc_op(s1, CC_OP_ADCB + ot);
1413        break;
1414    case OP_SBBL:
1415        gen_compute_eflags_c(s1, s1->tmp4);
1416        if (s1->prefix & PREFIX_LOCK) {
1417            tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1418            tcg_gen_neg_tl(s1->T0, s1->T0);
1419            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1420                                        s1->mem_index, ot | MO_LE);
1421        } else {
1422            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1423            tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1424            gen_op_st_rm_T0_A0(s1, ot, d);
1425        }
1426        gen_op_update3_cc(s1, s1->tmp4);
1427        set_cc_op(s1, CC_OP_SBBB + ot);
1428        break;
1429    case OP_ADDL:
1430        if (s1->prefix & PREFIX_LOCK) {
1431            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1432                                        s1->mem_index, ot | MO_LE);
1433        } else {
1434            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1435            gen_op_st_rm_T0_A0(s1, ot, d);
1436        }
1437        gen_op_update2_cc(s1);
1438        set_cc_op(s1, CC_OP_ADDB + ot);
1439        break;
1440    case OP_SUBL:
1441        if (s1->prefix & PREFIX_LOCK) {
1442            tcg_gen_neg_tl(s1->T0, s1->T1);
1443            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1444                                        s1->mem_index, ot | MO_LE);
1445            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1446        } else {
1447            tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1448            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1449            gen_op_st_rm_T0_A0(s1, ot, d);
1450        }
1451        gen_op_update2_cc(s1);
1452        set_cc_op(s1, CC_OP_SUBB + ot);
1453        break;
1454    default:
1455    case OP_ANDL:
1456        if (s1->prefix & PREFIX_LOCK) {
1457            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1458                                        s1->mem_index, ot | MO_LE);
1459        } else {
1460            tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1461            gen_op_st_rm_T0_A0(s1, ot, d);
1462        }
1463        gen_op_update1_cc(s1);
1464        set_cc_op(s1, CC_OP_LOGICB + ot);
1465        break;
1466    case OP_ORL:
1467        if (s1->prefix & PREFIX_LOCK) {
1468            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1469                                       s1->mem_index, ot | MO_LE);
1470        } else {
1471            tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1472            gen_op_st_rm_T0_A0(s1, ot, d);
1473        }
1474        gen_op_update1_cc(s1);
1475        set_cc_op(s1, CC_OP_LOGICB + ot);
1476        break;
1477    case OP_XORL:
1478        if (s1->prefix & PREFIX_LOCK) {
1479            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1480                                        s1->mem_index, ot | MO_LE);
1481        } else {
1482            tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1483            gen_op_st_rm_T0_A0(s1, ot, d);
1484        }
1485        gen_op_update1_cc(s1);
1486        set_cc_op(s1, CC_OP_LOGICB + ot);
1487        break;
1488    case OP_CMPL:
1489        tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1490        tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1491        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1492        set_cc_op(s1, CC_OP_SUBB + ot);
1493        break;
1494    }
1495}
1496
1497/* if d == OR_TMP0, it means memory operand (address in A0) */
1498static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1499{
1500    if (s1->prefix & PREFIX_LOCK) {
1501        if (d != OR_TMP0) {
1502            /* Lock prefix when destination is not memory */
1503            gen_illegal_opcode(s1);
1504            return;
1505        }
1506        tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1507        tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1508                                    s1->mem_index, ot | MO_LE);
1509    } else {
1510        if (d != OR_TMP0) {
1511            gen_op_mov_v_reg(s1, ot, s1->T0, d);
1512        } else {
1513            gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1514        }
1515        tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1516        gen_op_st_rm_T0_A0(s1, ot, d);
1517    }
1518
1519    gen_compute_eflags_c(s1, cpu_cc_src);
1520    tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1521    set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1522}
1523
1524static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1525                            TCGv shm1, TCGv count, bool is_right)
1526{
1527    TCGv_i32 z32, s32, oldop;
1528    TCGv z_tl;
1529
1530    /* Store the results into the CC variables.  If we know that the
1531       variable must be dead, store unconditionally.  Otherwise we'll
1532       need to not disrupt the current contents.  */
1533    z_tl = tcg_const_tl(0);
1534    if (cc_op_live[s->cc_op] & USES_CC_DST) {
1535        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1536                           result, cpu_cc_dst);
1537    } else {
1538        tcg_gen_mov_tl(cpu_cc_dst, result);
1539    }
1540    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1541        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1542                           shm1, cpu_cc_src);
1543    } else {
1544        tcg_gen_mov_tl(cpu_cc_src, shm1);
1545    }
1546    tcg_temp_free(z_tl);
1547
1548    /* Get the two potential CC_OP values into temporaries.  */
1549    tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1550    if (s->cc_op == CC_OP_DYNAMIC) {
1551        oldop = cpu_cc_op;
1552    } else {
1553        tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1554        oldop = s->tmp3_i32;
1555    }
1556
1557    /* Conditionally store the CC_OP value.  */
1558    z32 = tcg_const_i32(0);
1559    s32 = tcg_temp_new_i32();
1560    tcg_gen_trunc_tl_i32(s32, count);
1561    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1562    tcg_temp_free_i32(z32);
1563    tcg_temp_free_i32(s32);
1564
1565    /* The CC_OP value is no longer predictable.  */
1566    set_cc_op(s, CC_OP_DYNAMIC);
1567}
1568
1569static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1570                            int is_right, int is_arith)
1571{
1572    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1573
1574    /* load */
1575    if (op1 == OR_TMP0) {
1576        gen_op_ld_v(s, ot, s->T0, s->A0);
1577    } else {
1578        gen_op_mov_v_reg(s, ot, s->T0, op1);
1579    }
1580
1581    tcg_gen_andi_tl(s->T1, s->T1, mask);
1582    tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1583
1584    if (is_right) {
1585        if (is_arith) {
1586            gen_exts(ot, s->T0);
1587            tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1588            tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1589        } else {
1590            gen_extu(ot, s->T0);
1591            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1592            tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1593        }
1594    } else {
1595        tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1596        tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1597    }
1598
1599    /* store */
1600    gen_op_st_rm_T0_A0(s, ot, op1);
1601
1602    gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1603}
1604
1605static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1606                            int is_right, int is_arith)
1607{
1608    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1609
1610    /* load */
1611    if (op1 == OR_TMP0)
1612        gen_op_ld_v(s, ot, s->T0, s->A0);
1613    else
1614        gen_op_mov_v_reg(s, ot, s->T0, op1);
1615
1616    op2 &= mask;
1617    if (op2 != 0) {
1618        if (is_right) {
1619            if (is_arith) {
1620                gen_exts(ot, s->T0);
1621                tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1622                tcg_gen_sari_tl(s->T0, s->T0, op2);
1623            } else {
1624                gen_extu(ot, s->T0);
1625                tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1626                tcg_gen_shri_tl(s->T0, s->T0, op2);
1627            }
1628        } else {
1629            tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1630            tcg_gen_shli_tl(s->T0, s->T0, op2);
1631        }
1632    }
1633
1634    /* store */
1635    gen_op_st_rm_T0_A0(s, ot, op1);
1636
1637    /* update eflags if non zero shift */
1638    if (op2 != 0) {
1639        tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1640        tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1641        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1642    }
1643}
1644
1645static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1646{
1647    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1648    TCGv_i32 t0, t1;
1649
1650    /* load */
1651    if (op1 == OR_TMP0) {
1652        gen_op_ld_v(s, ot, s->T0, s->A0);
1653    } else {
1654        gen_op_mov_v_reg(s, ot, s->T0, op1);
1655    }
1656
1657    tcg_gen_andi_tl(s->T1, s->T1, mask);
1658
1659    switch (ot) {
1660    case MO_8:
1661        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1662        tcg_gen_ext8u_tl(s->T0, s->T0);
1663        tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1664        goto do_long;
1665    case MO_16:
1666        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1667        tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1668        goto do_long;
1669    do_long:
1670#ifdef TARGET_X86_64
1671    case MO_32:
1672        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1673        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1674        if (is_right) {
1675            tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1676        } else {
1677            tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1678        }
1679        tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1680        break;
1681#endif
1682    default:
1683        if (is_right) {
1684            tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1685        } else {
1686            tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1687        }
1688        break;
1689    }
1690
1691    /* store */
1692    gen_op_st_rm_T0_A0(s, ot, op1);
1693
1694    /* We'll need the flags computed into CC_SRC.  */
1695    gen_compute_eflags(s);
1696
1697    /* The value that was "rotated out" is now present at the other end
1698       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1699       since we've computed the flags into CC_SRC, these variables are
1700       currently dead.  */
1701    if (is_right) {
1702        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1703        tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1704        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1705    } else {
1706        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1707        tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1708    }
1709    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1710    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1711
1712    /* Now conditionally store the new CC_OP value.  If the shift count
1713       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1714       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1715       exactly as we computed above.  */
1716    t0 = tcg_const_i32(0);
1717    t1 = tcg_temp_new_i32();
1718    tcg_gen_trunc_tl_i32(t1, s->T1);
1719    tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1720    tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1721    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1722                        s->tmp2_i32, s->tmp3_i32);
1723    tcg_temp_free_i32(t0);
1724    tcg_temp_free_i32(t1);
1725
1726    /* The CC_OP value is no longer predictable.  */ 
1727    set_cc_op(s, CC_OP_DYNAMIC);
1728}
1729
1730static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1731                          int is_right)
1732{
1733    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1734    int shift;
1735
1736    /* load */
1737    if (op1 == OR_TMP0) {
1738        gen_op_ld_v(s, ot, s->T0, s->A0);
1739    } else {
1740        gen_op_mov_v_reg(s, ot, s->T0, op1);
1741    }
1742
1743    op2 &= mask;
1744    if (op2 != 0) {
1745        switch (ot) {
1746#ifdef TARGET_X86_64
1747        case MO_32:
1748            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1749            if (is_right) {
1750                tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1751            } else {
1752                tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1753            }
1754            tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1755            break;
1756#endif
1757        default:
1758            if (is_right) {
1759                tcg_gen_rotri_tl(s->T0, s->T0, op2);
1760            } else {
1761                tcg_gen_rotli_tl(s->T0, s->T0, op2);
1762            }
1763            break;
1764        case MO_8:
1765            mask = 7;
1766            goto do_shifts;
1767        case MO_16:
1768            mask = 15;
1769        do_shifts:
1770            shift = op2 & mask;
1771            if (is_right) {
1772                shift = mask + 1 - shift;
1773            }
1774            gen_extu(ot, s->T0);
1775            tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1776            tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1777            tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1778            break;
1779        }
1780    }
1781
1782    /* store */
1783    gen_op_st_rm_T0_A0(s, ot, op1);
1784
1785    if (op2 != 0) {
1786        /* Compute the flags into CC_SRC.  */
1787        gen_compute_eflags(s);
1788
1789        /* The value that was "rotated out" is now present at the other end
1790           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1791           since we've computed the flags into CC_SRC, these variables are
1792           currently dead.  */
1793        if (is_right) {
1794            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1795            tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1796            tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1797        } else {
1798            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1799            tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1800        }
1801        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1802        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1803        set_cc_op(s, CC_OP_ADCOX);
1804    }
1805}
1806
1807/* XXX: add faster immediate = 1 case */
1808static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1809                           int is_right)
1810{
1811    gen_compute_eflags(s);
1812    assert(s->cc_op == CC_OP_EFLAGS);
1813
1814    /* load */
1815    if (op1 == OR_TMP0)
1816        gen_op_ld_v(s, ot, s->T0, s->A0);
1817    else
1818        gen_op_mov_v_reg(s, ot, s->T0, op1);
1819    
1820    if (is_right) {
1821        switch (ot) {
1822        case MO_8:
1823            gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1824            break;
1825        case MO_16:
1826            gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1827            break;
1828        case MO_32:
1829            gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1830            break;
1831#ifdef TARGET_X86_64
1832        case MO_64:
1833            gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1834            break;
1835#endif
1836        default:
1837            tcg_abort();
1838        }
1839    } else {
1840        switch (ot) {
1841        case MO_8:
1842            gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1843            break;
1844        case MO_16:
1845            gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1846            break;
1847        case MO_32:
1848            gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1849            break;
1850#ifdef TARGET_X86_64
1851        case MO_64:
1852            gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1853            break;
1854#endif
1855        default:
1856            tcg_abort();
1857        }
1858    }
1859    /* store */
1860    gen_op_st_rm_T0_A0(s, ot, op1);
1861}
1862
1863/* XXX: add faster immediate case */
1864static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1865                             bool is_right, TCGv count_in)
1866{
1867    target_ulong mask = (ot == MO_64 ? 63 : 31);
1868    TCGv count;
1869
1870    /* load */
1871    if (op1 == OR_TMP0) {
1872        gen_op_ld_v(s, ot, s->T0, s->A0);
1873    } else {
1874        gen_op_mov_v_reg(s, ot, s->T0, op1);
1875    }
1876
1877    count = tcg_temp_new();
1878    tcg_gen_andi_tl(count, count_in, mask);
1879
1880    switch (ot) {
1881    case MO_16:
1882        /* Note: we implement the Intel behaviour for shift count > 16.
1883           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1884           portion by constructing it as a 32-bit value.  */
1885        if (is_right) {
1886            tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1887            tcg_gen_mov_tl(s->T1, s->T0);
1888            tcg_gen_mov_tl(s->T0, s->tmp0);
1889        } else {
1890            tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1891        }
1892        /*
1893         * If TARGET_X86_64 defined then fall through into MO_32 case,
1894         * otherwise fall through default case.
1895         */
1896    case MO_32:
1897#ifdef TARGET_X86_64
1898        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1899        tcg_gen_subi_tl(s->tmp0, count, 1);
1900        if (is_right) {
1901            tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1902            tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1903            tcg_gen_shr_i64(s->T0, s->T0, count);
1904        } else {
1905            tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1906            tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1907            tcg_gen_shl_i64(s->T0, s->T0, count);
1908            tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1909            tcg_gen_shri_i64(s->T0, s->T0, 32);
1910        }
1911        break;
1912#endif
1913    default:
1914        tcg_gen_subi_tl(s->tmp0, count, 1);
1915        if (is_right) {
1916            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1917
1918            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1919            tcg_gen_shr_tl(s->T0, s->T0, count);
1920            tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1921        } else {
1922            tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1923            if (ot == MO_16) {
1924                /* Only needed if count > 16, for Intel behaviour.  */
1925                tcg_gen_subfi_tl(s->tmp4, 33, count);
1926                tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1927                tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1928            }
1929
1930            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1931            tcg_gen_shl_tl(s->T0, s->T0, count);
1932            tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1933        }
1934        tcg_gen_movi_tl(s->tmp4, 0);
1935        tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1936                           s->tmp4, s->T1);
1937        tcg_gen_or_tl(s->T0, s->T0, s->T1);
1938        break;
1939    }
1940
1941    /* store */
1942    gen_op_st_rm_T0_A0(s, ot, op1);
1943
1944    gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1945    tcg_temp_free(count);
1946}
1947
1948static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1949{
1950    if (s != OR_TMP1)
1951        gen_op_mov_v_reg(s1, ot, s1->T1, s);
1952    switch(op) {
1953    case OP_ROL:
1954        gen_rot_rm_T1(s1, ot, d, 0);
1955        break;
1956    case OP_ROR:
1957        gen_rot_rm_T1(s1, ot, d, 1);
1958        break;
1959    case OP_SHL:
1960    case OP_SHL1:
1961        gen_shift_rm_T1(s1, ot, d, 0, 0);
1962        break;
1963    case OP_SHR:
1964        gen_shift_rm_T1(s1, ot, d, 1, 0);
1965        break;
1966    case OP_SAR:
1967        gen_shift_rm_T1(s1, ot, d, 1, 1);
1968        break;
1969    case OP_RCL:
1970        gen_rotc_rm_T1(s1, ot, d, 0);
1971        break;
1972    case OP_RCR:
1973        gen_rotc_rm_T1(s1, ot, d, 1);
1974        break;
1975    }
1976}
1977
1978static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1979{
1980    switch(op) {
1981    case OP_ROL:
1982        gen_rot_rm_im(s1, ot, d, c, 0);
1983        break;
1984    case OP_ROR:
1985        gen_rot_rm_im(s1, ot, d, c, 1);
1986        break;
1987    case OP_SHL:
1988    case OP_SHL1:
1989        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1990        break;
1991    case OP_SHR:
1992        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1993        break;
1994    case OP_SAR:
1995        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1996        break;
1997    default:
1998        /* currently not optimized */
1999        tcg_gen_movi_tl(s1->T1, c);
2000        gen_shift(s1, op, ot, d, OR_TMP1);
2001        break;
2002    }
2003}
2004
2005#define X86_MAX_INSN_LENGTH 15
2006
2007static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
2008{
2009    uint64_t pc = s->pc;
2010
2011    s->pc += num_bytes;
2012    if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
2013        /* If the instruction's 16th byte is on a different page than the 1st, a
2014         * page fault on the second page wins over the general protection fault
2015         * caused by the instruction being too long.
2016         * This can happen even if the operand is only one byte long!
2017         */
2018        if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
2019            volatile uint8_t unused =
2020                cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
2021            (void) unused;
2022        }
2023        siglongjmp(s->jmpbuf, 1);
2024    }
2025
2026    return pc;
2027}
2028
2029static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
2030{
2031    return translator_ldub(env, &s->base, advance_pc(env, s, 1));
2032}
2033
2034static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
2035{
2036    return translator_ldsw(env, &s->base, advance_pc(env, s, 2));
2037}
2038
2039static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
2040{
2041    return translator_lduw(env, &s->base, advance_pc(env, s, 2));
2042}
2043
2044static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
2045{
2046    return translator_ldl(env, &s->base, advance_pc(env, s, 4));
2047}
2048
2049#ifdef TARGET_X86_64
2050static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
2051{
2052    return translator_ldq(env, &s->base, advance_pc(env, s, 8));
2053}
2054#endif
2055
2056/* Decompose an address.  */
2057
2058typedef struct AddressParts {
2059    int def_seg;
2060    int base;
2061    int index;
2062    int scale;
2063    target_long disp;
2064} AddressParts;
2065
2066static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
2067                                    int modrm)
2068{
2069    int def_seg, base, index, scale, mod, rm;
2070    target_long disp;
2071    bool havesib;
2072
2073    def_seg = R_DS;
2074    index = -1;
2075    scale = 0;
2076    disp = 0;
2077
2078    mod = (modrm >> 6) & 3;
2079    rm = modrm & 7;
2080    base = rm | REX_B(s);
2081
2082    if (mod == 3) {
2083        /* Normally filtered out earlier, but including this path
2084           simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
2085        goto done;
2086    }
2087
2088    switch (s->aflag) {
2089    case MO_64:
2090    case MO_32:
2091        havesib = 0;
2092        if (rm == 4) {
2093            int code = x86_ldub_code(env, s);
2094            scale = (code >> 6) & 3;
2095            index = ((code >> 3) & 7) | REX_X(s);
2096            if (index == 4) {
2097                index = -1;  /* no index */
2098            }
2099            base = (code & 7) | REX_B(s);
2100            havesib = 1;
2101        }
2102
2103        switch (mod) {
2104        case 0:
2105            if ((base & 7) == 5) {
2106                base = -1;
2107                disp = (int32_t)x86_ldl_code(env, s);
2108                if (CODE64(s) && !havesib) {
2109                    base = -2;
2110                    disp += s->pc + s->rip_offset;
2111                }
2112            }
2113            break;
2114        case 1:
2115            disp = (int8_t)x86_ldub_code(env, s);
2116            break;
2117        default:
2118        case 2:
2119            disp = (int32_t)x86_ldl_code(env, s);
2120            break;
2121        }
2122
2123        /* For correct popl handling with esp.  */
2124        if (base == R_ESP && s->popl_esp_hack) {
2125            disp += s->popl_esp_hack;
2126        }
2127        if (base == R_EBP || base == R_ESP) {
2128            def_seg = R_SS;
2129        }
2130        break;
2131
2132    case MO_16:
2133        if (mod == 0) {
2134            if (rm == 6) {
2135                base = -1;
2136                disp = x86_lduw_code(env, s);
2137                break;
2138            }
2139        } else if (mod == 1) {
2140            disp = (int8_t)x86_ldub_code(env, s);
2141        } else {
2142            disp = (int16_t)x86_lduw_code(env, s);
2143        }
2144
2145        switch (rm) {
2146        case 0:
2147            base = R_EBX;
2148            index = R_ESI;
2149            break;
2150        case 1:
2151            base = R_EBX;
2152            index = R_EDI;
2153            break;
2154        case 2:
2155            base = R_EBP;
2156            index = R_ESI;
2157            def_seg = R_SS;
2158            break;
2159        case 3:
2160            base = R_EBP;
2161            index = R_EDI;
2162            def_seg = R_SS;
2163            break;
2164        case 4:
2165            base = R_ESI;
2166            break;
2167        case 5:
2168            base = R_EDI;
2169            break;
2170        case 6:
2171            base = R_EBP;
2172            def_seg = R_SS;
2173            break;
2174        default:
2175        case 7:
2176            base = R_EBX;
2177            break;
2178        }
2179        break;
2180
2181    default:
2182        tcg_abort();
2183    }
2184
2185 done:
2186    return (AddressParts){ def_seg, base, index, scale, disp };
2187}
2188
2189/* Compute the address, with a minimum number of TCG ops.  */
2190static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2191{
2192    TCGv ea = NULL;
2193
2194    if (a.index >= 0) {
2195        if (a.scale == 0) {
2196            ea = cpu_regs[a.index];
2197        } else {
2198            tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2199            ea = s->A0;
2200        }
2201        if (a.base >= 0) {
2202            tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2203            ea = s->A0;
2204        }
2205    } else if (a.base >= 0) {
2206        ea = cpu_regs[a.base];
2207    }
2208    if (!ea) {
2209        tcg_gen_movi_tl(s->A0, a.disp);
2210        ea = s->A0;
2211    } else if (a.disp != 0) {
2212        tcg_gen_addi_tl(s->A0, ea, a.disp);
2213        ea = s->A0;
2214    }
2215
2216    return ea;
2217}
2218
2219static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2220{
2221    AddressParts a = gen_lea_modrm_0(env, s, modrm);
2222    TCGv ea = gen_lea_modrm_1(s, a);
2223    gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2224}
2225
2226static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2227{
2228    (void)gen_lea_modrm_0(env, s, modrm);
2229}
2230
2231/* Used for BNDCL, BNDCU, BNDCN.  */
2232static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2233                      TCGCond cond, TCGv_i64 bndv)
2234{
2235    TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2236
2237    tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2238    if (!CODE64(s)) {
2239        tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2240    }
2241    tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2242    tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2243    gen_helper_bndck(cpu_env, s->tmp2_i32);
2244}
2245
2246/* used for LEA and MOV AX, mem */
2247static void gen_add_A0_ds_seg(DisasContext *s)
2248{
2249    gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2250}
2251
2252/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2253   OR_TMP0 */
2254static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2255                           MemOp ot, int reg, int is_store)
2256{
2257    int mod, rm;
2258
2259    mod = (modrm >> 6) & 3;
2260    rm = (modrm & 7) | REX_B(s);
2261    if (mod == 3) {
2262        if (is_store) {
2263            if (reg != OR_TMP0)
2264                gen_op_mov_v_reg(s, ot, s->T0, reg);
2265            gen_op_mov_reg_v(s, ot, rm, s->T0);
2266        } else {
2267            gen_op_mov_v_reg(s, ot, s->T0, rm);
2268            if (reg != OR_TMP0)
2269                gen_op_mov_reg_v(s, ot, reg, s->T0);
2270        }
2271    } else {
2272        gen_lea_modrm(env, s, modrm);
2273        if (is_store) {
2274            if (reg != OR_TMP0)
2275                gen_op_mov_v_reg(s, ot, s->T0, reg);
2276            gen_op_st_v(s, ot, s->T0, s->A0);
2277        } else {
2278            gen_op_ld_v(s, ot, s->T0, s->A0);
2279            if (reg != OR_TMP0)
2280                gen_op_mov_reg_v(s, ot, reg, s->T0);
2281        }
2282    }
2283}
2284
2285static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2286{
2287    uint32_t ret;
2288
2289    switch (ot) {
2290    case MO_8:
2291        ret = x86_ldub_code(env, s);
2292        break;
2293    case MO_16:
2294        ret = x86_lduw_code(env, s);
2295        break;
2296    case MO_32:
2297#ifdef TARGET_X86_64
2298    case MO_64:
2299#endif
2300        ret = x86_ldl_code(env, s);
2301        break;
2302    default:
2303        tcg_abort();
2304    }
2305    return ret;
2306}
2307
2308static inline int insn_const_size(MemOp ot)
2309{
2310    if (ot <= MO_32) {
2311        return 1 << ot;
2312    } else {
2313        return 4;
2314    }
2315}
2316
2317static void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2318{
2319    target_ulong pc = s->cs_base + eip;
2320
2321    if (translator_use_goto_tb(&s->base, pc))  {
2322        /* jump to same page: we can use a direct jump */
2323        tcg_gen_goto_tb(tb_num);
2324        gen_jmp_im(s, eip);
2325        tcg_gen_exit_tb(s->base.tb, tb_num);
2326        s->base.is_jmp = DISAS_NORETURN;
2327    } else {
2328        /* jump to another page */
2329        gen_jmp_im(s, eip);
2330        gen_jr(s, s->tmp0);
2331    }
2332}
2333
2334static inline void gen_jcc(DisasContext *s, int b,
2335                           target_ulong val, target_ulong next_eip)
2336{
2337    TCGLabel *l1, *l2;
2338
2339    if (s->jmp_opt) {
2340        l1 = gen_new_label();
2341        gen_jcc1(s, b, l1);
2342
2343        gen_goto_tb(s, 0, next_eip);
2344
2345        gen_set_label(l1);
2346        gen_goto_tb(s, 1, val);
2347    } else {
2348        l1 = gen_new_label();
2349        l2 = gen_new_label();
2350        gen_jcc1(s, b, l1);
2351
2352        gen_jmp_im(s, next_eip);
2353        tcg_gen_br(l2);
2354
2355        gen_set_label(l1);
2356        gen_jmp_im(s, val);
2357        gen_set_label(l2);
2358        gen_eob(s);
2359    }
2360}
2361
2362static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2363                        int modrm, int reg)
2364{
2365    CCPrepare cc;
2366
2367    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2368
2369    cc = gen_prepare_cc(s, b, s->T1);
2370    if (cc.mask != -1) {
2371        TCGv t0 = tcg_temp_new();
2372        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2373        cc.reg = t0;
2374    }
2375    if (!cc.use_reg2) {
2376        cc.reg2 = tcg_const_tl(cc.imm);
2377    }
2378
2379    tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2380                       s->T0, cpu_regs[reg]);
2381    gen_op_mov_reg_v(s, ot, reg, s->T0);
2382
2383    if (cc.mask != -1) {
2384        tcg_temp_free(cc.reg);
2385    }
2386    if (!cc.use_reg2) {
2387        tcg_temp_free(cc.reg2);
2388    }
2389}
2390
2391static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
2392{
2393    tcg_gen_ld32u_tl(s->T0, cpu_env,
2394                     offsetof(CPUX86State,segs[seg_reg].selector));
2395}
2396
2397static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
2398{
2399    tcg_gen_ext16u_tl(s->T0, s->T0);
2400    tcg_gen_st32_tl(s->T0, cpu_env,
2401                    offsetof(CPUX86State,segs[seg_reg].selector));
2402    tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2403}
2404
2405/* move T0 to seg_reg and compute if the CPU state may change. Never
2406   call this function with seg_reg == R_CS */
2407static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
2408{
2409    if (PE(s) && !VM86(s)) {
2410        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2411        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2412        /* abort translation because the addseg value may change or
2413           because ss32 may change. For R_SS, translation must always
2414           stop as a special handling must be done to disable hardware
2415           interrupts for the next instruction */
2416        if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) {
2417            s->base.is_jmp = DISAS_TOO_MANY;
2418        }
2419    } else {
2420        gen_op_movl_seg_T0_vm(s, seg_reg);
2421        if (seg_reg == R_SS) {
2422            s->base.is_jmp = DISAS_TOO_MANY;
2423        }
2424    }
2425}
2426
2427static void gen_svm_check_intercept(DisasContext *s, uint32_t type)
2428{
2429    /* no SVM activated; fast case */
2430    if (likely(!GUEST(s))) {
2431        return;
2432    }
2433    gen_helper_svm_check_intercept(cpu_env, tcg_constant_i32(type));
2434}
2435
2436static inline void gen_stack_update(DisasContext *s, int addend)
2437{
2438    gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2439}
2440
2441/* Generate a push. It depends on ss32, addseg and dflag.  */
2442static void gen_push_v(DisasContext *s, TCGv val)
2443{
2444    MemOp d_ot = mo_pushpop(s, s->dflag);
2445    MemOp a_ot = mo_stacksize(s);
2446    int size = 1 << d_ot;
2447    TCGv new_esp = s->A0;
2448
2449    tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2450
2451    if (!CODE64(s)) {
2452        if (ADDSEG(s)) {
2453            new_esp = s->tmp4;
2454            tcg_gen_mov_tl(new_esp, s->A0);
2455        }
2456        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2457    }
2458
2459    gen_op_st_v(s, d_ot, val, s->A0);
2460    gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2461}
2462
2463/* two step pop is necessary for precise exceptions */
2464static MemOp gen_pop_T0(DisasContext *s)
2465{
2466    MemOp d_ot = mo_pushpop(s, s->dflag);
2467
2468    gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2469    gen_op_ld_v(s, d_ot, s->T0, s->A0);
2470
2471    return d_ot;
2472}
2473
2474static inline void gen_pop_update(DisasContext *s, MemOp ot)
2475{
2476    gen_stack_update(s, 1 << ot);
2477}
2478
2479static inline void gen_stack_A0(DisasContext *s)
2480{
2481    gen_lea_v_seg(s, SS32(s) ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2482}
2483
2484static void gen_pusha(DisasContext *s)
2485{
2486    MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2487    MemOp d_ot = s->dflag;
2488    int size = 1 << d_ot;
2489    int i;
2490
2491    for (i = 0; i < 8; i++) {
2492        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2493        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2494        gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2495    }
2496
2497    gen_stack_update(s, -8 * size);
2498}
2499
2500static void gen_popa(DisasContext *s)
2501{
2502    MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2503    MemOp d_ot = s->dflag;
2504    int size = 1 << d_ot;
2505    int i;
2506
2507    for (i = 0; i < 8; i++) {
2508        /* ESP is not reloaded */
2509        if (7 - i == R_ESP) {
2510            continue;
2511        }
2512        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2513        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2514        gen_op_ld_v(s, d_ot, s->T0, s->A0);
2515        gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2516    }
2517
2518    gen_stack_update(s, 8 * size);
2519}
2520
2521static void gen_enter(DisasContext *s, int esp_addend, int level)
2522{
2523    MemOp d_ot = mo_pushpop(s, s->dflag);
2524    MemOp a_ot = CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
2525    int size = 1 << d_ot;
2526
2527    /* Push BP; compute FrameTemp into T1.  */
2528    tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2529    gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2530    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2531
2532    level &= 31;
2533    if (level != 0) {
2534        int i;
2535
2536        /* Copy level-1 pointers from the previous frame.  */
2537        for (i = 1; i < level; ++i) {
2538            tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2539            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2540            gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2541
2542            tcg_gen_subi_tl(s->A0, s->T1, size * i);
2543            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2544            gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2545        }
2546
2547        /* Push the current FrameTemp as the last level.  */
2548        tcg_gen_subi_tl(s->A0, s->T1, size * level);
2549        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2550        gen_op_st_v(s, d_ot, s->T1, s->A0);
2551    }
2552
2553    /* Copy the FrameTemp value to EBP.  */
2554    gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2555
2556    /* Compute the final value of ESP.  */
2557    tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2558    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2559}
2560
2561static void gen_leave(DisasContext *s)
2562{
2563    MemOp d_ot = mo_pushpop(s, s->dflag);
2564    MemOp a_ot = mo_stacksize(s);
2565
2566    gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2567    gen_op_ld_v(s, d_ot, s->T0, s->A0);
2568
2569    tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2570
2571    gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2572    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2573}
2574
2575/* Similarly, except that the assumption here is that we don't decode
2576   the instruction at all -- either a missing opcode, an unimplemented
2577   feature, or just a bogus instruction stream.  */
2578static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2579{
2580    gen_illegal_opcode(s);
2581
2582    if (qemu_loglevel_mask(LOG_UNIMP)) {
2583        FILE *logfile = qemu_log_lock();
2584        target_ulong pc = s->pc_start, end = s->pc;
2585
2586        qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2587        for (; pc < end; ++pc) {
2588            qemu_log(" %02x", cpu_ldub_code(env, pc));
2589        }
2590        qemu_log("\n");
2591        qemu_log_unlock(logfile);
2592    }
2593}
2594
2595/* an interrupt is different from an exception because of the
2596   privilege checks */
2597static void gen_interrupt(DisasContext *s, int intno,
2598                          target_ulong cur_eip, target_ulong next_eip)
2599{
2600    gen_update_cc_op(s);
2601    gen_jmp_im(s, cur_eip);
2602    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2603                               tcg_const_i32(next_eip - cur_eip));
2604    s->base.is_jmp = DISAS_NORETURN;
2605}
2606
2607static void gen_set_hflag(DisasContext *s, uint32_t mask)
2608{
2609    if ((s->flags & mask) == 0) {
2610        TCGv_i32 t = tcg_temp_new_i32();
2611        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2612        tcg_gen_ori_i32(t, t, mask);
2613        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2614        tcg_temp_free_i32(t);
2615        s->flags |= mask;
2616    }
2617}
2618
2619static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2620{
2621    if (s->flags & mask) {
2622        TCGv_i32 t = tcg_temp_new_i32();
2623        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2624        tcg_gen_andi_i32(t, t, ~mask);
2625        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2626        tcg_temp_free_i32(t);
2627        s->flags &= ~mask;
2628    }
2629}
2630
2631/* Clear BND registers during legacy branches.  */
2632static void gen_bnd_jmp(DisasContext *s)
2633{
2634    /* Clear the registers only if BND prefix is missing, MPX is enabled,
2635       and if the BNDREGs are known to be in use (non-zero) already.
2636       The helper itself will check BNDPRESERVE at runtime.  */
2637    if ((s->prefix & PREFIX_REPNZ) == 0
2638        && (s->flags & HF_MPX_EN_MASK) != 0
2639        && (s->flags & HF_MPX_IU_MASK) != 0) {
2640        gen_helper_bnd_jmp(cpu_env);
2641    }
2642}
2643
2644/* Generate an end of block. Trace exception is also generated if needed.
2645   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2646   If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2647   S->TF.  This is used by the syscall/sysret insns.  */
2648static void
2649do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2650{
2651    gen_update_cc_op(s);
2652
2653    /* If several instructions disable interrupts, only the first does it.  */
2654    if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2655        gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2656    } else {
2657        gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2658    }
2659
2660    if (s->base.tb->flags & HF_RF_MASK) {
2661        gen_helper_reset_rf(cpu_env);
2662    }
2663    if (recheck_tf) {
2664        gen_helper_rechecking_single_step(cpu_env);
2665        tcg_gen_exit_tb(NULL, 0);
2666    } else if (s->flags & HF_TF_MASK) {
2667        gen_helper_single_step(cpu_env);
2668    } else if (jr) {
2669        tcg_gen_lookup_and_goto_ptr();
2670    } else {
2671        tcg_gen_exit_tb(NULL, 0);
2672    }
2673    s->base.is_jmp = DISAS_NORETURN;
2674}
2675
2676static inline void
2677gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2678{
2679    do_gen_eob_worker(s, inhibit, recheck_tf, false);
2680}
2681
2682/* End of block.
2683   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2684static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2685{
2686    gen_eob_worker(s, inhibit, false);
2687}
2688
2689/* End of block, resetting the inhibit irq flag.  */
2690static void gen_eob(DisasContext *s)
2691{
2692    gen_eob_worker(s, false, false);
2693}
2694
2695/* Jump to register */
2696static void gen_jr(DisasContext *s, TCGv dest)
2697{
2698    do_gen_eob_worker(s, false, false, true);
2699}
2700
2701/* generate a jump to eip. No segment change must happen before as a
2702   direct call to the next block may occur */
2703static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2704{
2705    gen_update_cc_op(s);
2706    set_cc_op(s, CC_OP_DYNAMIC);
2707    if (s->jmp_opt) {
2708        gen_goto_tb(s, tb_num, eip);
2709    } else {
2710        gen_jmp_im(s, eip);
2711        gen_eob(s);
2712    }
2713}
2714
2715static void gen_jmp(DisasContext *s, target_ulong eip)
2716{
2717    gen_jmp_tb(s, eip, 0);
2718}
2719
2720static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2721{
2722    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2723    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2724}
2725
2726static inline void gen_stq_env_A0(DisasContext *s, int offset)
2727{
2728    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2729    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2730}
2731
2732static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2733{
2734    int mem_index = s->mem_index;
2735    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2736    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2737    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2738    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2739    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2740}
2741
2742static inline void gen_sto_env_A0(DisasContext *s, int offset)
2743{
2744    int mem_index = s->mem_index;
2745    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2746    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2747    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2748    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2749    tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2750}
2751
2752static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2753{
2754    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2755    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2756    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2757    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2758}
2759
2760static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2761{
2762    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2763    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2764}
2765
2766static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2767{
2768    tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2769    tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2770}
2771
2772static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2773{
2774    tcg_gen_movi_i64(s->tmp1_i64, 0);
2775    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2776}
2777
2778typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2779typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2780typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2781typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2782typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2783typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2784                               TCGv_i32 val);
2785typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2786typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2787                               TCGv val);
2788
2789#define SSE_SPECIAL ((void *)1)
2790#define SSE_DUMMY ((void *)2)
2791
2792#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2793#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2794                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2795
2796static const SSEFunc_0_epp sse_op_table1[256][4] = {
2797    /* 3DNow! extensions */
2798    [0x0e] = { SSE_DUMMY }, /* femms */
2799    [0x0f] = { SSE_DUMMY }, /* pf... */
2800    /* pure SSE operations */
2801    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2802    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2803    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2804    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2805    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2806    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2807    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2808    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2809
2810    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2811    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2812    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2813    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2814    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2815    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2816    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2817    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2818    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2819    [0x51] = SSE_FOP(sqrt),
2820    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2821    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2822    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2823    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2824    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2825    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2826    [0x58] = SSE_FOP(add),
2827    [0x59] = SSE_FOP(mul),
2828    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2829               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2830    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2831    [0x5c] = SSE_FOP(sub),
2832    [0x5d] = SSE_FOP(min),
2833    [0x5e] = SSE_FOP(div),
2834    [0x5f] = SSE_FOP(max),
2835
2836    [0xc2] = SSE_FOP(cmpeq),
2837    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2838               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2839
2840    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2841    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2842    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2843
2844    /* MMX ops and their SSE extensions */
2845    [0x60] = MMX_OP2(punpcklbw),
2846    [0x61] = MMX_OP2(punpcklwd),
2847    [0x62] = MMX_OP2(punpckldq),
2848    [0x63] = MMX_OP2(packsswb),
2849    [0x64] = MMX_OP2(pcmpgtb),
2850    [0x65] = MMX_OP2(pcmpgtw),
2851    [0x66] = MMX_OP2(pcmpgtl),
2852    [0x67] = MMX_OP2(packuswb),
2853    [0x68] = MMX_OP2(punpckhbw),
2854    [0x69] = MMX_OP2(punpckhwd),
2855    [0x6a] = MMX_OP2(punpckhdq),
2856    [0x6b] = MMX_OP2(packssdw),
2857    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2858    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2859    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2860    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2861    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2862               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2863               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2864               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2865    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2866    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2867    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2868    [0x74] = MMX_OP2(pcmpeqb),
2869    [0x75] = MMX_OP2(pcmpeqw),
2870    [0x76] = MMX_OP2(pcmpeql),
2871    [0x77] = { SSE_DUMMY }, /* emms */
2872    [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2873    [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2874    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2875    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2876    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2877    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2878    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2879    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2880    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2881    [0xd1] = MMX_OP2(psrlw),
2882    [0xd2] = MMX_OP2(psrld),
2883    [0xd3] = MMX_OP2(psrlq),
2884    [0xd4] = MMX_OP2(paddq),
2885    [0xd5] = MMX_OP2(pmullw),
2886    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2887    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2888    [0xd8] = MMX_OP2(psubusb),
2889    [0xd9] = MMX_OP2(psubusw),
2890    [0xda] = MMX_OP2(pminub),
2891    [0xdb] = MMX_OP2(pand),
2892    [0xdc] = MMX_OP2(paddusb),
2893    [0xdd] = MMX_OP2(paddusw),
2894    [0xde] = MMX_OP2(pmaxub),
2895    [0xdf] = MMX_OP2(pandn),
2896    [0xe0] = MMX_OP2(pavgb),
2897    [0xe1] = MMX_OP2(psraw),
2898    [0xe2] = MMX_OP2(psrad),
2899    [0xe3] = MMX_OP2(pavgw),
2900    [0xe4] = MMX_OP2(pmulhuw),
2901    [0xe5] = MMX_OP2(pmulhw),
2902    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2903    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2904    [0xe8] = MMX_OP2(psubsb),
2905    [0xe9] = MMX_OP2(psubsw),
2906    [0xea] = MMX_OP2(pminsw),
2907    [0xeb] = MMX_OP2(por),
2908    [0xec] = MMX_OP2(paddsb),
2909    [0xed] = MMX_OP2(paddsw),
2910    [0xee] = MMX_OP2(pmaxsw),
2911    [0xef] = MMX_OP2(pxor),
2912    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2913    [0xf1] = MMX_OP2(psllw),
2914    [0xf2] = MMX_OP2(pslld),
2915    [0xf3] = MMX_OP2(psllq),
2916    [0xf4] = MMX_OP2(pmuludq),
2917    [0xf5] = MMX_OP2(pmaddwd),
2918    [0xf6] = MMX_OP2(psadbw),
2919    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2920               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2921    [0xf8] = MMX_OP2(psubb),
2922    [0xf9] = MMX_OP2(psubw),
2923    [0xfa] = MMX_OP2(psubl),
2924    [0xfb] = MMX_OP2(psubq),
2925    [0xfc] = MMX_OP2(paddb),
2926    [0xfd] = MMX_OP2(paddw),
2927    [0xfe] = MMX_OP2(paddl),
2928};
2929
2930static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2931    [0 + 2] = MMX_OP2(psrlw),
2932    [0 + 4] = MMX_OP2(psraw),
2933    [0 + 6] = MMX_OP2(psllw),
2934    [8 + 2] = MMX_OP2(psrld),
2935    [8 + 4] = MMX_OP2(psrad),
2936    [8 + 6] = MMX_OP2(pslld),
2937    [16 + 2] = MMX_OP2(psrlq),
2938    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2939    [16 + 6] = MMX_OP2(psllq),
2940    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2941};
2942
2943static const SSEFunc_0_epi sse_op_table3ai[] = {
2944    gen_helper_cvtsi2ss,
2945    gen_helper_cvtsi2sd
2946};
2947
2948#ifdef TARGET_X86_64
2949static const SSEFunc_0_epl sse_op_table3aq[] = {
2950    gen_helper_cvtsq2ss,
2951    gen_helper_cvtsq2sd
2952};
2953#endif
2954
2955static const SSEFunc_i_ep sse_op_table3bi[] = {
2956    gen_helper_cvttss2si,
2957    gen_helper_cvtss2si,
2958    gen_helper_cvttsd2si,
2959    gen_helper_cvtsd2si
2960};
2961
2962#ifdef TARGET_X86_64
2963static const SSEFunc_l_ep sse_op_table3bq[] = {
2964    gen_helper_cvttss2sq,
2965    gen_helper_cvtss2sq,
2966    gen_helper_cvttsd2sq,
2967    gen_helper_cvtsd2sq
2968};
2969#endif
2970
2971static const SSEFunc_0_epp sse_op_table4[8][4] = {
2972    SSE_FOP(cmpeq),
2973    SSE_FOP(cmplt),
2974    SSE_FOP(cmple),
2975    SSE_FOP(cmpunord),
2976    SSE_FOP(cmpneq),
2977    SSE_FOP(cmpnlt),
2978    SSE_FOP(cmpnle),
2979    SSE_FOP(cmpord),
2980};
2981
2982static const SSEFunc_0_epp sse_op_table5[256] = {
2983    [0x0c] = gen_helper_pi2fw,
2984    [0x0d] = gen_helper_pi2fd,
2985    [0x1c] = gen_helper_pf2iw,
2986    [0x1d] = gen_helper_pf2id,
2987    [0x8a] = gen_helper_pfnacc,
2988    [0x8e] = gen_helper_pfpnacc,
2989    [0x90] = gen_helper_pfcmpge,
2990    [0x94] = gen_helper_pfmin,
2991    [0x96] = gen_helper_pfrcp,
2992    [0x97] = gen_helper_pfrsqrt,
2993    [0x9a] = gen_helper_pfsub,
2994    [0x9e] = gen_helper_pfadd,
2995    [0xa0] = gen_helper_pfcmpgt,
2996    [0xa4] = gen_helper_pfmax,
2997    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2998    [0xa7] = gen_helper_movq, /* pfrsqit1 */
2999    [0xaa] = gen_helper_pfsubr,
3000    [0xae] = gen_helper_pfacc,
3001    [0xb0] = gen_helper_pfcmpeq,
3002    [0xb4] = gen_helper_pfmul,
3003    [0xb6] = gen_helper_movq, /* pfrcpit2 */
3004    [0xb7] = gen_helper_pmulhrw_mmx,
3005    [0xbb] = gen_helper_pswapd,
3006    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
3007};
3008
3009struct SSEOpHelper_epp {
3010    SSEFunc_0_epp op[2];
3011    uint32_t ext_mask;
3012};
3013
3014struct SSEOpHelper_eppi {
3015    SSEFunc_0_eppi op[2];
3016    uint32_t ext_mask;
3017};
3018
3019#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
3020#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
3021#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
3022#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
3023#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
3024        CPUID_EXT_PCLMULQDQ }
3025#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
3026
3027static const struct SSEOpHelper_epp sse_op_table6[256] = {
3028    [0x00] = SSSE3_OP(pshufb),
3029    [0x01] = SSSE3_OP(phaddw),
3030    [0x02] = SSSE3_OP(phaddd),
3031    [0x03] = SSSE3_OP(phaddsw),
3032    [0x04] = SSSE3_OP(pmaddubsw),
3033    [0x05] = SSSE3_OP(phsubw),
3034    [0x06] = SSSE3_OP(phsubd),
3035    [0x07] = SSSE3_OP(phsubsw),
3036    [0x08] = SSSE3_OP(psignb),
3037    [0x09] = SSSE3_OP(psignw),
3038    [0x0a] = SSSE3_OP(psignd),
3039    [0x0b] = SSSE3_OP(pmulhrsw),
3040    [0x10] = SSE41_OP(pblendvb),
3041    [0x14] = SSE41_OP(blendvps),
3042    [0x15] = SSE41_OP(blendvpd),
3043    [0x17] = SSE41_OP(ptest),
3044    [0x1c] = SSSE3_OP(pabsb),
3045    [0x1d] = SSSE3_OP(pabsw),
3046    [0x1e] = SSSE3_OP(pabsd),
3047    [0x20] = SSE41_OP(pmovsxbw),
3048    [0x21] = SSE41_OP(pmovsxbd),
3049    [0x22] = SSE41_OP(pmovsxbq),
3050    [0x23] = SSE41_OP(pmovsxwd),
3051    [0x24] = SSE41_OP(pmovsxwq),
3052    [0x25] = SSE41_OP(pmovsxdq),
3053    [0x28] = SSE41_OP(pmuldq),
3054    [0x29] = SSE41_OP(pcmpeqq),
3055    [0x2a] = SSE41_SPECIAL, /* movntqda */
3056    [0x2b] = SSE41_OP(packusdw),
3057    [0x30] = SSE41_OP(pmovzxbw),
3058    [0x31] = SSE41_OP(pmovzxbd),
3059    [0x32] = SSE41_OP(pmovzxbq),
3060    [0x33] = SSE41_OP(pmovzxwd),
3061    [0x34] = SSE41_OP(pmovzxwq),
3062    [0x35] = SSE41_OP(pmovzxdq),
3063    [0x37] = SSE42_OP(pcmpgtq),
3064    [0x38] = SSE41_OP(pminsb),
3065    [0x39] = SSE41_OP(pminsd),
3066    [0x3a] = SSE41_OP(pminuw),
3067    [0x3b] = SSE41_OP(pminud),
3068    [0x3c] = SSE41_OP(pmaxsb),
3069    [0x3d] = SSE41_OP(pmaxsd),
3070    [0x3e] = SSE41_OP(pmaxuw),
3071    [0x3f] = SSE41_OP(pmaxud),
3072    [0x40] = SSE41_OP(pmulld),
3073    [0x41] = SSE41_OP(phminposuw),
3074    [0xdb] = AESNI_OP(aesimc),
3075    [0xdc] = AESNI_OP(aesenc),
3076    [0xdd] = AESNI_OP(aesenclast),
3077    [0xde] = AESNI_OP(aesdec),
3078    [0xdf] = AESNI_OP(aesdeclast),
3079};
3080
3081static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3082    [0x08] = SSE41_OP(roundps),
3083    [0x09] = SSE41_OP(roundpd),
3084    [0x0a] = SSE41_OP(roundss),
3085    [0x0b] = SSE41_OP(roundsd),
3086    [0x0c] = SSE41_OP(blendps),
3087    [0x0d] = SSE41_OP(blendpd),
3088    [0x0e] = SSE41_OP(pblendw),
3089    [0x0f] = SSSE3_OP(palignr),
3090    [0x14] = SSE41_SPECIAL, /* pextrb */
3091    [0x15] = SSE41_SPECIAL, /* pextrw */
3092    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3093    [0x17] = SSE41_SPECIAL, /* extractps */
3094    [0x20] = SSE41_SPECIAL, /* pinsrb */
3095    [0x21] = SSE41_SPECIAL, /* insertps */
3096    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3097    [0x40] = SSE41_OP(dpps),
3098    [0x41] = SSE41_OP(dppd),
3099    [0x42] = SSE41_OP(mpsadbw),
3100    [0x44] = PCLMULQDQ_OP(pclmulqdq),
3101    [0x60] = SSE42_OP(pcmpestrm),
3102    [0x61] = SSE42_OP(pcmpestri),
3103    [0x62] = SSE42_OP(pcmpistrm),
3104    [0x63] = SSE42_OP(pcmpistri),
3105    [0xdf] = AESNI_OP(aeskeygenassist),
3106};
3107
3108static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3109                    target_ulong pc_start)
3110{
3111    int b1, op1_offset, op2_offset, is_xmm, val;
3112    int modrm, mod, rm, reg;
3113    SSEFunc_0_epp sse_fn_epp;
3114    SSEFunc_0_eppi sse_fn_eppi;
3115    SSEFunc_0_ppi sse_fn_ppi;
3116    SSEFunc_0_eppt sse_fn_eppt;
3117    MemOp ot;
3118
3119    b &= 0xff;
3120    if (s->prefix & PREFIX_DATA)
3121        b1 = 1;
3122    else if (s->prefix & PREFIX_REPZ)
3123        b1 = 2;
3124    else if (s->prefix & PREFIX_REPNZ)
3125        b1 = 3;
3126    else
3127        b1 = 0;
3128    sse_fn_epp = sse_op_table1[b][b1];
3129    if (!sse_fn_epp) {
3130        goto unknown_op;
3131    }
3132    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3133        is_xmm = 1;
3134    } else {
3135        if (b1 == 0) {
3136            /* MMX case */
3137            is_xmm = 0;
3138        } else {
3139            is_xmm = 1;
3140        }
3141    }
3142    /* simple MMX/SSE operation */
3143    if (s->flags & HF_TS_MASK) {
3144        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3145        return;
3146    }
3147    if (s->flags & HF_EM_MASK) {
3148    illegal_op:
3149        gen_illegal_opcode(s);
3150        return;
3151    }
3152    if (is_xmm
3153        && !(s->flags & HF_OSFXSR_MASK)
3154        && (b != 0x38 && b != 0x3a)) {
3155        goto unknown_op;
3156    }
3157    if (b == 0x0e) {
3158        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3159            /* If we were fully decoding this we might use illegal_op.  */
3160            goto unknown_op;
3161        }
3162        /* femms */
3163        gen_helper_emms(cpu_env);
3164        return;
3165    }
3166    if (b == 0x77) {
3167        /* emms */
3168        gen_helper_emms(cpu_env);
3169        return;
3170    }
3171    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3172       the static cpu state) */
3173    if (!is_xmm) {
3174        gen_helper_enter_mmx(cpu_env);
3175    }
3176
3177    modrm = x86_ldub_code(env, s);
3178    reg = ((modrm >> 3) & 7);
3179    if (is_xmm) {
3180        reg |= REX_R(s);
3181    }
3182    mod = (modrm >> 6) & 3;
3183    if (sse_fn_epp == SSE_SPECIAL) {
3184        b |= (b1 << 8);
3185        switch(b) {
3186        case 0x0e7: /* movntq */
3187            if (mod == 3) {
3188                goto illegal_op;
3189            }
3190            gen_lea_modrm(env, s, modrm);
3191            gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3192            break;
3193        case 0x1e7: /* movntdq */
3194        case 0x02b: /* movntps */
3195        case 0x12b: /* movntps */
3196            if (mod == 3)
3197                goto illegal_op;
3198            gen_lea_modrm(env, s, modrm);
3199            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3200            break;
3201        case 0x3f0: /* lddqu */
3202            if (mod == 3)
3203                goto illegal_op;
3204            gen_lea_modrm(env, s, modrm);
3205            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3206            break;
3207        case 0x22b: /* movntss */
3208        case 0x32b: /* movntsd */
3209            if (mod == 3)
3210                goto illegal_op;
3211            gen_lea_modrm(env, s, modrm);
3212            if (b1 & 1) {
3213                gen_stq_env_A0(s, offsetof(CPUX86State,
3214                                           xmm_regs[reg].ZMM_Q(0)));
3215            } else {
3216                tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3217                    xmm_regs[reg].ZMM_L(0)));
3218                gen_op_st_v(s, MO_32, s->T0, s->A0);
3219            }
3220            break;
3221        case 0x6e: /* movd mm, ea */
3222#ifdef TARGET_X86_64
3223            if (s->dflag == MO_64) {
3224                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3225                tcg_gen_st_tl(s->T0, cpu_env,
3226                              offsetof(CPUX86State, fpregs[reg].mmx));
3227            } else
3228#endif
3229            {
3230                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3231                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3232                                 offsetof(CPUX86State,fpregs[reg].mmx));
3233                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3234                gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3235            }
3236            break;
3237        case 0x16e: /* movd xmm, ea */
3238#ifdef TARGET_X86_64
3239            if (s->dflag == MO_64) {
3240                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3241                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3242                                 offsetof(CPUX86State,xmm_regs[reg]));
3243                gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3244            } else
3245#endif
3246            {
3247                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3248                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3249                                 offsetof(CPUX86State,xmm_regs[reg]));
3250                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3251                gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3252            }
3253            break;
3254        case 0x6f: /* movq mm, ea */
3255            if (mod != 3) {
3256                gen_lea_modrm(env, s, modrm);
3257                gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3258            } else {
3259                rm = (modrm & 7);
3260                tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3261                               offsetof(CPUX86State,fpregs[rm].mmx));
3262                tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3263                               offsetof(CPUX86State,fpregs[reg].mmx));
3264            }
3265            break;
3266        case 0x010: /* movups */
3267        case 0x110: /* movupd */
3268        case 0x028: /* movaps */
3269        case 0x128: /* movapd */
3270        case 0x16f: /* movdqa xmm, ea */
3271        case 0x26f: /* movdqu xmm, ea */
3272            if (mod != 3) {
3273                gen_lea_modrm(env, s, modrm);
3274                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3275            } else {
3276                rm = (modrm & 7) | REX_B(s);
3277                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3278                            offsetof(CPUX86State,xmm_regs[rm]));
3279            }
3280            break;
3281        case 0x210: /* movss xmm, ea */
3282            if (mod != 3) {
3283                gen_lea_modrm(env, s, modrm);
3284                gen_op_ld_v(s, MO_32, s->T0, s->A0);
3285                tcg_gen_st32_tl(s->T0, cpu_env,
3286                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3287                tcg_gen_movi_tl(s->T0, 0);
3288                tcg_gen_st32_tl(s->T0, cpu_env,
3289                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3290                tcg_gen_st32_tl(s->T0, cpu_env,
3291                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3292                tcg_gen_st32_tl(s->T0, cpu_env,
3293                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3294            } else {
3295                rm = (modrm & 7) | REX_B(s);
3296                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3297                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3298            }
3299            break;
3300        case 0x310: /* movsd xmm, ea */
3301            if (mod != 3) {
3302                gen_lea_modrm(env, s, modrm);
3303                gen_ldq_env_A0(s, offsetof(CPUX86State,
3304                                           xmm_regs[reg].ZMM_Q(0)));
3305                tcg_gen_movi_tl(s->T0, 0);
3306                tcg_gen_st32_tl(s->T0, cpu_env,
3307                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3308                tcg_gen_st32_tl(s->T0, cpu_env,
3309                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3310            } else {
3311                rm = (modrm & 7) | REX_B(s);
3312                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3313                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3314            }
3315            break;
3316        case 0x012: /* movlps */
3317        case 0x112: /* movlpd */
3318            if (mod != 3) {
3319                gen_lea_modrm(env, s, modrm);
3320                gen_ldq_env_A0(s, offsetof(CPUX86State,
3321                                           xmm_regs[reg].ZMM_Q(0)));
3322            } else {
3323                /* movhlps */
3324                rm = (modrm & 7) | REX_B(s);
3325                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3326                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3327            }
3328            break;
3329        case 0x212: /* movsldup */
3330            if (mod != 3) {
3331                gen_lea_modrm(env, s, modrm);
3332                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3333            } else {
3334                rm = (modrm & 7) | REX_B(s);
3335                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3336                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3337                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3338                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3339            }
3340            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3341                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3342            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3343                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3344            break;
3345        case 0x312: /* movddup */
3346            if (mod != 3) {
3347                gen_lea_modrm(env, s, modrm);
3348                gen_ldq_env_A0(s, offsetof(CPUX86State,
3349                                           xmm_regs[reg].ZMM_Q(0)));
3350            } else {
3351                rm = (modrm & 7) | REX_B(s);
3352                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3353                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3354            }
3355            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3356                        offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3357            break;
3358        case 0x016: /* movhps */
3359        case 0x116: /* movhpd */
3360            if (mod != 3) {
3361                gen_lea_modrm(env, s, modrm);
3362                gen_ldq_env_A0(s, offsetof(CPUX86State,
3363                                           xmm_regs[reg].ZMM_Q(1)));
3364            } else {
3365                /* movlhps */
3366                rm = (modrm & 7) | REX_B(s);
3367                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3368                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3369            }
3370            break;
3371        case 0x216: /* movshdup */
3372            if (mod != 3) {
3373                gen_lea_modrm(env, s, modrm);
3374                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3375            } else {
3376                rm = (modrm & 7) | REX_B(s);
3377                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3378                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3379                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3380                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3381            }
3382            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3383                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3384            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3385                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3386            break;
3387        case 0x178:
3388        case 0x378:
3389            {
3390                int bit_index, field_length;
3391
3392                if (b1 == 1 && reg != 0)
3393                    goto illegal_op;
3394                field_length = x86_ldub_code(env, s) & 0x3F;
3395                bit_index = x86_ldub_code(env, s) & 0x3F;
3396                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3397                    offsetof(CPUX86State,xmm_regs[reg]));
3398                if (b1 == 1)
3399                    gen_helper_extrq_i(cpu_env, s->ptr0,
3400                                       tcg_const_i32(bit_index),
3401                                       tcg_const_i32(field_length));
3402                else
3403                    gen_helper_insertq_i(cpu_env, s->ptr0,
3404                                         tcg_const_i32(bit_index),
3405                                         tcg_const_i32(field_length));
3406            }
3407            break;
3408        case 0x7e: /* movd ea, mm */
3409#ifdef TARGET_X86_64
3410            if (s->dflag == MO_64) {
3411                tcg_gen_ld_i64(s->T0, cpu_env,
3412                               offsetof(CPUX86State,fpregs[reg].mmx));
3413                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3414            } else
3415#endif
3416            {
3417                tcg_gen_ld32u_tl(s->T0, cpu_env,
3418                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3419                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3420            }
3421            break;
3422        case 0x17e: /* movd ea, xmm */
3423#ifdef TARGET_X86_64
3424            if (s->dflag == MO_64) {
3425                tcg_gen_ld_i64(s->T0, cpu_env,
3426                               offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3427                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3428            } else
3429#endif
3430            {
3431                tcg_gen_ld32u_tl(s->T0, cpu_env,
3432                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3433                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3434            }
3435            break;
3436        case 0x27e: /* movq xmm, ea */
3437            if (mod != 3) {
3438                gen_lea_modrm(env, s, modrm);
3439                gen_ldq_env_A0(s, offsetof(CPUX86State,
3440                                           xmm_regs[reg].ZMM_Q(0)));
3441            } else {
3442                rm = (modrm & 7) | REX_B(s);
3443                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3444                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3445            }
3446            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3447            break;
3448        case 0x7f: /* movq ea, mm */
3449            if (mod != 3) {
3450                gen_lea_modrm(env, s, modrm);
3451                gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3452            } else {
3453                rm = (modrm & 7);
3454                gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3455                            offsetof(CPUX86State,fpregs[reg].mmx));
3456            }
3457            break;
3458        case 0x011: /* movups */
3459        case 0x111: /* movupd */
3460        case 0x029: /* movaps */
3461        case 0x129: /* movapd */
3462        case 0x17f: /* movdqa ea, xmm */
3463        case 0x27f: /* movdqu ea, xmm */
3464            if (mod != 3) {
3465                gen_lea_modrm(env, s, modrm);
3466                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3467            } else {
3468                rm = (modrm & 7) | REX_B(s);
3469                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3470                            offsetof(CPUX86State,xmm_regs[reg]));
3471            }
3472            break;
3473        case 0x211: /* movss ea, xmm */
3474            if (mod != 3) {
3475                gen_lea_modrm(env, s, modrm);
3476                tcg_gen_ld32u_tl(s->T0, cpu_env,
3477                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3478                gen_op_st_v(s, MO_32, s->T0, s->A0);
3479            } else {
3480                rm = (modrm & 7) | REX_B(s);
3481                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3482                            offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3483            }
3484            break;
3485        case 0x311: /* movsd ea, xmm */
3486            if (mod != 3) {
3487                gen_lea_modrm(env, s, modrm);
3488                gen_stq_env_A0(s, offsetof(CPUX86State,
3489                                           xmm_regs[reg].ZMM_Q(0)));
3490            } else {
3491                rm = (modrm & 7) | REX_B(s);
3492                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3493                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3494            }
3495            break;
3496        case 0x013: /* movlps */
3497        case 0x113: /* movlpd */
3498            if (mod != 3) {
3499                gen_lea_modrm(env, s, modrm);
3500                gen_stq_env_A0(s, offsetof(CPUX86State,
3501                                           xmm_regs[reg].ZMM_Q(0)));
3502            } else {
3503                goto illegal_op;
3504            }
3505            break;
3506        case 0x017: /* movhps */
3507        case 0x117: /* movhpd */
3508            if (mod != 3) {
3509                gen_lea_modrm(env, s, modrm);
3510                gen_stq_env_A0(s, offsetof(CPUX86State,
3511                                           xmm_regs[reg].ZMM_Q(1)));
3512            } else {
3513                goto illegal_op;
3514            }
3515            break;
3516        case 0x71: /* shift mm, im */
3517        case 0x72:
3518        case 0x73:
3519        case 0x171: /* shift xmm, im */
3520        case 0x172:
3521        case 0x173:
3522            if (b1 >= 2) {
3523                goto unknown_op;
3524            }
3525            val = x86_ldub_code(env, s);
3526            if (is_xmm) {
3527                tcg_gen_movi_tl(s->T0, val);
3528                tcg_gen_st32_tl(s->T0, cpu_env,
3529                                offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3530                tcg_gen_movi_tl(s->T0, 0);
3531                tcg_gen_st32_tl(s->T0, cpu_env,
3532                                offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3533                op1_offset = offsetof(CPUX86State,xmm_t0);
3534            } else {
3535                tcg_gen_movi_tl(s->T0, val);
3536                tcg_gen_st32_tl(s->T0, cpu_env,
3537                                offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3538                tcg_gen_movi_tl(s->T0, 0);
3539                tcg_gen_st32_tl(s->T0, cpu_env,
3540                                offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3541                op1_offset = offsetof(CPUX86State,mmx_t0);
3542            }
3543            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3544                                       (((modrm >> 3)) & 7)][b1];
3545            if (!sse_fn_epp) {
3546                goto unknown_op;
3547            }
3548            if (is_xmm) {
3549                rm = (modrm & 7) | REX_B(s);
3550                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3551            } else {
3552                rm = (modrm & 7);
3553                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3554            }
3555            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3556            tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3557            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3558            break;
3559        case 0x050: /* movmskps */
3560            rm = (modrm & 7) | REX_B(s);
3561            tcg_gen_addi_ptr(s->ptr0, cpu_env,
3562                             offsetof(CPUX86State,xmm_regs[rm]));
3563            gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3564            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3565            break;
3566        case 0x150: /* movmskpd */
3567            rm = (modrm & 7) | REX_B(s);
3568            tcg_gen_addi_ptr(s->ptr0, cpu_env,
3569                             offsetof(CPUX86State,xmm_regs[rm]));
3570            gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3571            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3572            break;
3573        case 0x02a: /* cvtpi2ps */
3574        case 0x12a: /* cvtpi2pd */
3575            gen_helper_enter_mmx(cpu_env);
3576            if (mod != 3) {
3577                gen_lea_modrm(env, s, modrm);
3578                op2_offset = offsetof(CPUX86State,mmx_t0);
3579                gen_ldq_env_A0(s, op2_offset);
3580            } else {
3581                rm = (modrm & 7);
3582                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3583            }
3584            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3585            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3586            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3587            switch(b >> 8) {
3588            case 0x0:
3589                gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3590                break;
3591            default:
3592            case 0x1:
3593                gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3594                break;
3595            }
3596            break;
3597        case 0x22a: /* cvtsi2ss */
3598        case 0x32a: /* cvtsi2sd */
3599            ot = mo_64_32(s->dflag);
3600            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3601            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3602            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3603            if (ot == MO_32) {
3604                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3605                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3606                sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3607            } else {
3608#ifdef TARGET_X86_64
3609                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3610                sse_fn_epl(cpu_env, s->ptr0, s->T0);
3611#else
3612                goto illegal_op;
3613#endif
3614            }
3615            break;
3616        case 0x02c: /* cvttps2pi */
3617        case 0x12c: /* cvttpd2pi */
3618        case 0x02d: /* cvtps2pi */
3619        case 0x12d: /* cvtpd2pi */
3620            gen_helper_enter_mmx(cpu_env);
3621            if (mod != 3) {
3622                gen_lea_modrm(env, s, modrm);
3623                op2_offset = offsetof(CPUX86State,xmm_t0);
3624                gen_ldo_env_A0(s, op2_offset);
3625            } else {
3626                rm = (modrm & 7) | REX_B(s);
3627                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3628            }
3629            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3630            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3631            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3632            switch(b) {
3633            case 0x02c:
3634                gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3635                break;
3636            case 0x12c:
3637                gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3638                break;
3639            case 0x02d:
3640                gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3641                break;
3642            case 0x12d:
3643                gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3644                break;
3645            }
3646            break;
3647        case 0x22c: /* cvttss2si */
3648        case 0x32c: /* cvttsd2si */
3649        case 0x22d: /* cvtss2si */
3650        case 0x32d: /* cvtsd2si */
3651            ot = mo_64_32(s->dflag);
3652            if (mod != 3) {
3653                gen_lea_modrm(env, s, modrm);
3654                if ((b >> 8) & 1) {
3655                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3656                } else {
3657                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
3658                    tcg_gen_st32_tl(s->T0, cpu_env,
3659                                    offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3660                }
3661                op2_offset = offsetof(CPUX86State,xmm_t0);
3662            } else {
3663                rm = (modrm & 7) | REX_B(s);
3664                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3665            }
3666            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3667            if (ot == MO_32) {
3668                SSEFunc_i_ep sse_fn_i_ep =
3669                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3670                sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3671                tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3672            } else {
3673#ifdef TARGET_X86_64
3674                SSEFunc_l_ep sse_fn_l_ep =
3675                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3676                sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3677#else
3678                goto illegal_op;
3679#endif
3680            }
3681            gen_op_mov_reg_v(s, ot, reg, s->T0);
3682            break;
3683        case 0xc4: /* pinsrw */
3684        case 0x1c4:
3685            s->rip_offset = 1;
3686            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3687            val = x86_ldub_code(env, s);
3688            if (b1) {
3689                val &= 7;
3690                tcg_gen_st16_tl(s->T0, cpu_env,
3691                                offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3692            } else {
3693                val &= 3;
3694                tcg_gen_st16_tl(s->T0, cpu_env,
3695                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3696            }
3697            break;
3698        case 0xc5: /* pextrw */
3699        case 0x1c5:
3700            if (mod != 3)
3701                goto illegal_op;
3702            ot = mo_64_32(s->dflag);
3703            val = x86_ldub_code(env, s);
3704            if (b1) {
3705                val &= 7;
3706                rm = (modrm & 7) | REX_B(s);
3707                tcg_gen_ld16u_tl(s->T0, cpu_env,
3708                                 offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3709            } else {
3710                val &= 3;
3711                rm = (modrm & 7);
3712                tcg_gen_ld16u_tl(s->T0, cpu_env,
3713                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3714            }
3715            reg = ((modrm >> 3) & 7) | REX_R(s);
3716            gen_op_mov_reg_v(s, ot, reg, s->T0);
3717            break;
3718        case 0x1d6: /* movq ea, xmm */
3719            if (mod != 3) {
3720                gen_lea_modrm(env, s, modrm);
3721                gen_stq_env_A0(s, offsetof(CPUX86State,
3722                                           xmm_regs[reg].ZMM_Q(0)));
3723            } else {
3724                rm = (modrm & 7) | REX_B(s);
3725                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3726                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3727                gen_op_movq_env_0(s,
3728                                  offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3729            }
3730            break;
3731        case 0x2d6: /* movq2dq */
3732            gen_helper_enter_mmx(cpu_env);
3733            rm = (modrm & 7);
3734            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3735                        offsetof(CPUX86State,fpregs[rm].mmx));
3736            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3737            break;
3738        case 0x3d6: /* movdq2q */
3739            gen_helper_enter_mmx(cpu_env);
3740            rm = (modrm & 7) | REX_B(s);
3741            gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3742                        offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3743            break;
3744        case 0xd7: /* pmovmskb */
3745        case 0x1d7:
3746            if (mod != 3)
3747                goto illegal_op;
3748            if (b1) {
3749                rm = (modrm & 7) | REX_B(s);
3750                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3751                                 offsetof(CPUX86State, xmm_regs[rm]));
3752                gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3753            } else {
3754                rm = (modrm & 7);
3755                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3756                                 offsetof(CPUX86State, fpregs[rm].mmx));
3757                gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3758            }
3759            reg = ((modrm >> 3) & 7) | REX_R(s);
3760            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3761            break;
3762
3763        case 0x138:
3764        case 0x038:
3765            b = modrm;
3766            if ((b & 0xf0) == 0xf0) {
3767                goto do_0f_38_fx;
3768            }
3769            modrm = x86_ldub_code(env, s);
3770            rm = modrm & 7;
3771            reg = ((modrm >> 3) & 7) | REX_R(s);
3772            mod = (modrm >> 6) & 3;
3773            if (b1 >= 2) {
3774                goto unknown_op;
3775            }
3776
3777            sse_fn_epp = sse_op_table6[b].op[b1];
3778            if (!sse_fn_epp) {
3779                goto unknown_op;
3780            }
3781            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3782                goto illegal_op;
3783
3784            if (b1) {
3785                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3786                if (mod == 3) {
3787                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3788                } else {
3789                    op2_offset = offsetof(CPUX86State,xmm_t0);
3790                    gen_lea_modrm(env, s, modrm);
3791                    switch (b) {
3792                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3793                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3794                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3795                        gen_ldq_env_A0(s, op2_offset +
3796                                        offsetof(ZMMReg, ZMM_Q(0)));
3797                        break;
3798                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3799                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3800                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3801                                            s->mem_index, MO_LEUL);
3802                        tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3803                                        offsetof(ZMMReg, ZMM_L(0)));
3804                        break;
3805                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3806                        tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3807                                           s->mem_index, MO_LEUW);
3808                        tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3809                                        offsetof(ZMMReg, ZMM_W(0)));
3810                        break;
3811                    case 0x2a:            /* movntqda */
3812                        gen_ldo_env_A0(s, op1_offset);
3813                        return;
3814                    default:
3815                        gen_ldo_env_A0(s, op2_offset);
3816                    }
3817                }
3818            } else {
3819                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3820                if (mod == 3) {
3821                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3822                } else {
3823                    op2_offset = offsetof(CPUX86State,mmx_t0);
3824                    gen_lea_modrm(env, s, modrm);
3825                    gen_ldq_env_A0(s, op2_offset);
3826                }
3827            }
3828            if (sse_fn_epp == SSE_SPECIAL) {
3829                goto unknown_op;
3830            }
3831
3832            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3833            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3834            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3835
3836            if (b == 0x17) {
3837                set_cc_op(s, CC_OP_EFLAGS);
3838            }
3839            break;
3840
3841        case 0x238:
3842        case 0x338:
3843        do_0f_38_fx:
3844            /* Various integer extensions at 0f 38 f[0-f].  */
3845            b = modrm | (b1 << 8);
3846            modrm = x86_ldub_code(env, s);
3847            reg = ((modrm >> 3) & 7) | REX_R(s);
3848
3849            switch (b) {
3850            case 0x3f0: /* crc32 Gd,Eb */
3851            case 0x3f1: /* crc32 Gd,Ey */
3852            do_crc32:
3853                if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3854                    goto illegal_op;
3855                }
3856                if ((b & 0xff) == 0xf0) {
3857                    ot = MO_8;
3858                } else if (s->dflag != MO_64) {
3859                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3860                } else {
3861                    ot = MO_64;
3862                }
3863
3864                tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3865                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3866                gen_helper_crc32(s->T0, s->tmp2_i32,
3867                                 s->T0, tcg_const_i32(8 << ot));
3868
3869                ot = mo_64_32(s->dflag);
3870                gen_op_mov_reg_v(s, ot, reg, s->T0);
3871                break;
3872
3873            case 0x1f0: /* crc32 or movbe */
3874            case 0x1f1:
3875                /* For these insns, the f3 prefix is supposed to have priority
3876                   over the 66 prefix, but that's not what we implement above
3877                   setting b1.  */
3878                if (s->prefix & PREFIX_REPNZ) {
3879                    goto do_crc32;
3880                }
3881                /* FALLTHRU */
3882            case 0x0f0: /* movbe Gy,My */
3883            case 0x0f1: /* movbe My,Gy */
3884                if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3885                    goto illegal_op;
3886                }
3887                if (s->dflag != MO_64) {
3888                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3889                } else {
3890                    ot = MO_64;
3891                }
3892
3893                gen_lea_modrm(env, s, modrm);
3894                if ((b & 1) == 0) {
3895                    tcg_gen_qemu_ld_tl(s->T0, s->A0,
3896                                       s->mem_index, ot | MO_BE);
3897                    gen_op_mov_reg_v(s, ot, reg, s->T0);
3898                } else {
3899                    tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3900                                       s->mem_index, ot | MO_BE);
3901                }
3902                break;
3903
3904            case 0x0f2: /* andn Gy, By, Ey */
3905                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3906                    || !(s->prefix & PREFIX_VEX)
3907                    || s->vex_l != 0) {
3908                    goto illegal_op;
3909                }
3910                ot = mo_64_32(s->dflag);
3911                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3912                tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3913                gen_op_mov_reg_v(s, ot, reg, s->T0);
3914                gen_op_update1_cc(s);
3915                set_cc_op(s, CC_OP_LOGICB + ot);
3916                break;
3917
3918            case 0x0f7: /* bextr Gy, Ey, By */
3919                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3920                    || !(s->prefix & PREFIX_VEX)
3921                    || s->vex_l != 0) {
3922                    goto illegal_op;
3923                }
3924                ot = mo_64_32(s->dflag);
3925                {
3926                    TCGv bound, zero;
3927
3928                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3929                    /* Extract START, and shift the operand.
3930                       Shifts larger than operand size get zeros.  */
3931                    tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3932                    tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3933
3934                    bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3935                    zero = tcg_const_tl(0);
3936                    tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3937                                       s->T0, zero);
3938                    tcg_temp_free(zero);
3939
3940                    /* Extract the LEN into a mask.  Lengths larger than
3941                       operand size get all ones.  */
3942                    tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3943                    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3944                                       s->A0, bound);
3945                    tcg_temp_free(bound);
3946                    tcg_gen_movi_tl(s->T1, 1);
3947                    tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3948                    tcg_gen_subi_tl(s->T1, s->T1, 1);
3949                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
3950
3951                    gen_op_mov_reg_v(s, ot, reg, s->T0);
3952                    gen_op_update1_cc(s);
3953                    set_cc_op(s, CC_OP_LOGICB + ot);
3954                }
3955                break;
3956
3957            case 0x0f5: /* bzhi Gy, Ey, By */
3958                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3959                    || !(s->prefix & PREFIX_VEX)
3960                    || s->vex_l != 0) {
3961                    goto illegal_op;
3962                }
3963                ot = mo_64_32(s->dflag);
3964                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3965                tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3966                {
3967                    TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3968                    /* Note that since we're using BMILG (in order to get O
3969                       cleared) we need to store the inverse into C.  */
3970                    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3971                                       s->T1, bound);
3972                    tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3973                                       bound, bound, s->T1);
3974                    tcg_temp_free(bound);
3975                }
3976                tcg_gen_movi_tl(s->A0, -1);
3977                tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3978                tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3979                gen_op_mov_reg_v(s, ot, reg, s->T0);
3980                gen_op_update1_cc(s);
3981                set_cc_op(s, CC_OP_BMILGB + ot);
3982                break;
3983
3984            case 0x3f6: /* mulx By, Gy, rdx, Ey */
3985                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3986                    || !(s->prefix & PREFIX_VEX)
3987                    || s->vex_l != 0) {
3988                    goto illegal_op;
3989                }
3990                ot = mo_64_32(s->dflag);
3991                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3992                switch (ot) {
3993                default:
3994                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3995                    tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
3996                    tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
3997                                      s->tmp2_i32, s->tmp3_i32);
3998                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
3999                    tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
4000                    break;
4001#ifdef TARGET_X86_64
4002                case MO_64:
4003                    tcg_gen_mulu2_i64(s->T0, s->T1,
4004                                      s->T0, cpu_regs[R_EDX]);
4005                    tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
4006                    tcg_gen_mov_i64(cpu_regs[reg], s->T1);
4007                    break;
4008#endif
4009                }
4010                break;
4011
4012            case 0x3f5: /* pdep Gy, By, Ey */
4013                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4014                    || !(s->prefix & PREFIX_VEX)
4015                    || s->vex_l != 0) {
4016                    goto illegal_op;
4017                }
4018                ot = mo_64_32(s->dflag);
4019                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4020                /* Note that by zero-extending the source operand, we
4021                   automatically handle zero-extending the result.  */
4022                if (ot == MO_64) {
4023                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4024                } else {
4025                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4026                }
4027                gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
4028                break;
4029
4030            case 0x2f5: /* pext Gy, By, Ey */
4031                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4032                    || !(s->prefix & PREFIX_VEX)
4033                    || s->vex_l != 0) {
4034                    goto illegal_op;
4035                }
4036                ot = mo_64_32(s->dflag);
4037                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4038                /* Note that by zero-extending the source operand, we
4039                   automatically handle zero-extending the result.  */
4040                if (ot == MO_64) {
4041                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4042                } else {
4043                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4044                }
4045                gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
4046                break;
4047
4048            case 0x1f6: /* adcx Gy, Ey */
4049            case 0x2f6: /* adox Gy, Ey */
4050                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
4051                    goto illegal_op;
4052                } else {
4053                    TCGv carry_in, carry_out, zero;
4054                    int end_op;
4055
4056                    ot = mo_64_32(s->dflag);
4057                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4058
4059                    /* Re-use the carry-out from a previous round.  */
4060                    carry_in = NULL;
4061                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
4062                    switch (s->cc_op) {
4063                    case CC_OP_ADCX:
4064                        if (b == 0x1f6) {
4065                            carry_in = cpu_cc_dst;
4066                            end_op = CC_OP_ADCX;
4067                        } else {
4068                            end_op = CC_OP_ADCOX;
4069                        }
4070                        break;
4071                    case CC_OP_ADOX:
4072                        if (b == 0x1f6) {
4073                            end_op = CC_OP_ADCOX;
4074                        } else {
4075                            carry_in = cpu_cc_src2;
4076                            end_op = CC_OP_ADOX;
4077                        }
4078                        break;
4079                    case CC_OP_ADCOX:
4080                        end_op = CC_OP_ADCOX;
4081                        carry_in = carry_out;
4082                        break;
4083                    default:
4084                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4085                        break;
4086                    }
4087                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
4088                    if (!carry_in) {
4089                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4090                            gen_compute_eflags(s);
4091                        }
4092                        carry_in = s->tmp0;
4093                        tcg_gen_extract_tl(carry_in, cpu_cc_src,
4094                                           ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4095                    }
4096
4097                    switch (ot) {
4098#ifdef TARGET_X86_64
4099                    case MO_32:
4100                        /* If we know TL is 64-bit, and we want a 32-bit
4101                           result, just do everything in 64-bit arithmetic.  */
4102                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4103                        tcg_gen_ext32u_i64(s->T0, s->T0);
4104                        tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4105                        tcg_gen_add_i64(s->T0, s->T0, carry_in);
4106                        tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4107                        tcg_gen_shri_i64(carry_out, s->T0, 32);
4108                        break;
4109#endif
4110                    default:
4111                        /* Otherwise compute the carry-out in two steps.  */
4112                        zero = tcg_const_tl(0);
4113                        tcg_gen_add2_tl(s->T0, carry_out,
4114                                        s->T0, zero,
4115                                        carry_in, zero);
4116                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4117                                        cpu_regs[reg], carry_out,
4118                                        s->T0, zero);
4119                        tcg_temp_free(zero);
4120                        break;
4121                    }
4122                    set_cc_op(s, end_op);
4123                }
4124                break;
4125
4126            case 0x1f7: /* shlx Gy, Ey, By */
4127            case 0x2f7: /* sarx Gy, Ey, By */
4128            case 0x3f7: /* shrx Gy, Ey, By */
4129                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4130                    || !(s->prefix & PREFIX_VEX)
4131                    || s->vex_l != 0) {
4132                    goto illegal_op;
4133                }
4134                ot = mo_64_32(s->dflag);
4135                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4136                if (ot == MO_64) {
4137                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4138                } else {
4139                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4140                }
4141                if (b == 0x1f7) {
4142                    tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4143                } else if (b == 0x2f7) {
4144                    if (ot != MO_64) {
4145                        tcg_gen_ext32s_tl(s->T0, s->T0);
4146                    }
4147                    tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4148                } else {
4149                    if (ot != MO_64) {
4150                        tcg_gen_ext32u_tl(s->T0, s->T0);
4151                    }
4152                    tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4153                }
4154                gen_op_mov_reg_v(s, ot, reg, s->T0);
4155                break;
4156
4157            case 0x0f3:
4158            case 0x1f3:
4159            case 0x2f3:
4160            case 0x3f3: /* Group 17 */
4161                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4162                    || !(s->prefix & PREFIX_VEX)
4163                    || s->vex_l != 0) {
4164                    goto illegal_op;
4165                }
4166                ot = mo_64_32(s->dflag);
4167                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4168
4169                tcg_gen_mov_tl(cpu_cc_src, s->T0);
4170                switch (reg & 7) {
4171                case 1: /* blsr By,Ey */
4172                    tcg_gen_subi_tl(s->T1, s->T0, 1);
4173                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
4174                    break;
4175                case 2: /* blsmsk By,Ey */
4176                    tcg_gen_subi_tl(s->T1, s->T0, 1);
4177                    tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4178                    break;
4179                case 3: /* blsi By, Ey */
4180                    tcg_gen_neg_tl(s->T1, s->T0);
4181                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
4182                    break;
4183                default:
4184                    goto unknown_op;
4185                }
4186                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4187                gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4188                set_cc_op(s, CC_OP_BMILGB + ot);
4189                break;
4190
4191            default:
4192                goto unknown_op;
4193            }
4194            break;
4195
4196        case 0x03a:
4197        case 0x13a:
4198            b = modrm;
4199            modrm = x86_ldub_code(env, s);
4200            rm = modrm & 7;
4201            reg = ((modrm >> 3) & 7) | REX_R(s);
4202            mod = (modrm >> 6) & 3;
4203            if (b1 >= 2) {
4204                goto unknown_op;
4205            }
4206
4207            sse_fn_eppi = sse_op_table7[b].op[b1];
4208            if (!sse_fn_eppi) {
4209                goto unknown_op;
4210            }
4211            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4212                goto illegal_op;
4213
4214            s->rip_offset = 1;
4215
4216            if (sse_fn_eppi == SSE_SPECIAL) {
4217                ot = mo_64_32(s->dflag);
4218                rm = (modrm & 7) | REX_B(s);
4219                if (mod != 3)
4220                    gen_lea_modrm(env, s, modrm);
4221                reg = ((modrm >> 3) & 7) | REX_R(s);
4222                val = x86_ldub_code(env, s);
4223                switch (b) {
4224                case 0x14: /* pextrb */
4225                    tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4226                                            xmm_regs[reg].ZMM_B(val & 15)));
4227                    if (mod == 3) {
4228                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4229                    } else {
4230                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4231                                           s->mem_index, MO_UB);
4232                    }
4233                    break;
4234                case 0x15: /* pextrw */
4235                    tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4236                                            xmm_regs[reg].ZMM_W(val & 7)));
4237                    if (mod == 3) {
4238                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4239                    } else {
4240                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4241                                           s->mem_index, MO_LEUW);
4242                    }
4243                    break;
4244                case 0x16:
4245                    if (ot == MO_32) { /* pextrd */
4246                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4247                                        offsetof(CPUX86State,
4248                                                xmm_regs[reg].ZMM_L(val & 3)));
4249                        if (mod == 3) {
4250                            tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4251                        } else {
4252                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4253                                                s->mem_index, MO_LEUL);
4254                        }
4255                    } else { /* pextrq */
4256#ifdef TARGET_X86_64
4257                        tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4258                                        offsetof(CPUX86State,
4259                                                xmm_regs[reg].ZMM_Q(val & 1)));
4260                        if (mod == 3) {
4261                            tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4262                        } else {
4263                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4264                                                s->mem_index, MO_LEQ);
4265                        }
4266#else
4267                        goto illegal_op;
4268#endif
4269                    }
4270                    break;
4271                case 0x17: /* extractps */
4272                    tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4273                                            xmm_regs[reg].ZMM_L(val & 3)));
4274                    if (mod == 3) {
4275                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4276                    } else {
4277                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4278                                           s->mem_index, MO_LEUL);
4279                    }
4280                    break;
4281                case 0x20: /* pinsrb */
4282                    if (mod == 3) {
4283                        gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4284                    } else {
4285                        tcg_gen_qemu_ld_tl(s->T0, s->A0,
4286                                           s->mem_index, MO_UB);
4287                    }
4288                    tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4289                                            xmm_regs[reg].ZMM_B(val & 15)));
4290                    break;
4291                case 0x21: /* insertps */
4292                    if (mod == 3) {
4293                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4294                                        offsetof(CPUX86State,xmm_regs[rm]
4295                                                .ZMM_L((val >> 6) & 3)));
4296                    } else {
4297                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4298                                            s->mem_index, MO_LEUL);
4299                    }
4300                    tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4301                                    offsetof(CPUX86State,xmm_regs[reg]
4302                                            .ZMM_L((val >> 4) & 3)));
4303                    if ((val >> 0) & 1)
4304                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4305                                        cpu_env, offsetof(CPUX86State,
4306                                                xmm_regs[reg].ZMM_L(0)));
4307                    if ((val >> 1) & 1)
4308                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4309                                        cpu_env, offsetof(CPUX86State,
4310                                                xmm_regs[reg].ZMM_L(1)));
4311                    if ((val >> 2) & 1)
4312                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4313                                        cpu_env, offsetof(CPUX86State,
4314                                                xmm_regs[reg].ZMM_L(2)));
4315                    if ((val >> 3) & 1)
4316                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4317                                        cpu_env, offsetof(CPUX86State,
4318                                                xmm_regs[reg].ZMM_L(3)));
4319                    break;
4320                case 0x22:
4321                    if (ot == MO_32) { /* pinsrd */
4322                        if (mod == 3) {
4323                            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4324                        } else {
4325                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4326                                                s->mem_index, MO_LEUL);
4327                        }
4328                        tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4329                                        offsetof(CPUX86State,
4330                                                xmm_regs[reg].ZMM_L(val & 3)));
4331                    } else { /* pinsrq */
4332#ifdef TARGET_X86_64
4333                        if (mod == 3) {
4334                            gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4335                        } else {
4336                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4337                                                s->mem_index, MO_LEQ);
4338                        }
4339                        tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4340                                        offsetof(CPUX86State,
4341                                                xmm_regs[reg].ZMM_Q(val & 1)));
4342#else
4343                        goto illegal_op;
4344#endif
4345                    }
4346                    break;
4347                }
4348                return;
4349            }
4350
4351            if (b1) {
4352                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4353                if (mod == 3) {
4354                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4355                } else {
4356                    op2_offset = offsetof(CPUX86State,xmm_t0);
4357                    gen_lea_modrm(env, s, modrm);
4358                    gen_ldo_env_A0(s, op2_offset);
4359                }
4360            } else {
4361                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4362                if (mod == 3) {
4363                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4364                } else {
4365                    op2_offset = offsetof(CPUX86State,mmx_t0);
4366                    gen_lea_modrm(env, s, modrm);
4367                    gen_ldq_env_A0(s, op2_offset);
4368                }
4369            }
4370            val = x86_ldub_code(env, s);
4371
4372            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4373                set_cc_op(s, CC_OP_EFLAGS);
4374
4375                if (s->dflag == MO_64) {
4376                    /* The helper must use entire 64-bit gp registers */
4377                    val |= 1 << 8;
4378                }
4379            }
4380
4381            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4382            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4383            sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4384            break;
4385
4386        case 0x33a:
4387            /* Various integer extensions at 0f 3a f[0-f].  */
4388            b = modrm | (b1 << 8);
4389            modrm = x86_ldub_code(env, s);
4390            reg = ((modrm >> 3) & 7) | REX_R(s);
4391
4392            switch (b) {
4393            case 0x3f0: /* rorx Gy,Ey, Ib */
4394                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4395                    || !(s->prefix & PREFIX_VEX)
4396                    || s->vex_l != 0) {
4397                    goto illegal_op;
4398                }
4399                ot = mo_64_32(s->dflag);
4400                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4401                b = x86_ldub_code(env, s);
4402                if (ot == MO_64) {
4403                    tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4404                } else {
4405                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4406                    tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4407                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4408                }
4409                gen_op_mov_reg_v(s, ot, reg, s->T0);
4410                break;
4411
4412            default:
4413                goto unknown_op;
4414            }
4415            break;
4416
4417        default:
4418        unknown_op:
4419            gen_unknown_opcode(env, s);
4420            return;
4421        }
4422    } else {
4423        /* generic MMX or SSE operation */
4424        switch(b) {
4425        case 0x70: /* pshufx insn */
4426        case 0xc6: /* pshufx insn */
4427        case 0xc2: /* compare insns */
4428            s->rip_offset = 1;
4429            break;
4430        default:
4431            break;
4432        }
4433        if (is_xmm) {
4434            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4435            if (mod != 3) {
4436                int sz = 4;
4437
4438                gen_lea_modrm(env, s, modrm);
4439                op2_offset = offsetof(CPUX86State,xmm_t0);
4440
4441                switch (b) {
4442                case 0x50 ... 0x5a:
4443                case 0x5c ... 0x5f:
4444                case 0xc2:
4445                    /* Most sse scalar operations.  */
4446                    if (b1 == 2) {
4447                        sz = 2;
4448                    } else if (b1 == 3) {
4449                        sz = 3;
4450                    }
4451                    break;
4452
4453                case 0x2e:  /* ucomis[sd] */
4454                case 0x2f:  /* comis[sd] */
4455                    if (b1 == 0) {
4456                        sz = 2;
4457                    } else {
4458                        sz = 3;
4459                    }
4460                    break;
4461                }
4462
4463                switch (sz) {
4464                case 2:
4465                    /* 32 bit access */
4466                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
4467                    tcg_gen_st32_tl(s->T0, cpu_env,
4468                                    offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4469                    break;
4470                case 3:
4471                    /* 64 bit access */
4472                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4473                    break;
4474                default:
4475                    /* 128 bit access */
4476                    gen_ldo_env_A0(s, op2_offset);
4477                    break;
4478                }
4479            } else {
4480                rm = (modrm & 7) | REX_B(s);
4481                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4482            }
4483        } else {
4484            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4485            if (mod != 3) {
4486                gen_lea_modrm(env, s, modrm);
4487                op2_offset = offsetof(CPUX86State,mmx_t0);
4488                gen_ldq_env_A0(s, op2_offset);
4489            } else {
4490                rm = (modrm & 7);
4491                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4492            }
4493        }
4494        switch(b) {
4495        case 0x0f: /* 3DNow! data insns */
4496            val = x86_ldub_code(env, s);
4497            sse_fn_epp = sse_op_table5[val];
4498            if (!sse_fn_epp) {
4499                goto unknown_op;
4500            }
4501            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4502                goto illegal_op;
4503            }
4504            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4505            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4506            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4507            break;
4508        case 0x70: /* pshufx insn */
4509        case 0xc6: /* pshufx insn */
4510            val = x86_ldub_code(env, s);
4511            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4512            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4513            /* XXX: introduce a new table? */
4514            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4515            sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4516            break;
4517        case 0xc2:
4518            /* compare insns */
4519            val = x86_ldub_code(env, s);
4520            if (val >= 8)
4521                goto unknown_op;
4522            sse_fn_epp = sse_op_table4[val][b1];
4523
4524            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4525            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4526            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4527            break;
4528        case 0xf7:
4529            /* maskmov : we must prepare A0 */
4530            if (mod != 3)
4531                goto illegal_op;
4532            tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4533            gen_extu(s->aflag, s->A0);
4534            gen_add_A0_ds_seg(s);
4535
4536            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4537            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4538            /* XXX: introduce a new table? */
4539            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4540            sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4541            break;
4542        default:
4543            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4544            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4545            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4546            break;
4547        }
4548        if (b == 0x2e || b == 0x2f) {
4549            set_cc_op(s, CC_OP_EFLAGS);
4550        }
4551    }
4552}
4553
4554/* convert one instruction. s->base.is_jmp is set if the translation must
4555   be stopped. Return the next pc value */
4556static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4557{
4558    CPUX86State *env = cpu->env_ptr;
4559    int b, prefixes;
4560    int shift;
4561    MemOp ot, aflag, dflag;
4562    int modrm, reg, rm, mod, op, opreg, val;
4563    target_ulong next_eip, tval;
4564    target_ulong pc_start = s->base.pc_next;
4565
4566    s->pc_start = s->pc = pc_start;
4567    s->override = -1;
4568#ifdef TARGET_X86_64
4569    s->rex_w = false;
4570    s->rex_r = 0;
4571    s->rex_x = 0;
4572    s->rex_b = 0;
4573#endif
4574    s->rip_offset = 0; /* for relative ip address */
4575    s->vex_l = 0;
4576    s->vex_v = 0;
4577    if (sigsetjmp(s->jmpbuf, 0) != 0) {
4578        gen_exception_gpf(s);
4579        return s->pc;
4580    }
4581
4582    prefixes = 0;
4583
4584 next_byte:
4585    b = x86_ldub_code(env, s);
4586    /* Collect prefixes.  */
4587    switch (b) {
4588    case 0xf3:
4589        prefixes |= PREFIX_REPZ;
4590        goto next_byte;
4591    case 0xf2:
4592        prefixes |= PREFIX_REPNZ;
4593        goto next_byte;
4594    case 0xf0:
4595        prefixes |= PREFIX_LOCK;
4596        goto next_byte;
4597    case 0x2e:
4598        s->override = R_CS;
4599        goto next_byte;
4600    case 0x36:
4601        s->override = R_SS;
4602        goto next_byte;
4603    case 0x3e:
4604        s->override = R_DS;
4605        goto next_byte;
4606    case 0x26:
4607        s->override = R_ES;
4608        goto next_byte;
4609    case 0x64:
4610        s->override = R_FS;
4611        goto next_byte;
4612    case 0x65:
4613        s->override = R_GS;
4614        goto next_byte;
4615    case 0x66:
4616        prefixes |= PREFIX_DATA;
4617        goto next_byte;
4618    case 0x67:
4619        prefixes |= PREFIX_ADR;
4620        goto next_byte;
4621#ifdef TARGET_X86_64
4622    case 0x40 ... 0x4f:
4623        if (CODE64(s)) {
4624            /* REX prefix */
4625            prefixes |= PREFIX_REX;
4626            s->rex_w = (b >> 3) & 1;
4627            s->rex_r = (b & 0x4) << 1;
4628            s->rex_x = (b & 0x2) << 2;
4629            s->rex_b = (b & 0x1) << 3;
4630            goto next_byte;
4631        }
4632        break;
4633#endif
4634    case 0xc5: /* 2-byte VEX */
4635    case 0xc4: /* 3-byte VEX */
4636        /* VEX prefixes cannot be used except in 32-bit mode.
4637           Otherwise the instruction is LES or LDS.  */
4638        if (CODE32(s) && !VM86(s)) {
4639            static const int pp_prefix[4] = {
4640                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4641            };
4642            int vex3, vex2 = x86_ldub_code(env, s);
4643
4644            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4645                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4646                   otherwise the instruction is LES or LDS.  */
4647                s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4648                break;
4649            }
4650
4651            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4652            if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4653                            | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
4654                goto illegal_op;
4655            }
4656#ifdef TARGET_X86_64
4657            s->rex_r = (~vex2 >> 4) & 8;
4658#endif
4659            if (b == 0xc5) {
4660                /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4661                vex3 = vex2;
4662                b = x86_ldub_code(env, s) | 0x100;
4663            } else {
4664                /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4665                vex3 = x86_ldub_code(env, s);
4666#ifdef TARGET_X86_64
4667                s->rex_x = (~vex2 >> 3) & 8;
4668                s->rex_b = (~vex2 >> 2) & 8;
4669                s->rex_w = (vex3 >> 7) & 1;
4670#endif
4671                switch (vex2 & 0x1f) {
4672                case 0x01: /* Implied 0f leading opcode bytes.  */
4673                    b = x86_ldub_code(env, s) | 0x100;
4674                    break;
4675                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4676                    b = 0x138;
4677                    break;
4678                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4679                    b = 0x13a;
4680                    break;
4681                default:   /* Reserved for future use.  */
4682                    goto unknown_op;
4683                }
4684            }
4685            s->vex_v = (~vex3 >> 3) & 0xf;
4686            s->vex_l = (vex3 >> 2) & 1;
4687            prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4688        }
4689        break;
4690    }
4691
4692    /* Post-process prefixes.  */
4693    if (CODE64(s)) {
4694        /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4695           data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4696           over 0x66 if both are present.  */
4697        dflag = (REX_W(s) ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4698        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4699        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4700    } else {
4701        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4702        if (CODE32(s) ^ ((prefixes & PREFIX_DATA) != 0)) {
4703            dflag = MO_32;
4704        } else {
4705            dflag = MO_16;
4706        }
4707        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4708        if (CODE32(s) ^ ((prefixes & PREFIX_ADR) != 0)) {
4709            aflag = MO_32;
4710        }  else {
4711            aflag = MO_16;
4712        }
4713    }
4714
4715    s->prefix = prefixes;
4716    s->aflag = aflag;
4717    s->dflag = dflag;
4718
4719    /* now check op code */
4720 reswitch:
4721    switch(b) {
4722    case 0x0f:
4723        /**************************/
4724        /* extended op code */
4725        b = x86_ldub_code(env, s) | 0x100;
4726        goto reswitch;
4727
4728        /**************************/
4729        /* arith & logic */
4730    case 0x00 ... 0x05:
4731    case 0x08 ... 0x0d:
4732    case 0x10 ... 0x15:
4733    case 0x18 ... 0x1d:
4734    case 0x20 ... 0x25:
4735    case 0x28 ... 0x2d:
4736    case 0x30 ... 0x35:
4737    case 0x38 ... 0x3d:
4738        {
4739            int op, f, val;
4740            op = (b >> 3) & 7;
4741            f = (b >> 1) & 3;
4742
4743            ot = mo_b_d(b, dflag);
4744
4745            switch(f) {
4746            case 0: /* OP Ev, Gv */
4747                modrm = x86_ldub_code(env, s);
4748                reg = ((modrm >> 3) & 7) | REX_R(s);
4749                mod = (modrm >> 6) & 3;
4750                rm = (modrm & 7) | REX_B(s);
4751                if (mod != 3) {
4752                    gen_lea_modrm(env, s, modrm);
4753                    opreg = OR_TMP0;
4754                } else if (op == OP_XORL && rm == reg) {
4755                xor_zero:
4756                    /* xor reg, reg optimisation */
4757                    set_cc_op(s, CC_OP_CLR);
4758                    tcg_gen_movi_tl(s->T0, 0);
4759                    gen_op_mov_reg_v(s, ot, reg, s->T0);
4760                    break;
4761                } else {
4762                    opreg = rm;
4763                }
4764                gen_op_mov_v_reg(s, ot, s->T1, reg);
4765                gen_op(s, op, ot, opreg);
4766                break;
4767            case 1: /* OP Gv, Ev */
4768                modrm = x86_ldub_code(env, s);
4769                mod = (modrm >> 6) & 3;
4770                reg = ((modrm >> 3) & 7) | REX_R(s);
4771                rm = (modrm & 7) | REX_B(s);
4772                if (mod != 3) {
4773                    gen_lea_modrm(env, s, modrm);
4774                    gen_op_ld_v(s, ot, s->T1, s->A0);
4775                } else if (op == OP_XORL && rm == reg) {
4776                    goto xor_zero;
4777                } else {
4778                    gen_op_mov_v_reg(s, ot, s->T1, rm);
4779                }
4780                gen_op(s, op, ot, reg);
4781                break;
4782            case 2: /* OP A, Iv */
4783                val = insn_get(env, s, ot);
4784                tcg_gen_movi_tl(s->T1, val);
4785                gen_op(s, op, ot, OR_EAX);
4786                break;
4787            }
4788        }
4789        break;
4790
4791    case 0x82:
4792        if (CODE64(s))
4793            goto illegal_op;
4794        /* fall through */
4795    case 0x80: /* GRP1 */
4796    case 0x81:
4797    case 0x83:
4798        {
4799            int val;
4800
4801            ot = mo_b_d(b, dflag);
4802
4803            modrm = x86_ldub_code(env, s);
4804            mod = (modrm >> 6) & 3;
4805            rm = (modrm & 7) | REX_B(s);
4806            op = (modrm >> 3) & 7;
4807
4808            if (mod != 3) {
4809                if (b == 0x83)
4810                    s->rip_offset = 1;
4811                else
4812                    s->rip_offset = insn_const_size(ot);
4813                gen_lea_modrm(env, s, modrm);
4814                opreg = OR_TMP0;
4815            } else {
4816                opreg = rm;
4817            }
4818
4819            switch(b) {
4820            default:
4821            case 0x80:
4822            case 0x81:
4823            case 0x82:
4824                val = insn_get(env, s, ot);
4825                break;
4826            case 0x83:
4827                val = (int8_t)insn_get(env, s, MO_8);
4828                break;
4829            }
4830            tcg_gen_movi_tl(s->T1, val);
4831            gen_op(s, op, ot, opreg);
4832        }
4833        break;
4834
4835        /**************************/
4836        /* inc, dec, and other misc arith */
4837    case 0x40 ... 0x47: /* inc Gv */
4838        ot = dflag;
4839        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4840        break;
4841    case 0x48 ... 0x4f: /* dec Gv */
4842        ot = dflag;
4843        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4844        break;
4845    case 0xf6: /* GRP3 */
4846    case 0xf7:
4847        ot = mo_b_d(b, dflag);
4848
4849        modrm = x86_ldub_code(env, s);
4850        mod = (modrm >> 6) & 3;
4851        rm = (modrm & 7) | REX_B(s);
4852        op = (modrm >> 3) & 7;
4853        if (mod != 3) {
4854            if (op == 0) {
4855                s->rip_offset = insn_const_size(ot);
4856            }
4857            gen_lea_modrm(env, s, modrm);
4858            /* For those below that handle locked memory, don't load here.  */
4859            if (!(s->prefix & PREFIX_LOCK)
4860                || op != 2) {
4861                gen_op_ld_v(s, ot, s->T0, s->A0);
4862            }
4863        } else {
4864            gen_op_mov_v_reg(s, ot, s->T0, rm);
4865        }
4866
4867        switch(op) {
4868        case 0: /* test */
4869            val = insn_get(env, s, ot);
4870            tcg_gen_movi_tl(s->T1, val);
4871            gen_op_testl_T0_T1_cc(s);
4872            set_cc_op(s, CC_OP_LOGICB + ot);
4873            break;
4874        case 2: /* not */
4875            if (s->prefix & PREFIX_LOCK) {
4876                if (mod == 3) {
4877                    goto illegal_op;
4878                }
4879                tcg_gen_movi_tl(s->T0, ~0);
4880                tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4881                                            s->mem_index, ot | MO_LE);
4882            } else {
4883                tcg_gen_not_tl(s->T0, s->T0);
4884                if (mod != 3) {
4885                    gen_op_st_v(s, ot, s->T0, s->A0);
4886                } else {
4887                    gen_op_mov_reg_v(s, ot, rm, s->T0);
4888                }
4889            }
4890            break;
4891        case 3: /* neg */
4892            if (s->prefix & PREFIX_LOCK) {
4893                TCGLabel *label1;
4894                TCGv a0, t0, t1, t2;
4895
4896                if (mod == 3) {
4897                    goto illegal_op;
4898                }
4899                a0 = tcg_temp_local_new();
4900                t0 = tcg_temp_local_new();
4901                label1 = gen_new_label();
4902
4903                tcg_gen_mov_tl(a0, s->A0);
4904                tcg_gen_mov_tl(t0, s->T0);
4905
4906                gen_set_label(label1);
4907                t1 = tcg_temp_new();
4908                t2 = tcg_temp_new();
4909                tcg_gen_mov_tl(t2, t0);
4910                tcg_gen_neg_tl(t1, t0);
4911                tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4912                                          s->mem_index, ot | MO_LE);
4913                tcg_temp_free(t1);
4914                tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4915
4916                tcg_temp_free(t2);
4917                tcg_temp_free(a0);
4918                tcg_gen_mov_tl(s->T0, t0);
4919                tcg_temp_free(t0);
4920            } else {
4921                tcg_gen_neg_tl(s->T0, s->T0);
4922                if (mod != 3) {
4923                    gen_op_st_v(s, ot, s->T0, s->A0);
4924                } else {
4925                    gen_op_mov_reg_v(s, ot, rm, s->T0);
4926                }
4927            }
4928            gen_op_update_neg_cc(s);
4929            set_cc_op(s, CC_OP_SUBB + ot);
4930            break;
4931        case 4: /* mul */
4932            switch(ot) {
4933            case MO_8:
4934                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4935                tcg_gen_ext8u_tl(s->T0, s->T0);
4936                tcg_gen_ext8u_tl(s->T1, s->T1);
4937                /* XXX: use 32 bit mul which could be faster */
4938                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4939                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4940                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4941                tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4942                set_cc_op(s, CC_OP_MULB);
4943                break;
4944            case MO_16:
4945                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4946                tcg_gen_ext16u_tl(s->T0, s->T0);
4947                tcg_gen_ext16u_tl(s->T1, s->T1);
4948                /* XXX: use 32 bit mul which could be faster */
4949                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4950                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4951                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4952                tcg_gen_shri_tl(s->T0, s->T0, 16);
4953                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4954                tcg_gen_mov_tl(cpu_cc_src, s->T0);
4955                set_cc_op(s, CC_OP_MULW);
4956                break;
4957            default:
4958            case MO_32:
4959                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4960                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4961                tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4962                                  s->tmp2_i32, s->tmp3_i32);
4963                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4964                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4965                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4966                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4967                set_cc_op(s, CC_OP_MULL);
4968                break;
4969#ifdef TARGET_X86_64
4970            case MO_64:
4971                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4972                                  s->T0, cpu_regs[R_EAX]);
4973                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4974                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4975                set_cc_op(s, CC_OP_MULQ);
4976                break;
4977#endif
4978            }
4979            break;
4980        case 5: /* imul */
4981            switch(ot) {
4982            case MO_8:
4983                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4984                tcg_gen_ext8s_tl(s->T0, s->T0);
4985                tcg_gen_ext8s_tl(s->T1, s->T1);
4986                /* XXX: use 32 bit mul which could be faster */
4987                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4988                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4989                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4990                tcg_gen_ext8s_tl(s->tmp0, s->T0);
4991                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4992                set_cc_op(s, CC_OP_MULB);
4993                break;
4994            case MO_16:
4995                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4996                tcg_gen_ext16s_tl(s->T0, s->T0);
4997                tcg_gen_ext16s_tl(s->T1, s->T1);
4998                /* XXX: use 32 bit mul which could be faster */
4999                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5000                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5001                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5002                tcg_gen_ext16s_tl(s->tmp0, s->T0);
5003                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5004                tcg_gen_shri_tl(s->T0, s->T0, 16);
5005                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5006                set_cc_op(s, CC_OP_MULW);
5007                break;
5008            default:
5009            case MO_32:
5010                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5011                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
5012                tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5013                                  s->tmp2_i32, s->tmp3_i32);
5014                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
5015                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
5016                tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5017                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5018                tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5019                tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5020                set_cc_op(s, CC_OP_MULL);
5021                break;
5022#ifdef TARGET_X86_64
5023            case MO_64:
5024                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
5025                                  s->T0, cpu_regs[R_EAX]);
5026                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5027                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
5028                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
5029                set_cc_op(s, CC_OP_MULQ);
5030                break;
5031#endif
5032            }
5033            break;
5034        case 6: /* div */
5035            switch(ot) {
5036            case MO_8:
5037                gen_helper_divb_AL(cpu_env, s->T0);
5038                break;
5039            case MO_16:
5040                gen_helper_divw_AX(cpu_env, s->T0);
5041                break;
5042            default:
5043            case MO_32:
5044                gen_helper_divl_EAX(cpu_env, s->T0);
5045                break;
5046#ifdef TARGET_X86_64
5047            case MO_64:
5048                gen_helper_divq_EAX(cpu_env, s->T0);
5049                break;
5050#endif
5051            }
5052            break;
5053        case 7: /* idiv */
5054            switch(ot) {
5055            case MO_8:
5056                gen_helper_idivb_AL(cpu_env, s->T0);
5057                break;
5058            case MO_16:
5059                gen_helper_idivw_AX(cpu_env, s->T0);
5060                break;
5061            default:
5062            case MO_32:
5063                gen_helper_idivl_EAX(cpu_env, s->T0);
5064                break;
5065#ifdef TARGET_X86_64
5066            case MO_64:
5067                gen_helper_idivq_EAX(cpu_env, s->T0);
5068                break;
5069#endif
5070            }
5071            break;
5072        default:
5073            goto unknown_op;
5074        }
5075        break;
5076
5077    case 0xfe: /* GRP4 */
5078    case 0xff: /* GRP5 */
5079        ot = mo_b_d(b, dflag);
5080
5081        modrm = x86_ldub_code(env, s);
5082        mod = (modrm >> 6) & 3;
5083        rm = (modrm & 7) | REX_B(s);
5084        op = (modrm >> 3) & 7;
5085        if (op >= 2 && b == 0xfe) {
5086            goto unknown_op;
5087        }
5088        if (CODE64(s)) {
5089            if (op == 2 || op == 4) {
5090                /* operand size for jumps is 64 bit */
5091                ot = MO_64;
5092            } else if (op == 3 || op == 5) {
5093                ot = dflag != MO_16 ? MO_32 + REX_W(s) : MO_16;
5094            } else if (op == 6) {
5095                /* default push size is 64 bit */
5096                ot = mo_pushpop(s, dflag);
5097            }
5098        }
5099        if (mod != 3) {
5100            gen_lea_modrm(env, s, modrm);
5101            if (op >= 2 && op != 3 && op != 5)
5102                gen_op_ld_v(s, ot, s->T0, s->A0);
5103        } else {
5104            gen_op_mov_v_reg(s, ot, s->T0, rm);
5105        }
5106
5107        switch(op) {
5108        case 0: /* inc Ev */
5109            if (mod != 3)
5110                opreg = OR_TMP0;
5111            else
5112                opreg = rm;
5113            gen_inc(s, ot, opreg, 1);
5114            break;
5115        case 1: /* dec Ev */
5116            if (mod != 3)
5117                opreg = OR_TMP0;
5118            else
5119                opreg = rm;
5120            gen_inc(s, ot, opreg, -1);
5121            break;
5122        case 2: /* call Ev */
5123            /* XXX: optimize if memory (no 'and' is necessary) */
5124            if (dflag == MO_16) {
5125                tcg_gen_ext16u_tl(s->T0, s->T0);
5126            }
5127            next_eip = s->pc - s->cs_base;
5128            tcg_gen_movi_tl(s->T1, next_eip);
5129            gen_push_v(s, s->T1);
5130            gen_op_jmp_v(s->T0);
5131            gen_bnd_jmp(s);
5132            gen_jr(s, s->T0);
5133            break;
5134        case 3: /* lcall Ev */
5135            if (mod == 3) {
5136                goto illegal_op;
5137            }
5138            gen_op_ld_v(s, ot, s->T1, s->A0);
5139            gen_add_A0_im(s, 1 << ot);
5140            gen_op_ld_v(s, MO_16, s->T0, s->A0);
5141        do_lcall:
5142            if (PE(s) && !VM86(s)) {
5143                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5144                gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5145                                           tcg_const_i32(dflag - 1),
5146                                           tcg_const_tl(s->pc - s->cs_base));
5147            } else {
5148                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5149                gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5150                                      tcg_const_i32(dflag - 1),
5151                                      tcg_const_i32(s->pc - s->cs_base));
5152            }
5153            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5154            gen_jr(s, s->tmp4);
5155            break;
5156        case 4: /* jmp Ev */
5157            if (dflag == MO_16) {
5158                tcg_gen_ext16u_tl(s->T0, s->T0);
5159            }
5160            gen_op_jmp_v(s->T0);
5161            gen_bnd_jmp(s);
5162            gen_jr(s, s->T0);
5163            break;
5164        case 5: /* ljmp Ev */
5165            if (mod == 3) {
5166                goto illegal_op;
5167            }
5168            gen_op_ld_v(s, ot, s->T1, s->A0);
5169            gen_add_A0_im(s, 1 << ot);
5170            gen_op_ld_v(s, MO_16, s->T0, s->A0);
5171        do_ljmp:
5172            if (PE(s) && !VM86(s)) {
5173                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5174                gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5175                                          tcg_const_tl(s->pc - s->cs_base));
5176            } else {
5177                gen_op_movl_seg_T0_vm(s, R_CS);
5178                gen_op_jmp_v(s->T1);
5179            }
5180            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5181            gen_jr(s, s->tmp4);
5182            break;
5183        case 6: /* push Ev */
5184            gen_push_v(s, s->T0);
5185            break;
5186        default:
5187            goto unknown_op;
5188        }
5189        break;
5190
5191    case 0x84: /* test Ev, Gv */
5192    case 0x85:
5193        ot = mo_b_d(b, dflag);
5194
5195        modrm = x86_ldub_code(env, s);
5196        reg = ((modrm >> 3) & 7) | REX_R(s);
5197
5198        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5199        gen_op_mov_v_reg(s, ot, s->T1, reg);
5200        gen_op_testl_T0_T1_cc(s);
5201        set_cc_op(s, CC_OP_LOGICB + ot);
5202        break;
5203
5204    case 0xa8: /* test eAX, Iv */
5205    case 0xa9:
5206        ot = mo_b_d(b, dflag);
5207        val = insn_get(env, s, ot);
5208
5209        gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5210        tcg_gen_movi_tl(s->T1, val);
5211        gen_op_testl_T0_T1_cc(s);
5212        set_cc_op(s, CC_OP_LOGICB + ot);
5213        break;
5214
5215    case 0x98: /* CWDE/CBW */
5216        switch (dflag) {
5217#ifdef TARGET_X86_64
5218        case MO_64:
5219            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5220            tcg_gen_ext32s_tl(s->T0, s->T0);
5221            gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5222            break;
5223#endif
5224        case MO_32:
5225            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5226            tcg_gen_ext16s_tl(s->T0, s->T0);
5227            gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5228            break;
5229        case MO_16:
5230            gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5231            tcg_gen_ext8s_tl(s->T0, s->T0);
5232            gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5233            break;
5234        default:
5235            tcg_abort();
5236        }
5237        break;
5238    case 0x99: /* CDQ/CWD */
5239        switch (dflag) {
5240#ifdef TARGET_X86_64
5241        case MO_64:
5242            gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5243            tcg_gen_sari_tl(s->T0, s->T0, 63);
5244            gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5245            break;
5246#endif
5247        case MO_32:
5248            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5249            tcg_gen_ext32s_tl(s->T0, s->T0);
5250            tcg_gen_sari_tl(s->T0, s->T0, 31);
5251            gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5252            break;
5253        case MO_16:
5254            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5255            tcg_gen_ext16s_tl(s->T0, s->T0);
5256            tcg_gen_sari_tl(s->T0, s->T0, 15);
5257            gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5258            break;
5259        default:
5260            tcg_abort();
5261        }
5262        break;
5263    case 0x1af: /* imul Gv, Ev */
5264    case 0x69: /* imul Gv, Ev, I */
5265    case 0x6b:
5266        ot = dflag;
5267        modrm = x86_ldub_code(env, s);
5268        reg = ((modrm >> 3) & 7) | REX_R(s);
5269        if (b == 0x69)
5270            s->rip_offset = insn_const_size(ot);
5271        else if (b == 0x6b)
5272            s->rip_offset = 1;
5273        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5274        if (b == 0x69) {
5275            val = insn_get(env, s, ot);
5276            tcg_gen_movi_tl(s->T1, val);
5277        } else if (b == 0x6b) {
5278            val = (int8_t)insn_get(env, s, MO_8);
5279            tcg_gen_movi_tl(s->T1, val);
5280        } else {
5281            gen_op_mov_v_reg(s, ot, s->T1, reg);
5282        }
5283        switch (ot) {
5284#ifdef TARGET_X86_64
5285        case MO_64:
5286            tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5287            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5288            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5289            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5290            break;
5291#endif
5292        case MO_32:
5293            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5294            tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5295            tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5296                              s->tmp2_i32, s->tmp3_i32);
5297            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5298            tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5299            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5300            tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5301            tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5302            break;
5303        default:
5304            tcg_gen_ext16s_tl(s->T0, s->T0);
5305            tcg_gen_ext16s_tl(s->T1, s->T1);
5306            /* XXX: use 32 bit mul which could be faster */
5307            tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5308            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5309            tcg_gen_ext16s_tl(s->tmp0, s->T0);
5310            tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5311            gen_op_mov_reg_v(s, ot, reg, s->T0);
5312            break;
5313        }
5314        set_cc_op(s, CC_OP_MULB + ot);
5315        break;
5316    case 0x1c0:
5317    case 0x1c1: /* xadd Ev, Gv */
5318        ot = mo_b_d(b, dflag);
5319        modrm = x86_ldub_code(env, s);
5320        reg = ((modrm >> 3) & 7) | REX_R(s);
5321        mod = (modrm >> 6) & 3;
5322        gen_op_mov_v_reg(s, ot, s->T0, reg);
5323        if (mod == 3) {
5324            rm = (modrm & 7) | REX_B(s);
5325            gen_op_mov_v_reg(s, ot, s->T1, rm);
5326            tcg_gen_add_tl(s->T0, s->T0, s->T1);
5327            gen_op_mov_reg_v(s, ot, reg, s->T1);
5328            gen_op_mov_reg_v(s, ot, rm, s->T0);
5329        } else {
5330            gen_lea_modrm(env, s, modrm);
5331            if (s->prefix & PREFIX_LOCK) {
5332                tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5333                                            s->mem_index, ot | MO_LE);
5334                tcg_gen_add_tl(s->T0, s->T0, s->T1);
5335            } else {
5336                gen_op_ld_v(s, ot, s->T1, s->A0);
5337                tcg_gen_add_tl(s->T0, s->T0, s->T1);
5338                gen_op_st_v(s, ot, s->T0, s->A0);
5339            }
5340            gen_op_mov_reg_v(s, ot, reg, s->T1);
5341        }
5342        gen_op_update2_cc(s);
5343        set_cc_op(s, CC_OP_ADDB + ot);
5344        break;
5345    case 0x1b0:
5346    case 0x1b1: /* cmpxchg Ev, Gv */
5347        {
5348            TCGv oldv, newv, cmpv;
5349
5350            ot = mo_b_d(b, dflag);
5351            modrm = x86_ldub_code(env, s);
5352            reg = ((modrm >> 3) & 7) | REX_R(s);
5353            mod = (modrm >> 6) & 3;
5354            oldv = tcg_temp_new();
5355            newv = tcg_temp_new();
5356            cmpv = tcg_temp_new();
5357            gen_op_mov_v_reg(s, ot, newv, reg);
5358            tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5359
5360            if (s->prefix & PREFIX_LOCK) {
5361                if (mod == 3) {
5362                    goto illegal_op;
5363                }
5364                gen_lea_modrm(env, s, modrm);
5365                tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5366                                          s->mem_index, ot | MO_LE);
5367                gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5368            } else {
5369                if (mod == 3) {
5370                    rm = (modrm & 7) | REX_B(s);
5371                    gen_op_mov_v_reg(s, ot, oldv, rm);
5372                } else {
5373                    gen_lea_modrm(env, s, modrm);
5374                    gen_op_ld_v(s, ot, oldv, s->A0);
5375                    rm = 0; /* avoid warning */
5376                }
5377                gen_extu(ot, oldv);
5378                gen_extu(ot, cmpv);
5379                /* store value = (old == cmp ? new : old);  */
5380                tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5381                if (mod == 3) {
5382                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5383                    gen_op_mov_reg_v(s, ot, rm, newv);
5384                } else {
5385                    /* Perform an unconditional store cycle like physical cpu;
5386                       must be before changing accumulator to ensure
5387                       idempotency if the store faults and the instruction
5388                       is restarted */
5389                    gen_op_st_v(s, ot, newv, s->A0);
5390                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5391                }
5392            }
5393            tcg_gen_mov_tl(cpu_cc_src, oldv);
5394            tcg_gen_mov_tl(s->cc_srcT, cmpv);
5395            tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5396            set_cc_op(s, CC_OP_SUBB + ot);
5397            tcg_temp_free(oldv);
5398            tcg_temp_free(newv);
5399            tcg_temp_free(cmpv);
5400        }
5401        break;
5402    case 0x1c7: /* cmpxchg8b */
5403        modrm = x86_ldub_code(env, s);
5404        mod = (modrm >> 6) & 3;
5405        switch ((modrm >> 3) & 7) {
5406        case 1: /* CMPXCHG8, CMPXCHG16 */
5407            if (mod == 3) {
5408                goto illegal_op;
5409            }
5410#ifdef TARGET_X86_64
5411            if (dflag == MO_64) {
5412                if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5413                    goto illegal_op;
5414                }
5415                gen_lea_modrm(env, s, modrm);
5416                if ((s->prefix & PREFIX_LOCK) &&
5417                    (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5418                    gen_helper_cmpxchg16b(cpu_env, s->A0);
5419                } else {
5420                    gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5421                }
5422                set_cc_op(s, CC_OP_EFLAGS);
5423                break;
5424            }
5425#endif        
5426            if (!(s->cpuid_features & CPUID_CX8)) {
5427                goto illegal_op;
5428            }
5429            gen_lea_modrm(env, s, modrm);
5430            if ((s->prefix & PREFIX_LOCK) &&
5431                (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5432                gen_helper_cmpxchg8b(cpu_env, s->A0);
5433            } else {
5434                gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5435            }
5436            set_cc_op(s, CC_OP_EFLAGS);
5437            break;
5438
5439        case 7: /* RDSEED */
5440        case 6: /* RDRAND */
5441            if (mod != 3 ||
5442                (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5443                !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5444                goto illegal_op;
5445            }
5446            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5447                gen_io_start();
5448            }
5449            gen_helper_rdrand(s->T0, cpu_env);
5450            rm = (modrm & 7) | REX_B(s);
5451            gen_op_mov_reg_v(s, dflag, rm, s->T0);
5452            set_cc_op(s, CC_OP_EFLAGS);
5453            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5454                gen_jmp(s, s->pc - s->cs_base);
5455            }
5456            break;
5457
5458        default:
5459            goto illegal_op;
5460        }
5461        break;
5462
5463        /**************************/
5464        /* push/pop */
5465    case 0x50 ... 0x57: /* push */
5466        gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5467        gen_push_v(s, s->T0);
5468        break;
5469    case 0x58 ... 0x5f: /* pop */
5470        ot = gen_pop_T0(s);
5471        /* NOTE: order is important for pop %sp */
5472        gen_pop_update(s, ot);
5473        gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5474        break;
5475    case 0x60: /* pusha */
5476        if (CODE64(s))
5477            goto illegal_op;
5478        gen_pusha(s);
5479        break;
5480    case 0x61: /* popa */
5481        if (CODE64(s))
5482            goto illegal_op;
5483        gen_popa(s);
5484        break;
5485    case 0x68: /* push Iv */
5486    case 0x6a:
5487        ot = mo_pushpop(s, dflag);
5488        if (b == 0x68)
5489            val = insn_get(env, s, ot);
5490        else
5491            val = (int8_t)insn_get(env, s, MO_8);
5492        tcg_gen_movi_tl(s->T0, val);
5493        gen_push_v(s, s->T0);
5494        break;
5495    case 0x8f: /* pop Ev */
5496        modrm = x86_ldub_code(env, s);
5497        mod = (modrm >> 6) & 3;
5498        ot = gen_pop_T0(s);
5499        if (mod == 3) {
5500            /* NOTE: order is important for pop %sp */
5501            gen_pop_update(s, ot);
5502            rm = (modrm & 7) | REX_B(s);
5503            gen_op_mov_reg_v(s, ot, rm, s->T0);
5504        } else {
5505            /* NOTE: order is important too for MMU exceptions */
5506            s->popl_esp_hack = 1 << ot;
5507            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5508            s->popl_esp_hack = 0;
5509            gen_pop_update(s, ot);
5510        }
5511        break;
5512    case 0xc8: /* enter */
5513        {
5514            int level;
5515            val = x86_lduw_code(env, s);
5516            level = x86_ldub_code(env, s);
5517            gen_enter(s, val, level);
5518        }
5519        break;
5520    case 0xc9: /* leave */
5521        gen_leave(s);
5522        break;
5523    case 0x06: /* push es */
5524    case 0x0e: /* push cs */
5525    case 0x16: /* push ss */
5526    case 0x1e: /* push ds */
5527        if (CODE64(s))
5528            goto illegal_op;
5529        gen_op_movl_T0_seg(s, b >> 3);
5530        gen_push_v(s, s->T0);
5531        break;
5532    case 0x1a0: /* push fs */
5533    case 0x1a8: /* push gs */
5534        gen_op_movl_T0_seg(s, (b >> 3) & 7);
5535        gen_push_v(s, s->T0);
5536        break;
5537    case 0x07: /* pop es */
5538    case 0x17: /* pop ss */
5539    case 0x1f: /* pop ds */
5540        if (CODE64(s))
5541            goto illegal_op;
5542        reg = b >> 3;
5543        ot = gen_pop_T0(s);
5544        gen_movl_seg_T0(s, reg);
5545        gen_pop_update(s, ot);
5546        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5547        if (s->base.is_jmp) {
5548            gen_jmp_im(s, s->pc - s->cs_base);
5549            if (reg == R_SS) {
5550                s->flags &= ~HF_TF_MASK;
5551                gen_eob_inhibit_irq(s, true);
5552            } else {
5553                gen_eob(s);
5554            }
5555        }
5556        break;
5557    case 0x1a1: /* pop fs */
5558    case 0x1a9: /* pop gs */
5559        ot = gen_pop_T0(s);
5560        gen_movl_seg_T0(s, (b >> 3) & 7);
5561        gen_pop_update(s, ot);
5562        if (s->base.is_jmp) {
5563            gen_jmp_im(s, s->pc - s->cs_base);
5564            gen_eob(s);
5565        }
5566        break;
5567
5568        /**************************/
5569        /* mov */
5570    case 0x88:
5571    case 0x89: /* mov Gv, Ev */
5572        ot = mo_b_d(b, dflag);
5573        modrm = x86_ldub_code(env, s);
5574        reg = ((modrm >> 3) & 7) | REX_R(s);
5575
5576        /* generate a generic store */
5577        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5578        break;
5579    case 0xc6:
5580    case 0xc7: /* mov Ev, Iv */
5581        ot = mo_b_d(b, dflag);
5582        modrm = x86_ldub_code(env, s);
5583        mod = (modrm >> 6) & 3;
5584        if (mod != 3) {
5585            s->rip_offset = insn_const_size(ot);
5586            gen_lea_modrm(env, s, modrm);
5587        }
5588        val = insn_get(env, s, ot);
5589        tcg_gen_movi_tl(s->T0, val);
5590        if (mod != 3) {
5591            gen_op_st_v(s, ot, s->T0, s->A0);
5592        } else {
5593            gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5594        }
5595        break;
5596    case 0x8a:
5597    case 0x8b: /* mov Ev, Gv */
5598        ot = mo_b_d(b, dflag);
5599        modrm = x86_ldub_code(env, s);
5600        reg = ((modrm >> 3) & 7) | REX_R(s);
5601
5602        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5603        gen_op_mov_reg_v(s, ot, reg, s->T0);
5604        break;
5605    case 0x8e: /* mov seg, Gv */
5606        modrm = x86_ldub_code(env, s);
5607        reg = (modrm >> 3) & 7;
5608        if (reg >= 6 || reg == R_CS)
5609            goto illegal_op;
5610        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5611        gen_movl_seg_T0(s, reg);
5612        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5613        if (s->base.is_jmp) {
5614            gen_jmp_im(s, s->pc - s->cs_base);
5615            if (reg == R_SS) {
5616                s->flags &= ~HF_TF_MASK;
5617                gen_eob_inhibit_irq(s, true);
5618            } else {
5619                gen_eob(s);
5620            }
5621        }
5622        break;
5623    case 0x8c: /* mov Gv, seg */
5624        modrm = x86_ldub_code(env, s);
5625        reg = (modrm >> 3) & 7;
5626        mod = (modrm >> 6) & 3;
5627        if (reg >= 6)
5628            goto illegal_op;
5629        gen_op_movl_T0_seg(s, reg);
5630        ot = mod == 3 ? dflag : MO_16;
5631        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5632        break;
5633
5634    case 0x1b6: /* movzbS Gv, Eb */
5635    case 0x1b7: /* movzwS Gv, Eb */
5636    case 0x1be: /* movsbS Gv, Eb */
5637    case 0x1bf: /* movswS Gv, Eb */
5638        {
5639            MemOp d_ot;
5640            MemOp s_ot;
5641
5642            /* d_ot is the size of destination */
5643            d_ot = dflag;
5644            /* ot is the size of source */
5645            ot = (b & 1) + MO_8;
5646            /* s_ot is the sign+size of source */
5647            s_ot = b & 8 ? MO_SIGN | ot : ot;
5648
5649            modrm = x86_ldub_code(env, s);
5650            reg = ((modrm >> 3) & 7) | REX_R(s);
5651            mod = (modrm >> 6) & 3;
5652            rm = (modrm & 7) | REX_B(s);
5653
5654            if (mod == 3) {
5655                if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5656                    tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5657                } else {
5658                    gen_op_mov_v_reg(s, ot, s->T0, rm);
5659                    switch (s_ot) {
5660                    case MO_UB:
5661                        tcg_gen_ext8u_tl(s->T0, s->T0);
5662                        break;
5663                    case MO_SB:
5664                        tcg_gen_ext8s_tl(s->T0, s->T0);
5665                        break;
5666                    case MO_UW:
5667                        tcg_gen_ext16u_tl(s->T0, s->T0);
5668                        break;
5669                    default:
5670                    case MO_SW:
5671                        tcg_gen_ext16s_tl(s->T0, s->T0);
5672                        break;
5673                    }
5674                }
5675                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5676            } else {
5677                gen_lea_modrm(env, s, modrm);
5678                gen_op_ld_v(s, s_ot, s->T0, s->A0);
5679                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5680            }
5681        }
5682        break;
5683
5684    case 0x8d: /* lea */
5685        modrm = x86_ldub_code(env, s);
5686        mod = (modrm >> 6) & 3;
5687        if (mod == 3)
5688            goto illegal_op;
5689        reg = ((modrm >> 3) & 7) | REX_R(s);
5690        {
5691            AddressParts a = gen_lea_modrm_0(env, s, modrm);
5692            TCGv ea = gen_lea_modrm_1(s, a);
5693            gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5694            gen_op_mov_reg_v(s, dflag, reg, s->A0);
5695        }
5696        break;
5697
5698    case 0xa0: /* mov EAX, Ov */
5699    case 0xa1:
5700    case 0xa2: /* mov Ov, EAX */
5701    case 0xa3:
5702        {
5703            target_ulong offset_addr;
5704
5705            ot = mo_b_d(b, dflag);
5706            switch (s->aflag) {
5707#ifdef TARGET_X86_64
5708            case MO_64:
5709                offset_addr = x86_ldq_code(env, s);
5710                break;
5711#endif
5712            default:
5713                offset_addr = insn_get(env, s, s->aflag);
5714                break;
5715            }
5716            tcg_gen_movi_tl(s->A0, offset_addr);
5717            gen_add_A0_ds_seg(s);
5718            if ((b & 2) == 0) {
5719                gen_op_ld_v(s, ot, s->T0, s->A0);
5720                gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5721            } else {
5722                gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5723                gen_op_st_v(s, ot, s->T0, s->A0);
5724            }
5725        }
5726        break;
5727    case 0xd7: /* xlat */
5728        tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5729        tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5730        tcg_gen_add_tl(s->A0, s->A0, s->T0);
5731        gen_extu(s->aflag, s->A0);
5732        gen_add_A0_ds_seg(s);
5733        gen_op_ld_v(s, MO_8, s->T0, s->A0);
5734        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5735        break;
5736    case 0xb0 ... 0xb7: /* mov R, Ib */
5737        val = insn_get(env, s, MO_8);
5738        tcg_gen_movi_tl(s->T0, val);
5739        gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5740        break;
5741    case 0xb8 ... 0xbf: /* mov R, Iv */
5742#ifdef TARGET_X86_64
5743        if (dflag == MO_64) {
5744            uint64_t tmp;
5745            /* 64 bit case */
5746            tmp = x86_ldq_code(env, s);
5747            reg = (b & 7) | REX_B(s);
5748            tcg_gen_movi_tl(s->T0, tmp);
5749            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5750        } else
5751#endif
5752        {
5753            ot = dflag;
5754            val = insn_get(env, s, ot);
5755            reg = (b & 7) | REX_B(s);
5756            tcg_gen_movi_tl(s->T0, val);
5757            gen_op_mov_reg_v(s, ot, reg, s->T0);
5758        }
5759        break;
5760
5761    case 0x91 ... 0x97: /* xchg R, EAX */
5762    do_xchg_reg_eax:
5763        ot = dflag;
5764        reg = (b & 7) | REX_B(s);
5765        rm = R_EAX;
5766        goto do_xchg_reg;
5767    case 0x86:
5768    case 0x87: /* xchg Ev, Gv */
5769        ot = mo_b_d(b, dflag);
5770        modrm = x86_ldub_code(env, s);
5771        reg = ((modrm >> 3) & 7) | REX_R(s);
5772        mod = (modrm >> 6) & 3;
5773        if (mod == 3) {
5774            rm = (modrm & 7) | REX_B(s);
5775        do_xchg_reg:
5776            gen_op_mov_v_reg(s, ot, s->T0, reg);
5777            gen_op_mov_v_reg(s, ot, s->T1, rm);
5778            gen_op_mov_reg_v(s, ot, rm, s->T0);
5779            gen_op_mov_reg_v(s, ot, reg, s->T1);
5780        } else {
5781            gen_lea_modrm(env, s, modrm);
5782            gen_op_mov_v_reg(s, ot, s->T0, reg);
5783            /* for xchg, lock is implicit */
5784            tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5785                                   s->mem_index, ot | MO_LE);
5786            gen_op_mov_reg_v(s, ot, reg, s->T1);
5787        }
5788        break;
5789    case 0xc4: /* les Gv */
5790        /* In CODE64 this is VEX3; see above.  */
5791        op = R_ES;
5792        goto do_lxx;
5793    case 0xc5: /* lds Gv */
5794        /* In CODE64 this is VEX2; see above.  */
5795        op = R_DS;
5796        goto do_lxx;
5797    case 0x1b2: /* lss Gv */
5798        op = R_SS;
5799        goto do_lxx;
5800    case 0x1b4: /* lfs Gv */
5801        op = R_FS;
5802        goto do_lxx;
5803    case 0x1b5: /* lgs Gv */
5804        op = R_GS;
5805    do_lxx:
5806        ot = dflag != MO_16 ? MO_32 : MO_16;
5807        modrm = x86_ldub_code(env, s);
5808        reg = ((modrm >> 3) & 7) | REX_R(s);
5809        mod = (modrm >> 6) & 3;
5810        if (mod == 3)
5811            goto illegal_op;
5812        gen_lea_modrm(env, s, modrm);
5813        gen_op_ld_v(s, ot, s->T1, s->A0);
5814        gen_add_A0_im(s, 1 << ot);
5815        /* load the segment first to handle exceptions properly */
5816        gen_op_ld_v(s, MO_16, s->T0, s->A0);
5817        gen_movl_seg_T0(s, op);
5818        /* then put the data */
5819        gen_op_mov_reg_v(s, ot, reg, s->T1);
5820        if (s->base.is_jmp) {
5821            gen_jmp_im(s, s->pc - s->cs_base);
5822            gen_eob(s);
5823        }
5824        break;
5825
5826        /************************/
5827        /* shifts */
5828    case 0xc0:
5829    case 0xc1:
5830        /* shift Ev,Ib */
5831        shift = 2;
5832    grp2:
5833        {
5834            ot = mo_b_d(b, dflag);
5835            modrm = x86_ldub_code(env, s);
5836            mod = (modrm >> 6) & 3;
5837            op = (modrm >> 3) & 7;
5838
5839            if (mod != 3) {
5840                if (shift == 2) {
5841                    s->rip_offset = 1;
5842                }
5843                gen_lea_modrm(env, s, modrm);
5844                opreg = OR_TMP0;
5845            } else {
5846                opreg = (modrm & 7) | REX_B(s);
5847            }
5848
5849            /* simpler op */
5850            if (shift == 0) {
5851                gen_shift(s, op, ot, opreg, OR_ECX);
5852            } else {
5853                if (shift == 2) {
5854                    shift = x86_ldub_code(env, s);
5855                }
5856                gen_shifti(s, op, ot, opreg, shift);
5857            }
5858        }
5859        break;
5860    case 0xd0:
5861    case 0xd1:
5862        /* shift Ev,1 */
5863        shift = 1;
5864        goto grp2;
5865    case 0xd2:
5866    case 0xd3:
5867        /* shift Ev,cl */
5868        shift = 0;
5869        goto grp2;
5870
5871    case 0x1a4: /* shld imm */
5872        op = 0;
5873        shift = 1;
5874        goto do_shiftd;
5875    case 0x1a5: /* shld cl */
5876        op = 0;
5877        shift = 0;
5878        goto do_shiftd;
5879    case 0x1ac: /* shrd imm */
5880        op = 1;
5881        shift = 1;
5882        goto do_shiftd;
5883    case 0x1ad: /* shrd cl */
5884        op = 1;
5885        shift = 0;
5886    do_shiftd:
5887        ot = dflag;
5888        modrm = x86_ldub_code(env, s);
5889        mod = (modrm >> 6) & 3;
5890        rm = (modrm & 7) | REX_B(s);
5891        reg = ((modrm >> 3) & 7) | REX_R(s);
5892        if (mod != 3) {
5893            gen_lea_modrm(env, s, modrm);
5894            opreg = OR_TMP0;
5895        } else {
5896            opreg = rm;
5897        }
5898        gen_op_mov_v_reg(s, ot, s->T1, reg);
5899
5900        if (shift) {
5901            TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5902            gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5903            tcg_temp_free(imm);
5904        } else {
5905            gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5906        }
5907        break;
5908
5909        /************************/
5910        /* floats */
5911    case 0xd8 ... 0xdf:
5912        {
5913            bool update_fip = true;
5914
5915            if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5916                /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5917                /* XXX: what to do if illegal op ? */
5918                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5919                break;
5920            }
5921            modrm = x86_ldub_code(env, s);
5922            mod = (modrm >> 6) & 3;
5923            rm = modrm & 7;
5924            op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5925            if (mod != 3) {
5926                /* memory op */
5927                AddressParts a = gen_lea_modrm_0(env, s, modrm);
5928                TCGv ea = gen_lea_modrm_1(s, a);
5929                TCGv last_addr = tcg_temp_new();
5930                bool update_fdp = true;
5931
5932                tcg_gen_mov_tl(last_addr, ea);
5933                gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
5934
5935                switch (op) {
5936                case 0x00 ... 0x07: /* fxxxs */
5937                case 0x10 ... 0x17: /* fixxxl */
5938                case 0x20 ... 0x27: /* fxxxl */
5939                case 0x30 ... 0x37: /* fixxx */
5940                    {
5941                        int op1;
5942                        op1 = op & 7;
5943
5944                        switch (op >> 4) {
5945                        case 0:
5946                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5947                                                s->mem_index, MO_LEUL);
5948                            gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5949                            break;
5950                        case 1:
5951                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5952                                                s->mem_index, MO_LEUL);
5953                            gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5954                            break;
5955                        case 2:
5956                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5957                                                s->mem_index, MO_LEQ);
5958                            gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5959                            break;
5960                        case 3:
5961                        default:
5962                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5963                                                s->mem_index, MO_LESW);
5964                            gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5965                            break;
5966                        }
5967
5968                        gen_helper_fp_arith_ST0_FT0(op1);
5969                        if (op1 == 3) {
5970                            /* fcomp needs pop */
5971                            gen_helper_fpop(cpu_env);
5972                        }
5973                    }
5974                    break;
5975                case 0x08: /* flds */
5976                case 0x0a: /* fsts */
5977                case 0x0b: /* fstps */
5978                case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5979                case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5980                case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5981                    switch (op & 7) {
5982                    case 0:
5983                        switch (op >> 4) {
5984                        case 0:
5985                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5986                                                s->mem_index, MO_LEUL);
5987                            gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5988                            break;
5989                        case 1:
5990                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5991                                                s->mem_index, MO_LEUL);
5992                            gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5993                            break;
5994                        case 2:
5995                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5996                                                s->mem_index, MO_LEQ);
5997                            gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
5998                            break;
5999                        case 3:
6000                        default:
6001                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6002                                                s->mem_index, MO_LESW);
6003                            gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
6004                            break;
6005                        }
6006                        break;
6007                    case 1:
6008                        /* XXX: the corresponding CPUID bit must be tested ! */
6009                        switch (op >> 4) {
6010                        case 1:
6011                            gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
6012                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6013                                                s->mem_index, MO_LEUL);
6014                            break;
6015                        case 2:
6016                            gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
6017                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6018                                                s->mem_index, MO_LEQ);
6019                            break;
6020                        case 3:
6021                        default:
6022                            gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
6023                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6024                                                s->mem_index, MO_LEUW);
6025                            break;
6026                        }
6027                        gen_helper_fpop(cpu_env);
6028                        break;
6029                    default:
6030                        switch (op >> 4) {
6031                        case 0:
6032                            gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
6033                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6034                                                s->mem_index, MO_LEUL);
6035                            break;
6036                        case 1:
6037                            gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
6038                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6039                                                s->mem_index, MO_LEUL);
6040                            break;
6041                        case 2:
6042                            gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
6043                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6044                                                s->mem_index, MO_LEQ);
6045                            break;
6046                        case 3:
6047                        default:
6048                            gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
6049                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6050                                                s->mem_index, MO_LEUW);
6051                            break;
6052                        }
6053                        if ((op & 7) == 3) {
6054                            gen_helper_fpop(cpu_env);
6055                        }
6056                        break;
6057                    }
6058                    break;
6059                case 0x0c: /* fldenv mem */
6060                    gen_helper_fldenv(cpu_env, s->A0,
6061                                      tcg_const_i32(dflag - 1));
6062                    update_fip = update_fdp = false;
6063                    break;
6064                case 0x0d: /* fldcw mem */
6065                    tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6066                                        s->mem_index, MO_LEUW);
6067                    gen_helper_fldcw(cpu_env, s->tmp2_i32);
6068                    update_fip = update_fdp = false;
6069                    break;
6070                case 0x0e: /* fnstenv mem */
6071                    gen_helper_fstenv(cpu_env, s->A0,
6072                                      tcg_const_i32(dflag - 1));
6073                    update_fip = update_fdp = false;
6074                    break;
6075                case 0x0f: /* fnstcw mem */
6076                    gen_helper_fnstcw(s->tmp2_i32, cpu_env);
6077                    tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6078                                        s->mem_index, MO_LEUW);
6079                    update_fip = update_fdp = false;
6080                    break;
6081                case 0x1d: /* fldt mem */
6082                    gen_helper_fldt_ST0(cpu_env, s->A0);
6083                    break;
6084                case 0x1f: /* fstpt mem */
6085                    gen_helper_fstt_ST0(cpu_env, s->A0);
6086                    gen_helper_fpop(cpu_env);
6087                    break;
6088                case 0x2c: /* frstor mem */
6089                    gen_helper_frstor(cpu_env, s->A0,
6090                                      tcg_const_i32(dflag - 1));
6091                    update_fip = update_fdp = false;
6092                    break;
6093                case 0x2e: /* fnsave mem */
6094                    gen_helper_fsave(cpu_env, s->A0,
6095                                     tcg_const_i32(dflag - 1));
6096                    update_fip = update_fdp = false;
6097                    break;
6098                case 0x2f: /* fnstsw mem */
6099                    gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6100                    tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6101                                        s->mem_index, MO_LEUW);
6102                    update_fip = update_fdp = false;
6103                    break;
6104                case 0x3c: /* fbld */
6105                    gen_helper_fbld_ST0(cpu_env, s->A0);
6106                    break;
6107                case 0x3e: /* fbstp */
6108                    gen_helper_fbst_ST0(cpu_env, s->A0);
6109                    gen_helper_fpop(cpu_env);
6110                    break;
6111                case 0x3d: /* fildll */
6112                    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
6113                                        s->mem_index, MO_LEQ);
6114                    gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6115                    break;
6116                case 0x3f: /* fistpll */
6117                    gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6118                    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6119                                        s->mem_index, MO_LEQ);
6120                    gen_helper_fpop(cpu_env);
6121                    break;
6122                default:
6123                    goto unknown_op;
6124                }
6125
6126                if (update_fdp) {
6127                    int last_seg = s->override >= 0 ? s->override : a.def_seg;
6128
6129                    tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
6130                                   offsetof(CPUX86State,
6131                                            segs[last_seg].selector));
6132                    tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
6133                                     offsetof(CPUX86State, fpds));
6134                    tcg_gen_st_tl(last_addr, cpu_env,
6135                                  offsetof(CPUX86State, fpdp));
6136                }
6137                tcg_temp_free(last_addr);
6138            } else {
6139                /* register float ops */
6140                opreg = rm;
6141
6142                switch (op) {
6143                case 0x08: /* fld sti */
6144                    gen_helper_fpush(cpu_env);
6145                    gen_helper_fmov_ST0_STN(cpu_env,
6146                                            tcg_const_i32((opreg + 1) & 7));
6147                    break;
6148                case 0x09: /* fxchg sti */
6149                case 0x29: /* fxchg4 sti, undocumented op */
6150                case 0x39: /* fxchg7 sti, undocumented op */
6151                    gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6152                    break;
6153                case 0x0a: /* grp d9/2 */
6154                    switch (rm) {
6155                    case 0: /* fnop */
6156                        /* check exceptions (FreeBSD FPU probe) */
6157                        gen_helper_fwait(cpu_env);
6158                        update_fip = false;
6159                        break;
6160                    default:
6161                        goto unknown_op;
6162                    }
6163                    break;
6164                case 0x0c: /* grp d9/4 */
6165                    switch (rm) {
6166                    case 0: /* fchs */
6167                        gen_helper_fchs_ST0(cpu_env);
6168                        break;
6169                    case 1: /* fabs */
6170                        gen_helper_fabs_ST0(cpu_env);
6171                        break;
6172                    case 4: /* ftst */
6173                        gen_helper_fldz_FT0(cpu_env);
6174                        gen_helper_fcom_ST0_FT0(cpu_env);
6175                        break;
6176                    case 5: /* fxam */
6177                        gen_helper_fxam_ST0(cpu_env);
6178                        break;
6179                    default:
6180                        goto unknown_op;
6181                    }
6182                    break;
6183                case 0x0d: /* grp d9/5 */
6184                    {
6185                        switch (rm) {
6186                        case 0:
6187                            gen_helper_fpush(cpu_env);
6188                            gen_helper_fld1_ST0(cpu_env);
6189                            break;
6190                        case 1:
6191                            gen_helper_fpush(cpu_env);
6192                            gen_helper_fldl2t_ST0(cpu_env);
6193                            break;
6194                        case 2:
6195                            gen_helper_fpush(cpu_env);
6196                            gen_helper_fldl2e_ST0(cpu_env);
6197                            break;
6198                        case 3:
6199                            gen_helper_fpush(cpu_env);
6200                            gen_helper_fldpi_ST0(cpu_env);
6201                            break;
6202                        case 4:
6203                            gen_helper_fpush(cpu_env);
6204                            gen_helper_fldlg2_ST0(cpu_env);
6205                            break;
6206                        case 5:
6207                            gen_helper_fpush(cpu_env);
6208                            gen_helper_fldln2_ST0(cpu_env);
6209                            break;
6210                        case 6:
6211                            gen_helper_fpush(cpu_env);
6212                            gen_helper_fldz_ST0(cpu_env);
6213                            break;
6214                        default:
6215                            goto unknown_op;
6216                        }
6217                    }
6218                    break;
6219                case 0x0e: /* grp d9/6 */
6220                    switch (rm) {
6221                    case 0: /* f2xm1 */
6222                        gen_helper_f2xm1(cpu_env);
6223                        break;
6224                    case 1: /* fyl2x */
6225                        gen_helper_fyl2x(cpu_env);
6226                        break;
6227                    case 2: /* fptan */
6228                        gen_helper_fptan(cpu_env);
6229                        break;
6230                    case 3: /* fpatan */
6231                        gen_helper_fpatan(cpu_env);
6232                        break;
6233                    case 4: /* fxtract */
6234                        gen_helper_fxtract(cpu_env);
6235                        break;
6236                    case 5: /* fprem1 */
6237                        gen_helper_fprem1(cpu_env);
6238                        break;
6239                    case 6: /* fdecstp */
6240                        gen_helper_fdecstp(cpu_env);
6241                        break;
6242                    default:
6243                    case 7: /* fincstp */
6244                        gen_helper_fincstp(cpu_env);
6245                        break;
6246                    }
6247                    break;
6248                case 0x0f: /* grp d9/7 */
6249                    switch (rm) {
6250                    case 0: /* fprem */
6251                        gen_helper_fprem(cpu_env);
6252                        break;
6253                    case 1: /* fyl2xp1 */
6254                        gen_helper_fyl2xp1(cpu_env);
6255                        break;
6256                    case 2: /* fsqrt */
6257                        gen_helper_fsqrt(cpu_env);
6258                        break;
6259                    case 3: /* fsincos */
6260                        gen_helper_fsincos(cpu_env);
6261                        break;
6262                    case 5: /* fscale */
6263                        gen_helper_fscale(cpu_env);
6264                        break;
6265                    case 4: /* frndint */
6266                        gen_helper_frndint(cpu_env);
6267                        break;
6268                    case 6: /* fsin */
6269                        gen_helper_fsin(cpu_env);
6270                        break;
6271                    default:
6272                    case 7: /* fcos */
6273                        gen_helper_fcos(cpu_env);
6274                        break;
6275                    }
6276                    break;
6277                case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6278                case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6279                case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6280                    {
6281                        int op1;
6282
6283                        op1 = op & 7;
6284                        if (op >= 0x20) {
6285                            gen_helper_fp_arith_STN_ST0(op1, opreg);
6286                            if (op >= 0x30) {
6287                                gen_helper_fpop(cpu_env);
6288                            }
6289                        } else {
6290                            gen_helper_fmov_FT0_STN(cpu_env,
6291                                                    tcg_const_i32(opreg));
6292                            gen_helper_fp_arith_ST0_FT0(op1);
6293                        }
6294                    }
6295                    break;
6296                case 0x02: /* fcom */
6297                case 0x22: /* fcom2, undocumented op */
6298                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6299                    gen_helper_fcom_ST0_FT0(cpu_env);
6300                    break;
6301                case 0x03: /* fcomp */
6302                case 0x23: /* fcomp3, undocumented op */
6303                case 0x32: /* fcomp5, undocumented op */
6304                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6305                    gen_helper_fcom_ST0_FT0(cpu_env);
6306                    gen_helper_fpop(cpu_env);
6307                    break;
6308                case 0x15: /* da/5 */
6309                    switch (rm) {
6310                    case 1: /* fucompp */
6311                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6312                        gen_helper_fucom_ST0_FT0(cpu_env);
6313                        gen_helper_fpop(cpu_env);
6314                        gen_helper_fpop(cpu_env);
6315                        break;
6316                    default:
6317                        goto unknown_op;
6318                    }
6319                    break;
6320                case 0x1c:
6321                    switch (rm) {
6322                    case 0: /* feni (287 only, just do nop here) */
6323                        break;
6324                    case 1: /* fdisi (287 only, just do nop here) */
6325                        break;
6326                    case 2: /* fclex */
6327                        gen_helper_fclex(cpu_env);
6328                        update_fip = false;
6329                        break;
6330                    case 3: /* fninit */
6331                        gen_helper_fninit(cpu_env);
6332                        update_fip = false;
6333                        break;
6334                    case 4: /* fsetpm (287 only, just do nop here) */
6335                        break;
6336                    default:
6337                        goto unknown_op;
6338                    }
6339                    break;
6340                case 0x1d: /* fucomi */
6341                    if (!(s->cpuid_features & CPUID_CMOV)) {
6342                        goto illegal_op;
6343                    }
6344                    gen_update_cc_op(s);
6345                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6346                    gen_helper_fucomi_ST0_FT0(cpu_env);
6347                    set_cc_op(s, CC_OP_EFLAGS);
6348                    break;
6349                case 0x1e: /* fcomi */
6350                    if (!(s->cpuid_features & CPUID_CMOV)) {
6351                        goto illegal_op;
6352                    }
6353                    gen_update_cc_op(s);
6354                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6355                    gen_helper_fcomi_ST0_FT0(cpu_env);
6356                    set_cc_op(s, CC_OP_EFLAGS);
6357                    break;
6358                case 0x28: /* ffree sti */
6359                    gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6360                    break;
6361                case 0x2a: /* fst sti */
6362                    gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6363                    break;
6364                case 0x2b: /* fstp sti */
6365                case 0x0b: /* fstp1 sti, undocumented op */
6366                case 0x3a: /* fstp8 sti, undocumented op */
6367                case 0x3b: /* fstp9 sti, undocumented op */
6368                    gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6369                    gen_helper_fpop(cpu_env);
6370                    break;
6371                case 0x2c: /* fucom st(i) */
6372                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6373                    gen_helper_fucom_ST0_FT0(cpu_env);
6374                    break;
6375                case 0x2d: /* fucomp st(i) */
6376                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6377                    gen_helper_fucom_ST0_FT0(cpu_env);
6378                    gen_helper_fpop(cpu_env);
6379                    break;
6380                case 0x33: /* de/3 */
6381                    switch (rm) {
6382                    case 1: /* fcompp */
6383                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6384                        gen_helper_fcom_ST0_FT0(cpu_env);
6385                        gen_helper_fpop(cpu_env);
6386                        gen_helper_fpop(cpu_env);
6387                        break;
6388                    default:
6389                        goto unknown_op;
6390                    }
6391                    break;
6392                case 0x38: /* ffreep sti, undocumented op */
6393                    gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6394                    gen_helper_fpop(cpu_env);
6395                    break;
6396                case 0x3c: /* df/4 */
6397                    switch (rm) {
6398                    case 0:
6399                        gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6400                        tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6401                        gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6402                        break;
6403                    default:
6404                        goto unknown_op;
6405                    }
6406                    break;
6407                case 0x3d: /* fucomip */
6408                    if (!(s->cpuid_features & CPUID_CMOV)) {
6409                        goto illegal_op;
6410                    }
6411                    gen_update_cc_op(s);
6412                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6413                    gen_helper_fucomi_ST0_FT0(cpu_env);
6414                    gen_helper_fpop(cpu_env);
6415                    set_cc_op(s, CC_OP_EFLAGS);
6416                    break;
6417                case 0x3e: /* fcomip */
6418                    if (!(s->cpuid_features & CPUID_CMOV)) {
6419                        goto illegal_op;
6420                    }
6421                    gen_update_cc_op(s);
6422                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6423                    gen_helper_fcomi_ST0_FT0(cpu_env);
6424                    gen_helper_fpop(cpu_env);
6425                    set_cc_op(s, CC_OP_EFLAGS);
6426                    break;
6427                case 0x10 ... 0x13: /* fcmovxx */
6428                case 0x18 ... 0x1b:
6429                    {
6430                        int op1;
6431                        TCGLabel *l1;
6432                        static const uint8_t fcmov_cc[8] = {
6433                            (JCC_B << 1),
6434                            (JCC_Z << 1),
6435                            (JCC_BE << 1),
6436                            (JCC_P << 1),
6437                        };
6438
6439                        if (!(s->cpuid_features & CPUID_CMOV)) {
6440                            goto illegal_op;
6441                        }
6442                        op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6443                        l1 = gen_new_label();
6444                        gen_jcc1_noeob(s, op1, l1);
6445                        gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6446                        gen_set_label(l1);
6447                    }
6448                    break;
6449                default:
6450                    goto unknown_op;
6451                }
6452            }
6453
6454            if (update_fip) {
6455                tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
6456                               offsetof(CPUX86State, segs[R_CS].selector));
6457                tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
6458                                 offsetof(CPUX86State, fpcs));
6459                tcg_gen_st_tl(tcg_constant_tl(pc_start - s->cs_base),
6460                              cpu_env, offsetof(CPUX86State, fpip));
6461            }
6462        }
6463        break;
6464        /************************/
6465        /* string ops */
6466
6467    case 0xa4: /* movsS */
6468    case 0xa5:
6469        ot = mo_b_d(b, dflag);
6470        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6471            gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6472        } else {
6473            gen_movs(s, ot);
6474        }
6475        break;
6476
6477    case 0xaa: /* stosS */
6478    case 0xab:
6479        ot = mo_b_d(b, dflag);
6480        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6481            gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6482        } else {
6483            gen_stos(s, ot);
6484        }
6485        break;
6486    case 0xac: /* lodsS */
6487    case 0xad:
6488        ot = mo_b_d(b, dflag);
6489        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6490            gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6491        } else {
6492            gen_lods(s, ot);
6493        }
6494        break;
6495    case 0xae: /* scasS */
6496    case 0xaf:
6497        ot = mo_b_d(b, dflag);
6498        if (prefixes & PREFIX_REPNZ) {
6499            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6500        } else if (prefixes & PREFIX_REPZ) {
6501            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6502        } else {
6503            gen_scas(s, ot);
6504        }
6505        break;
6506
6507    case 0xa6: /* cmpsS */
6508    case 0xa7:
6509        ot = mo_b_d(b, dflag);
6510        if (prefixes & PREFIX_REPNZ) {
6511            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6512        } else if (prefixes & PREFIX_REPZ) {
6513            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6514        } else {
6515            gen_cmps(s, ot);
6516        }
6517        break;
6518    case 0x6c: /* insS */
6519    case 0x6d:
6520        ot = mo_b_d32(b, dflag);
6521        tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6522        tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6523        if (!gen_check_io(s, ot, s->tmp2_i32,
6524                          SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) {
6525            break;
6526        }
6527        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6528            gen_io_start();
6529        }
6530        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6531            gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6532            /* jump generated by gen_repz_ins */
6533        } else {
6534            gen_ins(s, ot);
6535            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6536                gen_jmp(s, s->pc - s->cs_base);
6537            }
6538        }
6539        break;
6540    case 0x6e: /* outsS */
6541    case 0x6f:
6542        ot = mo_b_d32(b, dflag);
6543        tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6544        tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6545        if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_STR_MASK)) {
6546            break;
6547        }
6548        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6549            gen_io_start();
6550        }
6551        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6552            gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6553            /* jump generated by gen_repz_outs */
6554        } else {
6555            gen_outs(s, ot);
6556            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6557                gen_jmp(s, s->pc - s->cs_base);
6558            }
6559        }
6560        break;
6561
6562        /************************/
6563        /* port I/O */
6564
6565    case 0xe4:
6566    case 0xe5:
6567        ot = mo_b_d32(b, dflag);
6568        val = x86_ldub_code(env, s);
6569        tcg_gen_movi_i32(s->tmp2_i32, val);
6570        if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6571            break;
6572        }
6573        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6574            gen_io_start();
6575        }
6576        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6577        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6578        gen_bpt_io(s, s->tmp2_i32, ot);
6579        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6580            gen_jmp(s, s->pc - s->cs_base);
6581        }
6582        break;
6583    case 0xe6:
6584    case 0xe7:
6585        ot = mo_b_d32(b, dflag);
6586        val = x86_ldub_code(env, s);
6587        tcg_gen_movi_i32(s->tmp2_i32, val);
6588        if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6589            break;
6590        }
6591        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6592            gen_io_start();
6593        }
6594        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6595        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6596        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6597        gen_bpt_io(s, s->tmp2_i32, ot);
6598        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6599            gen_jmp(s, s->pc - s->cs_base);
6600        }
6601        break;
6602    case 0xec:
6603    case 0xed:
6604        ot = mo_b_d32(b, dflag);
6605        tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6606        tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6607        if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6608            break;
6609        }
6610        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6611            gen_io_start();
6612        }
6613        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6614        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6615        gen_bpt_io(s, s->tmp2_i32, ot);
6616        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6617            gen_jmp(s, s->pc - s->cs_base);
6618        }
6619        break;
6620    case 0xee:
6621    case 0xef:
6622        ot = mo_b_d32(b, dflag);
6623        tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6624        tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6625        if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6626            break;
6627        }
6628        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6629            gen_io_start();
6630        }
6631        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6632        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6633        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6634        gen_bpt_io(s, s->tmp2_i32, ot);
6635        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6636            gen_jmp(s, s->pc - s->cs_base);
6637        }
6638        break;
6639
6640        /************************/
6641        /* control */
6642    case 0xc2: /* ret im */
6643        val = x86_ldsw_code(env, s);
6644        ot = gen_pop_T0(s);
6645        gen_stack_update(s, val + (1 << ot));
6646        /* Note that gen_pop_T0 uses a zero-extending load.  */
6647        gen_op_jmp_v(s->T0);
6648        gen_bnd_jmp(s);
6649        gen_jr(s, s->T0);
6650        break;
6651    case 0xc3: /* ret */
6652        ot = gen_pop_T0(s);
6653        gen_pop_update(s, ot);
6654        /* Note that gen_pop_T0 uses a zero-extending load.  */
6655        gen_op_jmp_v(s->T0);
6656        gen_bnd_jmp(s);
6657        gen_jr(s, s->T0);
6658        break;
6659    case 0xca: /* lret im */
6660        val = x86_ldsw_code(env, s);
6661    do_lret:
6662        if (PE(s) && !VM86(s)) {
6663            gen_update_cc_op(s);
6664            gen_jmp_im(s, pc_start - s->cs_base);
6665            gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6666                                      tcg_const_i32(val));
6667        } else {
6668            gen_stack_A0(s);
6669            /* pop offset */
6670            gen_op_ld_v(s, dflag, s->T0, s->A0);
6671            /* NOTE: keeping EIP updated is not a problem in case of
6672               exception */
6673            gen_op_jmp_v(s->T0);
6674            /* pop selector */
6675            gen_add_A0_im(s, 1 << dflag);
6676            gen_op_ld_v(s, dflag, s->T0, s->A0);
6677            gen_op_movl_seg_T0_vm(s, R_CS);
6678            /* add stack offset */
6679            gen_stack_update(s, val + (2 << dflag));
6680        }
6681        gen_eob(s);
6682        break;
6683    case 0xcb: /* lret */
6684        val = 0;
6685        goto do_lret;
6686    case 0xcf: /* iret */
6687        gen_svm_check_intercept(s, SVM_EXIT_IRET);
6688        if (!PE(s) || VM86(s)) {
6689            /* real mode or vm86 mode */
6690            if (!check_vm86_iopl(s)) {
6691                break;
6692            }
6693            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6694        } else {
6695            gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6696                                      tcg_const_i32(s->pc - s->cs_base));
6697        }
6698        set_cc_op(s, CC_OP_EFLAGS);
6699        gen_eob(s);
6700        break;
6701    case 0xe8: /* call im */
6702        {
6703            if (dflag != MO_16) {
6704                tval = (int32_t)insn_get(env, s, MO_32);
6705            } else {
6706                tval = (int16_t)insn_get(env, s, MO_16);
6707            }
6708            next_eip = s->pc - s->cs_base;
6709            tval += next_eip;
6710            if (dflag == MO_16) {
6711                tval &= 0xffff;
6712            } else if (!CODE64(s)) {
6713                tval &= 0xffffffff;
6714            }
6715            tcg_gen_movi_tl(s->T0, next_eip);
6716            gen_push_v(s, s->T0);
6717            gen_bnd_jmp(s);
6718            gen_jmp(s, tval);
6719        }
6720        break;
6721    case 0x9a: /* lcall im */
6722        {
6723            unsigned int selector, offset;
6724
6725            if (CODE64(s))
6726                goto illegal_op;
6727            ot = dflag;
6728            offset = insn_get(env, s, ot);
6729            selector = insn_get(env, s, MO_16);
6730
6731            tcg_gen_movi_tl(s->T0, selector);
6732            tcg_gen_movi_tl(s->T1, offset);
6733        }
6734        goto do_lcall;
6735    case 0xe9: /* jmp im */
6736        if (dflag != MO_16) {
6737            tval = (int32_t)insn_get(env, s, MO_32);
6738        } else {
6739            tval = (int16_t)insn_get(env, s, MO_16);
6740        }
6741        tval += s->pc - s->cs_base;
6742        if (dflag == MO_16) {
6743            tval &= 0xffff;
6744        } else if (!CODE64(s)) {
6745            tval &= 0xffffffff;
6746        }
6747        gen_bnd_jmp(s);
6748        gen_jmp(s, tval);
6749        break;
6750    case 0xea: /* ljmp im */
6751        {
6752            unsigned int selector, offset;
6753
6754            if (CODE64(s))
6755                goto illegal_op;
6756            ot = dflag;
6757            offset = insn_get(env, s, ot);
6758            selector = insn_get(env, s, MO_16);
6759
6760            tcg_gen_movi_tl(s->T0, selector);
6761            tcg_gen_movi_tl(s->T1, offset);
6762        }
6763        goto do_ljmp;
6764    case 0xeb: /* jmp Jb */
6765        tval = (int8_t)insn_get(env, s, MO_8);
6766        tval += s->pc - s->cs_base;
6767        if (dflag == MO_16) {
6768            tval &= 0xffff;
6769        }
6770        gen_jmp(s, tval);
6771        break;
6772    case 0x70 ... 0x7f: /* jcc Jb */
6773        tval = (int8_t)insn_get(env, s, MO_8);
6774        goto do_jcc;
6775    case 0x180 ... 0x18f: /* jcc Jv */
6776        if (dflag != MO_16) {
6777            tval = (int32_t)insn_get(env, s, MO_32);
6778        } else {
6779            tval = (int16_t)insn_get(env, s, MO_16);
6780        }
6781    do_jcc:
6782        next_eip = s->pc - s->cs_base;
6783        tval += next_eip;
6784        if (dflag == MO_16) {
6785            tval &= 0xffff;
6786        }
6787        gen_bnd_jmp(s);
6788        gen_jcc(s, b, tval, next_eip);
6789        break;
6790
6791    case 0x190 ... 0x19f: /* setcc Gv */
6792        modrm = x86_ldub_code(env, s);
6793        gen_setcc1(s, b, s->T0);
6794        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6795        break;
6796    case 0x140 ... 0x14f: /* cmov Gv, Ev */
6797        if (!(s->cpuid_features & CPUID_CMOV)) {
6798            goto illegal_op;
6799        }
6800        ot = dflag;
6801        modrm = x86_ldub_code(env, s);
6802        reg = ((modrm >> 3) & 7) | REX_R(s);
6803        gen_cmovcc1(env, s, ot, b, modrm, reg);
6804        break;
6805
6806        /************************/
6807        /* flags */
6808    case 0x9c: /* pushf */
6809        gen_svm_check_intercept(s, SVM_EXIT_PUSHF);
6810        if (check_vm86_iopl(s)) {
6811            gen_update_cc_op(s);
6812            gen_helper_read_eflags(s->T0, cpu_env);
6813            gen_push_v(s, s->T0);
6814        }
6815        break;
6816    case 0x9d: /* popf */
6817        gen_svm_check_intercept(s, SVM_EXIT_POPF);
6818        if (check_vm86_iopl(s)) {
6819            ot = gen_pop_T0(s);
6820            if (CPL(s) == 0) {
6821                if (dflag != MO_16) {
6822                    gen_helper_write_eflags(cpu_env, s->T0,
6823                                            tcg_const_i32((TF_MASK | AC_MASK |
6824                                                           ID_MASK | NT_MASK |
6825                                                           IF_MASK |
6826                                                           IOPL_MASK)));
6827                } else {
6828                    gen_helper_write_eflags(cpu_env, s->T0,
6829                                            tcg_const_i32((TF_MASK | AC_MASK |
6830                                                           ID_MASK | NT_MASK |
6831                                                           IF_MASK | IOPL_MASK)
6832                                                          & 0xffff));
6833                }
6834            } else {
6835                if (CPL(s) <= IOPL(s)) {
6836                    if (dflag != MO_16) {
6837                        gen_helper_write_eflags(cpu_env, s->T0,
6838                                                tcg_const_i32((TF_MASK |
6839                                                               AC_MASK |
6840                                                               ID_MASK |
6841                                                               NT_MASK |
6842                                                               IF_MASK)));
6843                    } else {
6844                        gen_helper_write_eflags(cpu_env, s->T0,
6845                                                tcg_const_i32((TF_MASK |
6846                                                               AC_MASK |
6847                                                               ID_MASK |
6848                                                               NT_MASK |
6849                                                               IF_MASK)
6850                                                              & 0xffff));
6851                    }
6852                } else {
6853                    if (dflag != MO_16) {
6854                        gen_helper_write_eflags(cpu_env, s->T0,
6855                                           tcg_const_i32((TF_MASK | AC_MASK |
6856                                                          ID_MASK | NT_MASK)));
6857                    } else {
6858                        gen_helper_write_eflags(cpu_env, s->T0,
6859                                           tcg_const_i32((TF_MASK | AC_MASK |
6860                                                          ID_MASK | NT_MASK)
6861                                                         & 0xffff));
6862                    }
6863                }
6864            }
6865            gen_pop_update(s, ot);
6866            set_cc_op(s, CC_OP_EFLAGS);
6867            /* abort translation because TF/AC flag may change */
6868            gen_jmp_im(s, s->pc - s->cs_base);
6869            gen_eob(s);
6870        }
6871        break;
6872    case 0x9e: /* sahf */
6873        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6874            goto illegal_op;
6875        gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6876        gen_compute_eflags(s);
6877        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6878        tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6879        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6880        break;
6881    case 0x9f: /* lahf */
6882        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6883            goto illegal_op;
6884        gen_compute_eflags(s);
6885        /* Note: gen_compute_eflags() only gives the condition codes */
6886        tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6887        gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6888        break;
6889    case 0xf5: /* cmc */
6890        gen_compute_eflags(s);
6891        tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6892        break;
6893    case 0xf8: /* clc */
6894        gen_compute_eflags(s);
6895        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6896        break;
6897    case 0xf9: /* stc */
6898        gen_compute_eflags(s);
6899        tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6900        break;
6901    case 0xfc: /* cld */
6902        tcg_gen_movi_i32(s->tmp2_i32, 1);
6903        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6904        break;
6905    case 0xfd: /* std */
6906        tcg_gen_movi_i32(s->tmp2_i32, -1);
6907        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6908        break;
6909
6910        /************************/
6911        /* bit operations */
6912    case 0x1ba: /* bt/bts/btr/btc Gv, im */
6913        ot = dflag;
6914        modrm = x86_ldub_code(env, s);
6915        op = (modrm >> 3) & 7;
6916        mod = (modrm >> 6) & 3;
6917        rm = (modrm & 7) | REX_B(s);
6918        if (mod != 3) {
6919            s->rip_offset = 1;
6920            gen_lea_modrm(env, s, modrm);
6921            if (!(s->prefix & PREFIX_LOCK)) {
6922                gen_op_ld_v(s, ot, s->T0, s->A0);
6923            }
6924        } else {
6925            gen_op_mov_v_reg(s, ot, s->T0, rm);
6926        }
6927        /* load shift */
6928        val = x86_ldub_code(env, s);
6929        tcg_gen_movi_tl(s->T1, val);
6930        if (op < 4)
6931            goto unknown_op;
6932        op -= 4;
6933        goto bt_op;
6934    case 0x1a3: /* bt Gv, Ev */
6935        op = 0;
6936        goto do_btx;
6937    case 0x1ab: /* bts */
6938        op = 1;
6939        goto do_btx;
6940    case 0x1b3: /* btr */
6941        op = 2;
6942        goto do_btx;
6943    case 0x1bb: /* btc */
6944        op = 3;
6945    do_btx:
6946        ot = dflag;
6947        modrm = x86_ldub_code(env, s);
6948        reg = ((modrm >> 3) & 7) | REX_R(s);
6949        mod = (modrm >> 6) & 3;
6950        rm = (modrm & 7) | REX_B(s);
6951        gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6952        if (mod != 3) {
6953            AddressParts a = gen_lea_modrm_0(env, s, modrm);
6954            /* specific case: we need to add a displacement */
6955            gen_exts(ot, s->T1);
6956            tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6957            tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6958            tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6959            gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6960            if (!(s->prefix & PREFIX_LOCK)) {
6961                gen_op_ld_v(s, ot, s->T0, s->A0);
6962            }
6963        } else {
6964            gen_op_mov_v_reg(s, ot, s->T0, rm);
6965        }
6966    bt_op:
6967        tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6968        tcg_gen_movi_tl(s->tmp0, 1);
6969        tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6970        if (s->prefix & PREFIX_LOCK) {
6971            switch (op) {
6972            case 0: /* bt */
6973                /* Needs no atomic ops; we surpressed the normal
6974                   memory load for LOCK above so do it now.  */
6975                gen_op_ld_v(s, ot, s->T0, s->A0);
6976                break;
6977            case 1: /* bts */
6978                tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6979                                           s->mem_index, ot | MO_LE);
6980                break;
6981            case 2: /* btr */
6982                tcg_gen_not_tl(s->tmp0, s->tmp0);
6983                tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6984                                            s->mem_index, ot | MO_LE);
6985                break;
6986            default:
6987            case 3: /* btc */
6988                tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6989                                            s->mem_index, ot | MO_LE);
6990                break;
6991            }
6992            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6993        } else {
6994            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6995            switch (op) {
6996            case 0: /* bt */
6997                /* Data already loaded; nothing to do.  */
6998                break;
6999            case 1: /* bts */
7000                tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
7001                break;
7002            case 2: /* btr */
7003                tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
7004                break;
7005            default:
7006            case 3: /* btc */
7007                tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
7008                break;
7009            }
7010            if (op != 0) {
7011                if (mod != 3) {
7012                    gen_op_st_v(s, ot, s->T0, s->A0);
7013                } else {
7014                    gen_op_mov_reg_v(s, ot, rm, s->T0);
7015                }
7016            }
7017        }
7018
7019        /* Delay all CC updates until after the store above.  Note that
7020           C is the result of the test, Z is unchanged, and the others
7021           are all undefined.  */
7022        switch (s->cc_op) {
7023        case CC_OP_MULB ... CC_OP_MULQ:
7024        case CC_OP_ADDB ... CC_OP_ADDQ:
7025        case CC_OP_ADCB ... CC_OP_ADCQ:
7026        case CC_OP_SUBB ... CC_OP_SUBQ:
7027        case CC_OP_SBBB ... CC_OP_SBBQ:
7028        case CC_OP_LOGICB ... CC_OP_LOGICQ:
7029        case CC_OP_INCB ... CC_OP_INCQ:
7030        case CC_OP_DECB ... CC_OP_DECQ:
7031        case CC_OP_SHLB ... CC_OP_SHLQ:
7032        case CC_OP_SARB ... CC_OP_SARQ:
7033        case CC_OP_BMILGB ... CC_OP_BMILGQ:
7034            /* Z was going to be computed from the non-zero status of CC_DST.
7035               We can get that same Z value (and the new C value) by leaving
7036               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
7037               same width.  */
7038            tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
7039            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
7040            break;
7041        default:
7042            /* Otherwise, generate EFLAGS and replace the C bit.  */
7043            gen_compute_eflags(s);
7044            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
7045                               ctz32(CC_C), 1);
7046            break;
7047        }
7048        break;
7049    case 0x1bc: /* bsf / tzcnt */
7050    case 0x1bd: /* bsr / lzcnt */
7051        ot = dflag;
7052        modrm = x86_ldub_code(env, s);
7053        reg = ((modrm >> 3) & 7) | REX_R(s);
7054        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
7055        gen_extu(ot, s->T0);
7056
7057        /* Note that lzcnt and tzcnt are in different extensions.  */
7058        if ((prefixes & PREFIX_REPZ)
7059            && (b & 1
7060                ? s->cpuid_ext3_features & CPUID_EXT3_ABM
7061                : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
7062            int size = 8 << ot;
7063            /* For lzcnt/tzcnt, C bit is defined related to the input. */
7064            tcg_gen_mov_tl(cpu_cc_src, s->T0);
7065            if (b & 1) {
7066                /* For lzcnt, reduce the target_ulong result by the
7067                   number of zeros that we expect to find at the top.  */
7068                tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
7069                tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
7070            } else {
7071                /* For tzcnt, a zero input must return the operand size.  */
7072                tcg_gen_ctzi_tl(s->T0, s->T0, size);
7073            }
7074            /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
7075            gen_op_update1_cc(s);
7076            set_cc_op(s, CC_OP_BMILGB + ot);
7077        } else {
7078            /* For bsr/bsf, only the Z bit is defined and it is related
7079               to the input and not the result.  */
7080            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
7081            set_cc_op(s, CC_OP_LOGICB + ot);
7082
7083            /* ??? The manual says that the output is undefined when the
7084               input is zero, but real hardware leaves it unchanged, and
7085               real programs appear to depend on that.  Accomplish this
7086               by passing the output as the value to return upon zero.  */
7087            if (b & 1) {
7088                /* For bsr, return the bit index of the first 1 bit,
7089                   not the count of leading zeros.  */
7090                tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
7091                tcg_gen_clz_tl(s->T0, s->T0, s->T1);
7092                tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
7093            } else {
7094                tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
7095            }
7096        }
7097        gen_op_mov_reg_v(s, ot, reg, s->T0);
7098        break;
7099        /************************/
7100        /* bcd */
7101    case 0x27: /* daa */
7102        if (CODE64(s))
7103            goto illegal_op;
7104        gen_update_cc_op(s);
7105        gen_helper_daa(cpu_env);
7106        set_cc_op(s, CC_OP_EFLAGS);
7107        break;
7108    case 0x2f: /* das */
7109        if (CODE64(s))
7110            goto illegal_op;
7111        gen_update_cc_op(s);
7112        gen_helper_das(cpu_env);
7113        set_cc_op(s, CC_OP_EFLAGS);
7114        break;
7115    case 0x37: /* aaa */
7116        if (CODE64(s))
7117            goto illegal_op;
7118        gen_update_cc_op(s);
7119        gen_helper_aaa(cpu_env);
7120        set_cc_op(s, CC_OP_EFLAGS);
7121        break;
7122    case 0x3f: /* aas */
7123        if (CODE64(s))
7124            goto illegal_op;
7125        gen_update_cc_op(s);
7126        gen_helper_aas(cpu_env);
7127        set_cc_op(s, CC_OP_EFLAGS);
7128        break;
7129    case 0xd4: /* aam */
7130        if (CODE64(s))
7131            goto illegal_op;
7132        val = x86_ldub_code(env, s);
7133        if (val == 0) {
7134            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7135        } else {
7136            gen_helper_aam(cpu_env, tcg_const_i32(val));
7137            set_cc_op(s, CC_OP_LOGICB);
7138        }
7139        break;
7140    case 0xd5: /* aad */
7141        if (CODE64(s))
7142            goto illegal_op;
7143        val = x86_ldub_code(env, s);
7144        gen_helper_aad(cpu_env, tcg_const_i32(val));
7145        set_cc_op(s, CC_OP_LOGICB);
7146        break;
7147        /************************/
7148        /* misc */
7149    case 0x90: /* nop */
7150        /* XXX: correct lock test for all insn */
7151        if (prefixes & PREFIX_LOCK) {
7152            goto illegal_op;
7153        }
7154        /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7155        if (REX_B(s)) {
7156            goto do_xchg_reg_eax;
7157        }
7158        if (prefixes & PREFIX_REPZ) {
7159            gen_update_cc_op(s);
7160            gen_jmp_im(s, pc_start - s->cs_base);
7161            gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7162            s->base.is_jmp = DISAS_NORETURN;
7163        }
7164        break;
7165    case 0x9b: /* fwait */
7166        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7167            (HF_MP_MASK | HF_TS_MASK)) {
7168            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7169        } else {
7170            gen_helper_fwait(cpu_env);
7171        }
7172        break;
7173    case 0xcc: /* int3 */
7174        gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7175        break;
7176    case 0xcd: /* int N */
7177        val = x86_ldub_code(env, s);
7178        if (check_vm86_iopl(s)) {
7179            gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7180        }
7181        break;
7182    case 0xce: /* into */
7183        if (CODE64(s))
7184            goto illegal_op;
7185        gen_update_cc_op(s);
7186        gen_jmp_im(s, pc_start - s->cs_base);
7187        gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7188        break;
7189#ifdef WANT_ICEBP
7190    case 0xf1: /* icebp (undocumented, exits to external debugger) */
7191        gen_svm_check_intercept(s, SVM_EXIT_ICEBP);
7192        gen_debug(s);
7193        break;
7194#endif
7195    case 0xfa: /* cli */
7196        if (check_iopl(s)) {
7197            gen_helper_cli(cpu_env);
7198        }
7199        break;
7200    case 0xfb: /* sti */
7201        if (check_iopl(s)) {
7202            gen_helper_sti(cpu_env);
7203            /* interruptions are enabled only the first insn after sti */
7204            gen_jmp_im(s, s->pc - s->cs_base);
7205            gen_eob_inhibit_irq(s, true);
7206        }
7207        break;
7208    case 0x62: /* bound */
7209        if (CODE64(s))
7210            goto illegal_op;
7211        ot = dflag;
7212        modrm = x86_ldub_code(env, s);
7213        reg = (modrm >> 3) & 7;
7214        mod = (modrm >> 6) & 3;
7215        if (mod == 3)
7216            goto illegal_op;
7217        gen_op_mov_v_reg(s, ot, s->T0, reg);
7218        gen_lea_modrm(env, s, modrm);
7219        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7220        if (ot == MO_16) {
7221            gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7222        } else {
7223            gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7224        }
7225        break;
7226    case 0x1c8 ... 0x1cf: /* bswap reg */
7227        reg = (b & 7) | REX_B(s);
7228#ifdef TARGET_X86_64
7229        if (dflag == MO_64) {
7230            tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]);
7231            break;
7232        }
7233#endif
7234        tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ);
7235        break;
7236    case 0xd6: /* salc */
7237        if (CODE64(s))
7238            goto illegal_op;
7239        gen_compute_eflags_c(s, s->T0);
7240        tcg_gen_neg_tl(s->T0, s->T0);
7241        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7242        break;
7243    case 0xe0: /* loopnz */
7244    case 0xe1: /* loopz */
7245    case 0xe2: /* loop */
7246    case 0xe3: /* jecxz */
7247        {
7248            TCGLabel *l1, *l2, *l3;
7249
7250            tval = (int8_t)insn_get(env, s, MO_8);
7251            next_eip = s->pc - s->cs_base;
7252            tval += next_eip;
7253            if (dflag == MO_16) {
7254                tval &= 0xffff;
7255            }
7256
7257            l1 = gen_new_label();
7258            l2 = gen_new_label();
7259            l3 = gen_new_label();
7260            gen_update_cc_op(s);
7261            b &= 3;
7262            switch(b) {
7263            case 0: /* loopnz */
7264            case 1: /* loopz */
7265                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7266                gen_op_jz_ecx(s, s->aflag, l3);
7267                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7268                break;
7269            case 2: /* loop */
7270                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7271                gen_op_jnz_ecx(s, s->aflag, l1);
7272                break;
7273            default:
7274            case 3: /* jcxz */
7275                gen_op_jz_ecx(s, s->aflag, l1);
7276                break;
7277            }
7278
7279            gen_set_label(l3);
7280            gen_jmp_im(s, next_eip);
7281            tcg_gen_br(l2);
7282
7283            gen_set_label(l1);
7284            gen_jmp_im(s, tval);
7285            gen_set_label(l2);
7286            gen_eob(s);
7287        }
7288        break;
7289    case 0x130: /* wrmsr */
7290    case 0x132: /* rdmsr */
7291        if (check_cpl0(s)) {
7292            gen_update_cc_op(s);
7293            gen_jmp_im(s, pc_start - s->cs_base);
7294            if (b & 2) {
7295                gen_helper_rdmsr(cpu_env);
7296            } else {
7297                gen_helper_wrmsr(cpu_env);
7298                gen_jmp_im(s, s->pc - s->cs_base);
7299                gen_eob(s);
7300            }
7301        }
7302        break;
7303    case 0x131: /* rdtsc */
7304        gen_update_cc_op(s);
7305        gen_jmp_im(s, pc_start - s->cs_base);
7306        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7307            gen_io_start();
7308        }
7309        gen_helper_rdtsc(cpu_env);
7310        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7311            gen_jmp(s, s->pc - s->cs_base);
7312        }
7313        break;
7314    case 0x133: /* rdpmc */
7315        gen_update_cc_op(s);
7316        gen_jmp_im(s, pc_start - s->cs_base);
7317        gen_helper_rdpmc(cpu_env);
7318        s->base.is_jmp = DISAS_NORETURN;
7319        break;
7320    case 0x134: /* sysenter */
7321        /* For Intel SYSENTER is valid on 64-bit */
7322        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7323            goto illegal_op;
7324        if (!PE(s)) {
7325            gen_exception_gpf(s);
7326        } else {
7327            gen_helper_sysenter(cpu_env);
7328            gen_eob(s);
7329        }
7330        break;
7331    case 0x135: /* sysexit */
7332        /* For Intel SYSEXIT is valid on 64-bit */
7333        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7334            goto illegal_op;
7335        if (!PE(s)) {
7336            gen_exception_gpf(s);
7337        } else {
7338            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7339            gen_eob(s);
7340        }
7341        break;
7342#ifdef TARGET_X86_64
7343    case 0x105: /* syscall */
7344        /* XXX: is it usable in real mode ? */
7345        gen_update_cc_op(s);
7346        gen_jmp_im(s, pc_start - s->cs_base);
7347        gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7348        /* TF handling for the syscall insn is different. The TF bit is  checked
7349           after the syscall insn completes. This allows #DB to not be
7350           generated after one has entered CPL0 if TF is set in FMASK.  */
7351        gen_eob_worker(s, false, true);
7352        break;
7353    case 0x107: /* sysret */
7354        if (!PE(s)) {
7355            gen_exception_gpf(s);
7356        } else {
7357            gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7358            /* condition codes are modified only in long mode */
7359            if (LMA(s)) {
7360                set_cc_op(s, CC_OP_EFLAGS);
7361            }
7362            /* TF handling for the sysret insn is different. The TF bit is
7363               checked after the sysret insn completes. This allows #DB to be
7364               generated "as if" the syscall insn in userspace has just
7365               completed.  */
7366            gen_eob_worker(s, false, true);
7367        }
7368        break;
7369#endif
7370    case 0x1a2: /* cpuid */
7371        gen_update_cc_op(s);
7372        gen_jmp_im(s, pc_start - s->cs_base);
7373        gen_helper_cpuid(cpu_env);
7374        break;
7375    case 0xf4: /* hlt */
7376        if (check_cpl0(s)) {
7377            gen_update_cc_op(s);
7378            gen_jmp_im(s, pc_start - s->cs_base);
7379            gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7380            s->base.is_jmp = DISAS_NORETURN;
7381        }
7382        break;
7383    case 0x100:
7384        modrm = x86_ldub_code(env, s);
7385        mod = (modrm >> 6) & 3;
7386        op = (modrm >> 3) & 7;
7387        switch(op) {
7388        case 0: /* sldt */
7389            if (!PE(s) || VM86(s))
7390                goto illegal_op;
7391            gen_svm_check_intercept(s, SVM_EXIT_LDTR_READ);
7392            tcg_gen_ld32u_tl(s->T0, cpu_env,
7393                             offsetof(CPUX86State, ldt.selector));
7394            ot = mod == 3 ? dflag : MO_16;
7395            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7396            break;
7397        case 2: /* lldt */
7398            if (!PE(s) || VM86(s))
7399                goto illegal_op;
7400            if (check_cpl0(s)) {
7401                gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE);
7402                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7403                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7404                gen_helper_lldt(cpu_env, s->tmp2_i32);
7405            }
7406            break;
7407        case 1: /* str */
7408            if (!PE(s) || VM86(s))
7409                goto illegal_op;
7410            gen_svm_check_intercept(s, SVM_EXIT_TR_READ);
7411            tcg_gen_ld32u_tl(s->T0, cpu_env,
7412                             offsetof(CPUX86State, tr.selector));
7413            ot = mod == 3 ? dflag : MO_16;
7414            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7415            break;
7416        case 3: /* ltr */
7417            if (!PE(s) || VM86(s))
7418                goto illegal_op;
7419            if (check_cpl0(s)) {
7420                gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE);
7421                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7422                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7423                gen_helper_ltr(cpu_env, s->tmp2_i32);
7424            }
7425            break;
7426        case 4: /* verr */
7427        case 5: /* verw */
7428            if (!PE(s) || VM86(s))
7429                goto illegal_op;
7430            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7431            gen_update_cc_op(s);
7432            if (op == 4) {
7433                gen_helper_verr(cpu_env, s->T0);
7434            } else {
7435                gen_helper_verw(cpu_env, s->T0);
7436            }
7437            set_cc_op(s, CC_OP_EFLAGS);
7438            break;
7439        default:
7440            goto unknown_op;
7441        }
7442        break;
7443
7444    case 0x101:
7445        modrm = x86_ldub_code(env, s);
7446        switch (modrm) {
7447        CASE_MODRM_MEM_OP(0): /* sgdt */
7448            gen_svm_check_intercept(s, SVM_EXIT_GDTR_READ);
7449            gen_lea_modrm(env, s, modrm);
7450            tcg_gen_ld32u_tl(s->T0,
7451                             cpu_env, offsetof(CPUX86State, gdt.limit));
7452            gen_op_st_v(s, MO_16, s->T0, s->A0);
7453            gen_add_A0_im(s, 2);
7454            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7455            if (dflag == MO_16) {
7456                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7457            }
7458            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7459            break;
7460
7461        case 0xc8: /* monitor */
7462            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7463                goto illegal_op;
7464            }
7465            gen_update_cc_op(s);
7466            gen_jmp_im(s, pc_start - s->cs_base);
7467            tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7468            gen_extu(s->aflag, s->A0);
7469            gen_add_A0_ds_seg(s);
7470            gen_helper_monitor(cpu_env, s->A0);
7471            break;
7472
7473        case 0xc9: /* mwait */
7474            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7475                goto illegal_op;
7476            }
7477            gen_update_cc_op(s);
7478            gen_jmp_im(s, pc_start - s->cs_base);
7479            gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7480            s->base.is_jmp = DISAS_NORETURN;
7481            break;
7482
7483        case 0xca: /* clac */
7484            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7485                || CPL(s) != 0) {
7486                goto illegal_op;
7487            }
7488            gen_helper_clac(cpu_env);
7489            gen_jmp_im(s, s->pc - s->cs_base);
7490            gen_eob(s);
7491            break;
7492
7493        case 0xcb: /* stac */
7494            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7495                || CPL(s) != 0) {
7496                goto illegal_op;
7497            }
7498            gen_helper_stac(cpu_env);
7499            gen_jmp_im(s, s->pc - s->cs_base);
7500            gen_eob(s);
7501            break;
7502
7503        CASE_MODRM_MEM_OP(1): /* sidt */
7504            gen_svm_check_intercept(s, SVM_EXIT_IDTR_READ);
7505            gen_lea_modrm(env, s, modrm);
7506            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7507            gen_op_st_v(s, MO_16, s->T0, s->A0);
7508            gen_add_A0_im(s, 2);
7509            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7510            if (dflag == MO_16) {
7511                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7512            }
7513            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7514            break;
7515
7516        case 0xd0: /* xgetbv */
7517            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7518                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7519                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7520                goto illegal_op;
7521            }
7522            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7523            gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7524            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7525            break;
7526
7527        case 0xd1: /* xsetbv */
7528            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7529                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7530                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7531                goto illegal_op;
7532            }
7533            if (!check_cpl0(s)) {
7534                break;
7535            }
7536            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7537                                  cpu_regs[R_EDX]);
7538            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7539            gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7540            /* End TB because translation flags may change.  */
7541            gen_jmp_im(s, s->pc - s->cs_base);
7542            gen_eob(s);
7543            break;
7544
7545        case 0xd8: /* VMRUN */
7546            if (!SVME(s) || !PE(s)) {
7547                goto illegal_op;
7548            }
7549            if (!check_cpl0(s)) {
7550                break;
7551            }
7552            gen_update_cc_op(s);
7553            gen_jmp_im(s, pc_start - s->cs_base);
7554            gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7555                             tcg_const_i32(s->pc - pc_start));
7556            tcg_gen_exit_tb(NULL, 0);
7557            s->base.is_jmp = DISAS_NORETURN;
7558            break;
7559
7560        case 0xd9: /* VMMCALL */
7561            if (!SVME(s)) {
7562                goto illegal_op;
7563            }
7564            gen_update_cc_op(s);
7565            gen_jmp_im(s, pc_start - s->cs_base);
7566            gen_helper_vmmcall(cpu_env);
7567            break;
7568
7569        case 0xda: /* VMLOAD */
7570            if (!SVME(s) || !PE(s)) {
7571                goto illegal_op;
7572            }
7573            if (!check_cpl0(s)) {
7574                break;
7575            }
7576            gen_update_cc_op(s);
7577            gen_jmp_im(s, pc_start - s->cs_base);
7578            gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7579            break;
7580
7581        case 0xdb: /* VMSAVE */
7582            if (!SVME(s) || !PE(s)) {
7583                goto illegal_op;
7584            }
7585            if (!check_cpl0(s)) {
7586                break;
7587            }
7588            gen_update_cc_op(s);
7589            gen_jmp_im(s, pc_start - s->cs_base);
7590            gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7591            break;
7592
7593        case 0xdc: /* STGI */
7594            if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7595                || !PE(s)) {
7596                goto illegal_op;
7597            }
7598            if (!check_cpl0(s)) {
7599                break;
7600            }
7601            gen_update_cc_op(s);
7602            gen_helper_stgi(cpu_env);
7603            gen_jmp_im(s, s->pc - s->cs_base);
7604            gen_eob(s);
7605            break;
7606
7607        case 0xdd: /* CLGI */
7608            if (!SVME(s) || !PE(s)) {
7609                goto illegal_op;
7610            }
7611            if (!check_cpl0(s)) {
7612                break;
7613            }
7614            gen_update_cc_op(s);
7615            gen_jmp_im(s, pc_start - s->cs_base);
7616            gen_helper_clgi(cpu_env);
7617            break;
7618
7619        case 0xde: /* SKINIT */
7620            if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7621                || !PE(s)) {
7622                goto illegal_op;
7623            }
7624            gen_svm_check_intercept(s, SVM_EXIT_SKINIT);
7625            /* If not intercepted, not implemented -- raise #UD. */
7626            goto illegal_op;
7627
7628        case 0xdf: /* INVLPGA */
7629            if (!SVME(s) || !PE(s)) {
7630                goto illegal_op;
7631            }
7632            if (!check_cpl0(s)) {
7633                break;
7634            }
7635            gen_svm_check_intercept(s, SVM_EXIT_INVLPGA);
7636            if (s->aflag == MO_64) {
7637                tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7638            } else {
7639                tcg_gen_ext32u_tl(s->A0, cpu_regs[R_EAX]);
7640            }
7641            gen_helper_flush_page(cpu_env, s->A0);
7642            gen_jmp_im(s, s->pc - s->cs_base);
7643            gen_eob(s);
7644            break;
7645
7646        CASE_MODRM_MEM_OP(2): /* lgdt */
7647            if (!check_cpl0(s)) {
7648                break;
7649            }
7650            gen_svm_check_intercept(s, SVM_EXIT_GDTR_WRITE);
7651            gen_lea_modrm(env, s, modrm);
7652            gen_op_ld_v(s, MO_16, s->T1, s->A0);
7653            gen_add_A0_im(s, 2);
7654            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7655            if (dflag == MO_16) {
7656                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7657            }
7658            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7659            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7660            break;
7661
7662        CASE_MODRM_MEM_OP(3): /* lidt */
7663            if (!check_cpl0(s)) {
7664                break;
7665            }
7666            gen_svm_check_intercept(s, SVM_EXIT_IDTR_WRITE);
7667            gen_lea_modrm(env, s, modrm);
7668            gen_op_ld_v(s, MO_16, s->T1, s->A0);
7669            gen_add_A0_im(s, 2);
7670            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7671            if (dflag == MO_16) {
7672                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7673            }
7674            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7675            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7676            break;
7677
7678        CASE_MODRM_OP(4): /* smsw */
7679            gen_svm_check_intercept(s, SVM_EXIT_READ_CR0);
7680            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7681            /*
7682             * In 32-bit mode, the higher 16 bits of the destination
7683             * register are undefined.  In practice CR0[31:0] is stored
7684             * just like in 64-bit mode.
7685             */
7686            mod = (modrm >> 6) & 3;
7687            ot = (mod != 3 ? MO_16 : s->dflag);
7688            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7689            break;
7690        case 0xee: /* rdpkru */
7691            if (prefixes & PREFIX_LOCK) {
7692                goto illegal_op;
7693            }
7694            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7695            gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7696            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7697            break;
7698        case 0xef: /* wrpkru */
7699            if (prefixes & PREFIX_LOCK) {
7700                goto illegal_op;
7701            }
7702            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7703                                  cpu_regs[R_EDX]);
7704            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7705            gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7706            break;
7707
7708        CASE_MODRM_OP(6): /* lmsw */
7709            if (!check_cpl0(s)) {
7710                break;
7711            }
7712            gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
7713            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7714            /*
7715             * Only the 4 lower bits of CR0 are modified.
7716             * PE cannot be set to zero if already set to one.
7717             */
7718            tcg_gen_ld_tl(s->T1, cpu_env, offsetof(CPUX86State, cr[0]));
7719            tcg_gen_andi_tl(s->T0, s->T0, 0xf);
7720            tcg_gen_andi_tl(s->T1, s->T1, ~0xe);
7721            tcg_gen_or_tl(s->T0, s->T0, s->T1);
7722            gen_helper_write_crN(cpu_env, tcg_constant_i32(0), s->T0);
7723            gen_jmp_im(s, s->pc - s->cs_base);
7724            gen_eob(s);
7725            break;
7726
7727        CASE_MODRM_MEM_OP(7): /* invlpg */
7728            if (!check_cpl0(s)) {
7729                break;
7730            }
7731            gen_svm_check_intercept(s, SVM_EXIT_INVLPG);
7732            gen_lea_modrm(env, s, modrm);
7733            gen_helper_flush_page(cpu_env, s->A0);
7734            gen_jmp_im(s, s->pc - s->cs_base);
7735            gen_eob(s);
7736            break;
7737
7738        case 0xf8: /* swapgs */
7739#ifdef TARGET_X86_64
7740            if (CODE64(s)) {
7741                if (check_cpl0(s)) {
7742                    tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7743                    tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7744                                  offsetof(CPUX86State, kernelgsbase));
7745                    tcg_gen_st_tl(s->T0, cpu_env,
7746                                  offsetof(CPUX86State, kernelgsbase));
7747                }
7748                break;
7749            }
7750#endif
7751            goto illegal_op;
7752
7753        case 0xf9: /* rdtscp */
7754            if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7755                goto illegal_op;
7756            }
7757            gen_update_cc_op(s);
7758            gen_jmp_im(s, pc_start - s->cs_base);
7759            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7760                gen_io_start();
7761            }
7762            gen_helper_rdtscp(cpu_env);
7763            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7764                gen_jmp(s, s->pc - s->cs_base);
7765            }
7766            break;
7767
7768        default:
7769            goto unknown_op;
7770        }
7771        break;
7772
7773    case 0x108: /* invd */
7774    case 0x109: /* wbinvd */
7775        if (check_cpl0(s)) {
7776            gen_svm_check_intercept(s, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7777            /* nothing to do */
7778        }
7779        break;
7780    case 0x63: /* arpl or movslS (x86_64) */
7781#ifdef TARGET_X86_64
7782        if (CODE64(s)) {
7783            int d_ot;
7784            /* d_ot is the size of destination */
7785            d_ot = dflag;
7786
7787            modrm = x86_ldub_code(env, s);
7788            reg = ((modrm >> 3) & 7) | REX_R(s);
7789            mod = (modrm >> 6) & 3;
7790            rm = (modrm & 7) | REX_B(s);
7791
7792            if (mod == 3) {
7793                gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7794                /* sign extend */
7795                if (d_ot == MO_64) {
7796                    tcg_gen_ext32s_tl(s->T0, s->T0);
7797                }
7798                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7799            } else {
7800                gen_lea_modrm(env, s, modrm);
7801                gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7802                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7803            }
7804        } else
7805#endif
7806        {
7807            TCGLabel *label1;
7808            TCGv t0, t1, t2, a0;
7809
7810            if (!PE(s) || VM86(s))
7811                goto illegal_op;
7812            t0 = tcg_temp_local_new();
7813            t1 = tcg_temp_local_new();
7814            t2 = tcg_temp_local_new();
7815            ot = MO_16;
7816            modrm = x86_ldub_code(env, s);
7817            reg = (modrm >> 3) & 7;
7818            mod = (modrm >> 6) & 3;
7819            rm = modrm & 7;
7820            if (mod != 3) {
7821                gen_lea_modrm(env, s, modrm);
7822                gen_op_ld_v(s, ot, t0, s->A0);
7823                a0 = tcg_temp_local_new();
7824                tcg_gen_mov_tl(a0, s->A0);
7825            } else {
7826                gen_op_mov_v_reg(s, ot, t0, rm);
7827                a0 = NULL;
7828            }
7829            gen_op_mov_v_reg(s, ot, t1, reg);
7830            tcg_gen_andi_tl(s->tmp0, t0, 3);
7831            tcg_gen_andi_tl(t1, t1, 3);
7832            tcg_gen_movi_tl(t2, 0);
7833            label1 = gen_new_label();
7834            tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7835            tcg_gen_andi_tl(t0, t0, ~3);
7836            tcg_gen_or_tl(t0, t0, t1);
7837            tcg_gen_movi_tl(t2, CC_Z);
7838            gen_set_label(label1);
7839            if (mod != 3) {
7840                gen_op_st_v(s, ot, t0, a0);
7841                tcg_temp_free(a0);
7842           } else {
7843                gen_op_mov_reg_v(s, ot, rm, t0);
7844            }
7845            gen_compute_eflags(s);
7846            tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7847            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7848            tcg_temp_free(t0);
7849            tcg_temp_free(t1);
7850            tcg_temp_free(t2);
7851        }
7852        break;
7853    case 0x102: /* lar */
7854    case 0x103: /* lsl */
7855        {
7856            TCGLabel *label1;
7857            TCGv t0;
7858            if (!PE(s) || VM86(s))
7859                goto illegal_op;
7860            ot = dflag != MO_16 ? MO_32 : MO_16;
7861            modrm = x86_ldub_code(env, s);
7862            reg = ((modrm >> 3) & 7) | REX_R(s);
7863            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7864            t0 = tcg_temp_local_new();
7865            gen_update_cc_op(s);
7866            if (b == 0x102) {
7867                gen_helper_lar(t0, cpu_env, s->T0);
7868            } else {
7869                gen_helper_lsl(t0, cpu_env, s->T0);
7870            }
7871            tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7872            label1 = gen_new_label();
7873            tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7874            gen_op_mov_reg_v(s, ot, reg, t0);
7875            gen_set_label(label1);
7876            set_cc_op(s, CC_OP_EFLAGS);
7877            tcg_temp_free(t0);
7878        }
7879        break;
7880    case 0x118:
7881        modrm = x86_ldub_code(env, s);
7882        mod = (modrm >> 6) & 3;
7883        op = (modrm >> 3) & 7;
7884        switch(op) {
7885        case 0: /* prefetchnta */
7886        case 1: /* prefetchnt0 */
7887        case 2: /* prefetchnt0 */
7888        case 3: /* prefetchnt0 */
7889            if (mod == 3)
7890                goto illegal_op;
7891            gen_nop_modrm(env, s, modrm);
7892            /* nothing more to do */
7893            break;
7894        default: /* nop (multi byte) */
7895            gen_nop_modrm(env, s, modrm);
7896            break;
7897        }
7898        break;
7899    case 0x11a:
7900        modrm = x86_ldub_code(env, s);
7901        if (s->flags & HF_MPX_EN_MASK) {
7902            mod = (modrm >> 6) & 3;
7903            reg = ((modrm >> 3) & 7) | REX_R(s);
7904            if (prefixes & PREFIX_REPZ) {
7905                /* bndcl */
7906                if (reg >= 4
7907                    || (prefixes & PREFIX_LOCK)
7908                    || s->aflag == MO_16) {
7909                    goto illegal_op;
7910                }
7911                gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7912            } else if (prefixes & PREFIX_REPNZ) {
7913                /* bndcu */
7914                if (reg >= 4
7915                    || (prefixes & PREFIX_LOCK)
7916                    || s->aflag == MO_16) {
7917                    goto illegal_op;
7918                }
7919                TCGv_i64 notu = tcg_temp_new_i64();
7920                tcg_gen_not_i64(notu, cpu_bndu[reg]);
7921                gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7922                tcg_temp_free_i64(notu);
7923            } else if (prefixes & PREFIX_DATA) {
7924                /* bndmov -- from reg/mem */
7925                if (reg >= 4 || s->aflag == MO_16) {
7926                    goto illegal_op;
7927                }
7928                if (mod == 3) {
7929                    int reg2 = (modrm & 7) | REX_B(s);
7930                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7931                        goto illegal_op;
7932                    }
7933                    if (s->flags & HF_MPX_IU_MASK) {
7934                        tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7935                        tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7936                    }
7937                } else {
7938                    gen_lea_modrm(env, s, modrm);
7939                    if (CODE64(s)) {
7940                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7941                                            s->mem_index, MO_LEQ);
7942                        tcg_gen_addi_tl(s->A0, s->A0, 8);
7943                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7944                                            s->mem_index, MO_LEQ);
7945                    } else {
7946                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7947                                            s->mem_index, MO_LEUL);
7948                        tcg_gen_addi_tl(s->A0, s->A0, 4);
7949                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7950                                            s->mem_index, MO_LEUL);
7951                    }
7952                    /* bnd registers are now in-use */
7953                    gen_set_hflag(s, HF_MPX_IU_MASK);
7954                }
7955            } else if (mod != 3) {
7956                /* bndldx */
7957                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7958                if (reg >= 4
7959                    || (prefixes & PREFIX_LOCK)
7960                    || s->aflag == MO_16
7961                    || a.base < -1) {
7962                    goto illegal_op;
7963                }
7964                if (a.base >= 0) {
7965                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7966                } else {
7967                    tcg_gen_movi_tl(s->A0, 0);
7968                }
7969                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7970                if (a.index >= 0) {
7971                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7972                } else {
7973                    tcg_gen_movi_tl(s->T0, 0);
7974                }
7975                if (CODE64(s)) {
7976                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7977                    tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7978                                   offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7979                } else {
7980                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7981                    tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7982                    tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7983                }
7984                gen_set_hflag(s, HF_MPX_IU_MASK);
7985            }
7986        }
7987        gen_nop_modrm(env, s, modrm);
7988        break;
7989    case 0x11b:
7990        modrm = x86_ldub_code(env, s);
7991        if (s->flags & HF_MPX_EN_MASK) {
7992            mod = (modrm >> 6) & 3;
7993            reg = ((modrm >> 3) & 7) | REX_R(s);
7994            if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7995                /* bndmk */
7996                if (reg >= 4
7997                    || (prefixes & PREFIX_LOCK)
7998                    || s->aflag == MO_16) {
7999                    goto illegal_op;
8000                }
8001                AddressParts a = gen_lea_modrm_0(env, s, modrm);
8002                if (a.base >= 0) {
8003                    tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
8004                    if (!CODE64(s)) {
8005                        tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
8006                    }
8007                } else if (a.base == -1) {
8008                    /* no base register has lower bound of 0 */
8009                    tcg_gen_movi_i64(cpu_bndl[reg], 0);
8010                } else {
8011                    /* rip-relative generates #ud */
8012                    goto illegal_op;
8013                }
8014                tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
8015                if (!CODE64(s)) {
8016                    tcg_gen_ext32u_tl(s->A0, s->A0);
8017                }
8018                tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
8019                /* bnd registers are now in-use */
8020                gen_set_hflag(s, HF_MPX_IU_MASK);
8021                break;
8022            } else if (prefixes & PREFIX_REPNZ) {
8023                /* bndcn */
8024                if (reg >= 4
8025                    || (prefixes & PREFIX_LOCK)
8026                    || s->aflag == MO_16) {
8027                    goto illegal_op;
8028                }
8029                gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
8030            } else if (prefixes & PREFIX_DATA) {
8031                /* bndmov -- to reg/mem */
8032                if (reg >= 4 || s->aflag == MO_16) {
8033                    goto illegal_op;
8034                }
8035                if (mod == 3) {
8036                    int reg2 = (modrm & 7) | REX_B(s);
8037                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
8038                        goto illegal_op;
8039                    }
8040                    if (s->flags & HF_MPX_IU_MASK) {
8041                        tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
8042                        tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
8043                    }
8044                } else {
8045                    gen_lea_modrm(env, s, modrm);
8046                    if (CODE64(s)) {
8047                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8048                                            s->mem_index, MO_LEQ);
8049                        tcg_gen_addi_tl(s->A0, s->A0, 8);
8050                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8051                                            s->mem_index, MO_LEQ);
8052                    } else {
8053                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8054                                            s->mem_index, MO_LEUL);
8055                        tcg_gen_addi_tl(s->A0, s->A0, 4);
8056                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8057                                            s->mem_index, MO_LEUL);
8058                    }
8059                }
8060            } else if (mod != 3) {
8061                /* bndstx */
8062                AddressParts a = gen_lea_modrm_0(env, s, modrm);
8063                if (reg >= 4
8064                    || (prefixes & PREFIX_LOCK)
8065                    || s->aflag == MO_16
8066                    || a.base < -1) {
8067                    goto illegal_op;
8068                }
8069                if (a.base >= 0) {
8070                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
8071                } else {
8072                    tcg_gen_movi_tl(s->A0, 0);
8073                }
8074                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
8075                if (a.index >= 0) {
8076                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
8077                } else {
8078                    tcg_gen_movi_tl(s->T0, 0);
8079                }
8080                if (CODE64(s)) {
8081                    gen_helper_bndstx64(cpu_env, s->A0, s->T0,
8082                                        cpu_bndl[reg], cpu_bndu[reg]);
8083                } else {
8084                    gen_helper_bndstx32(cpu_env, s->A0, s->T0,
8085                                        cpu_bndl[reg], cpu_bndu[reg]);
8086                }
8087            }
8088        }
8089        gen_nop_modrm(env, s, modrm);
8090        break;
8091    case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
8092        modrm = x86_ldub_code(env, s);
8093        gen_nop_modrm(env, s, modrm);
8094        break;
8095
8096    case 0x120: /* mov reg, crN */
8097    case 0x122: /* mov crN, reg */
8098        if (!check_cpl0(s)) {
8099            break;
8100        }
8101        modrm = x86_ldub_code(env, s);
8102        /*
8103         * Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8104         * AMD documentation (24594.pdf) and testing of Intel 386 and 486
8105         * processors all show that the mod bits are assumed to be 1's,
8106         * regardless of actual values.
8107         */
8108        rm = (modrm & 7) | REX_B(s);
8109        reg = ((modrm >> 3) & 7) | REX_R(s);
8110        switch (reg) {
8111        case 0:
8112            if ((prefixes & PREFIX_LOCK) &&
8113                (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8114                reg = 8;
8115            }
8116            break;
8117        case 2:
8118        case 3:
8119        case 4:
8120        case 8:
8121            break;
8122        default:
8123            goto unknown_op;
8124        }
8125        ot  = (CODE64(s) ? MO_64 : MO_32);
8126
8127        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8128            gen_io_start();
8129        }
8130        if (b & 2) {
8131            gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg);
8132            gen_op_mov_v_reg(s, ot, s->T0, rm);
8133            gen_helper_write_crN(cpu_env, tcg_constant_i32(reg), s->T0);
8134            gen_jmp_im(s, s->pc - s->cs_base);
8135            gen_eob(s);
8136        } else {
8137            gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg);
8138            gen_helper_read_crN(s->T0, cpu_env, tcg_constant_i32(reg));
8139            gen_op_mov_reg_v(s, ot, rm, s->T0);
8140            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8141                gen_jmp(s, s->pc - s->cs_base);
8142            }
8143        }
8144        break;
8145
8146    case 0x121: /* mov reg, drN */
8147    case 0x123: /* mov drN, reg */
8148        if (check_cpl0(s)) {
8149            modrm = x86_ldub_code(env, s);
8150            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8151             * AMD documentation (24594.pdf) and testing of
8152             * intel 386 and 486 processors all show that the mod bits
8153             * are assumed to be 1's, regardless of actual values.
8154             */
8155            rm = (modrm & 7) | REX_B(s);
8156            reg = ((modrm >> 3) & 7) | REX_R(s);
8157            if (CODE64(s))
8158                ot = MO_64;
8159            else
8160                ot = MO_32;
8161            if (reg >= 8) {
8162                goto illegal_op;
8163            }
8164            if (b & 2) {
8165                gen_svm_check_intercept(s, SVM_EXIT_WRITE_DR0 + reg);
8166                gen_op_mov_v_reg(s, ot, s->T0, rm);
8167                tcg_gen_movi_i32(s->tmp2_i32, reg);
8168                gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8169                gen_jmp_im(s, s->pc - s->cs_base);
8170                gen_eob(s);
8171            } else {
8172                gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg);
8173                tcg_gen_movi_i32(s->tmp2_i32, reg);
8174                gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8175                gen_op_mov_reg_v(s, ot, rm, s->T0);
8176            }
8177        }
8178        break;
8179    case 0x106: /* clts */
8180        if (check_cpl0(s)) {
8181            gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
8182            gen_helper_clts(cpu_env);
8183            /* abort block because static cpu state changed */
8184            gen_jmp_im(s, s->pc - s->cs_base);
8185            gen_eob(s);
8186        }
8187        break;
8188    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8189    case 0x1c3: /* MOVNTI reg, mem */
8190        if (!(s->cpuid_features & CPUID_SSE2))
8191            goto illegal_op;
8192        ot = mo_64_32(dflag);
8193        modrm = x86_ldub_code(env, s);
8194        mod = (modrm >> 6) & 3;
8195        if (mod == 3)
8196            goto illegal_op;
8197        reg = ((modrm >> 3) & 7) | REX_R(s);
8198        /* generate a generic store */
8199        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8200        break;
8201    case 0x1ae:
8202        modrm = x86_ldub_code(env, s);
8203        switch (modrm) {
8204        CASE_MODRM_MEM_OP(0): /* fxsave */
8205            if (!(s->cpuid_features & CPUID_FXSR)
8206                || (prefixes & PREFIX_LOCK)) {
8207                goto illegal_op;
8208            }
8209            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8210                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8211                break;
8212            }
8213            gen_lea_modrm(env, s, modrm);
8214            gen_helper_fxsave(cpu_env, s->A0);
8215            break;
8216
8217        CASE_MODRM_MEM_OP(1): /* fxrstor */
8218            if (!(s->cpuid_features & CPUID_FXSR)
8219                || (prefixes & PREFIX_LOCK)) {
8220                goto illegal_op;
8221            }
8222            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8223                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8224                break;
8225            }
8226            gen_lea_modrm(env, s, modrm);
8227            gen_helper_fxrstor(cpu_env, s->A0);
8228            break;
8229
8230        CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8231            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8232                goto illegal_op;
8233            }
8234            if (s->flags & HF_TS_MASK) {
8235                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8236                break;
8237            }
8238            gen_lea_modrm(env, s, modrm);
8239            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8240            gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8241            break;
8242
8243        CASE_MODRM_MEM_OP(3): /* stmxcsr */
8244            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8245                goto illegal_op;
8246            }
8247            if (s->flags & HF_TS_MASK) {
8248                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8249                break;
8250            }
8251            gen_helper_update_mxcsr(cpu_env);
8252            gen_lea_modrm(env, s, modrm);
8253            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8254            gen_op_st_v(s, MO_32, s->T0, s->A0);
8255            break;
8256
8257        CASE_MODRM_MEM_OP(4): /* xsave */
8258            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8259                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8260                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8261                goto illegal_op;
8262            }
8263            gen_lea_modrm(env, s, modrm);
8264            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8265                                  cpu_regs[R_EDX]);
8266            gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8267            break;
8268
8269        CASE_MODRM_MEM_OP(5): /* xrstor */
8270            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8271                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8272                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8273                goto illegal_op;
8274            }
8275            gen_lea_modrm(env, s, modrm);
8276            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8277                                  cpu_regs[R_EDX]);
8278            gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8279            /* XRSTOR is how MPX is enabled, which changes how
8280               we translate.  Thus we need to end the TB.  */
8281            gen_update_cc_op(s);
8282            gen_jmp_im(s, s->pc - s->cs_base);
8283            gen_eob(s);
8284            break;
8285
8286        CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8287            if (prefixes & PREFIX_LOCK) {
8288                goto illegal_op;
8289            }
8290            if (prefixes & PREFIX_DATA) {
8291                /* clwb */
8292                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8293                    goto illegal_op;
8294                }
8295                gen_nop_modrm(env, s, modrm);
8296            } else {
8297                /* xsaveopt */
8298                if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8299                    || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8300                    || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8301                    goto illegal_op;
8302                }
8303                gen_lea_modrm(env, s, modrm);
8304                tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8305                                      cpu_regs[R_EDX]);
8306                gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8307            }
8308            break;
8309
8310        CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8311            if (prefixes & PREFIX_LOCK) {
8312                goto illegal_op;
8313            }
8314            if (prefixes & PREFIX_DATA) {
8315                /* clflushopt */
8316                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8317                    goto illegal_op;
8318                }
8319            } else {
8320                /* clflush */
8321                if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8322                    || !(s->cpuid_features & CPUID_CLFLUSH)) {
8323                    goto illegal_op;
8324                }
8325            }
8326            gen_nop_modrm(env, s, modrm);
8327            break;
8328
8329        case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8330        case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8331        case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8332        case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8333            if (CODE64(s)
8334                && (prefixes & PREFIX_REPZ)
8335                && !(prefixes & PREFIX_LOCK)
8336                && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8337                TCGv base, treg, src, dst;
8338
8339                /* Preserve hflags bits by testing CR4 at runtime.  */
8340                tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8341                gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8342
8343                base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8344                treg = cpu_regs[(modrm & 7) | REX_B(s)];
8345
8346                if (modrm & 0x10) {
8347                    /* wr*base */
8348                    dst = base, src = treg;
8349                } else {
8350                    /* rd*base */
8351                    dst = treg, src = base;
8352                }
8353
8354                if (s->dflag == MO_32) {
8355                    tcg_gen_ext32u_tl(dst, src);
8356                } else {
8357                    tcg_gen_mov_tl(dst, src);
8358                }
8359                break;
8360            }
8361            goto unknown_op;
8362
8363        case 0xf8: /* sfence / pcommit */
8364            if (prefixes & PREFIX_DATA) {
8365                /* pcommit */
8366                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8367                    || (prefixes & PREFIX_LOCK)) {
8368                    goto illegal_op;
8369                }
8370                break;
8371            }
8372            /* fallthru */
8373        case 0xf9 ... 0xff: /* sfence */
8374            if (!(s->cpuid_features & CPUID_SSE)
8375                || (prefixes & PREFIX_LOCK)) {
8376                goto illegal_op;
8377            }
8378            tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8379            break;
8380        case 0xe8 ... 0xef: /* lfence */
8381            if (!(s->cpuid_features & CPUID_SSE)
8382                || (prefixes & PREFIX_LOCK)) {
8383                goto illegal_op;
8384            }
8385            tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8386            break;
8387        case 0xf0 ... 0xf7: /* mfence */
8388            if (!(s->cpuid_features & CPUID_SSE2)
8389                || (prefixes & PREFIX_LOCK)) {
8390                goto illegal_op;
8391            }
8392            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8393            break;
8394
8395        default:
8396            goto unknown_op;
8397        }
8398        break;
8399
8400    case 0x10d: /* 3DNow! prefetch(w) */
8401        modrm = x86_ldub_code(env, s);
8402        mod = (modrm >> 6) & 3;
8403        if (mod == 3)
8404            goto illegal_op;
8405        gen_nop_modrm(env, s, modrm);
8406        break;
8407    case 0x1aa: /* rsm */
8408        gen_svm_check_intercept(s, SVM_EXIT_RSM);
8409        if (!(s->flags & HF_SMM_MASK))
8410            goto illegal_op;
8411#ifdef CONFIG_USER_ONLY
8412        /* we should not be in SMM mode */
8413        g_assert_not_reached();
8414#else
8415        gen_update_cc_op(s);
8416        gen_jmp_im(s, s->pc - s->cs_base);
8417        gen_helper_rsm(cpu_env);
8418#endif /* CONFIG_USER_ONLY */
8419        gen_eob(s);
8420        break;
8421    case 0x1b8: /* SSE4.2 popcnt */
8422        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8423             PREFIX_REPZ)
8424            goto illegal_op;
8425        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8426            goto illegal_op;
8427
8428        modrm = x86_ldub_code(env, s);
8429        reg = ((modrm >> 3) & 7) | REX_R(s);
8430
8431        if (s->prefix & PREFIX_DATA) {
8432            ot = MO_16;
8433        } else {
8434            ot = mo_64_32(dflag);
8435        }
8436
8437        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8438        gen_extu(ot, s->T0);
8439        tcg_gen_mov_tl(cpu_cc_src, s->T0);
8440        tcg_gen_ctpop_tl(s->T0, s->T0);
8441        gen_op_mov_reg_v(s, ot, reg, s->T0);
8442
8443        set_cc_op(s, CC_OP_POPCNT);
8444        break;
8445    case 0x10e ... 0x10f:
8446        /* 3DNow! instructions, ignore prefixes */
8447        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8448        /* fall through */
8449    case 0x110 ... 0x117:
8450    case 0x128 ... 0x12f:
8451    case 0x138 ... 0x13a:
8452    case 0x150 ... 0x179:
8453    case 0x17c ... 0x17f:
8454    case 0x1c2:
8455    case 0x1c4 ... 0x1c6:
8456    case 0x1d0 ... 0x1fe:
8457        gen_sse(env, s, b, pc_start);
8458        break;
8459    default:
8460        goto unknown_op;
8461    }
8462    return s->pc;
8463 illegal_op:
8464    gen_illegal_opcode(s);
8465    return s->pc;
8466 unknown_op:
8467    gen_unknown_opcode(env, s);
8468    return s->pc;
8469}
8470
8471void tcg_x86_init(void)
8472{
8473    static const char reg_names[CPU_NB_REGS][4] = {
8474#ifdef TARGET_X86_64
8475        [R_EAX] = "rax",
8476        [R_EBX] = "rbx",
8477        [R_ECX] = "rcx",
8478        [R_EDX] = "rdx",
8479        [R_ESI] = "rsi",
8480        [R_EDI] = "rdi",
8481        [R_EBP] = "rbp",
8482        [R_ESP] = "rsp",
8483        [8]  = "r8",
8484        [9]  = "r9",
8485        [10] = "r10",
8486        [11] = "r11",
8487        [12] = "r12",
8488        [13] = "r13",
8489        [14] = "r14",
8490        [15] = "r15",
8491#else
8492        [R_EAX] = "eax",
8493        [R_EBX] = "ebx",
8494        [R_ECX] = "ecx",
8495        [R_EDX] = "edx",
8496        [R_ESI] = "esi",
8497        [R_EDI] = "edi",
8498        [R_EBP] = "ebp",
8499        [R_ESP] = "esp",
8500#endif
8501    };
8502    static const char seg_base_names[6][8] = {
8503        [R_CS] = "cs_base",
8504        [R_DS] = "ds_base",
8505        [R_ES] = "es_base",
8506        [R_FS] = "fs_base",
8507        [R_GS] = "gs_base",
8508        [R_SS] = "ss_base",
8509    };
8510    static const char bnd_regl_names[4][8] = {
8511        "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8512    };
8513    static const char bnd_regu_names[4][8] = {
8514        "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8515    };
8516    int i;
8517
8518    cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8519                                       offsetof(CPUX86State, cc_op), "cc_op");
8520    cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8521                                    "cc_dst");
8522    cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8523                                    "cc_src");
8524    cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8525                                     "cc_src2");
8526
8527    for (i = 0; i < CPU_NB_REGS; ++i) {
8528        cpu_regs[i] = tcg_global_mem_new(cpu_env,
8529                                         offsetof(CPUX86State, regs[i]),
8530                                         reg_names[i]);
8531    }
8532
8533    for (i = 0; i < 6; ++i) {
8534        cpu_seg_base[i]
8535            = tcg_global_mem_new(cpu_env,
8536                                 offsetof(CPUX86State, segs[i].base),
8537                                 seg_base_names[i]);
8538    }
8539
8540    for (i = 0; i < 4; ++i) {
8541        cpu_bndl[i]
8542            = tcg_global_mem_new_i64(cpu_env,
8543                                     offsetof(CPUX86State, bnd_regs[i].lb),
8544                                     bnd_regl_names[i]);
8545        cpu_bndu[i]
8546            = tcg_global_mem_new_i64(cpu_env,
8547                                     offsetof(CPUX86State, bnd_regs[i].ub),
8548                                     bnd_regu_names[i]);
8549    }
8550}
8551
8552static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8553{
8554    DisasContext *dc = container_of(dcbase, DisasContext, base);
8555    CPUX86State *env = cpu->env_ptr;
8556    uint32_t flags = dc->base.tb->flags;
8557    uint32_t cflags = tb_cflags(dc->base.tb);
8558    int cpl = (flags >> HF_CPL_SHIFT) & 3;
8559    int iopl = (flags >> IOPL_SHIFT) & 3;
8560
8561    dc->cs_base = dc->base.tb->cs_base;
8562    dc->flags = flags;
8563#ifndef CONFIG_USER_ONLY
8564    dc->cpl = cpl;
8565    dc->iopl = iopl;
8566#endif
8567
8568    /* We make some simplifying assumptions; validate they're correct. */
8569    g_assert(PE(dc) == ((flags & HF_PE_MASK) != 0));
8570    g_assert(CPL(dc) == cpl);
8571    g_assert(IOPL(dc) == iopl);
8572    g_assert(VM86(dc) == ((flags & HF_VM_MASK) != 0));
8573    g_assert(CODE32(dc) == ((flags & HF_CS32_MASK) != 0));
8574    g_assert(CODE64(dc) == ((flags & HF_CS64_MASK) != 0));
8575    g_assert(SS32(dc) == ((flags & HF_SS32_MASK) != 0));
8576    g_assert(LMA(dc) == ((flags & HF_LMA_MASK) != 0));
8577    g_assert(ADDSEG(dc) == ((flags & HF_ADDSEG_MASK) != 0));
8578    g_assert(SVME(dc) == ((flags & HF_SVME_MASK) != 0));
8579    g_assert(GUEST(dc) == ((flags & HF_GUEST_MASK) != 0));
8580
8581    dc->cc_op = CC_OP_DYNAMIC;
8582    dc->cc_op_dirty = false;
8583    dc->popl_esp_hack = 0;
8584    /* select memory access functions */
8585    dc->mem_index = 0;
8586#ifdef CONFIG_SOFTMMU
8587    dc->mem_index = cpu_mmu_index(env, false);
8588#endif
8589    dc->cpuid_features = env->features[FEAT_1_EDX];
8590    dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8591    dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8592    dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8593    dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8594    dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8595    dc->jmp_opt = !((cflags & CF_NO_GOTO_TB) ||
8596                    (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)));
8597    /*
8598     * If jmp_opt, we want to handle each string instruction individually.
8599     * For icount also disable repz optimization so that each iteration
8600     * is accounted separately.
8601     */
8602    dc->repz_opt = !dc->jmp_opt && !(cflags & CF_USE_ICOUNT);
8603
8604    dc->T0 = tcg_temp_new();
8605    dc->T1 = tcg_temp_new();
8606    dc->A0 = tcg_temp_new();
8607
8608    dc->tmp0 = tcg_temp_new();
8609    dc->tmp1_i64 = tcg_temp_new_i64();
8610    dc->tmp2_i32 = tcg_temp_new_i32();
8611    dc->tmp3_i32 = tcg_temp_new_i32();
8612    dc->tmp4 = tcg_temp_new();
8613    dc->ptr0 = tcg_temp_new_ptr();
8614    dc->ptr1 = tcg_temp_new_ptr();
8615    dc->cc_srcT = tcg_temp_local_new();
8616}
8617
8618static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8619{
8620}
8621
8622static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8623{
8624    DisasContext *dc = container_of(dcbase, DisasContext, base);
8625
8626    tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8627}
8628
8629static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8630{
8631    DisasContext *dc = container_of(dcbase, DisasContext, base);
8632    target_ulong pc_next;
8633
8634#ifdef TARGET_VSYSCALL_PAGE
8635    /*
8636     * Detect entry into the vsyscall page and invoke the syscall.
8637     */
8638    if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
8639        gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
8640        dc->base.pc_next = dc->pc + 1;
8641        return;
8642    }
8643#endif
8644
8645    pc_next = disas_insn(dc, cpu);
8646
8647    if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
8648        /* if single step mode, we generate only one instruction and
8649           generate an exception */
8650        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8651           the flag and abort the translation to give the irqs a
8652           chance to happen */
8653        dc->base.is_jmp = DISAS_TOO_MANY;
8654    } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8655               && ((pc_next & TARGET_PAGE_MASK)
8656                   != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8657                       & TARGET_PAGE_MASK)
8658                   || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8659        /* Do not cross the boundary of the pages in icount mode,
8660           it can cause an exception. Do it only when boundary is
8661           crossed by the first instruction in the block.
8662           If current instruction already crossed the bound - it's ok,
8663           because an exception hasn't stopped this code.
8664         */
8665        dc->base.is_jmp = DISAS_TOO_MANY;
8666    } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8667        dc->base.is_jmp = DISAS_TOO_MANY;
8668    }
8669
8670    dc->base.pc_next = pc_next;
8671}
8672
8673static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8674{
8675    DisasContext *dc = container_of(dcbase, DisasContext, base);
8676
8677    if (dc->base.is_jmp == DISAS_TOO_MANY) {
8678        gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8679        gen_eob(dc);
8680    }
8681}
8682
8683static void i386_tr_disas_log(const DisasContextBase *dcbase,
8684                              CPUState *cpu)
8685{
8686    DisasContext *dc = container_of(dcbase, DisasContext, base);
8687
8688    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8689    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8690}
8691
8692static const TranslatorOps i386_tr_ops = {
8693    .init_disas_context = i386_tr_init_disas_context,
8694    .tb_start           = i386_tr_tb_start,
8695    .insn_start         = i386_tr_insn_start,
8696    .translate_insn     = i386_tr_translate_insn,
8697    .tb_stop            = i386_tr_tb_stop,
8698    .disas_log          = i386_tr_disas_log,
8699};
8700
8701/* generate intermediate code for basic block 'tb'.  */
8702void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8703{
8704    DisasContext dc;
8705
8706    translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8707}
8708
8709void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8710                          target_ulong *data)
8711{
8712    int cc_op = data[1];
8713    env->eip = data[0] - tb->cs_base;
8714    if (cc_op != CC_OP_DYNAMIC) {
8715        env->cc_op = cc_op;
8716    }
8717}
8718