qemu/target/i386/translate.c
<<
>>
Prefs
   1/*
   2 *  i386 translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "qemu/host-utils.h"
  22#include "cpu.h"
  23#include "disas/disas.h"
  24#include "exec/exec-all.h"
  25#include "tcg-op.h"
  26#include "exec/cpu_ldst.h"
  27#include "exec/translator.h"
  28
  29#include "exec/helper-proto.h"
  30#include "exec/helper-gen.h"
  31
  32#include "trace-tcg.h"
  33#include "exec/log.h"
  34
  35#define PREFIX_REPZ   0x01
  36#define PREFIX_REPNZ  0x02
  37#define PREFIX_LOCK   0x04
  38#define PREFIX_DATA   0x08
  39#define PREFIX_ADR    0x10
  40#define PREFIX_VEX    0x20
  41
  42#ifdef TARGET_X86_64
  43#define CODE64(s) ((s)->code64)
  44#define REX_X(s) ((s)->rex_x)
  45#define REX_B(s) ((s)->rex_b)
  46#else
  47#define CODE64(s) 0
  48#define REX_X(s) 0
  49#define REX_B(s) 0
  50#endif
  51
  52#ifdef TARGET_X86_64
  53# define ctztl  ctz64
  54# define clztl  clz64
  55#else
  56# define ctztl  ctz32
  57# define clztl  clz32
  58#endif
  59
  60/* For a switch indexed by MODRM, match all memory operands for a given OP.  */
  61#define CASE_MODRM_MEM_OP(OP) \
  62    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  63    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  64    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
  65
  66#define CASE_MODRM_OP(OP) \
  67    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  68    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  69    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
  70    case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
  71
  72//#define MACRO_TEST   1
  73
  74/* global register indexes */
  75static TCGv cpu_A0;
  76static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
  77static TCGv_i32 cpu_cc_op;
  78static TCGv cpu_regs[CPU_NB_REGS];
  79static TCGv cpu_seg_base[6];
  80static TCGv_i64 cpu_bndl[4];
  81static TCGv_i64 cpu_bndu[4];
  82/* local temps */
  83static TCGv cpu_T0, cpu_T1;
  84/* local register indexes (only used inside old micro ops) */
  85static TCGv cpu_tmp0, cpu_tmp4;
  86static TCGv_ptr cpu_ptr0, cpu_ptr1;
  87static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
  88static TCGv_i64 cpu_tmp1_i64;
  89
  90#include "exec/gen-icount.h"
  91
  92#ifdef TARGET_X86_64
  93static int x86_64_hregs;
  94#endif
  95
  96typedef struct DisasContext {
  97    DisasContextBase base;
  98
  99    /* current insn context */
 100    int override; /* -1 if no override */
 101    int prefix;
 102    TCGMemOp aflag;
 103    TCGMemOp dflag;
 104    target_ulong pc_start;
 105    target_ulong pc; /* pc = eip + cs_base */
 106    /* current block context */
 107    target_ulong cs_base; /* base of CS segment */
 108    int pe;     /* protected mode */
 109    int code32; /* 32 bit code segment */
 110#ifdef TARGET_X86_64
 111    int lma;    /* long mode active */
 112    int code64; /* 64 bit code segment */
 113    int rex_x, rex_b;
 114#endif
 115    int vex_l;  /* vex vector length */
 116    int vex_v;  /* vex vvvv register, without 1's compliment.  */
 117    int ss32;   /* 32 bit stack segment */
 118    CCOp cc_op;  /* current CC operation */
 119    bool cc_op_dirty;
 120    int addseg; /* non zero if either DS/ES/SS have a non zero base */
 121    int f_st;   /* currently unused */
 122    int vm86;   /* vm86 mode */
 123    int cpl;
 124    int iopl;
 125    int tf;     /* TF cpu flag */
 126    int jmp_opt; /* use direct block chaining for direct jumps */
 127    int repz_opt; /* optimize jumps within repz instructions */
 128    int mem_index; /* select memory access functions */
 129    uint64_t flags; /* all execution flags */
 130    int popl_esp_hack; /* for correct popl with esp base handling */
 131    int rip_offset; /* only used in x86_64, but left for simplicity */
 132    int cpuid_features;
 133    int cpuid_ext_features;
 134    int cpuid_ext2_features;
 135    int cpuid_ext3_features;
 136    int cpuid_7_0_ebx_features;
 137    int cpuid_xsave_features;
 138    sigjmp_buf jmpbuf;
 139} DisasContext;
 140
 141static void gen_eob(DisasContext *s);
 142static void gen_jr(DisasContext *s, TCGv dest);
 143static void gen_jmp(DisasContext *s, target_ulong eip);
 144static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 145static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
 146
 147/* i386 arith/logic operations */
 148enum {
 149    OP_ADDL,
 150    OP_ORL,
 151    OP_ADCL,
 152    OP_SBBL,
 153    OP_ANDL,
 154    OP_SUBL,
 155    OP_XORL,
 156    OP_CMPL,
 157};
 158
 159/* i386 shift ops */
 160enum {
 161    OP_ROL,
 162    OP_ROR,
 163    OP_RCL,
 164    OP_RCR,
 165    OP_SHL,
 166    OP_SHR,
 167    OP_SHL1, /* undocumented */
 168    OP_SAR = 7,
 169};
 170
 171enum {
 172    JCC_O,
 173    JCC_B,
 174    JCC_Z,
 175    JCC_BE,
 176    JCC_S,
 177    JCC_P,
 178    JCC_L,
 179    JCC_LE,
 180};
 181
 182enum {
 183    /* I386 int registers */
 184    OR_EAX,   /* MUST be even numbered */
 185    OR_ECX,
 186    OR_EDX,
 187    OR_EBX,
 188    OR_ESP,
 189    OR_EBP,
 190    OR_ESI,
 191    OR_EDI,
 192
 193    OR_TMP0 = 16,    /* temporary operand register */
 194    OR_TMP1,
 195    OR_A0, /* temporary register used when doing address evaluation */
 196};
 197
 198enum {
 199    USES_CC_DST  = 1,
 200    USES_CC_SRC  = 2,
 201    USES_CC_SRC2 = 4,
 202    USES_CC_SRCT = 8,
 203};
 204
 205/* Bit set if the global variable is live after setting CC_OP to X.  */
 206static const uint8_t cc_op_live[CC_OP_NB] = {
 207    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 208    [CC_OP_EFLAGS] = USES_CC_SRC,
 209    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
 210    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
 211    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 212    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
 213    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 214    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
 215    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
 216    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
 217    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
 218    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
 219    [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
 220    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
 221    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
 222    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 223    [CC_OP_CLR] = 0,
 224    [CC_OP_POPCNT] = USES_CC_SRC,
 225};
 226
 227static void set_cc_op(DisasContext *s, CCOp op)
 228{
 229    int dead;
 230
 231    if (s->cc_op == op) {
 232        return;
 233    }
 234
 235    /* Discard CC computation that will no longer be used.  */
 236    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
 237    if (dead & USES_CC_DST) {
 238        tcg_gen_discard_tl(cpu_cc_dst);
 239    }
 240    if (dead & USES_CC_SRC) {
 241        tcg_gen_discard_tl(cpu_cc_src);
 242    }
 243    if (dead & USES_CC_SRC2) {
 244        tcg_gen_discard_tl(cpu_cc_src2);
 245    }
 246    if (dead & USES_CC_SRCT) {
 247        tcg_gen_discard_tl(cpu_cc_srcT);
 248    }
 249
 250    if (op == CC_OP_DYNAMIC) {
 251        /* The DYNAMIC setting is translator only, and should never be
 252           stored.  Thus we always consider it clean.  */
 253        s->cc_op_dirty = false;
 254    } else {
 255        /* Discard any computed CC_OP value (see shifts).  */
 256        if (s->cc_op == CC_OP_DYNAMIC) {
 257            tcg_gen_discard_i32(cpu_cc_op);
 258        }
 259        s->cc_op_dirty = true;
 260    }
 261    s->cc_op = op;
 262}
 263
 264static void gen_update_cc_op(DisasContext *s)
 265{
 266    if (s->cc_op_dirty) {
 267        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
 268        s->cc_op_dirty = false;
 269    }
 270}
 271
 272#ifdef TARGET_X86_64
 273
 274#define NB_OP_SIZES 4
 275
 276#else /* !TARGET_X86_64 */
 277
 278#define NB_OP_SIZES 3
 279
 280#endif /* !TARGET_X86_64 */
 281
 282#if defined(HOST_WORDS_BIGENDIAN)
 283#define REG_B_OFFSET (sizeof(target_ulong) - 1)
 284#define REG_H_OFFSET (sizeof(target_ulong) - 2)
 285#define REG_W_OFFSET (sizeof(target_ulong) - 2)
 286#define REG_L_OFFSET (sizeof(target_ulong) - 4)
 287#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
 288#else
 289#define REG_B_OFFSET 0
 290#define REG_H_OFFSET 1
 291#define REG_W_OFFSET 0
 292#define REG_L_OFFSET 0
 293#define REG_LH_OFFSET 4
 294#endif
 295
 296/* In instruction encodings for byte register accesses the
 297 * register number usually indicates "low 8 bits of register N";
 298 * however there are some special cases where N 4..7 indicates
 299 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
 300 * true for this special case, false otherwise.
 301 */
 302static inline bool byte_reg_is_xH(int reg)
 303{
 304    if (reg < 4) {
 305        return false;
 306    }
 307#ifdef TARGET_X86_64
 308    if (reg >= 8 || x86_64_hregs) {
 309        return false;
 310    }
 311#endif
 312    return true;
 313}
 314
 315/* Select the size of a push/pop operation.  */
 316static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
 317{
 318    if (CODE64(s)) {
 319        return ot == MO_16 ? MO_16 : MO_64;
 320    } else {
 321        return ot;
 322    }
 323}
 324
 325/* Select the size of the stack pointer.  */
 326static inline TCGMemOp mo_stacksize(DisasContext *s)
 327{
 328    return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
 329}
 330
 331/* Select only size 64 else 32.  Used for SSE operand sizes.  */
 332static inline TCGMemOp mo_64_32(TCGMemOp ot)
 333{
 334#ifdef TARGET_X86_64
 335    return ot == MO_64 ? MO_64 : MO_32;
 336#else
 337    return MO_32;
 338#endif
 339}
 340
 341/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
 342   byte vs word opcodes.  */
 343static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
 344{
 345    return b & 1 ? ot : MO_8;
 346}
 347
 348/* Select size 8 if lsb of B is clear, else OT capped at 32.
 349   Used for decoding operand size of port opcodes.  */
 350static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
 351{
 352    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 353}
 354
 355static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
 356{
 357    switch(ot) {
 358    case MO_8:
 359        if (!byte_reg_is_xH(reg)) {
 360            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
 361        } else {
 362            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
 363        }
 364        break;
 365    case MO_16:
 366        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
 367        break;
 368    case MO_32:
 369        /* For x86_64, this sets the higher half of register to zero.
 370           For i386, this is equivalent to a mov. */
 371        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
 372        break;
 373#ifdef TARGET_X86_64
 374    case MO_64:
 375        tcg_gen_mov_tl(cpu_regs[reg], t0);
 376        break;
 377#endif
 378    default:
 379        tcg_abort();
 380    }
 381}
 382
 383static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
 384{
 385    if (ot == MO_8 && byte_reg_is_xH(reg)) {
 386        tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
 387    } else {
 388        tcg_gen_mov_tl(t0, cpu_regs[reg]);
 389    }
 390}
 391
 392static void gen_add_A0_im(DisasContext *s, int val)
 393{
 394    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
 395    if (!CODE64(s)) {
 396        tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
 397    }
 398}
 399
 400static inline void gen_op_jmp_v(TCGv dest)
 401{
 402    tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 403}
 404
 405static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
 406{
 407    tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
 408    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 409}
 410
 411static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
 412{
 413    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0);
 414    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 415}
 416
 417static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 418{
 419    tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
 420}
 421
 422static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 423{
 424    tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 425}
 426
 427static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 428{
 429    if (d == OR_TMP0) {
 430        gen_op_st_v(s, idx, cpu_T0, cpu_A0);
 431    } else {
 432        gen_op_mov_reg_v(idx, d, cpu_T0);
 433    }
 434}
 435
 436static inline void gen_jmp_im(target_ulong pc)
 437{
 438    tcg_gen_movi_tl(cpu_tmp0, pc);
 439    gen_op_jmp_v(cpu_tmp0);
 440}
 441
 442/* Compute SEG:REG into A0.  SEG is selected from the override segment
 443   (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
 444   indicate no override.  */
 445static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
 446                          int def_seg, int ovr_seg)
 447{
 448    switch (aflag) {
 449#ifdef TARGET_X86_64
 450    case MO_64:
 451        if (ovr_seg < 0) {
 452            tcg_gen_mov_tl(cpu_A0, a0);
 453            return;
 454        }
 455        break;
 456#endif
 457    case MO_32:
 458        /* 32 bit address */
 459        if (ovr_seg < 0 && s->addseg) {
 460            ovr_seg = def_seg;
 461        }
 462        if (ovr_seg < 0) {
 463            tcg_gen_ext32u_tl(cpu_A0, a0);
 464            return;
 465        }
 466        break;
 467    case MO_16:
 468        /* 16 bit address */
 469        tcg_gen_ext16u_tl(cpu_A0, a0);
 470        a0 = cpu_A0;
 471        if (ovr_seg < 0) {
 472            if (s->addseg) {
 473                ovr_seg = def_seg;
 474            } else {
 475                return;
 476            }
 477        }
 478        break;
 479    default:
 480        tcg_abort();
 481    }
 482
 483    if (ovr_seg >= 0) {
 484        TCGv seg = cpu_seg_base[ovr_seg];
 485
 486        if (aflag == MO_64) {
 487            tcg_gen_add_tl(cpu_A0, a0, seg);
 488        } else if (CODE64(s)) {
 489            tcg_gen_ext32u_tl(cpu_A0, a0);
 490            tcg_gen_add_tl(cpu_A0, cpu_A0, seg);
 491        } else {
 492            tcg_gen_add_tl(cpu_A0, a0, seg);
 493            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
 494        }
 495    }
 496}
 497
 498static inline void gen_string_movl_A0_ESI(DisasContext *s)
 499{
 500    gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
 501}
 502
 503static inline void gen_string_movl_A0_EDI(DisasContext *s)
 504{
 505    gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
 506}
 507
 508static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
 509{
 510    tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df));
 511    tcg_gen_shli_tl(cpu_T0, cpu_T0, ot);
 512};
 513
 514static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
 515{
 516    switch (size) {
 517    case MO_8:
 518        if (sign) {
 519            tcg_gen_ext8s_tl(dst, src);
 520        } else {
 521            tcg_gen_ext8u_tl(dst, src);
 522        }
 523        return dst;
 524    case MO_16:
 525        if (sign) {
 526            tcg_gen_ext16s_tl(dst, src);
 527        } else {
 528            tcg_gen_ext16u_tl(dst, src);
 529        }
 530        return dst;
 531#ifdef TARGET_X86_64
 532    case MO_32:
 533        if (sign) {
 534            tcg_gen_ext32s_tl(dst, src);
 535        } else {
 536            tcg_gen_ext32u_tl(dst, src);
 537        }
 538        return dst;
 539#endif
 540    default:
 541        return src;
 542    }
 543}
 544
 545static void gen_extu(TCGMemOp ot, TCGv reg)
 546{
 547    gen_ext_tl(reg, reg, ot, false);
 548}
 549
 550static void gen_exts(TCGMemOp ot, TCGv reg)
 551{
 552    gen_ext_tl(reg, reg, ot, true);
 553}
 554
 555static inline void gen_op_jnz_ecx(TCGMemOp size, TCGLabel *label1)
 556{
 557    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
 558    gen_extu(size, cpu_tmp0);
 559    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
 560}
 561
 562static inline void gen_op_jz_ecx(TCGMemOp size, TCGLabel *label1)
 563{
 564    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
 565    gen_extu(size, cpu_tmp0);
 566    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
 567}
 568
 569static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
 570{
 571    switch (ot) {
 572    case MO_8:
 573        gen_helper_inb(v, cpu_env, n);
 574        break;
 575    case MO_16:
 576        gen_helper_inw(v, cpu_env, n);
 577        break;
 578    case MO_32:
 579        gen_helper_inl(v, cpu_env, n);
 580        break;
 581    default:
 582        tcg_abort();
 583    }
 584}
 585
 586static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
 587{
 588    switch (ot) {
 589    case MO_8:
 590        gen_helper_outb(cpu_env, v, n);
 591        break;
 592    case MO_16:
 593        gen_helper_outw(cpu_env, v, n);
 594        break;
 595    case MO_32:
 596        gen_helper_outl(cpu_env, v, n);
 597        break;
 598    default:
 599        tcg_abort();
 600    }
 601}
 602
 603static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
 604                         uint32_t svm_flags)
 605{
 606    target_ulong next_eip;
 607
 608    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
 609        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
 610        switch (ot) {
 611        case MO_8:
 612            gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
 613            break;
 614        case MO_16:
 615            gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
 616            break;
 617        case MO_32:
 618            gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
 619            break;
 620        default:
 621            tcg_abort();
 622        }
 623    }
 624    if(s->flags & HF_SVMI_MASK) {
 625        gen_update_cc_op(s);
 626        gen_jmp_im(cur_eip);
 627        svm_flags |= (1 << (4 + ot));
 628        next_eip = s->pc - s->cs_base;
 629        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
 630        gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
 631                                tcg_const_i32(svm_flags),
 632                                tcg_const_i32(next_eip - cur_eip));
 633    }
 634}
 635
 636static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 637{
 638    gen_string_movl_A0_ESI(s);
 639    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
 640    gen_string_movl_A0_EDI(s);
 641    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
 642    gen_op_movl_T0_Dshift(ot);
 643    gen_op_add_reg_T0(s->aflag, R_ESI);
 644    gen_op_add_reg_T0(s->aflag, R_EDI);
 645}
 646
 647static void gen_op_update1_cc(void)
 648{
 649    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 650}
 651
 652static void gen_op_update2_cc(void)
 653{
 654    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
 655    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 656}
 657
 658static void gen_op_update3_cc(TCGv reg)
 659{
 660    tcg_gen_mov_tl(cpu_cc_src2, reg);
 661    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
 662    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 663}
 664
 665static inline void gen_op_testl_T0_T1_cc(void)
 666{
 667    tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
 668}
 669
 670static void gen_op_update_neg_cc(void)
 671{
 672    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 673    tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
 674    tcg_gen_movi_tl(cpu_cc_srcT, 0);
 675}
 676
 677/* compute all eflags to cc_src */
 678static void gen_compute_eflags(DisasContext *s)
 679{
 680    TCGv zero, dst, src1, src2;
 681    int live, dead;
 682
 683    if (s->cc_op == CC_OP_EFLAGS) {
 684        return;
 685    }
 686    if (s->cc_op == CC_OP_CLR) {
 687        tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
 688        set_cc_op(s, CC_OP_EFLAGS);
 689        return;
 690    }
 691
 692    TCGV_UNUSED(zero);
 693    dst = cpu_cc_dst;
 694    src1 = cpu_cc_src;
 695    src2 = cpu_cc_src2;
 696
 697    /* Take care to not read values that are not live.  */
 698    live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
 699    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
 700    if (dead) {
 701        zero = tcg_const_tl(0);
 702        if (dead & USES_CC_DST) {
 703            dst = zero;
 704        }
 705        if (dead & USES_CC_SRC) {
 706            src1 = zero;
 707        }
 708        if (dead & USES_CC_SRC2) {
 709            src2 = zero;
 710        }
 711    }
 712
 713    gen_update_cc_op(s);
 714    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
 715    set_cc_op(s, CC_OP_EFLAGS);
 716
 717    if (dead) {
 718        tcg_temp_free(zero);
 719    }
 720}
 721
 722typedef struct CCPrepare {
 723    TCGCond cond;
 724    TCGv reg;
 725    TCGv reg2;
 726    target_ulong imm;
 727    target_ulong mask;
 728    bool use_reg2;
 729    bool no_setcond;
 730} CCPrepare;
 731
 732/* compute eflags.C to reg */
 733static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 734{
 735    TCGv t0, t1;
 736    int size, shift;
 737
 738    switch (s->cc_op) {
 739    case CC_OP_SUBB ... CC_OP_SUBQ:
 740        /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
 741        size = s->cc_op - CC_OP_SUBB;
 742        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 743        /* If no temporary was used, be careful not to alias t1 and t0.  */
 744        t0 = t1 == cpu_cc_src ? cpu_tmp0 : reg;
 745        tcg_gen_mov_tl(t0, cpu_cc_srcT);
 746        gen_extu(size, t0);
 747        goto add_sub;
 748
 749    case CC_OP_ADDB ... CC_OP_ADDQ:
 750        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
 751        size = s->cc_op - CC_OP_ADDB;
 752        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 753        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 754    add_sub:
 755        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
 756                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 757
 758    case CC_OP_LOGICB ... CC_OP_LOGICQ:
 759    case CC_OP_CLR:
 760    case CC_OP_POPCNT:
 761        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 762
 763    case CC_OP_INCB ... CC_OP_INCQ:
 764    case CC_OP_DECB ... CC_OP_DECQ:
 765        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 766                             .mask = -1, .no_setcond = true };
 767
 768    case CC_OP_SHLB ... CC_OP_SHLQ:
 769        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
 770        size = s->cc_op - CC_OP_SHLB;
 771        shift = (8 << size) - 1;
 772        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 773                             .mask = (target_ulong)1 << shift };
 774
 775    case CC_OP_MULB ... CC_OP_MULQ:
 776        return (CCPrepare) { .cond = TCG_COND_NE,
 777                             .reg = cpu_cc_src, .mask = -1 };
 778
 779    case CC_OP_BMILGB ... CC_OP_BMILGQ:
 780        size = s->cc_op - CC_OP_BMILGB;
 781        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
 782        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 783
 784    case CC_OP_ADCX:
 785    case CC_OP_ADCOX:
 786        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
 787                             .mask = -1, .no_setcond = true };
 788
 789    case CC_OP_EFLAGS:
 790    case CC_OP_SARB ... CC_OP_SARQ:
 791        /* CC_SRC & 1 */
 792        return (CCPrepare) { .cond = TCG_COND_NE,
 793                             .reg = cpu_cc_src, .mask = CC_C };
 794
 795    default:
 796       /* The need to compute only C from CC_OP_DYNAMIC is important
 797          in efficiently implementing e.g. INC at the start of a TB.  */
 798       gen_update_cc_op(s);
 799       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
 800                               cpu_cc_src2, cpu_cc_op);
 801       return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 802                            .mask = -1, .no_setcond = true };
 803    }
 804}
 805
 806/* compute eflags.P to reg */
 807static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 808{
 809    gen_compute_eflags(s);
 810    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 811                         .mask = CC_P };
 812}
 813
 814/* compute eflags.S to reg */
 815static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 816{
 817    switch (s->cc_op) {
 818    case CC_OP_DYNAMIC:
 819        gen_compute_eflags(s);
 820        /* FALLTHRU */
 821    case CC_OP_EFLAGS:
 822    case CC_OP_ADCX:
 823    case CC_OP_ADOX:
 824    case CC_OP_ADCOX:
 825        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 826                             .mask = CC_S };
 827    case CC_OP_CLR:
 828    case CC_OP_POPCNT:
 829        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 830    default:
 831        {
 832            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 833            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
 834            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
 835        }
 836    }
 837}
 838
 839/* compute eflags.O to reg */
 840static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 841{
 842    switch (s->cc_op) {
 843    case CC_OP_ADOX:
 844    case CC_OP_ADCOX:
 845        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
 846                             .mask = -1, .no_setcond = true };
 847    case CC_OP_CLR:
 848    case CC_OP_POPCNT:
 849        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 850    default:
 851        gen_compute_eflags(s);
 852        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 853                             .mask = CC_O };
 854    }
 855}
 856
 857/* compute eflags.Z to reg */
 858static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 859{
 860    switch (s->cc_op) {
 861    case CC_OP_DYNAMIC:
 862        gen_compute_eflags(s);
 863        /* FALLTHRU */
 864    case CC_OP_EFLAGS:
 865    case CC_OP_ADCX:
 866    case CC_OP_ADOX:
 867    case CC_OP_ADCOX:
 868        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 869                             .mask = CC_Z };
 870    case CC_OP_CLR:
 871        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
 872    case CC_OP_POPCNT:
 873        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
 874                             .mask = -1 };
 875    default:
 876        {
 877            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 878            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 879            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 880        }
 881    }
 882}
 883
 884/* perform a conditional store into register 'reg' according to jump opcode
 885   value 'b'. In the fast case, T0 is guaranted not to be used. */
 886static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 887{
 888    int inv, jcc_op, cond;
 889    TCGMemOp size;
 890    CCPrepare cc;
 891    TCGv t0;
 892
 893    inv = b & 1;
 894    jcc_op = (b >> 1) & 7;
 895
 896    switch (s->cc_op) {
 897    case CC_OP_SUBB ... CC_OP_SUBQ:
 898        /* We optimize relational operators for the cmp/jcc case.  */
 899        size = s->cc_op - CC_OP_SUBB;
 900        switch (jcc_op) {
 901        case JCC_BE:
 902            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
 903            gen_extu(size, cpu_tmp4);
 904            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 905            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
 906                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 907            break;
 908
 909        case JCC_L:
 910            cond = TCG_COND_LT;
 911            goto fast_jcc_l;
 912        case JCC_LE:
 913            cond = TCG_COND_LE;
 914        fast_jcc_l:
 915            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
 916            gen_exts(size, cpu_tmp4);
 917            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
 918            cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
 919                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 920            break;
 921
 922        default:
 923            goto slow_jcc;
 924        }
 925        break;
 926
 927    default:
 928    slow_jcc:
 929        /* This actually generates good code for JC, JZ and JS.  */
 930        switch (jcc_op) {
 931        case JCC_O:
 932            cc = gen_prepare_eflags_o(s, reg);
 933            break;
 934        case JCC_B:
 935            cc = gen_prepare_eflags_c(s, reg);
 936            break;
 937        case JCC_Z:
 938            cc = gen_prepare_eflags_z(s, reg);
 939            break;
 940        case JCC_BE:
 941            gen_compute_eflags(s);
 942            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 943                               .mask = CC_Z | CC_C };
 944            break;
 945        case JCC_S:
 946            cc = gen_prepare_eflags_s(s, reg);
 947            break;
 948        case JCC_P:
 949            cc = gen_prepare_eflags_p(s, reg);
 950            break;
 951        case JCC_L:
 952            gen_compute_eflags(s);
 953            if (reg == cpu_cc_src) {
 954                reg = cpu_tmp0;
 955            }
 956            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 957            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 958            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 959                               .mask = CC_S };
 960            break;
 961        default:
 962        case JCC_LE:
 963            gen_compute_eflags(s);
 964            if (reg == cpu_cc_src) {
 965                reg = cpu_tmp0;
 966            }
 967            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 968            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 969            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 970                               .mask = CC_S | CC_Z };
 971            break;
 972        }
 973        break;
 974    }
 975
 976    if (inv) {
 977        cc.cond = tcg_invert_cond(cc.cond);
 978    }
 979    return cc;
 980}
 981
 982static void gen_setcc1(DisasContext *s, int b, TCGv reg)
 983{
 984    CCPrepare cc = gen_prepare_cc(s, b, reg);
 985
 986    if (cc.no_setcond) {
 987        if (cc.cond == TCG_COND_EQ) {
 988            tcg_gen_xori_tl(reg, cc.reg, 1);
 989        } else {
 990            tcg_gen_mov_tl(reg, cc.reg);
 991        }
 992        return;
 993    }
 994
 995    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
 996        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
 997        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
 998        tcg_gen_andi_tl(reg, reg, 1);
 999        return;
1000    }
1001    if (cc.mask != -1) {
1002        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1003        cc.reg = reg;
1004    }
1005    if (cc.use_reg2) {
1006        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1007    } else {
1008        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1009    }
1010}
1011
1012static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1013{
1014    gen_setcc1(s, JCC_B << 1, reg);
1015}
1016
1017/* generate a conditional jump to label 'l1' according to jump opcode
1018   value 'b'. In the fast case, T0 is guaranted not to be used. */
1019static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1020{
1021    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1022
1023    if (cc.mask != -1) {
1024        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1025        cc.reg = cpu_T0;
1026    }
1027    if (cc.use_reg2) {
1028        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1029    } else {
1030        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1031    }
1032}
1033
1034/* Generate a conditional jump to label 'l1' according to jump opcode
1035   value 'b'. In the fast case, T0 is guaranted not to be used.
1036   A translation block must end soon.  */
1037static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1038{
1039    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1040
1041    gen_update_cc_op(s);
1042    if (cc.mask != -1) {
1043        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1044        cc.reg = cpu_T0;
1045    }
1046    set_cc_op(s, CC_OP_DYNAMIC);
1047    if (cc.use_reg2) {
1048        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1049    } else {
1050        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1051    }
1052}
1053
1054/* XXX: does not work with gdbstub "ice" single step - not a
1055   serious problem */
1056static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1057{
1058    TCGLabel *l1 = gen_new_label();
1059    TCGLabel *l2 = gen_new_label();
1060    gen_op_jnz_ecx(s->aflag, l1);
1061    gen_set_label(l2);
1062    gen_jmp_tb(s, next_eip, 1);
1063    gen_set_label(l1);
1064    return l2;
1065}
1066
1067static inline void gen_stos(DisasContext *s, TCGMemOp ot)
1068{
1069    gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
1070    gen_string_movl_A0_EDI(s);
1071    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1072    gen_op_movl_T0_Dshift(ot);
1073    gen_op_add_reg_T0(s->aflag, R_EDI);
1074}
1075
1076static inline void gen_lods(DisasContext *s, TCGMemOp ot)
1077{
1078    gen_string_movl_A0_ESI(s);
1079    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1080    gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
1081    gen_op_movl_T0_Dshift(ot);
1082    gen_op_add_reg_T0(s->aflag, R_ESI);
1083}
1084
1085static inline void gen_scas(DisasContext *s, TCGMemOp ot)
1086{
1087    gen_string_movl_A0_EDI(s);
1088    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1089    gen_op(s, OP_CMPL, ot, R_EAX);
1090    gen_op_movl_T0_Dshift(ot);
1091    gen_op_add_reg_T0(s->aflag, R_EDI);
1092}
1093
1094static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
1095{
1096    gen_string_movl_A0_EDI(s);
1097    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1098    gen_string_movl_A0_ESI(s);
1099    gen_op(s, OP_CMPL, ot, OR_TMP0);
1100    gen_op_movl_T0_Dshift(ot);
1101    gen_op_add_reg_T0(s->aflag, R_ESI);
1102    gen_op_add_reg_T0(s->aflag, R_EDI);
1103}
1104
1105static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1106{
1107    if (s->flags & HF_IOBPT_MASK) {
1108        TCGv_i32 t_size = tcg_const_i32(1 << ot);
1109        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1110
1111        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1112        tcg_temp_free_i32(t_size);
1113        tcg_temp_free(t_next);
1114    }
1115}
1116
1117
1118static inline void gen_ins(DisasContext *s, TCGMemOp ot)
1119{
1120    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1121        gen_io_start();
1122    }
1123    gen_string_movl_A0_EDI(s);
1124    /* Note: we must do this dummy write first to be restartable in
1125       case of page fault. */
1126    tcg_gen_movi_tl(cpu_T0, 0);
1127    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1128    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1129    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1130    gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
1131    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1132    gen_op_movl_T0_Dshift(ot);
1133    gen_op_add_reg_T0(s->aflag, R_EDI);
1134    gen_bpt_io(s, cpu_tmp2_i32, ot);
1135    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1136        gen_io_end();
1137    }
1138}
1139
1140static inline void gen_outs(DisasContext *s, TCGMemOp ot)
1141{
1142    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1143        gen_io_start();
1144    }
1145    gen_string_movl_A0_ESI(s);
1146    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1147
1148    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1149    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1150    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0);
1151    gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
1152    gen_op_movl_T0_Dshift(ot);
1153    gen_op_add_reg_T0(s->aflag, R_ESI);
1154    gen_bpt_io(s, cpu_tmp2_i32, ot);
1155    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1156        gen_io_end();
1157    }
1158}
1159
1160/* same method as Valgrind : we generate jumps to current or next
1161   instruction */
1162#define GEN_REPZ(op)                                                          \
1163static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1164                                 target_ulong cur_eip, target_ulong next_eip) \
1165{                                                                             \
1166    TCGLabel *l2;                                                             \
1167    gen_update_cc_op(s);                                                      \
1168    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1169    gen_ ## op(s, ot);                                                        \
1170    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1171    /* a loop would cause two single step exceptions if ECX = 1               \
1172       before rep string_insn */                                              \
1173    if (s->repz_opt)                                                          \
1174        gen_op_jz_ecx(s->aflag, l2);                                          \
1175    gen_jmp(s, cur_eip);                                                      \
1176}
1177
1178#define GEN_REPZ2(op)                                                         \
1179static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1180                                   target_ulong cur_eip,                      \
1181                                   target_ulong next_eip,                     \
1182                                   int nz)                                    \
1183{                                                                             \
1184    TCGLabel *l2;                                                             \
1185    gen_update_cc_op(s);                                                      \
1186    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1187    gen_ ## op(s, ot);                                                        \
1188    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1189    gen_update_cc_op(s);                                                      \
1190    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1191    if (s->repz_opt)                                                          \
1192        gen_op_jz_ecx(s->aflag, l2);                                          \
1193    gen_jmp(s, cur_eip);                                                      \
1194}
1195
1196GEN_REPZ(movs)
1197GEN_REPZ(stos)
1198GEN_REPZ(lods)
1199GEN_REPZ(ins)
1200GEN_REPZ(outs)
1201GEN_REPZ2(scas)
1202GEN_REPZ2(cmps)
1203
1204static void gen_helper_fp_arith_ST0_FT0(int op)
1205{
1206    switch (op) {
1207    case 0:
1208        gen_helper_fadd_ST0_FT0(cpu_env);
1209        break;
1210    case 1:
1211        gen_helper_fmul_ST0_FT0(cpu_env);
1212        break;
1213    case 2:
1214        gen_helper_fcom_ST0_FT0(cpu_env);
1215        break;
1216    case 3:
1217        gen_helper_fcom_ST0_FT0(cpu_env);
1218        break;
1219    case 4:
1220        gen_helper_fsub_ST0_FT0(cpu_env);
1221        break;
1222    case 5:
1223        gen_helper_fsubr_ST0_FT0(cpu_env);
1224        break;
1225    case 6:
1226        gen_helper_fdiv_ST0_FT0(cpu_env);
1227        break;
1228    case 7:
1229        gen_helper_fdivr_ST0_FT0(cpu_env);
1230        break;
1231    }
1232}
1233
1234/* NOTE the exception in "r" op ordering */
1235static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1236{
1237    TCGv_i32 tmp = tcg_const_i32(opreg);
1238    switch (op) {
1239    case 0:
1240        gen_helper_fadd_STN_ST0(cpu_env, tmp);
1241        break;
1242    case 1:
1243        gen_helper_fmul_STN_ST0(cpu_env, tmp);
1244        break;
1245    case 4:
1246        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1247        break;
1248    case 5:
1249        gen_helper_fsub_STN_ST0(cpu_env, tmp);
1250        break;
1251    case 6:
1252        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1253        break;
1254    case 7:
1255        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1256        break;
1257    }
1258}
1259
1260/* if d == OR_TMP0, it means memory operand (address in A0) */
1261static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
1262{
1263    if (d != OR_TMP0) {
1264        gen_op_mov_v_reg(ot, cpu_T0, d);
1265    } else if (!(s1->prefix & PREFIX_LOCK)) {
1266        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1267    }
1268    switch(op) {
1269    case OP_ADCL:
1270        gen_compute_eflags_c(s1, cpu_tmp4);
1271        if (s1->prefix & PREFIX_LOCK) {
1272            tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
1273            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
1274                                        s1->mem_index, ot | MO_LE);
1275        } else {
1276            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1277            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
1278            gen_op_st_rm_T0_A0(s1, ot, d);
1279        }
1280        gen_op_update3_cc(cpu_tmp4);
1281        set_cc_op(s1, CC_OP_ADCB + ot);
1282        break;
1283    case OP_SBBL:
1284        gen_compute_eflags_c(s1, cpu_tmp4);
1285        if (s1->prefix & PREFIX_LOCK) {
1286            tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
1287            tcg_gen_neg_tl(cpu_T0, cpu_T0);
1288            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
1289                                        s1->mem_index, ot | MO_LE);
1290        } else {
1291            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1292            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
1293            gen_op_st_rm_T0_A0(s1, ot, d);
1294        }
1295        gen_op_update3_cc(cpu_tmp4);
1296        set_cc_op(s1, CC_OP_SBBB + ot);
1297        break;
1298    case OP_ADDL:
1299        if (s1->prefix & PREFIX_LOCK) {
1300            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1301                                        s1->mem_index, ot | MO_LE);
1302        } else {
1303            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1304            gen_op_st_rm_T0_A0(s1, ot, d);
1305        }
1306        gen_op_update2_cc();
1307        set_cc_op(s1, CC_OP_ADDB + ot);
1308        break;
1309    case OP_SUBL:
1310        if (s1->prefix & PREFIX_LOCK) {
1311            tcg_gen_neg_tl(cpu_T0, cpu_T1);
1312            tcg_gen_atomic_fetch_add_tl(cpu_cc_srcT, cpu_A0, cpu_T0,
1313                                        s1->mem_index, ot | MO_LE);
1314            tcg_gen_sub_tl(cpu_T0, cpu_cc_srcT, cpu_T1);
1315        } else {
1316            tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1317            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1318            gen_op_st_rm_T0_A0(s1, ot, d);
1319        }
1320        gen_op_update2_cc();
1321        set_cc_op(s1, CC_OP_SUBB + ot);
1322        break;
1323    default:
1324    case OP_ANDL:
1325        if (s1->prefix & PREFIX_LOCK) {
1326            tcg_gen_atomic_and_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1327                                        s1->mem_index, ot | MO_LE);
1328        } else {
1329            tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
1330            gen_op_st_rm_T0_A0(s1, ot, d);
1331        }
1332        gen_op_update1_cc();
1333        set_cc_op(s1, CC_OP_LOGICB + ot);
1334        break;
1335    case OP_ORL:
1336        if (s1->prefix & PREFIX_LOCK) {
1337            tcg_gen_atomic_or_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1338                                       s1->mem_index, ot | MO_LE);
1339        } else {
1340            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1341            gen_op_st_rm_T0_A0(s1, ot, d);
1342        }
1343        gen_op_update1_cc();
1344        set_cc_op(s1, CC_OP_LOGICB + ot);
1345        break;
1346    case OP_XORL:
1347        if (s1->prefix & PREFIX_LOCK) {
1348            tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1349                                        s1->mem_index, ot | MO_LE);
1350        } else {
1351            tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
1352            gen_op_st_rm_T0_A0(s1, ot, d);
1353        }
1354        gen_op_update1_cc();
1355        set_cc_op(s1, CC_OP_LOGICB + ot);
1356        break;
1357    case OP_CMPL:
1358        tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
1359        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1360        tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
1361        set_cc_op(s1, CC_OP_SUBB + ot);
1362        break;
1363    }
1364}
1365
1366/* if d == OR_TMP0, it means memory operand (address in A0) */
1367static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
1368{
1369    if (s1->prefix & PREFIX_LOCK) {
1370        tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
1371        tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
1372                                    s1->mem_index, ot | MO_LE);
1373    } else {
1374        if (d != OR_TMP0) {
1375            gen_op_mov_v_reg(ot, cpu_T0, d);
1376        } else {
1377            gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1378        }
1379        tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
1380        gen_op_st_rm_T0_A0(s1, ot, d);
1381    }
1382
1383    gen_compute_eflags_c(s1, cpu_cc_src);
1384    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1385    set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1386}
1387
1388static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
1389                            TCGv shm1, TCGv count, bool is_right)
1390{
1391    TCGv_i32 z32, s32, oldop;
1392    TCGv z_tl;
1393
1394    /* Store the results into the CC variables.  If we know that the
1395       variable must be dead, store unconditionally.  Otherwise we'll
1396       need to not disrupt the current contents.  */
1397    z_tl = tcg_const_tl(0);
1398    if (cc_op_live[s->cc_op] & USES_CC_DST) {
1399        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1400                           result, cpu_cc_dst);
1401    } else {
1402        tcg_gen_mov_tl(cpu_cc_dst, result);
1403    }
1404    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1405        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1406                           shm1, cpu_cc_src);
1407    } else {
1408        tcg_gen_mov_tl(cpu_cc_src, shm1);
1409    }
1410    tcg_temp_free(z_tl);
1411
1412    /* Get the two potential CC_OP values into temporaries.  */
1413    tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1414    if (s->cc_op == CC_OP_DYNAMIC) {
1415        oldop = cpu_cc_op;
1416    } else {
1417        tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
1418        oldop = cpu_tmp3_i32;
1419    }
1420
1421    /* Conditionally store the CC_OP value.  */
1422    z32 = tcg_const_i32(0);
1423    s32 = tcg_temp_new_i32();
1424    tcg_gen_trunc_tl_i32(s32, count);
1425    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
1426    tcg_temp_free_i32(z32);
1427    tcg_temp_free_i32(s32);
1428
1429    /* The CC_OP value is no longer predictable.  */
1430    set_cc_op(s, CC_OP_DYNAMIC);
1431}
1432
1433static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1434                            int is_right, int is_arith)
1435{
1436    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1437
1438    /* load */
1439    if (op1 == OR_TMP0) {
1440        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1441    } else {
1442        gen_op_mov_v_reg(ot, cpu_T0, op1);
1443    }
1444
1445    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1446    tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1);
1447
1448    if (is_right) {
1449        if (is_arith) {
1450            gen_exts(ot, cpu_T0);
1451            tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1452            tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
1453        } else {
1454            gen_extu(ot, cpu_T0);
1455            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1456            tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
1457        }
1458    } else {
1459        tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1460        tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
1461    }
1462
1463    /* store */
1464    gen_op_st_rm_T0_A0(s, ot, op1);
1465
1466    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right);
1467}
1468
1469static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1470                            int is_right, int is_arith)
1471{
1472    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1473
1474    /* load */
1475    if (op1 == OR_TMP0)
1476        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1477    else
1478        gen_op_mov_v_reg(ot, cpu_T0, op1);
1479
1480    op2 &= mask;
1481    if (op2 != 0) {
1482        if (is_right) {
1483            if (is_arith) {
1484                gen_exts(ot, cpu_T0);
1485                tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1);
1486                tcg_gen_sari_tl(cpu_T0, cpu_T0, op2);
1487            } else {
1488                gen_extu(ot, cpu_T0);
1489                tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1);
1490                tcg_gen_shri_tl(cpu_T0, cpu_T0, op2);
1491            }
1492        } else {
1493            tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1);
1494            tcg_gen_shli_tl(cpu_T0, cpu_T0, op2);
1495        }
1496    }
1497
1498    /* store */
1499    gen_op_st_rm_T0_A0(s, ot, op1);
1500
1501    /* update eflags if non zero shift */
1502    if (op2 != 0) {
1503        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1504        tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1505        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1506    }
1507}
1508
1509static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
1510{
1511    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1512    TCGv_i32 t0, t1;
1513
1514    /* load */
1515    if (op1 == OR_TMP0) {
1516        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1517    } else {
1518        gen_op_mov_v_reg(ot, cpu_T0, op1);
1519    }
1520
1521    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1522
1523    switch (ot) {
1524    case MO_8:
1525        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1526        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
1527        tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101);
1528        goto do_long;
1529    case MO_16:
1530        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1531        tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16);
1532        goto do_long;
1533    do_long:
1534#ifdef TARGET_X86_64
1535    case MO_32:
1536        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1537        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
1538        if (is_right) {
1539            tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1540        } else {
1541            tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1542        }
1543        tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1544        break;
1545#endif
1546    default:
1547        if (is_right) {
1548            tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1);
1549        } else {
1550            tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1);
1551        }
1552        break;
1553    }
1554
1555    /* store */
1556    gen_op_st_rm_T0_A0(s, ot, op1);
1557
1558    /* We'll need the flags computed into CC_SRC.  */
1559    gen_compute_eflags(s);
1560
1561    /* The value that was "rotated out" is now present at the other end
1562       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1563       since we've computed the flags into CC_SRC, these variables are
1564       currently dead.  */
1565    if (is_right) {
1566        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1567        tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1568        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1569    } else {
1570        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1571        tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1572    }
1573    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1574    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1575
1576    /* Now conditionally store the new CC_OP value.  If the shift count
1577       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1578       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1579       exactly as we computed above.  */
1580    t0 = tcg_const_i32(0);
1581    t1 = tcg_temp_new_i32();
1582    tcg_gen_trunc_tl_i32(t1, cpu_T1);
1583    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); 
1584    tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
1585    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1586                        cpu_tmp2_i32, cpu_tmp3_i32);
1587    tcg_temp_free_i32(t0);
1588    tcg_temp_free_i32(t1);
1589
1590    /* The CC_OP value is no longer predictable.  */ 
1591    set_cc_op(s, CC_OP_DYNAMIC);
1592}
1593
1594static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1595                          int is_right)
1596{
1597    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1598    int shift;
1599
1600    /* load */
1601    if (op1 == OR_TMP0) {
1602        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1603    } else {
1604        gen_op_mov_v_reg(ot, cpu_T0, op1);
1605    }
1606
1607    op2 &= mask;
1608    if (op2 != 0) {
1609        switch (ot) {
1610#ifdef TARGET_X86_64
1611        case MO_32:
1612            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1613            if (is_right) {
1614                tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1615            } else {
1616                tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1617            }
1618            tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1619            break;
1620#endif
1621        default:
1622            if (is_right) {
1623                tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2);
1624            } else {
1625                tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2);
1626            }
1627            break;
1628        case MO_8:
1629            mask = 7;
1630            goto do_shifts;
1631        case MO_16:
1632            mask = 15;
1633        do_shifts:
1634            shift = op2 & mask;
1635            if (is_right) {
1636                shift = mask + 1 - shift;
1637            }
1638            gen_extu(ot, cpu_T0);
1639            tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift);
1640            tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift);
1641            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
1642            break;
1643        }
1644    }
1645
1646    /* store */
1647    gen_op_st_rm_T0_A0(s, ot, op1);
1648
1649    if (op2 != 0) {
1650        /* Compute the flags into CC_SRC.  */
1651        gen_compute_eflags(s);
1652
1653        /* The value that was "rotated out" is now present at the other end
1654           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1655           since we've computed the flags into CC_SRC, these variables are
1656           currently dead.  */
1657        if (is_right) {
1658            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1659            tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1660            tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1661        } else {
1662            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1663            tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1664        }
1665        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1666        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1667        set_cc_op(s, CC_OP_ADCOX);
1668    }
1669}
1670
1671/* XXX: add faster immediate = 1 case */
1672static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1673                           int is_right)
1674{
1675    gen_compute_eflags(s);
1676    assert(s->cc_op == CC_OP_EFLAGS);
1677
1678    /* load */
1679    if (op1 == OR_TMP0)
1680        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1681    else
1682        gen_op_mov_v_reg(ot, cpu_T0, op1);
1683    
1684    if (is_right) {
1685        switch (ot) {
1686        case MO_8:
1687            gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1688            break;
1689        case MO_16:
1690            gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1691            break;
1692        case MO_32:
1693            gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1694            break;
1695#ifdef TARGET_X86_64
1696        case MO_64:
1697            gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1698            break;
1699#endif
1700        default:
1701            tcg_abort();
1702        }
1703    } else {
1704        switch (ot) {
1705        case MO_8:
1706            gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1707            break;
1708        case MO_16:
1709            gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1710            break;
1711        case MO_32:
1712            gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1713            break;
1714#ifdef TARGET_X86_64
1715        case MO_64:
1716            gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1717            break;
1718#endif
1719        default:
1720            tcg_abort();
1721        }
1722    }
1723    /* store */
1724    gen_op_st_rm_T0_A0(s, ot, op1);
1725}
1726
1727/* XXX: add faster immediate case */
1728static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1729                             bool is_right, TCGv count_in)
1730{
1731    target_ulong mask = (ot == MO_64 ? 63 : 31);
1732    TCGv count;
1733
1734    /* load */
1735    if (op1 == OR_TMP0) {
1736        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1737    } else {
1738        gen_op_mov_v_reg(ot, cpu_T0, op1);
1739    }
1740
1741    count = tcg_temp_new();
1742    tcg_gen_andi_tl(count, count_in, mask);
1743
1744    switch (ot) {
1745    case MO_16:
1746        /* Note: we implement the Intel behaviour for shift count > 16.
1747           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1748           portion by constructing it as a 32-bit value.  */
1749        if (is_right) {
1750            tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16);
1751            tcg_gen_mov_tl(cpu_T1, cpu_T0);
1752            tcg_gen_mov_tl(cpu_T0, cpu_tmp0);
1753        } else {
1754            tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16);
1755        }
1756        /* FALLTHRU */
1757#ifdef TARGET_X86_64
1758    case MO_32:
1759        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1760        tcg_gen_subi_tl(cpu_tmp0, count, 1);
1761        if (is_right) {
1762            tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1);
1763            tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1764            tcg_gen_shr_i64(cpu_T0, cpu_T0, count);
1765        } else {
1766            tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0);
1767            tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1768            tcg_gen_shl_i64(cpu_T0, cpu_T0, count);
1769            tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
1770            tcg_gen_shri_i64(cpu_T0, cpu_T0, 32);
1771        }
1772        break;
1773#endif
1774    default:
1775        tcg_gen_subi_tl(cpu_tmp0, count, 1);
1776        if (is_right) {
1777            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1778
1779            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1780            tcg_gen_shr_tl(cpu_T0, cpu_T0, count);
1781            tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
1782        } else {
1783            tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1784            if (ot == MO_16) {
1785                /* Only needed if count > 16, for Intel behaviour.  */
1786                tcg_gen_subfi_tl(cpu_tmp4, 33, count);
1787                tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4);
1788                tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
1789            }
1790
1791            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1792            tcg_gen_shl_tl(cpu_T0, cpu_T0, count);
1793            tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
1794        }
1795        tcg_gen_movi_tl(cpu_tmp4, 0);
1796        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
1797                           cpu_tmp4, cpu_T1);
1798        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1799        break;
1800    }
1801
1802    /* store */
1803    gen_op_st_rm_T0_A0(s, ot, op1);
1804
1805    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right);
1806    tcg_temp_free(count);
1807}
1808
1809static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
1810{
1811    if (s != OR_TMP1)
1812        gen_op_mov_v_reg(ot, cpu_T1, s);
1813    switch(op) {
1814    case OP_ROL:
1815        gen_rot_rm_T1(s1, ot, d, 0);
1816        break;
1817    case OP_ROR:
1818        gen_rot_rm_T1(s1, ot, d, 1);
1819        break;
1820    case OP_SHL:
1821    case OP_SHL1:
1822        gen_shift_rm_T1(s1, ot, d, 0, 0);
1823        break;
1824    case OP_SHR:
1825        gen_shift_rm_T1(s1, ot, d, 1, 0);
1826        break;
1827    case OP_SAR:
1828        gen_shift_rm_T1(s1, ot, d, 1, 1);
1829        break;
1830    case OP_RCL:
1831        gen_rotc_rm_T1(s1, ot, d, 0);
1832        break;
1833    case OP_RCR:
1834        gen_rotc_rm_T1(s1, ot, d, 1);
1835        break;
1836    }
1837}
1838
1839static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
1840{
1841    switch(op) {
1842    case OP_ROL:
1843        gen_rot_rm_im(s1, ot, d, c, 0);
1844        break;
1845    case OP_ROR:
1846        gen_rot_rm_im(s1, ot, d, c, 1);
1847        break;
1848    case OP_SHL:
1849    case OP_SHL1:
1850        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1851        break;
1852    case OP_SHR:
1853        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1854        break;
1855    case OP_SAR:
1856        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1857        break;
1858    default:
1859        /* currently not optimized */
1860        tcg_gen_movi_tl(cpu_T1, c);
1861        gen_shift(s1, op, ot, d, OR_TMP1);
1862        break;
1863    }
1864}
1865
1866#define X86_MAX_INSN_LENGTH 15
1867
1868static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
1869{
1870    uint64_t pc = s->pc;
1871
1872    s->pc += num_bytes;
1873    if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
1874        /* If the instruction's 16th byte is on a different page than the 1st, a
1875         * page fault on the second page wins over the general protection fault
1876         * caused by the instruction being too long.
1877         * This can happen even if the operand is only one byte long!
1878         */
1879        if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
1880            volatile uint8_t unused =
1881                cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
1882            (void) unused;
1883        }
1884        siglongjmp(s->jmpbuf, 1);
1885    }
1886
1887    return pc;
1888}
1889
1890static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
1891{
1892    return cpu_ldub_code(env, advance_pc(env, s, 1));
1893}
1894
1895static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
1896{
1897    return cpu_ldsw_code(env, advance_pc(env, s, 2));
1898}
1899
1900static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
1901{
1902    return cpu_lduw_code(env, advance_pc(env, s, 2));
1903}
1904
1905static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
1906{
1907    return cpu_ldl_code(env, advance_pc(env, s, 4));
1908}
1909
1910#ifdef TARGET_X86_64
1911static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
1912{
1913    return cpu_ldq_code(env, advance_pc(env, s, 8));
1914}
1915#endif
1916
1917/* Decompose an address.  */
1918
1919typedef struct AddressParts {
1920    int def_seg;
1921    int base;
1922    int index;
1923    int scale;
1924    target_long disp;
1925} AddressParts;
1926
1927static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1928                                    int modrm)
1929{
1930    int def_seg, base, index, scale, mod, rm;
1931    target_long disp;
1932    bool havesib;
1933
1934    def_seg = R_DS;
1935    index = -1;
1936    scale = 0;
1937    disp = 0;
1938
1939    mod = (modrm >> 6) & 3;
1940    rm = modrm & 7;
1941    base = rm | REX_B(s);
1942
1943    if (mod == 3) {
1944        /* Normally filtered out earlier, but including this path
1945           simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
1946        goto done;
1947    }
1948
1949    switch (s->aflag) {
1950    case MO_64:
1951    case MO_32:
1952        havesib = 0;
1953        if (rm == 4) {
1954            int code = x86_ldub_code(env, s);
1955            scale = (code >> 6) & 3;
1956            index = ((code >> 3) & 7) | REX_X(s);
1957            if (index == 4) {
1958                index = -1;  /* no index */
1959            }
1960            base = (code & 7) | REX_B(s);
1961            havesib = 1;
1962        }
1963
1964        switch (mod) {
1965        case 0:
1966            if ((base & 7) == 5) {
1967                base = -1;
1968                disp = (int32_t)x86_ldl_code(env, s);
1969                if (CODE64(s) && !havesib) {
1970                    base = -2;
1971                    disp += s->pc + s->rip_offset;
1972                }
1973            }
1974            break;
1975        case 1:
1976            disp = (int8_t)x86_ldub_code(env, s);
1977            break;
1978        default:
1979        case 2:
1980            disp = (int32_t)x86_ldl_code(env, s);
1981            break;
1982        }
1983
1984        /* For correct popl handling with esp.  */
1985        if (base == R_ESP && s->popl_esp_hack) {
1986            disp += s->popl_esp_hack;
1987        }
1988        if (base == R_EBP || base == R_ESP) {
1989            def_seg = R_SS;
1990        }
1991        break;
1992
1993    case MO_16:
1994        if (mod == 0) {
1995            if (rm == 6) {
1996                base = -1;
1997                disp = x86_lduw_code(env, s);
1998                break;
1999            }
2000        } else if (mod == 1) {
2001            disp = (int8_t)x86_ldub_code(env, s);
2002        } else {
2003            disp = (int16_t)x86_lduw_code(env, s);
2004        }
2005
2006        switch (rm) {
2007        case 0:
2008            base = R_EBX;
2009            index = R_ESI;
2010            break;
2011        case 1:
2012            base = R_EBX;
2013            index = R_EDI;
2014            break;
2015        case 2:
2016            base = R_EBP;
2017            index = R_ESI;
2018            def_seg = R_SS;
2019            break;
2020        case 3:
2021            base = R_EBP;
2022            index = R_EDI;
2023            def_seg = R_SS;
2024            break;
2025        case 4:
2026            base = R_ESI;
2027            break;
2028        case 5:
2029            base = R_EDI;
2030            break;
2031        case 6:
2032            base = R_EBP;
2033            def_seg = R_SS;
2034            break;
2035        default:
2036        case 7:
2037            base = R_EBX;
2038            break;
2039        }
2040        break;
2041
2042    default:
2043        tcg_abort();
2044    }
2045
2046 done:
2047    return (AddressParts){ def_seg, base, index, scale, disp };
2048}
2049
2050/* Compute the address, with a minimum number of TCG ops.  */
2051static TCGv gen_lea_modrm_1(AddressParts a)
2052{
2053    TCGv ea;
2054
2055    TCGV_UNUSED(ea);
2056    if (a.index >= 0) {
2057        if (a.scale == 0) {
2058            ea = cpu_regs[a.index];
2059        } else {
2060            tcg_gen_shli_tl(cpu_A0, cpu_regs[a.index], a.scale);
2061            ea = cpu_A0;
2062        }
2063        if (a.base >= 0) {
2064            tcg_gen_add_tl(cpu_A0, ea, cpu_regs[a.base]);
2065            ea = cpu_A0;
2066        }
2067    } else if (a.base >= 0) {
2068        ea = cpu_regs[a.base];
2069    }
2070    if (TCGV_IS_UNUSED(ea)) {
2071        tcg_gen_movi_tl(cpu_A0, a.disp);
2072        ea = cpu_A0;
2073    } else if (a.disp != 0) {
2074        tcg_gen_addi_tl(cpu_A0, ea, a.disp);
2075        ea = cpu_A0;
2076    }
2077
2078    return ea;
2079}
2080
2081static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2082{
2083    AddressParts a = gen_lea_modrm_0(env, s, modrm);
2084    TCGv ea = gen_lea_modrm_1(a);
2085    gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2086}
2087
2088static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2089{
2090    (void)gen_lea_modrm_0(env, s, modrm);
2091}
2092
2093/* Used for BNDCL, BNDCU, BNDCN.  */
2094static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2095                      TCGCond cond, TCGv_i64 bndv)
2096{
2097    TCGv ea = gen_lea_modrm_1(gen_lea_modrm_0(env, s, modrm));
2098
2099    tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
2100    if (!CODE64(s)) {
2101        tcg_gen_ext32u_i64(cpu_tmp1_i64, cpu_tmp1_i64);
2102    }
2103    tcg_gen_setcond_i64(cond, cpu_tmp1_i64, cpu_tmp1_i64, bndv);
2104    tcg_gen_extrl_i64_i32(cpu_tmp2_i32, cpu_tmp1_i64);
2105    gen_helper_bndck(cpu_env, cpu_tmp2_i32);
2106}
2107
2108/* used for LEA and MOV AX, mem */
2109static void gen_add_A0_ds_seg(DisasContext *s)
2110{
2111    gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override);
2112}
2113
2114/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2115   OR_TMP0 */
2116static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2117                           TCGMemOp ot, int reg, int is_store)
2118{
2119    int mod, rm;
2120
2121    mod = (modrm >> 6) & 3;
2122    rm = (modrm & 7) | REX_B(s);
2123    if (mod == 3) {
2124        if (is_store) {
2125            if (reg != OR_TMP0)
2126                gen_op_mov_v_reg(ot, cpu_T0, reg);
2127            gen_op_mov_reg_v(ot, rm, cpu_T0);
2128        } else {
2129            gen_op_mov_v_reg(ot, cpu_T0, rm);
2130            if (reg != OR_TMP0)
2131                gen_op_mov_reg_v(ot, reg, cpu_T0);
2132        }
2133    } else {
2134        gen_lea_modrm(env, s, modrm);
2135        if (is_store) {
2136            if (reg != OR_TMP0)
2137                gen_op_mov_v_reg(ot, cpu_T0, reg);
2138            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
2139        } else {
2140            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
2141            if (reg != OR_TMP0)
2142                gen_op_mov_reg_v(ot, reg, cpu_T0);
2143        }
2144    }
2145}
2146
2147static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
2148{
2149    uint32_t ret;
2150
2151    switch (ot) {
2152    case MO_8:
2153        ret = x86_ldub_code(env, s);
2154        break;
2155    case MO_16:
2156        ret = x86_lduw_code(env, s);
2157        break;
2158    case MO_32:
2159#ifdef TARGET_X86_64
2160    case MO_64:
2161#endif
2162        ret = x86_ldl_code(env, s);
2163        break;
2164    default:
2165        tcg_abort();
2166    }
2167    return ret;
2168}
2169
2170static inline int insn_const_size(TCGMemOp ot)
2171{
2172    if (ot <= MO_32) {
2173        return 1 << ot;
2174    } else {
2175        return 4;
2176    }
2177}
2178
2179static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2180{
2181#ifndef CONFIG_USER_ONLY
2182    return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
2183           (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2184#else
2185    return true;
2186#endif
2187}
2188
2189static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2190{
2191    target_ulong pc = s->cs_base + eip;
2192
2193    if (use_goto_tb(s, pc))  {
2194        /* jump to same page: we can use a direct jump */
2195        tcg_gen_goto_tb(tb_num);
2196        gen_jmp_im(eip);
2197        tcg_gen_exit_tb((uintptr_t)s->base.tb + tb_num);
2198        s->base.is_jmp = DISAS_NORETURN;
2199    } else {
2200        /* jump to another page */
2201        gen_jmp_im(eip);
2202        gen_jr(s, cpu_tmp0);
2203    }
2204}
2205
2206static inline void gen_jcc(DisasContext *s, int b,
2207                           target_ulong val, target_ulong next_eip)
2208{
2209    TCGLabel *l1, *l2;
2210
2211    if (s->jmp_opt) {
2212        l1 = gen_new_label();
2213        gen_jcc1(s, b, l1);
2214
2215        gen_goto_tb(s, 0, next_eip);
2216
2217        gen_set_label(l1);
2218        gen_goto_tb(s, 1, val);
2219    } else {
2220        l1 = gen_new_label();
2221        l2 = gen_new_label();
2222        gen_jcc1(s, b, l1);
2223
2224        gen_jmp_im(next_eip);
2225        tcg_gen_br(l2);
2226
2227        gen_set_label(l1);
2228        gen_jmp_im(val);
2229        gen_set_label(l2);
2230        gen_eob(s);
2231    }
2232}
2233
2234static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
2235                        int modrm, int reg)
2236{
2237    CCPrepare cc;
2238
2239    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2240
2241    cc = gen_prepare_cc(s, b, cpu_T1);
2242    if (cc.mask != -1) {
2243        TCGv t0 = tcg_temp_new();
2244        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2245        cc.reg = t0;
2246    }
2247    if (!cc.use_reg2) {
2248        cc.reg2 = tcg_const_tl(cc.imm);
2249    }
2250
2251    tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2,
2252                       cpu_T0, cpu_regs[reg]);
2253    gen_op_mov_reg_v(ot, reg, cpu_T0);
2254
2255    if (cc.mask != -1) {
2256        tcg_temp_free(cc.reg);
2257    }
2258    if (!cc.use_reg2) {
2259        tcg_temp_free(cc.reg2);
2260    }
2261}
2262
2263static inline void gen_op_movl_T0_seg(int seg_reg)
2264{
2265    tcg_gen_ld32u_tl(cpu_T0, cpu_env,
2266                     offsetof(CPUX86State,segs[seg_reg].selector));
2267}
2268
2269static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2270{
2271    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
2272    tcg_gen_st32_tl(cpu_T0, cpu_env,
2273                    offsetof(CPUX86State,segs[seg_reg].selector));
2274    tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4);
2275}
2276
2277/* move T0 to seg_reg and compute if the CPU state may change. Never
2278   call this function with seg_reg == R_CS */
2279static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2280{
2281    if (s->pe && !s->vm86) {
2282        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
2283        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
2284        /* abort translation because the addseg value may change or
2285           because ss32 may change. For R_SS, translation must always
2286           stop as a special handling must be done to disable hardware
2287           interrupts for the next instruction */
2288        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS)) {
2289            s->base.is_jmp = DISAS_TOO_MANY;
2290        }
2291    } else {
2292        gen_op_movl_seg_T0_vm(seg_reg);
2293        if (seg_reg == R_SS) {
2294            s->base.is_jmp = DISAS_TOO_MANY;
2295        }
2296    }
2297}
2298
2299static inline int svm_is_rep(int prefixes)
2300{
2301    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2302}
2303
2304static inline void
2305gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2306                              uint32_t type, uint64_t param)
2307{
2308    /* no SVM activated; fast case */
2309    if (likely(!(s->flags & HF_SVMI_MASK)))
2310        return;
2311    gen_update_cc_op(s);
2312    gen_jmp_im(pc_start - s->cs_base);
2313    gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2314                                         tcg_const_i64(param));
2315}
2316
2317static inline void
2318gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2319{
2320    gen_svm_check_intercept_param(s, pc_start, type, 0);
2321}
2322
2323static inline void gen_stack_update(DisasContext *s, int addend)
2324{
2325    gen_op_add_reg_im(mo_stacksize(s), R_ESP, addend);
2326}
2327
2328/* Generate a push. It depends on ss32, addseg and dflag.  */
2329static void gen_push_v(DisasContext *s, TCGv val)
2330{
2331    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2332    TCGMemOp a_ot = mo_stacksize(s);
2333    int size = 1 << d_ot;
2334    TCGv new_esp = cpu_A0;
2335
2336    tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
2337
2338    if (!CODE64(s)) {
2339        if (s->addseg) {
2340            new_esp = cpu_tmp4;
2341            tcg_gen_mov_tl(new_esp, cpu_A0);
2342        }
2343        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2344    }
2345
2346    gen_op_st_v(s, d_ot, val, cpu_A0);
2347    gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
2348}
2349
2350/* two step pop is necessary for precise exceptions */
2351static TCGMemOp gen_pop_T0(DisasContext *s)
2352{
2353    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2354
2355    gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2356    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2357
2358    return d_ot;
2359}
2360
2361static inline void gen_pop_update(DisasContext *s, TCGMemOp ot)
2362{
2363    gen_stack_update(s, 1 << ot);
2364}
2365
2366static inline void gen_stack_A0(DisasContext *s)
2367{
2368    gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2369}
2370
2371static void gen_pusha(DisasContext *s)
2372{
2373    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2374    TCGMemOp d_ot = s->dflag;
2375    int size = 1 << d_ot;
2376    int i;
2377
2378    for (i = 0; i < 8; i++) {
2379        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size);
2380        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2381        gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0);
2382    }
2383
2384    gen_stack_update(s, -8 * size);
2385}
2386
2387static void gen_popa(DisasContext *s)
2388{
2389    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2390    TCGMemOp d_ot = s->dflag;
2391    int size = 1 << d_ot;
2392    int i;
2393
2394    for (i = 0; i < 8; i++) {
2395        /* ESP is not reloaded */
2396        if (7 - i == R_ESP) {
2397            continue;
2398        }
2399        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size);
2400        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2401        gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2402        gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
2403    }
2404
2405    gen_stack_update(s, 8 * size);
2406}
2407
2408static void gen_enter(DisasContext *s, int esp_addend, int level)
2409{
2410    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2411    TCGMemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2412    int size = 1 << d_ot;
2413
2414    /* Push BP; compute FrameTemp into T1.  */
2415    tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
2416    gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
2417    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0);
2418
2419    level &= 31;
2420    if (level != 0) {
2421        int i;
2422
2423        /* Copy level-1 pointers from the previous frame.  */
2424        for (i = 1; i < level; ++i) {
2425            tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i);
2426            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2427            gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0);
2428
2429            tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i);
2430            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2431            gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0);
2432        }
2433
2434        /* Push the current FrameTemp as the last level.  */
2435        tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level);
2436        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2437        gen_op_st_v(s, d_ot, cpu_T1, cpu_A0);
2438    }
2439
2440    /* Copy the FrameTemp value to EBP.  */
2441    gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1);
2442
2443    /* Compute the final value of ESP.  */
2444    tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level);
2445    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2446}
2447
2448static void gen_leave(DisasContext *s)
2449{
2450    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2451    TCGMemOp a_ot = mo_stacksize(s);
2452
2453    gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2454    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2455
2456    tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
2457
2458    gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0);
2459    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2460}
2461
2462static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2463{
2464    gen_update_cc_op(s);
2465    gen_jmp_im(cur_eip);
2466    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
2467    s->base.is_jmp = DISAS_NORETURN;
2468}
2469
2470/* Generate #UD for the current instruction.  The assumption here is that
2471   the instruction is known, but it isn't allowed in the current cpu mode.  */
2472static void gen_illegal_opcode(DisasContext *s)
2473{
2474    gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
2475}
2476
2477/* Similarly, except that the assumption here is that we don't decode
2478   the instruction at all -- either a missing opcode, an unimplemented
2479   feature, or just a bogus instruction stream.  */
2480static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2481{
2482    gen_illegal_opcode(s);
2483
2484    if (qemu_loglevel_mask(LOG_UNIMP)) {
2485        target_ulong pc = s->pc_start, end = s->pc;
2486        qemu_log_lock();
2487        qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2488        for (; pc < end; ++pc) {
2489            qemu_log(" %02x", cpu_ldub_code(env, pc));
2490        }
2491        qemu_log("\n");
2492        qemu_log_unlock();
2493    }
2494}
2495
2496/* an interrupt is different from an exception because of the
2497   privilege checks */
2498static void gen_interrupt(DisasContext *s, int intno,
2499                          target_ulong cur_eip, target_ulong next_eip)
2500{
2501    gen_update_cc_op(s);
2502    gen_jmp_im(cur_eip);
2503    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2504                               tcg_const_i32(next_eip - cur_eip));
2505    s->base.is_jmp = DISAS_NORETURN;
2506}
2507
2508static void gen_debug(DisasContext *s, target_ulong cur_eip)
2509{
2510    gen_update_cc_op(s);
2511    gen_jmp_im(cur_eip);
2512    gen_helper_debug(cpu_env);
2513    s->base.is_jmp = DISAS_NORETURN;
2514}
2515
2516static void gen_set_hflag(DisasContext *s, uint32_t mask)
2517{
2518    if ((s->flags & mask) == 0) {
2519        TCGv_i32 t = tcg_temp_new_i32();
2520        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2521        tcg_gen_ori_i32(t, t, mask);
2522        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2523        tcg_temp_free_i32(t);
2524        s->flags |= mask;
2525    }
2526}
2527
2528static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2529{
2530    if (s->flags & mask) {
2531        TCGv_i32 t = tcg_temp_new_i32();
2532        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2533        tcg_gen_andi_i32(t, t, ~mask);
2534        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2535        tcg_temp_free_i32(t);
2536        s->flags &= ~mask;
2537    }
2538}
2539
2540/* Clear BND registers during legacy branches.  */
2541static void gen_bnd_jmp(DisasContext *s)
2542{
2543    /* Clear the registers only if BND prefix is missing, MPX is enabled,
2544       and if the BNDREGs are known to be in use (non-zero) already.
2545       The helper itself will check BNDPRESERVE at runtime.  */
2546    if ((s->prefix & PREFIX_REPNZ) == 0
2547        && (s->flags & HF_MPX_EN_MASK) != 0
2548        && (s->flags & HF_MPX_IU_MASK) != 0) {
2549        gen_helper_bnd_jmp(cpu_env);
2550    }
2551}
2552
2553/* Generate an end of block. Trace exception is also generated if needed.
2554   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2555   If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2556   S->TF.  This is used by the syscall/sysret insns.  */
2557static void
2558do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2559{
2560    gen_update_cc_op(s);
2561
2562    /* If several instructions disable interrupts, only the first does it.  */
2563    if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2564        gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2565    } else {
2566        gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2567    }
2568
2569    if (s->base.tb->flags & HF_RF_MASK) {
2570        gen_helper_reset_rf(cpu_env);
2571    }
2572    if (s->base.singlestep_enabled) {
2573        gen_helper_debug(cpu_env);
2574    } else if (recheck_tf) {
2575        gen_helper_rechecking_single_step(cpu_env);
2576        tcg_gen_exit_tb(0);
2577    } else if (s->tf) {
2578        gen_helper_single_step(cpu_env);
2579    } else if (jr) {
2580        tcg_gen_lookup_and_goto_ptr();
2581    } else {
2582        tcg_gen_exit_tb(0);
2583    }
2584    s->base.is_jmp = DISAS_NORETURN;
2585}
2586
2587static inline void
2588gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2589{
2590    do_gen_eob_worker(s, inhibit, recheck_tf, false);
2591}
2592
2593/* End of block.
2594   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2595static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2596{
2597    gen_eob_worker(s, inhibit, false);
2598}
2599
2600/* End of block, resetting the inhibit irq flag.  */
2601static void gen_eob(DisasContext *s)
2602{
2603    gen_eob_worker(s, false, false);
2604}
2605
2606/* Jump to register */
2607static void gen_jr(DisasContext *s, TCGv dest)
2608{
2609    do_gen_eob_worker(s, false, false, true);
2610}
2611
2612/* generate a jump to eip. No segment change must happen before as a
2613   direct call to the next block may occur */
2614static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2615{
2616    gen_update_cc_op(s);
2617    set_cc_op(s, CC_OP_DYNAMIC);
2618    if (s->jmp_opt) {
2619        gen_goto_tb(s, tb_num, eip);
2620    } else {
2621        gen_jmp_im(eip);
2622        gen_eob(s);
2623    }
2624}
2625
2626static void gen_jmp(DisasContext *s, target_ulong eip)
2627{
2628    gen_jmp_tb(s, eip, 0);
2629}
2630
2631static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2632{
2633    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2634    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2635}
2636
2637static inline void gen_stq_env_A0(DisasContext *s, int offset)
2638{
2639    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2640    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2641}
2642
2643static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2644{
2645    int mem_index = s->mem_index;
2646    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2647    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2648    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2649    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2650    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2651}
2652
2653static inline void gen_sto_env_A0(DisasContext *s, int offset)
2654{
2655    int mem_index = s->mem_index;
2656    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2657    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2658    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2659    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2660    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2661}
2662
2663static inline void gen_op_movo(int d_offset, int s_offset)
2664{
2665    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2666    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2667    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2668    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2669}
2670
2671static inline void gen_op_movq(int d_offset, int s_offset)
2672{
2673    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2674    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2675}
2676
2677static inline void gen_op_movl(int d_offset, int s_offset)
2678{
2679    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2680    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2681}
2682
2683static inline void gen_op_movq_env_0(int d_offset)
2684{
2685    tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2686    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2687}
2688
2689typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2690typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2691typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2692typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2693typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2694typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2695                               TCGv_i32 val);
2696typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2697typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2698                               TCGv val);
2699
2700#define SSE_SPECIAL ((void *)1)
2701#define SSE_DUMMY ((void *)2)
2702
2703#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2704#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2705                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2706
2707static const SSEFunc_0_epp sse_op_table1[256][4] = {
2708    /* 3DNow! extensions */
2709    [0x0e] = { SSE_DUMMY }, /* femms */
2710    [0x0f] = { SSE_DUMMY }, /* pf... */
2711    /* pure SSE operations */
2712    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2713    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2714    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2715    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2716    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2717    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2718    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2719    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2720
2721    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2722    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2723    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2724    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2725    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2726    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2727    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2728    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2729    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2730    [0x51] = SSE_FOP(sqrt),
2731    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2732    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2733    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2734    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2735    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2736    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2737    [0x58] = SSE_FOP(add),
2738    [0x59] = SSE_FOP(mul),
2739    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2740               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2741    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2742    [0x5c] = SSE_FOP(sub),
2743    [0x5d] = SSE_FOP(min),
2744    [0x5e] = SSE_FOP(div),
2745    [0x5f] = SSE_FOP(max),
2746
2747    [0xc2] = SSE_FOP(cmpeq),
2748    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2749               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2750
2751    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2752    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2753    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2754
2755    /* MMX ops and their SSE extensions */
2756    [0x60] = MMX_OP2(punpcklbw),
2757    [0x61] = MMX_OP2(punpcklwd),
2758    [0x62] = MMX_OP2(punpckldq),
2759    [0x63] = MMX_OP2(packsswb),
2760    [0x64] = MMX_OP2(pcmpgtb),
2761    [0x65] = MMX_OP2(pcmpgtw),
2762    [0x66] = MMX_OP2(pcmpgtl),
2763    [0x67] = MMX_OP2(packuswb),
2764    [0x68] = MMX_OP2(punpckhbw),
2765    [0x69] = MMX_OP2(punpckhwd),
2766    [0x6a] = MMX_OP2(punpckhdq),
2767    [0x6b] = MMX_OP2(packssdw),
2768    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2769    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2770    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2771    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2772    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2773               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2774               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2775               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2776    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2777    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2778    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2779    [0x74] = MMX_OP2(pcmpeqb),
2780    [0x75] = MMX_OP2(pcmpeqw),
2781    [0x76] = MMX_OP2(pcmpeql),
2782    [0x77] = { SSE_DUMMY }, /* emms */
2783    [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2784    [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2785    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2786    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2787    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2788    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2789    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2790    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2791    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2792    [0xd1] = MMX_OP2(psrlw),
2793    [0xd2] = MMX_OP2(psrld),
2794    [0xd3] = MMX_OP2(psrlq),
2795    [0xd4] = MMX_OP2(paddq),
2796    [0xd5] = MMX_OP2(pmullw),
2797    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2798    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2799    [0xd8] = MMX_OP2(psubusb),
2800    [0xd9] = MMX_OP2(psubusw),
2801    [0xda] = MMX_OP2(pminub),
2802    [0xdb] = MMX_OP2(pand),
2803    [0xdc] = MMX_OP2(paddusb),
2804    [0xdd] = MMX_OP2(paddusw),
2805    [0xde] = MMX_OP2(pmaxub),
2806    [0xdf] = MMX_OP2(pandn),
2807    [0xe0] = MMX_OP2(pavgb),
2808    [0xe1] = MMX_OP2(psraw),
2809    [0xe2] = MMX_OP2(psrad),
2810    [0xe3] = MMX_OP2(pavgw),
2811    [0xe4] = MMX_OP2(pmulhuw),
2812    [0xe5] = MMX_OP2(pmulhw),
2813    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2814    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2815    [0xe8] = MMX_OP2(psubsb),
2816    [0xe9] = MMX_OP2(psubsw),
2817    [0xea] = MMX_OP2(pminsw),
2818    [0xeb] = MMX_OP2(por),
2819    [0xec] = MMX_OP2(paddsb),
2820    [0xed] = MMX_OP2(paddsw),
2821    [0xee] = MMX_OP2(pmaxsw),
2822    [0xef] = MMX_OP2(pxor),
2823    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2824    [0xf1] = MMX_OP2(psllw),
2825    [0xf2] = MMX_OP2(pslld),
2826    [0xf3] = MMX_OP2(psllq),
2827    [0xf4] = MMX_OP2(pmuludq),
2828    [0xf5] = MMX_OP2(pmaddwd),
2829    [0xf6] = MMX_OP2(psadbw),
2830    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2831               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2832    [0xf8] = MMX_OP2(psubb),
2833    [0xf9] = MMX_OP2(psubw),
2834    [0xfa] = MMX_OP2(psubl),
2835    [0xfb] = MMX_OP2(psubq),
2836    [0xfc] = MMX_OP2(paddb),
2837    [0xfd] = MMX_OP2(paddw),
2838    [0xfe] = MMX_OP2(paddl),
2839};
2840
2841static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2842    [0 + 2] = MMX_OP2(psrlw),
2843    [0 + 4] = MMX_OP2(psraw),
2844    [0 + 6] = MMX_OP2(psllw),
2845    [8 + 2] = MMX_OP2(psrld),
2846    [8 + 4] = MMX_OP2(psrad),
2847    [8 + 6] = MMX_OP2(pslld),
2848    [16 + 2] = MMX_OP2(psrlq),
2849    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2850    [16 + 6] = MMX_OP2(psllq),
2851    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2852};
2853
2854static const SSEFunc_0_epi sse_op_table3ai[] = {
2855    gen_helper_cvtsi2ss,
2856    gen_helper_cvtsi2sd
2857};
2858
2859#ifdef TARGET_X86_64
2860static const SSEFunc_0_epl sse_op_table3aq[] = {
2861    gen_helper_cvtsq2ss,
2862    gen_helper_cvtsq2sd
2863};
2864#endif
2865
2866static const SSEFunc_i_ep sse_op_table3bi[] = {
2867    gen_helper_cvttss2si,
2868    gen_helper_cvtss2si,
2869    gen_helper_cvttsd2si,
2870    gen_helper_cvtsd2si
2871};
2872
2873#ifdef TARGET_X86_64
2874static const SSEFunc_l_ep sse_op_table3bq[] = {
2875    gen_helper_cvttss2sq,
2876    gen_helper_cvtss2sq,
2877    gen_helper_cvttsd2sq,
2878    gen_helper_cvtsd2sq
2879};
2880#endif
2881
2882static const SSEFunc_0_epp sse_op_table4[8][4] = {
2883    SSE_FOP(cmpeq),
2884    SSE_FOP(cmplt),
2885    SSE_FOP(cmple),
2886    SSE_FOP(cmpunord),
2887    SSE_FOP(cmpneq),
2888    SSE_FOP(cmpnlt),
2889    SSE_FOP(cmpnle),
2890    SSE_FOP(cmpord),
2891};
2892
2893static const SSEFunc_0_epp sse_op_table5[256] = {
2894    [0x0c] = gen_helper_pi2fw,
2895    [0x0d] = gen_helper_pi2fd,
2896    [0x1c] = gen_helper_pf2iw,
2897    [0x1d] = gen_helper_pf2id,
2898    [0x8a] = gen_helper_pfnacc,
2899    [0x8e] = gen_helper_pfpnacc,
2900    [0x90] = gen_helper_pfcmpge,
2901    [0x94] = gen_helper_pfmin,
2902    [0x96] = gen_helper_pfrcp,
2903    [0x97] = gen_helper_pfrsqrt,
2904    [0x9a] = gen_helper_pfsub,
2905    [0x9e] = gen_helper_pfadd,
2906    [0xa0] = gen_helper_pfcmpgt,
2907    [0xa4] = gen_helper_pfmax,
2908    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2909    [0xa7] = gen_helper_movq, /* pfrsqit1 */
2910    [0xaa] = gen_helper_pfsubr,
2911    [0xae] = gen_helper_pfacc,
2912    [0xb0] = gen_helper_pfcmpeq,
2913    [0xb4] = gen_helper_pfmul,
2914    [0xb6] = gen_helper_movq, /* pfrcpit2 */
2915    [0xb7] = gen_helper_pmulhrw_mmx,
2916    [0xbb] = gen_helper_pswapd,
2917    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2918};
2919
2920struct SSEOpHelper_epp {
2921    SSEFunc_0_epp op[2];
2922    uint32_t ext_mask;
2923};
2924
2925struct SSEOpHelper_eppi {
2926    SSEFunc_0_eppi op[2];
2927    uint32_t ext_mask;
2928};
2929
2930#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2931#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2932#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2933#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2934#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2935        CPUID_EXT_PCLMULQDQ }
2936#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2937
2938static const struct SSEOpHelper_epp sse_op_table6[256] = {
2939    [0x00] = SSSE3_OP(pshufb),
2940    [0x01] = SSSE3_OP(phaddw),
2941    [0x02] = SSSE3_OP(phaddd),
2942    [0x03] = SSSE3_OP(phaddsw),
2943    [0x04] = SSSE3_OP(pmaddubsw),
2944    [0x05] = SSSE3_OP(phsubw),
2945    [0x06] = SSSE3_OP(phsubd),
2946    [0x07] = SSSE3_OP(phsubsw),
2947    [0x08] = SSSE3_OP(psignb),
2948    [0x09] = SSSE3_OP(psignw),
2949    [0x0a] = SSSE3_OP(psignd),
2950    [0x0b] = SSSE3_OP(pmulhrsw),
2951    [0x10] = SSE41_OP(pblendvb),
2952    [0x14] = SSE41_OP(blendvps),
2953    [0x15] = SSE41_OP(blendvpd),
2954    [0x17] = SSE41_OP(ptest),
2955    [0x1c] = SSSE3_OP(pabsb),
2956    [0x1d] = SSSE3_OP(pabsw),
2957    [0x1e] = SSSE3_OP(pabsd),
2958    [0x20] = SSE41_OP(pmovsxbw),
2959    [0x21] = SSE41_OP(pmovsxbd),
2960    [0x22] = SSE41_OP(pmovsxbq),
2961    [0x23] = SSE41_OP(pmovsxwd),
2962    [0x24] = SSE41_OP(pmovsxwq),
2963    [0x25] = SSE41_OP(pmovsxdq),
2964    [0x28] = SSE41_OP(pmuldq),
2965    [0x29] = SSE41_OP(pcmpeqq),
2966    [0x2a] = SSE41_SPECIAL, /* movntqda */
2967    [0x2b] = SSE41_OP(packusdw),
2968    [0x30] = SSE41_OP(pmovzxbw),
2969    [0x31] = SSE41_OP(pmovzxbd),
2970    [0x32] = SSE41_OP(pmovzxbq),
2971    [0x33] = SSE41_OP(pmovzxwd),
2972    [0x34] = SSE41_OP(pmovzxwq),
2973    [0x35] = SSE41_OP(pmovzxdq),
2974    [0x37] = SSE42_OP(pcmpgtq),
2975    [0x38] = SSE41_OP(pminsb),
2976    [0x39] = SSE41_OP(pminsd),
2977    [0x3a] = SSE41_OP(pminuw),
2978    [0x3b] = SSE41_OP(pminud),
2979    [0x3c] = SSE41_OP(pmaxsb),
2980    [0x3d] = SSE41_OP(pmaxsd),
2981    [0x3e] = SSE41_OP(pmaxuw),
2982    [0x3f] = SSE41_OP(pmaxud),
2983    [0x40] = SSE41_OP(pmulld),
2984    [0x41] = SSE41_OP(phminposuw),
2985    [0xdb] = AESNI_OP(aesimc),
2986    [0xdc] = AESNI_OP(aesenc),
2987    [0xdd] = AESNI_OP(aesenclast),
2988    [0xde] = AESNI_OP(aesdec),
2989    [0xdf] = AESNI_OP(aesdeclast),
2990};
2991
2992static const struct SSEOpHelper_eppi sse_op_table7[256] = {
2993    [0x08] = SSE41_OP(roundps),
2994    [0x09] = SSE41_OP(roundpd),
2995    [0x0a] = SSE41_OP(roundss),
2996    [0x0b] = SSE41_OP(roundsd),
2997    [0x0c] = SSE41_OP(blendps),
2998    [0x0d] = SSE41_OP(blendpd),
2999    [0x0e] = SSE41_OP(pblendw),
3000    [0x0f] = SSSE3_OP(palignr),
3001    [0x14] = SSE41_SPECIAL, /* pextrb */
3002    [0x15] = SSE41_SPECIAL, /* pextrw */
3003    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3004    [0x17] = SSE41_SPECIAL, /* extractps */
3005    [0x20] = SSE41_SPECIAL, /* pinsrb */
3006    [0x21] = SSE41_SPECIAL, /* insertps */
3007    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3008    [0x40] = SSE41_OP(dpps),
3009    [0x41] = SSE41_OP(dppd),
3010    [0x42] = SSE41_OP(mpsadbw),
3011    [0x44] = PCLMULQDQ_OP(pclmulqdq),
3012    [0x60] = SSE42_OP(pcmpestrm),
3013    [0x61] = SSE42_OP(pcmpestri),
3014    [0x62] = SSE42_OP(pcmpistrm),
3015    [0x63] = SSE42_OP(pcmpistri),
3016    [0xdf] = AESNI_OP(aeskeygenassist),
3017};
3018
3019static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3020                    target_ulong pc_start, int rex_r)
3021{
3022    int b1, op1_offset, op2_offset, is_xmm, val;
3023    int modrm, mod, rm, reg;
3024    SSEFunc_0_epp sse_fn_epp;
3025    SSEFunc_0_eppi sse_fn_eppi;
3026    SSEFunc_0_ppi sse_fn_ppi;
3027    SSEFunc_0_eppt sse_fn_eppt;
3028    TCGMemOp ot;
3029
3030    b &= 0xff;
3031    if (s->prefix & PREFIX_DATA)
3032        b1 = 1;
3033    else if (s->prefix & PREFIX_REPZ)
3034        b1 = 2;
3035    else if (s->prefix & PREFIX_REPNZ)
3036        b1 = 3;
3037    else
3038        b1 = 0;
3039    sse_fn_epp = sse_op_table1[b][b1];
3040    if (!sse_fn_epp) {
3041        goto unknown_op;
3042    }
3043    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3044        is_xmm = 1;
3045    } else {
3046        if (b1 == 0) {
3047            /* MMX case */
3048            is_xmm = 0;
3049        } else {
3050            is_xmm = 1;
3051        }
3052    }
3053    /* simple MMX/SSE operation */
3054    if (s->flags & HF_TS_MASK) {
3055        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3056        return;
3057    }
3058    if (s->flags & HF_EM_MASK) {
3059    illegal_op:
3060        gen_illegal_opcode(s);
3061        return;
3062    }
3063    if (is_xmm
3064        && !(s->flags & HF_OSFXSR_MASK)
3065        && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
3066        goto unknown_op;
3067    }
3068    if (b == 0x0e) {
3069        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3070            /* If we were fully decoding this we might use illegal_op.  */
3071            goto unknown_op;
3072        }
3073        /* femms */
3074        gen_helper_emms(cpu_env);
3075        return;
3076    }
3077    if (b == 0x77) {
3078        /* emms */
3079        gen_helper_emms(cpu_env);
3080        return;
3081    }
3082    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3083       the static cpu state) */
3084    if (!is_xmm) {
3085        gen_helper_enter_mmx(cpu_env);
3086    }
3087
3088    modrm = x86_ldub_code(env, s);
3089    reg = ((modrm >> 3) & 7);
3090    if (is_xmm)
3091        reg |= rex_r;
3092    mod = (modrm >> 6) & 3;
3093    if (sse_fn_epp == SSE_SPECIAL) {
3094        b |= (b1 << 8);
3095        switch(b) {
3096        case 0x0e7: /* movntq */
3097            if (mod == 3) {
3098                goto illegal_op;
3099            }
3100            gen_lea_modrm(env, s, modrm);
3101            gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3102            break;
3103        case 0x1e7: /* movntdq */
3104        case 0x02b: /* movntps */
3105        case 0x12b: /* movntps */
3106            if (mod == 3)
3107                goto illegal_op;
3108            gen_lea_modrm(env, s, modrm);
3109            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3110            break;
3111        case 0x3f0: /* lddqu */
3112            if (mod == 3)
3113                goto illegal_op;
3114            gen_lea_modrm(env, s, modrm);
3115            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3116            break;
3117        case 0x22b: /* movntss */
3118        case 0x32b: /* movntsd */
3119            if (mod == 3)
3120                goto illegal_op;
3121            gen_lea_modrm(env, s, modrm);
3122            if (b1 & 1) {
3123                gen_stq_env_A0(s, offsetof(CPUX86State,
3124                                           xmm_regs[reg].ZMM_Q(0)));
3125            } else {
3126                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
3127                    xmm_regs[reg].ZMM_L(0)));
3128                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3129            }
3130            break;
3131        case 0x6e: /* movd mm, ea */
3132#ifdef TARGET_X86_64
3133            if (s->dflag == MO_64) {
3134                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3135                tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
3136            } else
3137#endif
3138            {
3139                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3140                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3141                                 offsetof(CPUX86State,fpregs[reg].mmx));
3142                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3143                gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
3144            }
3145            break;
3146        case 0x16e: /* movd xmm, ea */
3147#ifdef TARGET_X86_64
3148            if (s->dflag == MO_64) {
3149                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3150                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3151                                 offsetof(CPUX86State,xmm_regs[reg]));
3152                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0);
3153            } else
3154#endif
3155            {
3156                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3157                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3158                                 offsetof(CPUX86State,xmm_regs[reg]));
3159                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3160                gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
3161            }
3162            break;
3163        case 0x6f: /* movq mm, ea */
3164            if (mod != 3) {
3165                gen_lea_modrm(env, s, modrm);
3166                gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3167            } else {
3168                rm = (modrm & 7);
3169                tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3170                               offsetof(CPUX86State,fpregs[rm].mmx));
3171                tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3172                               offsetof(CPUX86State,fpregs[reg].mmx));
3173            }
3174            break;
3175        case 0x010: /* movups */
3176        case 0x110: /* movupd */
3177        case 0x028: /* movaps */
3178        case 0x128: /* movapd */
3179        case 0x16f: /* movdqa xmm, ea */
3180        case 0x26f: /* movdqu xmm, ea */
3181            if (mod != 3) {
3182                gen_lea_modrm(env, s, modrm);
3183                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3184            } else {
3185                rm = (modrm & 7) | REX_B(s);
3186                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
3187                            offsetof(CPUX86State,xmm_regs[rm]));
3188            }
3189            break;
3190        case 0x210: /* movss xmm, ea */
3191            if (mod != 3) {
3192                gen_lea_modrm(env, s, modrm);
3193                gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3194                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3195                tcg_gen_movi_tl(cpu_T0, 0);
3196                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3197                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3198                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3199            } else {
3200                rm = (modrm & 7) | REX_B(s);
3201                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3202                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3203            }
3204            break;
3205        case 0x310: /* movsd xmm, ea */
3206            if (mod != 3) {
3207                gen_lea_modrm(env, s, modrm);
3208                gen_ldq_env_A0(s, offsetof(CPUX86State,
3209                                           xmm_regs[reg].ZMM_Q(0)));
3210                tcg_gen_movi_tl(cpu_T0, 0);
3211                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3212                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3213            } else {
3214                rm = (modrm & 7) | REX_B(s);
3215                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3216                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3217            }
3218            break;
3219        case 0x012: /* movlps */
3220        case 0x112: /* movlpd */
3221            if (mod != 3) {
3222                gen_lea_modrm(env, s, modrm);
3223                gen_ldq_env_A0(s, offsetof(CPUX86State,
3224                                           xmm_regs[reg].ZMM_Q(0)));
3225            } else {
3226                /* movhlps */
3227                rm = (modrm & 7) | REX_B(s);
3228                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3229                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3230            }
3231            break;
3232        case 0x212: /* movsldup */
3233            if (mod != 3) {
3234                gen_lea_modrm(env, s, modrm);
3235                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3236            } else {
3237                rm = (modrm & 7) | REX_B(s);
3238                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3239                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3240                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3241                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3242            }
3243            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3244                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3245            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3246                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3247            break;
3248        case 0x312: /* movddup */
3249            if (mod != 3) {
3250                gen_lea_modrm(env, s, modrm);
3251                gen_ldq_env_A0(s, offsetof(CPUX86State,
3252                                           xmm_regs[reg].ZMM_Q(0)));
3253            } else {
3254                rm = (modrm & 7) | REX_B(s);
3255                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3256                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3257            }
3258            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3259                        offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3260            break;
3261        case 0x016: /* movhps */
3262        case 0x116: /* movhpd */
3263            if (mod != 3) {
3264                gen_lea_modrm(env, s, modrm);
3265                gen_ldq_env_A0(s, offsetof(CPUX86State,
3266                                           xmm_regs[reg].ZMM_Q(1)));
3267            } else {
3268                /* movlhps */
3269                rm = (modrm & 7) | REX_B(s);
3270                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3271                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3272            }
3273            break;
3274        case 0x216: /* movshdup */
3275            if (mod != 3) {
3276                gen_lea_modrm(env, s, modrm);
3277                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3278            } else {
3279                rm = (modrm & 7) | REX_B(s);
3280                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3281                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3282                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3283                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3284            }
3285            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3286                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3287            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3288                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3289            break;
3290        case 0x178:
3291        case 0x378:
3292            {
3293                int bit_index, field_length;
3294
3295                if (b1 == 1 && reg != 0)
3296                    goto illegal_op;
3297                field_length = x86_ldub_code(env, s) & 0x3F;
3298                bit_index = x86_ldub_code(env, s) & 0x3F;
3299                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3300                    offsetof(CPUX86State,xmm_regs[reg]));
3301                if (b1 == 1)
3302                    gen_helper_extrq_i(cpu_env, cpu_ptr0,
3303                                       tcg_const_i32(bit_index),
3304                                       tcg_const_i32(field_length));
3305                else
3306                    gen_helper_insertq_i(cpu_env, cpu_ptr0,
3307                                         tcg_const_i32(bit_index),
3308                                         tcg_const_i32(field_length));
3309            }
3310            break;
3311        case 0x7e: /* movd ea, mm */
3312#ifdef TARGET_X86_64
3313            if (s->dflag == MO_64) {
3314                tcg_gen_ld_i64(cpu_T0, cpu_env,
3315                               offsetof(CPUX86State,fpregs[reg].mmx));
3316                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3317            } else
3318#endif
3319            {
3320                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3321                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3322                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3323            }
3324            break;
3325        case 0x17e: /* movd ea, xmm */
3326#ifdef TARGET_X86_64
3327            if (s->dflag == MO_64) {
3328                tcg_gen_ld_i64(cpu_T0, cpu_env,
3329                               offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3330                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3331            } else
3332#endif
3333            {
3334                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3335                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3336                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3337            }
3338            break;
3339        case 0x27e: /* movq xmm, ea */
3340            if (mod != 3) {
3341                gen_lea_modrm(env, s, modrm);
3342                gen_ldq_env_A0(s, offsetof(CPUX86State,
3343                                           xmm_regs[reg].ZMM_Q(0)));
3344            } else {
3345                rm = (modrm & 7) | REX_B(s);
3346                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3347                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3348            }
3349            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3350            break;
3351        case 0x7f: /* movq ea, mm */
3352            if (mod != 3) {
3353                gen_lea_modrm(env, s, modrm);
3354                gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3355            } else {
3356                rm = (modrm & 7);
3357                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
3358                            offsetof(CPUX86State,fpregs[reg].mmx));
3359            }
3360            break;
3361        case 0x011: /* movups */
3362        case 0x111: /* movupd */
3363        case 0x029: /* movaps */
3364        case 0x129: /* movapd */
3365        case 0x17f: /* movdqa ea, xmm */
3366        case 0x27f: /* movdqu ea, xmm */
3367            if (mod != 3) {
3368                gen_lea_modrm(env, s, modrm);
3369                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3370            } else {
3371                rm = (modrm & 7) | REX_B(s);
3372                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
3373                            offsetof(CPUX86State,xmm_regs[reg]));
3374            }
3375            break;
3376        case 0x211: /* movss ea, xmm */
3377            if (mod != 3) {
3378                gen_lea_modrm(env, s, modrm);
3379                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3380                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3381            } else {
3382                rm = (modrm & 7) | REX_B(s);
3383                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
3384                            offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3385            }
3386            break;
3387        case 0x311: /* movsd ea, xmm */
3388            if (mod != 3) {
3389                gen_lea_modrm(env, s, modrm);
3390                gen_stq_env_A0(s, offsetof(CPUX86State,
3391                                           xmm_regs[reg].ZMM_Q(0)));
3392            } else {
3393                rm = (modrm & 7) | REX_B(s);
3394                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
3395                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3396            }
3397            break;
3398        case 0x013: /* movlps */
3399        case 0x113: /* movlpd */
3400            if (mod != 3) {
3401                gen_lea_modrm(env, s, modrm);
3402                gen_stq_env_A0(s, offsetof(CPUX86State,
3403                                           xmm_regs[reg].ZMM_Q(0)));
3404            } else {
3405                goto illegal_op;
3406            }
3407            break;
3408        case 0x017: /* movhps */
3409        case 0x117: /* movhpd */
3410            if (mod != 3) {
3411                gen_lea_modrm(env, s, modrm);
3412                gen_stq_env_A0(s, offsetof(CPUX86State,
3413                                           xmm_regs[reg].ZMM_Q(1)));
3414            } else {
3415                goto illegal_op;
3416            }
3417            break;
3418        case 0x71: /* shift mm, im */
3419        case 0x72:
3420        case 0x73:
3421        case 0x171: /* shift xmm, im */
3422        case 0x172:
3423        case 0x173:
3424            if (b1 >= 2) {
3425                goto unknown_op;
3426            }
3427            val = x86_ldub_code(env, s);
3428            if (is_xmm) {
3429                tcg_gen_movi_tl(cpu_T0, val);
3430                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
3431                tcg_gen_movi_tl(cpu_T0, 0);
3432                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(1)));
3433                op1_offset = offsetof(CPUX86State,xmm_t0);
3434            } else {
3435                tcg_gen_movi_tl(cpu_T0, val);
3436                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
3437                tcg_gen_movi_tl(cpu_T0, 0);
3438                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
3439                op1_offset = offsetof(CPUX86State,mmx_t0);
3440            }
3441            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3442                                       (((modrm >> 3)) & 7)][b1];
3443            if (!sse_fn_epp) {
3444                goto unknown_op;
3445            }
3446            if (is_xmm) {
3447                rm = (modrm & 7) | REX_B(s);
3448                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3449            } else {
3450                rm = (modrm & 7);
3451                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3452            }
3453            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3454            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
3455            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3456            break;
3457        case 0x050: /* movmskps */
3458            rm = (modrm & 7) | REX_B(s);
3459            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3460                             offsetof(CPUX86State,xmm_regs[rm]));
3461            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3462            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3463            break;
3464        case 0x150: /* movmskpd */
3465            rm = (modrm & 7) | REX_B(s);
3466            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3467                             offsetof(CPUX86State,xmm_regs[rm]));
3468            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3469            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3470            break;
3471        case 0x02a: /* cvtpi2ps */
3472        case 0x12a: /* cvtpi2pd */
3473            gen_helper_enter_mmx(cpu_env);
3474            if (mod != 3) {
3475                gen_lea_modrm(env, s, modrm);
3476                op2_offset = offsetof(CPUX86State,mmx_t0);
3477                gen_ldq_env_A0(s, op2_offset);
3478            } else {
3479                rm = (modrm & 7);
3480                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3481            }
3482            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3483            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3484            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3485            switch(b >> 8) {
3486            case 0x0:
3487                gen_helper_cvtpi2ps(cpu_env, cpu_ptr0, cpu_ptr1);
3488                break;
3489            default:
3490            case 0x1:
3491                gen_helper_cvtpi2pd(cpu_env, cpu_ptr0, cpu_ptr1);
3492                break;
3493            }
3494            break;
3495        case 0x22a: /* cvtsi2ss */
3496        case 0x32a: /* cvtsi2sd */
3497            ot = mo_64_32(s->dflag);
3498            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3499            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3500            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3501            if (ot == MO_32) {
3502                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3503                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3504                sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
3505            } else {
3506#ifdef TARGET_X86_64
3507                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3508                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T0);
3509#else
3510                goto illegal_op;
3511#endif
3512            }
3513            break;
3514        case 0x02c: /* cvttps2pi */
3515        case 0x12c: /* cvttpd2pi */
3516        case 0x02d: /* cvtps2pi */
3517        case 0x12d: /* cvtpd2pi */
3518            gen_helper_enter_mmx(cpu_env);
3519            if (mod != 3) {
3520                gen_lea_modrm(env, s, modrm);
3521                op2_offset = offsetof(CPUX86State,xmm_t0);
3522                gen_ldo_env_A0(s, op2_offset);
3523            } else {
3524                rm = (modrm & 7) | REX_B(s);
3525                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3526            }
3527            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3528            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3529            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3530            switch(b) {
3531            case 0x02c:
3532                gen_helper_cvttps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3533                break;
3534            case 0x12c:
3535                gen_helper_cvttpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3536                break;
3537            case 0x02d:
3538                gen_helper_cvtps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3539                break;
3540            case 0x12d:
3541                gen_helper_cvtpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3542                break;
3543            }
3544            break;
3545        case 0x22c: /* cvttss2si */
3546        case 0x32c: /* cvttsd2si */
3547        case 0x22d: /* cvtss2si */
3548        case 0x32d: /* cvtsd2si */
3549            ot = mo_64_32(s->dflag);
3550            if (mod != 3) {
3551                gen_lea_modrm(env, s, modrm);
3552                if ((b >> 8) & 1) {
3553                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3554                } else {
3555                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3556                    tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
3557                }
3558                op2_offset = offsetof(CPUX86State,xmm_t0);
3559            } else {
3560                rm = (modrm & 7) | REX_B(s);
3561                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3562            }
3563            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3564            if (ot == MO_32) {
3565                SSEFunc_i_ep sse_fn_i_ep =
3566                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3567                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3568                tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
3569            } else {
3570#ifdef TARGET_X86_64
3571                SSEFunc_l_ep sse_fn_l_ep =
3572                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3573                sse_fn_l_ep(cpu_T0, cpu_env, cpu_ptr0);
3574#else
3575                goto illegal_op;
3576#endif
3577            }
3578            gen_op_mov_reg_v(ot, reg, cpu_T0);
3579            break;
3580        case 0xc4: /* pinsrw */
3581        case 0x1c4:
3582            s->rip_offset = 1;
3583            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3584            val = x86_ldub_code(env, s);
3585            if (b1) {
3586                val &= 7;
3587                tcg_gen_st16_tl(cpu_T0, cpu_env,
3588                                offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3589            } else {
3590                val &= 3;
3591                tcg_gen_st16_tl(cpu_T0, cpu_env,
3592                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3593            }
3594            break;
3595        case 0xc5: /* pextrw */
3596        case 0x1c5:
3597            if (mod != 3)
3598                goto illegal_op;
3599            ot = mo_64_32(s->dflag);
3600            val = x86_ldub_code(env, s);
3601            if (b1) {
3602                val &= 7;
3603                rm = (modrm & 7) | REX_B(s);
3604                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
3605                                 offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3606            } else {
3607                val &= 3;
3608                rm = (modrm & 7);
3609                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
3610                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3611            }
3612            reg = ((modrm >> 3) & 7) | rex_r;
3613            gen_op_mov_reg_v(ot, reg, cpu_T0);
3614            break;
3615        case 0x1d6: /* movq ea, xmm */
3616            if (mod != 3) {
3617                gen_lea_modrm(env, s, modrm);
3618                gen_stq_env_A0(s, offsetof(CPUX86State,
3619                                           xmm_regs[reg].ZMM_Q(0)));
3620            } else {
3621                rm = (modrm & 7) | REX_B(s);
3622                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
3623                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3624                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3625            }
3626            break;
3627        case 0x2d6: /* movq2dq */
3628            gen_helper_enter_mmx(cpu_env);
3629            rm = (modrm & 7);
3630            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3631                        offsetof(CPUX86State,fpregs[rm].mmx));
3632            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3633            break;
3634        case 0x3d6: /* movdq2q */
3635            gen_helper_enter_mmx(cpu_env);
3636            rm = (modrm & 7) | REX_B(s);
3637            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3638                        offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3639            break;
3640        case 0xd7: /* pmovmskb */
3641        case 0x1d7:
3642            if (mod != 3)
3643                goto illegal_op;
3644            if (b1) {
3645                rm = (modrm & 7) | REX_B(s);
3646                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
3647                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3648            } else {
3649                rm = (modrm & 7);
3650                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
3651                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3652            }
3653            reg = ((modrm >> 3) & 7) | rex_r;
3654            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3655            break;
3656
3657        case 0x138:
3658        case 0x038:
3659            b = modrm;
3660            if ((b & 0xf0) == 0xf0) {
3661                goto do_0f_38_fx;
3662            }
3663            modrm = x86_ldub_code(env, s);
3664            rm = modrm & 7;
3665            reg = ((modrm >> 3) & 7) | rex_r;
3666            mod = (modrm >> 6) & 3;
3667            if (b1 >= 2) {
3668                goto unknown_op;
3669            }
3670
3671            sse_fn_epp = sse_op_table6[b].op[b1];
3672            if (!sse_fn_epp) {
3673                goto unknown_op;
3674            }
3675            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3676                goto illegal_op;
3677
3678            if (b1) {
3679                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3680                if (mod == 3) {
3681                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3682                } else {
3683                    op2_offset = offsetof(CPUX86State,xmm_t0);
3684                    gen_lea_modrm(env, s, modrm);
3685                    switch (b) {
3686                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3687                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3688                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3689                        gen_ldq_env_A0(s, op2_offset +
3690                                        offsetof(ZMMReg, ZMM_Q(0)));
3691                        break;
3692                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3693                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3694                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
3695                                            s->mem_index, MO_LEUL);
3696                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
3697                                        offsetof(ZMMReg, ZMM_L(0)));
3698                        break;
3699                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3700                        tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
3701                                           s->mem_index, MO_LEUW);
3702                        tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
3703                                        offsetof(ZMMReg, ZMM_W(0)));
3704                        break;
3705                    case 0x2a:            /* movntqda */
3706                        gen_ldo_env_A0(s, op1_offset);
3707                        return;
3708                    default:
3709                        gen_ldo_env_A0(s, op2_offset);
3710                    }
3711                }
3712            } else {
3713                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3714                if (mod == 3) {
3715                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3716                } else {
3717                    op2_offset = offsetof(CPUX86State,mmx_t0);
3718                    gen_lea_modrm(env, s, modrm);
3719                    gen_ldq_env_A0(s, op2_offset);
3720                }
3721            }
3722            if (sse_fn_epp == SSE_SPECIAL) {
3723                goto unknown_op;
3724            }
3725
3726            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3727            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3728            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3729
3730            if (b == 0x17) {
3731                set_cc_op(s, CC_OP_EFLAGS);
3732            }
3733            break;
3734
3735        case 0x238:
3736        case 0x338:
3737        do_0f_38_fx:
3738            /* Various integer extensions at 0f 38 f[0-f].  */
3739            b = modrm | (b1 << 8);
3740            modrm = x86_ldub_code(env, s);
3741            reg = ((modrm >> 3) & 7) | rex_r;
3742
3743            switch (b) {
3744            case 0x3f0: /* crc32 Gd,Eb */
3745            case 0x3f1: /* crc32 Gd,Ey */
3746            do_crc32:
3747                if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3748                    goto illegal_op;
3749                }
3750                if ((b & 0xff) == 0xf0) {
3751                    ot = MO_8;
3752                } else if (s->dflag != MO_64) {
3753                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3754                } else {
3755                    ot = MO_64;
3756                }
3757
3758                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
3759                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3760                gen_helper_crc32(cpu_T0, cpu_tmp2_i32,
3761                                 cpu_T0, tcg_const_i32(8 << ot));
3762
3763                ot = mo_64_32(s->dflag);
3764                gen_op_mov_reg_v(ot, reg, cpu_T0);
3765                break;
3766
3767            case 0x1f0: /* crc32 or movbe */
3768            case 0x1f1:
3769                /* For these insns, the f3 prefix is supposed to have priority
3770                   over the 66 prefix, but that's not what we implement above
3771                   setting b1.  */
3772                if (s->prefix & PREFIX_REPNZ) {
3773                    goto do_crc32;
3774                }
3775                /* FALLTHRU */
3776            case 0x0f0: /* movbe Gy,My */
3777            case 0x0f1: /* movbe My,Gy */
3778                if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3779                    goto illegal_op;
3780                }
3781                if (s->dflag != MO_64) {
3782                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3783                } else {
3784                    ot = MO_64;
3785                }
3786
3787                gen_lea_modrm(env, s, modrm);
3788                if ((b & 1) == 0) {
3789                    tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
3790                                       s->mem_index, ot | MO_BE);
3791                    gen_op_mov_reg_v(ot, reg, cpu_T0);
3792                } else {
3793                    tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
3794                                       s->mem_index, ot | MO_BE);
3795                }
3796                break;
3797
3798            case 0x0f2: /* andn Gy, By, Ey */
3799                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3800                    || !(s->prefix & PREFIX_VEX)
3801                    || s->vex_l != 0) {
3802                    goto illegal_op;
3803                }
3804                ot = mo_64_32(s->dflag);
3805                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3806                tcg_gen_andc_tl(cpu_T0, cpu_regs[s->vex_v], cpu_T0);
3807                gen_op_mov_reg_v(ot, reg, cpu_T0);
3808                gen_op_update1_cc();
3809                set_cc_op(s, CC_OP_LOGICB + ot);
3810                break;
3811
3812            case 0x0f7: /* bextr Gy, Ey, By */
3813                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3814                    || !(s->prefix & PREFIX_VEX)
3815                    || s->vex_l != 0) {
3816                    goto illegal_op;
3817                }
3818                ot = mo_64_32(s->dflag);
3819                {
3820                    TCGv bound, zero;
3821
3822                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3823                    /* Extract START, and shift the operand.
3824                       Shifts larger than operand size get zeros.  */
3825                    tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
3826                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_A0);
3827
3828                    bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3829                    zero = tcg_const_tl(0);
3830                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, cpu_A0, bound,
3831                                       cpu_T0, zero);
3832                    tcg_temp_free(zero);
3833
3834                    /* Extract the LEN into a mask.  Lengths larger than
3835                       operand size get all ones.  */
3836                    tcg_gen_extract_tl(cpu_A0, cpu_regs[s->vex_v], 8, 8);
3837                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
3838                                       cpu_A0, bound);
3839                    tcg_temp_free(bound);
3840                    tcg_gen_movi_tl(cpu_T1, 1);
3841                    tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_A0);
3842                    tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
3843                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
3844
3845                    gen_op_mov_reg_v(ot, reg, cpu_T0);
3846                    gen_op_update1_cc();
3847                    set_cc_op(s, CC_OP_LOGICB + ot);
3848                }
3849                break;
3850
3851            case 0x0f5: /* bzhi Gy, Ey, By */
3852                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3853                    || !(s->prefix & PREFIX_VEX)
3854                    || s->vex_l != 0) {
3855                    goto illegal_op;
3856                }
3857                ot = mo_64_32(s->dflag);
3858                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3859                tcg_gen_ext8u_tl(cpu_T1, cpu_regs[s->vex_v]);
3860                {
3861                    TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3862                    /* Note that since we're using BMILG (in order to get O
3863                       cleared) we need to store the inverse into C.  */
3864                    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3865                                       cpu_T1, bound);
3866                    tcg_gen_movcond_tl(TCG_COND_GT, cpu_T1, cpu_T1,
3867                                       bound, bound, cpu_T1);
3868                    tcg_temp_free(bound);
3869                }
3870                tcg_gen_movi_tl(cpu_A0, -1);
3871                tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T1);
3872                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_A0);
3873                gen_op_mov_reg_v(ot, reg, cpu_T0);
3874                gen_op_update1_cc();
3875                set_cc_op(s, CC_OP_BMILGB + ot);
3876                break;
3877
3878            case 0x3f6: /* mulx By, Gy, rdx, Ey */
3879                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3880                    || !(s->prefix & PREFIX_VEX)
3881                    || s->vex_l != 0) {
3882                    goto illegal_op;
3883                }
3884                ot = mo_64_32(s->dflag);
3885                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3886                switch (ot) {
3887                default:
3888                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3889                    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
3890                    tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
3891                                      cpu_tmp2_i32, cpu_tmp3_i32);
3892                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
3893                    tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
3894                    break;
3895#ifdef TARGET_X86_64
3896                case MO_64:
3897                    tcg_gen_mulu2_i64(cpu_T0, cpu_T1,
3898                                      cpu_T0, cpu_regs[R_EDX]);
3899                    tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T0);
3900                    tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
3901                    break;
3902#endif
3903                }
3904                break;
3905
3906            case 0x3f5: /* pdep Gy, By, Ey */
3907                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3908                    || !(s->prefix & PREFIX_VEX)
3909                    || s->vex_l != 0) {
3910                    goto illegal_op;
3911                }
3912                ot = mo_64_32(s->dflag);
3913                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3914                /* Note that by zero-extending the mask operand, we
3915                   automatically handle zero-extending the result.  */
3916                if (ot == MO_64) {
3917                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
3918                } else {
3919                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
3920                }
3921                gen_helper_pdep(cpu_regs[reg], cpu_T0, cpu_T1);
3922                break;
3923
3924            case 0x2f5: /* pext Gy, By, Ey */
3925                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3926                    || !(s->prefix & PREFIX_VEX)
3927                    || s->vex_l != 0) {
3928                    goto illegal_op;
3929                }
3930                ot = mo_64_32(s->dflag);
3931                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3932                /* Note that by zero-extending the mask operand, we
3933                   automatically handle zero-extending the result.  */
3934                if (ot == MO_64) {
3935                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
3936                } else {
3937                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
3938                }
3939                gen_helper_pext(cpu_regs[reg], cpu_T0, cpu_T1);
3940                break;
3941
3942            case 0x1f6: /* adcx Gy, Ey */
3943            case 0x2f6: /* adox Gy, Ey */
3944                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3945                    goto illegal_op;
3946                } else {
3947                    TCGv carry_in, carry_out, zero;
3948                    int end_op;
3949
3950                    ot = mo_64_32(s->dflag);
3951                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3952
3953                    /* Re-use the carry-out from a previous round.  */
3954                    TCGV_UNUSED(carry_in);
3955                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3956                    switch (s->cc_op) {
3957                    case CC_OP_ADCX:
3958                        if (b == 0x1f6) {
3959                            carry_in = cpu_cc_dst;
3960                            end_op = CC_OP_ADCX;
3961                        } else {
3962                            end_op = CC_OP_ADCOX;
3963                        }
3964                        break;
3965                    case CC_OP_ADOX:
3966                        if (b == 0x1f6) {
3967                            end_op = CC_OP_ADCOX;
3968                        } else {
3969                            carry_in = cpu_cc_src2;
3970                            end_op = CC_OP_ADOX;
3971                        }
3972                        break;
3973                    case CC_OP_ADCOX:
3974                        end_op = CC_OP_ADCOX;
3975                        carry_in = carry_out;
3976                        break;
3977                    default:
3978                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
3979                        break;
3980                    }
3981                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
3982                    if (TCGV_IS_UNUSED(carry_in)) {
3983                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
3984                            gen_compute_eflags(s);
3985                        }
3986                        carry_in = cpu_tmp0;
3987                        tcg_gen_extract_tl(carry_in, cpu_cc_src,
3988                                           ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
3989                    }
3990
3991                    switch (ot) {
3992#ifdef TARGET_X86_64
3993                    case MO_32:
3994                        /* If we know TL is 64-bit, and we want a 32-bit
3995                           result, just do everything in 64-bit arithmetic.  */
3996                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
3997                        tcg_gen_ext32u_i64(cpu_T0, cpu_T0);
3998                        tcg_gen_add_i64(cpu_T0, cpu_T0, cpu_regs[reg]);
3999                        tcg_gen_add_i64(cpu_T0, cpu_T0, carry_in);
4000                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T0);
4001                        tcg_gen_shri_i64(carry_out, cpu_T0, 32);
4002                        break;
4003#endif
4004                    default:
4005                        /* Otherwise compute the carry-out in two steps.  */
4006                        zero = tcg_const_tl(0);
4007                        tcg_gen_add2_tl(cpu_T0, carry_out,
4008                                        cpu_T0, zero,
4009                                        carry_in, zero);
4010                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4011                                        cpu_regs[reg], carry_out,
4012                                        cpu_T0, zero);
4013                        tcg_temp_free(zero);
4014                        break;
4015                    }
4016                    set_cc_op(s, end_op);
4017                }
4018                break;
4019
4020            case 0x1f7: /* shlx Gy, Ey, By */
4021            case 0x2f7: /* sarx Gy, Ey, By */
4022            case 0x3f7: /* shrx Gy, Ey, By */
4023                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4024                    || !(s->prefix & PREFIX_VEX)
4025                    || s->vex_l != 0) {
4026                    goto illegal_op;
4027                }
4028                ot = mo_64_32(s->dflag);
4029                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4030                if (ot == MO_64) {
4031                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 63);
4032                } else {
4033                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
4034                }
4035                if (b == 0x1f7) {
4036                    tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
4037                } else if (b == 0x2f7) {
4038                    if (ot != MO_64) {
4039                        tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
4040                    }
4041                    tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
4042                } else {
4043                    if (ot != MO_64) {
4044                        tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
4045                    }
4046                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
4047                }
4048                gen_op_mov_reg_v(ot, reg, cpu_T0);
4049                break;
4050
4051            case 0x0f3:
4052            case 0x1f3:
4053            case 0x2f3:
4054            case 0x3f3: /* Group 17 */
4055                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4056                    || !(s->prefix & PREFIX_VEX)
4057                    || s->vex_l != 0) {
4058                    goto illegal_op;
4059                }
4060                ot = mo_64_32(s->dflag);
4061                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4062
4063                switch (reg & 7) {
4064                case 1: /* blsr By,Ey */
4065                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
4066                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
4067                    gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
4068                    gen_op_update2_cc();
4069                    set_cc_op(s, CC_OP_BMILGB + ot);
4070                    break;
4071
4072                case 2: /* blsmsk By,Ey */
4073                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4074                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
4075                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
4076                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4077                    set_cc_op(s, CC_OP_BMILGB + ot);
4078                    break;
4079
4080                case 3: /* blsi By, Ey */
4081                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4082                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
4083                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
4084                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4085                    set_cc_op(s, CC_OP_BMILGB + ot);
4086                    break;
4087
4088                default:
4089                    goto unknown_op;
4090                }
4091                break;
4092
4093            default:
4094                goto unknown_op;
4095            }
4096            break;
4097
4098        case 0x03a:
4099        case 0x13a:
4100            b = modrm;
4101            modrm = x86_ldub_code(env, s);
4102            rm = modrm & 7;
4103            reg = ((modrm >> 3) & 7) | rex_r;
4104            mod = (modrm >> 6) & 3;
4105            if (b1 >= 2) {
4106                goto unknown_op;
4107            }
4108
4109            sse_fn_eppi = sse_op_table7[b].op[b1];
4110            if (!sse_fn_eppi) {
4111                goto unknown_op;
4112            }
4113            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4114                goto illegal_op;
4115
4116            s->rip_offset = 1;
4117
4118            if (sse_fn_eppi == SSE_SPECIAL) {
4119                ot = mo_64_32(s->dflag);
4120                rm = (modrm & 7) | REX_B(s);
4121                if (mod != 3)
4122                    gen_lea_modrm(env, s, modrm);
4123                reg = ((modrm >> 3) & 7) | rex_r;
4124                val = x86_ldub_code(env, s);
4125                switch (b) {
4126                case 0x14: /* pextrb */
4127                    tcg_gen_ld8u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4128                                            xmm_regs[reg].ZMM_B(val & 15)));
4129                    if (mod == 3) {
4130                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4131                    } else {
4132                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4133                                           s->mem_index, MO_UB);
4134                    }
4135                    break;
4136                case 0x15: /* pextrw */
4137                    tcg_gen_ld16u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4138                                            xmm_regs[reg].ZMM_W(val & 7)));
4139                    if (mod == 3) {
4140                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4141                    } else {
4142                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4143                                           s->mem_index, MO_LEUW);
4144                    }
4145                    break;
4146                case 0x16:
4147                    if (ot == MO_32) { /* pextrd */
4148                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4149                                        offsetof(CPUX86State,
4150                                                xmm_regs[reg].ZMM_L(val & 3)));
4151                        if (mod == 3) {
4152                            tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
4153                        } else {
4154                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
4155                                                s->mem_index, MO_LEUL);
4156                        }
4157                    } else { /* pextrq */
4158#ifdef TARGET_X86_64
4159                        tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
4160                                        offsetof(CPUX86State,
4161                                                xmm_regs[reg].ZMM_Q(val & 1)));
4162                        if (mod == 3) {
4163                            tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
4164                        } else {
4165                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
4166                                                s->mem_index, MO_LEQ);
4167                        }
4168#else
4169                        goto illegal_op;
4170#endif
4171                    }
4172                    break;
4173                case 0x17: /* extractps */
4174                    tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4175                                            xmm_regs[reg].ZMM_L(val & 3)));
4176                    if (mod == 3) {
4177                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4178                    } else {
4179                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4180                                           s->mem_index, MO_LEUL);
4181                    }
4182                    break;
4183                case 0x20: /* pinsrb */
4184                    if (mod == 3) {
4185                        gen_op_mov_v_reg(MO_32, cpu_T0, rm);
4186                    } else {
4187                        tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
4188                                           s->mem_index, MO_UB);
4189                    }
4190                    tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4191                                            xmm_regs[reg].ZMM_B(val & 15)));
4192                    break;
4193                case 0x21: /* insertps */
4194                    if (mod == 3) {
4195                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4196                                        offsetof(CPUX86State,xmm_regs[rm]
4197                                                .ZMM_L((val >> 6) & 3)));
4198                    } else {
4199                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4200                                            s->mem_index, MO_LEUL);
4201                    }
4202                    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4203                                    offsetof(CPUX86State,xmm_regs[reg]
4204                                            .ZMM_L((val >> 4) & 3)));
4205                    if ((val >> 0) & 1)
4206                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4207                                        cpu_env, offsetof(CPUX86State,
4208                                                xmm_regs[reg].ZMM_L(0)));
4209                    if ((val >> 1) & 1)
4210                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4211                                        cpu_env, offsetof(CPUX86State,
4212                                                xmm_regs[reg].ZMM_L(1)));
4213                    if ((val >> 2) & 1)
4214                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4215                                        cpu_env, offsetof(CPUX86State,
4216                                                xmm_regs[reg].ZMM_L(2)));
4217                    if ((val >> 3) & 1)
4218                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4219                                        cpu_env, offsetof(CPUX86State,
4220                                                xmm_regs[reg].ZMM_L(3)));
4221                    break;
4222                case 0x22:
4223                    if (ot == MO_32) { /* pinsrd */
4224                        if (mod == 3) {
4225                            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
4226                        } else {
4227                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4228                                                s->mem_index, MO_LEUL);
4229                        }
4230                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4231                                        offsetof(CPUX86State,
4232                                                xmm_regs[reg].ZMM_L(val & 3)));
4233                    } else { /* pinsrq */
4234#ifdef TARGET_X86_64
4235                        if (mod == 3) {
4236                            gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
4237                        } else {
4238                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
4239                                                s->mem_index, MO_LEQ);
4240                        }
4241                        tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
4242                                        offsetof(CPUX86State,
4243                                                xmm_regs[reg].ZMM_Q(val & 1)));
4244#else
4245                        goto illegal_op;
4246#endif
4247                    }
4248                    break;
4249                }
4250                return;
4251            }
4252
4253            if (b1) {
4254                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4255                if (mod == 3) {
4256                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4257                } else {
4258                    op2_offset = offsetof(CPUX86State,xmm_t0);
4259                    gen_lea_modrm(env, s, modrm);
4260                    gen_ldo_env_A0(s, op2_offset);
4261                }
4262            } else {
4263                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4264                if (mod == 3) {
4265                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4266                } else {
4267                    op2_offset = offsetof(CPUX86State,mmx_t0);
4268                    gen_lea_modrm(env, s, modrm);
4269                    gen_ldq_env_A0(s, op2_offset);
4270                }
4271            }
4272            val = x86_ldub_code(env, s);
4273
4274            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4275                set_cc_op(s, CC_OP_EFLAGS);
4276
4277                if (s->dflag == MO_64) {
4278                    /* The helper must use entire 64-bit gp registers */
4279                    val |= 1 << 8;
4280                }
4281            }
4282
4283            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4284            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4285            sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4286            break;
4287
4288        case 0x33a:
4289            /* Various integer extensions at 0f 3a f[0-f].  */
4290            b = modrm | (b1 << 8);
4291            modrm = x86_ldub_code(env, s);
4292            reg = ((modrm >> 3) & 7) | rex_r;
4293
4294            switch (b) {
4295            case 0x3f0: /* rorx Gy,Ey, Ib */
4296                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4297                    || !(s->prefix & PREFIX_VEX)
4298                    || s->vex_l != 0) {
4299                    goto illegal_op;
4300                }
4301                ot = mo_64_32(s->dflag);
4302                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4303                b = x86_ldub_code(env, s);
4304                if (ot == MO_64) {
4305                    tcg_gen_rotri_tl(cpu_T0, cpu_T0, b & 63);
4306                } else {
4307                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4308                    tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
4309                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
4310                }
4311                gen_op_mov_reg_v(ot, reg, cpu_T0);
4312                break;
4313
4314            default:
4315                goto unknown_op;
4316            }
4317            break;
4318
4319        default:
4320        unknown_op:
4321            gen_unknown_opcode(env, s);
4322            return;
4323        }
4324    } else {
4325        /* generic MMX or SSE operation */
4326        switch(b) {
4327        case 0x70: /* pshufx insn */
4328        case 0xc6: /* pshufx insn */
4329        case 0xc2: /* compare insns */
4330            s->rip_offset = 1;
4331            break;
4332        default:
4333            break;
4334        }
4335        if (is_xmm) {
4336            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4337            if (mod != 3) {
4338                int sz = 4;
4339
4340                gen_lea_modrm(env, s, modrm);
4341                op2_offset = offsetof(CPUX86State,xmm_t0);
4342
4343                switch (b) {
4344                case 0x50 ... 0x5a:
4345                case 0x5c ... 0x5f:
4346                case 0xc2:
4347                    /* Most sse scalar operations.  */
4348                    if (b1 == 2) {
4349                        sz = 2;
4350                    } else if (b1 == 3) {
4351                        sz = 3;
4352                    }
4353                    break;
4354
4355                case 0x2e:  /* ucomis[sd] */
4356                case 0x2f:  /* comis[sd] */
4357                    if (b1 == 0) {
4358                        sz = 2;
4359                    } else {
4360                        sz = 3;
4361                    }
4362                    break;
4363                }
4364
4365                switch (sz) {
4366                case 2:
4367                    /* 32 bit access */
4368                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
4369                    tcg_gen_st32_tl(cpu_T0, cpu_env,
4370                                    offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4371                    break;
4372                case 3:
4373                    /* 64 bit access */
4374                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4375                    break;
4376                default:
4377                    /* 128 bit access */
4378                    gen_ldo_env_A0(s, op2_offset);
4379                    break;
4380                }
4381            } else {
4382                rm = (modrm & 7) | REX_B(s);
4383                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4384            }
4385        } else {
4386            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4387            if (mod != 3) {
4388                gen_lea_modrm(env, s, modrm);
4389                op2_offset = offsetof(CPUX86State,mmx_t0);
4390                gen_ldq_env_A0(s, op2_offset);
4391            } else {
4392                rm = (modrm & 7);
4393                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4394            }
4395        }
4396        switch(b) {
4397        case 0x0f: /* 3DNow! data insns */
4398            val = x86_ldub_code(env, s);
4399            sse_fn_epp = sse_op_table5[val];
4400            if (!sse_fn_epp) {
4401                goto unknown_op;
4402            }
4403            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4404                goto illegal_op;
4405            }
4406            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4407            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4408            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4409            break;
4410        case 0x70: /* pshufx insn */
4411        case 0xc6: /* pshufx insn */
4412            val = x86_ldub_code(env, s);
4413            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4414            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4415            /* XXX: introduce a new table? */
4416            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4417            sse_fn_ppi(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4418            break;
4419        case 0xc2:
4420            /* compare insns */
4421            val = x86_ldub_code(env, s);
4422            if (val >= 8)
4423                goto unknown_op;
4424            sse_fn_epp = sse_op_table4[val][b1];
4425
4426            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4427            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4428            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4429            break;
4430        case 0xf7:
4431            /* maskmov : we must prepare A0 */
4432            if (mod != 3)
4433                goto illegal_op;
4434            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]);
4435            gen_extu(s->aflag, cpu_A0);
4436            gen_add_A0_ds_seg(s);
4437
4438            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4439            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4440            /* XXX: introduce a new table? */
4441            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4442            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
4443            break;
4444        default:
4445            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4446            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4447            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4448            break;
4449        }
4450        if (b == 0x2e || b == 0x2f) {
4451            set_cc_op(s, CC_OP_EFLAGS);
4452        }
4453    }
4454}
4455
4456/* convert one instruction. s->base.is_jmp is set if the translation must
4457   be stopped. Return the next pc value */
4458static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4459{
4460    CPUX86State *env = cpu->env_ptr;
4461    int b, prefixes;
4462    int shift;
4463    TCGMemOp ot, aflag, dflag;
4464    int modrm, reg, rm, mod, op, opreg, val;
4465    target_ulong next_eip, tval;
4466    int rex_w, rex_r;
4467    target_ulong pc_start = s->base.pc_next;
4468
4469    s->pc_start = s->pc = pc_start;
4470    prefixes = 0;
4471    s->override = -1;
4472    rex_w = -1;
4473    rex_r = 0;
4474#ifdef TARGET_X86_64
4475    s->rex_x = 0;
4476    s->rex_b = 0;
4477    x86_64_hregs = 0;
4478#endif
4479    s->rip_offset = 0; /* for relative ip address */
4480    s->vex_l = 0;
4481    s->vex_v = 0;
4482    if (sigsetjmp(s->jmpbuf, 0) != 0) {
4483        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
4484        return s->pc;
4485    }
4486
4487 next_byte:
4488    b = x86_ldub_code(env, s);
4489    /* Collect prefixes.  */
4490    switch (b) {
4491    case 0xf3:
4492        prefixes |= PREFIX_REPZ;
4493        goto next_byte;
4494    case 0xf2:
4495        prefixes |= PREFIX_REPNZ;
4496        goto next_byte;
4497    case 0xf0:
4498        prefixes |= PREFIX_LOCK;
4499        goto next_byte;
4500    case 0x2e:
4501        s->override = R_CS;
4502        goto next_byte;
4503    case 0x36:
4504        s->override = R_SS;
4505        goto next_byte;
4506    case 0x3e:
4507        s->override = R_DS;
4508        goto next_byte;
4509    case 0x26:
4510        s->override = R_ES;
4511        goto next_byte;
4512    case 0x64:
4513        s->override = R_FS;
4514        goto next_byte;
4515    case 0x65:
4516        s->override = R_GS;
4517        goto next_byte;
4518    case 0x66:
4519        prefixes |= PREFIX_DATA;
4520        goto next_byte;
4521    case 0x67:
4522        prefixes |= PREFIX_ADR;
4523        goto next_byte;
4524#ifdef TARGET_X86_64
4525    case 0x40 ... 0x4f:
4526        if (CODE64(s)) {
4527            /* REX prefix */
4528            rex_w = (b >> 3) & 1;
4529            rex_r = (b & 0x4) << 1;
4530            s->rex_x = (b & 0x2) << 2;
4531            REX_B(s) = (b & 0x1) << 3;
4532            x86_64_hregs = 1; /* select uniform byte register addressing */
4533            goto next_byte;
4534        }
4535        break;
4536#endif
4537    case 0xc5: /* 2-byte VEX */
4538    case 0xc4: /* 3-byte VEX */
4539        /* VEX prefixes cannot be used except in 32-bit mode.
4540           Otherwise the instruction is LES or LDS.  */
4541        if (s->code32 && !s->vm86) {
4542            static const int pp_prefix[4] = {
4543                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4544            };
4545            int vex3, vex2 = x86_ldub_code(env, s);
4546
4547            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4548                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4549                   otherwise the instruction is LES or LDS.  */
4550                break;
4551            }
4552            s->pc++;
4553
4554            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4555            if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4556                            | PREFIX_LOCK | PREFIX_DATA)) {
4557                goto illegal_op;
4558            }
4559#ifdef TARGET_X86_64
4560            if (x86_64_hregs) {
4561                goto illegal_op;
4562            }
4563#endif
4564            rex_r = (~vex2 >> 4) & 8;
4565            if (b == 0xc5) {
4566                vex3 = vex2;
4567                b = x86_ldub_code(env, s);
4568            } else {
4569#ifdef TARGET_X86_64
4570                s->rex_x = (~vex2 >> 3) & 8;
4571                s->rex_b = (~vex2 >> 2) & 8;
4572#endif
4573                vex3 = x86_ldub_code(env, s);
4574                rex_w = (vex3 >> 7) & 1;
4575                switch (vex2 & 0x1f) {
4576                case 0x01: /* Implied 0f leading opcode bytes.  */
4577                    b = x86_ldub_code(env, s) | 0x100;
4578                    break;
4579                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4580                    b = 0x138;
4581                    break;
4582                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4583                    b = 0x13a;
4584                    break;
4585                default:   /* Reserved for future use.  */
4586                    goto unknown_op;
4587                }
4588            }
4589            s->vex_v = (~vex3 >> 3) & 0xf;
4590            s->vex_l = (vex3 >> 2) & 1;
4591            prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4592        }
4593        break;
4594    }
4595
4596    /* Post-process prefixes.  */
4597    if (CODE64(s)) {
4598        /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4599           data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4600           over 0x66 if both are present.  */
4601        dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4602        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4603        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4604    } else {
4605        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4606        if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4607            dflag = MO_32;
4608        } else {
4609            dflag = MO_16;
4610        }
4611        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4612        if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4613            aflag = MO_32;
4614        }  else {
4615            aflag = MO_16;
4616        }
4617    }
4618
4619    s->prefix = prefixes;
4620    s->aflag = aflag;
4621    s->dflag = dflag;
4622
4623    /* now check op code */
4624 reswitch:
4625    switch(b) {
4626    case 0x0f:
4627        /**************************/
4628        /* extended op code */
4629        b = x86_ldub_code(env, s) | 0x100;
4630        goto reswitch;
4631
4632        /**************************/
4633        /* arith & logic */
4634    case 0x00 ... 0x05:
4635    case 0x08 ... 0x0d:
4636    case 0x10 ... 0x15:
4637    case 0x18 ... 0x1d:
4638    case 0x20 ... 0x25:
4639    case 0x28 ... 0x2d:
4640    case 0x30 ... 0x35:
4641    case 0x38 ... 0x3d:
4642        {
4643            int op, f, val;
4644            op = (b >> 3) & 7;
4645            f = (b >> 1) & 3;
4646
4647            ot = mo_b_d(b, dflag);
4648
4649            switch(f) {
4650            case 0: /* OP Ev, Gv */
4651                modrm = x86_ldub_code(env, s);
4652                reg = ((modrm >> 3) & 7) | rex_r;
4653                mod = (modrm >> 6) & 3;
4654                rm = (modrm & 7) | REX_B(s);
4655                if (mod != 3) {
4656                    gen_lea_modrm(env, s, modrm);
4657                    opreg = OR_TMP0;
4658                } else if (op == OP_XORL && rm == reg) {
4659                xor_zero:
4660                    /* xor reg, reg optimisation */
4661                    set_cc_op(s, CC_OP_CLR);
4662                    tcg_gen_movi_tl(cpu_T0, 0);
4663                    gen_op_mov_reg_v(ot, reg, cpu_T0);
4664                    break;
4665                } else {
4666                    opreg = rm;
4667                }
4668                gen_op_mov_v_reg(ot, cpu_T1, reg);
4669                gen_op(s, op, ot, opreg);
4670                break;
4671            case 1: /* OP Gv, Ev */
4672                modrm = x86_ldub_code(env, s);
4673                mod = (modrm >> 6) & 3;
4674                reg = ((modrm >> 3) & 7) | rex_r;
4675                rm = (modrm & 7) | REX_B(s);
4676                if (mod != 3) {
4677                    gen_lea_modrm(env, s, modrm);
4678                    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4679                } else if (op == OP_XORL && rm == reg) {
4680                    goto xor_zero;
4681                } else {
4682                    gen_op_mov_v_reg(ot, cpu_T1, rm);
4683                }
4684                gen_op(s, op, ot, reg);
4685                break;
4686            case 2: /* OP A, Iv */
4687                val = insn_get(env, s, ot);
4688                tcg_gen_movi_tl(cpu_T1, val);
4689                gen_op(s, op, ot, OR_EAX);
4690                break;
4691            }
4692        }
4693        break;
4694
4695    case 0x82:
4696        if (CODE64(s))
4697            goto illegal_op;
4698    case 0x80: /* GRP1 */
4699    case 0x81:
4700    case 0x83:
4701        {
4702            int val;
4703
4704            ot = mo_b_d(b, dflag);
4705
4706            modrm = x86_ldub_code(env, s);
4707            mod = (modrm >> 6) & 3;
4708            rm = (modrm & 7) | REX_B(s);
4709            op = (modrm >> 3) & 7;
4710
4711            if (mod != 3) {
4712                if (b == 0x83)
4713                    s->rip_offset = 1;
4714                else
4715                    s->rip_offset = insn_const_size(ot);
4716                gen_lea_modrm(env, s, modrm);
4717                opreg = OR_TMP0;
4718            } else {
4719                opreg = rm;
4720            }
4721
4722            switch(b) {
4723            default:
4724            case 0x80:
4725            case 0x81:
4726            case 0x82:
4727                val = insn_get(env, s, ot);
4728                break;
4729            case 0x83:
4730                val = (int8_t)insn_get(env, s, MO_8);
4731                break;
4732            }
4733            tcg_gen_movi_tl(cpu_T1, val);
4734            gen_op(s, op, ot, opreg);
4735        }
4736        break;
4737
4738        /**************************/
4739        /* inc, dec, and other misc arith */
4740    case 0x40 ... 0x47: /* inc Gv */
4741        ot = dflag;
4742        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4743        break;
4744    case 0x48 ... 0x4f: /* dec Gv */
4745        ot = dflag;
4746        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4747        break;
4748    case 0xf6: /* GRP3 */
4749    case 0xf7:
4750        ot = mo_b_d(b, dflag);
4751
4752        modrm = x86_ldub_code(env, s);
4753        mod = (modrm >> 6) & 3;
4754        rm = (modrm & 7) | REX_B(s);
4755        op = (modrm >> 3) & 7;
4756        if (mod != 3) {
4757            if (op == 0) {
4758                s->rip_offset = insn_const_size(ot);
4759            }
4760            gen_lea_modrm(env, s, modrm);
4761            /* For those below that handle locked memory, don't load here.  */
4762            if (!(s->prefix & PREFIX_LOCK)
4763                || op != 2) {
4764                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
4765            }
4766        } else {
4767            gen_op_mov_v_reg(ot, cpu_T0, rm);
4768        }
4769
4770        switch(op) {
4771        case 0: /* test */
4772            val = insn_get(env, s, ot);
4773            tcg_gen_movi_tl(cpu_T1, val);
4774            gen_op_testl_T0_T1_cc();
4775            set_cc_op(s, CC_OP_LOGICB + ot);
4776            break;
4777        case 2: /* not */
4778            if (s->prefix & PREFIX_LOCK) {
4779                if (mod == 3) {
4780                    goto illegal_op;
4781                }
4782                tcg_gen_movi_tl(cpu_T0, ~0);
4783                tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
4784                                            s->mem_index, ot | MO_LE);
4785            } else {
4786                tcg_gen_not_tl(cpu_T0, cpu_T0);
4787                if (mod != 3) {
4788                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
4789                } else {
4790                    gen_op_mov_reg_v(ot, rm, cpu_T0);
4791                }
4792            }
4793            break;
4794        case 3: /* neg */
4795            if (s->prefix & PREFIX_LOCK) {
4796                TCGLabel *label1;
4797                TCGv a0, t0, t1, t2;
4798
4799                if (mod == 3) {
4800                    goto illegal_op;
4801                }
4802                a0 = tcg_temp_local_new();
4803                t0 = tcg_temp_local_new();
4804                label1 = gen_new_label();
4805
4806                tcg_gen_mov_tl(a0, cpu_A0);
4807                tcg_gen_mov_tl(t0, cpu_T0);
4808
4809                gen_set_label(label1);
4810                t1 = tcg_temp_new();
4811                t2 = tcg_temp_new();
4812                tcg_gen_mov_tl(t2, t0);
4813                tcg_gen_neg_tl(t1, t0);
4814                tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4815                                          s->mem_index, ot | MO_LE);
4816                tcg_temp_free(t1);
4817                tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4818
4819                tcg_temp_free(t2);
4820                tcg_temp_free(a0);
4821                tcg_gen_mov_tl(cpu_T0, t0);
4822                tcg_temp_free(t0);
4823            } else {
4824                tcg_gen_neg_tl(cpu_T0, cpu_T0);
4825                if (mod != 3) {
4826                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
4827                } else {
4828                    gen_op_mov_reg_v(ot, rm, cpu_T0);
4829                }
4830            }
4831            gen_op_update_neg_cc();
4832            set_cc_op(s, CC_OP_SUBB + ot);
4833            break;
4834        case 4: /* mul */
4835            switch(ot) {
4836            case MO_8:
4837                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
4838                tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
4839                tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
4840                /* XXX: use 32 bit mul which could be faster */
4841                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4842                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4843                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4844                tcg_gen_andi_tl(cpu_cc_src, cpu_T0, 0xff00);
4845                set_cc_op(s, CC_OP_MULB);
4846                break;
4847            case MO_16:
4848                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
4849                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4850                tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
4851                /* XXX: use 32 bit mul which could be faster */
4852                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4853                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4854                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4855                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
4856                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4857                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4858                set_cc_op(s, CC_OP_MULW);
4859                break;
4860            default:
4861            case MO_32:
4862                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4863                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4864                tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4865                                  cpu_tmp2_i32, cpu_tmp3_i32);
4866                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4867                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4868                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4869                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4870                set_cc_op(s, CC_OP_MULL);
4871                break;
4872#ifdef TARGET_X86_64
4873            case MO_64:
4874                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4875                                  cpu_T0, cpu_regs[R_EAX]);
4876                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4877                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4878                set_cc_op(s, CC_OP_MULQ);
4879                break;
4880#endif
4881            }
4882            break;
4883        case 5: /* imul */
4884            switch(ot) {
4885            case MO_8:
4886                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
4887                tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
4888                tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
4889                /* XXX: use 32 bit mul which could be faster */
4890                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4891                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4892                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4893                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T0);
4894                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
4895                set_cc_op(s, CC_OP_MULB);
4896                break;
4897            case MO_16:
4898                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
4899                tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
4900                tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
4901                /* XXX: use 32 bit mul which could be faster */
4902                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4903                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4904                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4905                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
4906                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
4907                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
4908                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4909                set_cc_op(s, CC_OP_MULW);
4910                break;
4911            default:
4912            case MO_32:
4913                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4914                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4915                tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4916                                  cpu_tmp2_i32, cpu_tmp3_i32);
4917                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4918                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4919                tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
4920                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4921                tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
4922                tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
4923                set_cc_op(s, CC_OP_MULL);
4924                break;
4925#ifdef TARGET_X86_64
4926            case MO_64:
4927                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4928                                  cpu_T0, cpu_regs[R_EAX]);
4929                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4930                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4931                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4932                set_cc_op(s, CC_OP_MULQ);
4933                break;
4934#endif
4935            }
4936            break;
4937        case 6: /* div */
4938            switch(ot) {
4939            case MO_8:
4940                gen_helper_divb_AL(cpu_env, cpu_T0);
4941                break;
4942            case MO_16:
4943                gen_helper_divw_AX(cpu_env, cpu_T0);
4944                break;
4945            default:
4946            case MO_32:
4947                gen_helper_divl_EAX(cpu_env, cpu_T0);
4948                break;
4949#ifdef TARGET_X86_64
4950            case MO_64:
4951                gen_helper_divq_EAX(cpu_env, cpu_T0);
4952                break;
4953#endif
4954            }
4955            break;
4956        case 7: /* idiv */
4957            switch(ot) {
4958            case MO_8:
4959                gen_helper_idivb_AL(cpu_env, cpu_T0);
4960                break;
4961            case MO_16:
4962                gen_helper_idivw_AX(cpu_env, cpu_T0);
4963                break;
4964            default:
4965            case MO_32:
4966                gen_helper_idivl_EAX(cpu_env, cpu_T0);
4967                break;
4968#ifdef TARGET_X86_64
4969            case MO_64:
4970                gen_helper_idivq_EAX(cpu_env, cpu_T0);
4971                break;
4972#endif
4973            }
4974            break;
4975        default:
4976            goto unknown_op;
4977        }
4978        break;
4979
4980    case 0xfe: /* GRP4 */
4981    case 0xff: /* GRP5 */
4982        ot = mo_b_d(b, dflag);
4983
4984        modrm = x86_ldub_code(env, s);
4985        mod = (modrm >> 6) & 3;
4986        rm = (modrm & 7) | REX_B(s);
4987        op = (modrm >> 3) & 7;
4988        if (op >= 2 && b == 0xfe) {
4989            goto unknown_op;
4990        }
4991        if (CODE64(s)) {
4992            if (op == 2 || op == 4) {
4993                /* operand size for jumps is 64 bit */
4994                ot = MO_64;
4995            } else if (op == 3 || op == 5) {
4996                ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
4997            } else if (op == 6) {
4998                /* default push size is 64 bit */
4999                ot = mo_pushpop(s, dflag);
5000            }
5001        }
5002        if (mod != 3) {
5003            gen_lea_modrm(env, s, modrm);
5004            if (op >= 2 && op != 3 && op != 5)
5005                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
5006        } else {
5007            gen_op_mov_v_reg(ot, cpu_T0, rm);
5008        }
5009
5010        switch(op) {
5011        case 0: /* inc Ev */
5012            if (mod != 3)
5013                opreg = OR_TMP0;
5014            else
5015                opreg = rm;
5016            gen_inc(s, ot, opreg, 1);
5017            break;
5018        case 1: /* dec Ev */
5019            if (mod != 3)
5020                opreg = OR_TMP0;
5021            else
5022                opreg = rm;
5023            gen_inc(s, ot, opreg, -1);
5024            break;
5025        case 2: /* call Ev */
5026            /* XXX: optimize if memory (no 'and' is necessary) */
5027            if (dflag == MO_16) {
5028                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
5029            }
5030            next_eip = s->pc - s->cs_base;
5031            tcg_gen_movi_tl(cpu_T1, next_eip);
5032            gen_push_v(s, cpu_T1);
5033            gen_op_jmp_v(cpu_T0);
5034            gen_bnd_jmp(s);
5035            gen_jr(s, cpu_T0);
5036            break;
5037        case 3: /* lcall Ev */
5038            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5039            gen_add_A0_im(s, 1 << ot);
5040            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
5041        do_lcall:
5042            if (s->pe && !s->vm86) {
5043                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
5044                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
5045                                           tcg_const_i32(dflag - 1),
5046                                           tcg_const_tl(s->pc - s->cs_base));
5047            } else {
5048                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
5049                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
5050                                      tcg_const_i32(dflag - 1),
5051                                      tcg_const_i32(s->pc - s->cs_base));
5052            }
5053            tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
5054            gen_jr(s, cpu_tmp4);
5055            break;
5056        case 4: /* jmp Ev */
5057            if (dflag == MO_16) {
5058                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
5059            }
5060            gen_op_jmp_v(cpu_T0);
5061            gen_bnd_jmp(s);
5062            gen_jr(s, cpu_T0);
5063            break;
5064        case 5: /* ljmp Ev */
5065            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5066            gen_add_A0_im(s, 1 << ot);
5067            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
5068        do_ljmp:
5069            if (s->pe && !s->vm86) {
5070                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
5071                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
5072                                          tcg_const_tl(s->pc - s->cs_base));
5073            } else {
5074                gen_op_movl_seg_T0_vm(R_CS);
5075                gen_op_jmp_v(cpu_T1);
5076            }
5077            tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
5078            gen_jr(s, cpu_tmp4);
5079            break;
5080        case 6: /* push Ev */
5081            gen_push_v(s, cpu_T0);
5082            break;
5083        default:
5084            goto unknown_op;
5085        }
5086        break;
5087
5088    case 0x84: /* test Ev, Gv */
5089    case 0x85:
5090        ot = mo_b_d(b, dflag);
5091
5092        modrm = x86_ldub_code(env, s);
5093        reg = ((modrm >> 3) & 7) | rex_r;
5094
5095        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5096        gen_op_mov_v_reg(ot, cpu_T1, reg);
5097        gen_op_testl_T0_T1_cc();
5098        set_cc_op(s, CC_OP_LOGICB + ot);
5099        break;
5100
5101    case 0xa8: /* test eAX, Iv */
5102    case 0xa9:
5103        ot = mo_b_d(b, dflag);
5104        val = insn_get(env, s, ot);
5105
5106        gen_op_mov_v_reg(ot, cpu_T0, OR_EAX);
5107        tcg_gen_movi_tl(cpu_T1, val);
5108        gen_op_testl_T0_T1_cc();
5109        set_cc_op(s, CC_OP_LOGICB + ot);
5110        break;
5111
5112    case 0x98: /* CWDE/CBW */
5113        switch (dflag) {
5114#ifdef TARGET_X86_64
5115        case MO_64:
5116            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
5117            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
5118            gen_op_mov_reg_v(MO_64, R_EAX, cpu_T0);
5119            break;
5120#endif
5121        case MO_32:
5122            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
5123            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5124            gen_op_mov_reg_v(MO_32, R_EAX, cpu_T0);
5125            break;
5126        case MO_16:
5127            gen_op_mov_v_reg(MO_8, cpu_T0, R_EAX);
5128            tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
5129            gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
5130            break;
5131        default:
5132            tcg_abort();
5133        }
5134        break;
5135    case 0x99: /* CDQ/CWD */
5136        switch (dflag) {
5137#ifdef TARGET_X86_64
5138        case MO_64:
5139            gen_op_mov_v_reg(MO_64, cpu_T0, R_EAX);
5140            tcg_gen_sari_tl(cpu_T0, cpu_T0, 63);
5141            gen_op_mov_reg_v(MO_64, R_EDX, cpu_T0);
5142            break;
5143#endif
5144        case MO_32:
5145            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
5146            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
5147            tcg_gen_sari_tl(cpu_T0, cpu_T0, 31);
5148            gen_op_mov_reg_v(MO_32, R_EDX, cpu_T0);
5149            break;
5150        case MO_16:
5151            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
5152            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5153            tcg_gen_sari_tl(cpu_T0, cpu_T0, 15);
5154            gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
5155            break;
5156        default:
5157            tcg_abort();
5158        }
5159        break;
5160    case 0x1af: /* imul Gv, Ev */
5161    case 0x69: /* imul Gv, Ev, I */
5162    case 0x6b:
5163        ot = dflag;
5164        modrm = x86_ldub_code(env, s);
5165        reg = ((modrm >> 3) & 7) | rex_r;
5166        if (b == 0x69)
5167            s->rip_offset = insn_const_size(ot);
5168        else if (b == 0x6b)
5169            s->rip_offset = 1;
5170        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5171        if (b == 0x69) {
5172            val = insn_get(env, s, ot);
5173            tcg_gen_movi_tl(cpu_T1, val);
5174        } else if (b == 0x6b) {
5175            val = (int8_t)insn_get(env, s, MO_8);
5176            tcg_gen_movi_tl(cpu_T1, val);
5177        } else {
5178            gen_op_mov_v_reg(ot, cpu_T1, reg);
5179        }
5180        switch (ot) {
5181#ifdef TARGET_X86_64
5182        case MO_64:
5183            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, cpu_T0, cpu_T1);
5184            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5185            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5186            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
5187            break;
5188#endif
5189        case MO_32:
5190            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
5191            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
5192            tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
5193                              cpu_tmp2_i32, cpu_tmp3_i32);
5194            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
5195            tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
5196            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5197            tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
5198            tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
5199            break;
5200        default:
5201            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5202            tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
5203            /* XXX: use 32 bit mul which could be faster */
5204            tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
5205            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
5206            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
5207            tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
5208            gen_op_mov_reg_v(ot, reg, cpu_T0);
5209            break;
5210        }
5211        set_cc_op(s, CC_OP_MULB + ot);
5212        break;
5213    case 0x1c0:
5214    case 0x1c1: /* xadd Ev, Gv */
5215        ot = mo_b_d(b, dflag);
5216        modrm = x86_ldub_code(env, s);
5217        reg = ((modrm >> 3) & 7) | rex_r;
5218        mod = (modrm >> 6) & 3;
5219        gen_op_mov_v_reg(ot, cpu_T0, reg);
5220        if (mod == 3) {
5221            rm = (modrm & 7) | REX_B(s);
5222            gen_op_mov_v_reg(ot, cpu_T1, rm);
5223            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5224            gen_op_mov_reg_v(ot, reg, cpu_T1);
5225            gen_op_mov_reg_v(ot, rm, cpu_T0);
5226        } else {
5227            gen_lea_modrm(env, s, modrm);
5228            if (s->prefix & PREFIX_LOCK) {
5229                tcg_gen_atomic_fetch_add_tl(cpu_T1, cpu_A0, cpu_T0,
5230                                            s->mem_index, ot | MO_LE);
5231                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5232            } else {
5233                gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5234                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5235                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5236            }
5237            gen_op_mov_reg_v(ot, reg, cpu_T1);
5238        }
5239        gen_op_update2_cc();
5240        set_cc_op(s, CC_OP_ADDB + ot);
5241        break;
5242    case 0x1b0:
5243    case 0x1b1: /* cmpxchg Ev, Gv */
5244        {
5245            TCGv oldv, newv, cmpv;
5246
5247            ot = mo_b_d(b, dflag);
5248            modrm = x86_ldub_code(env, s);
5249            reg = ((modrm >> 3) & 7) | rex_r;
5250            mod = (modrm >> 6) & 3;
5251            oldv = tcg_temp_new();
5252            newv = tcg_temp_new();
5253            cmpv = tcg_temp_new();
5254            gen_op_mov_v_reg(ot, newv, reg);
5255            tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5256
5257            if (s->prefix & PREFIX_LOCK) {
5258                if (mod == 3) {
5259                    goto illegal_op;
5260                }
5261                gen_lea_modrm(env, s, modrm);
5262                tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv,
5263                                          s->mem_index, ot | MO_LE);
5264                gen_op_mov_reg_v(ot, R_EAX, oldv);
5265            } else {
5266                if (mod == 3) {
5267                    rm = (modrm & 7) | REX_B(s);
5268                    gen_op_mov_v_reg(ot, oldv, rm);
5269                } else {
5270                    gen_lea_modrm(env, s, modrm);
5271                    gen_op_ld_v(s, ot, oldv, cpu_A0);
5272                    rm = 0; /* avoid warning */
5273                }
5274                gen_extu(ot, oldv);
5275                gen_extu(ot, cmpv);
5276                /* store value = (old == cmp ? new : old);  */
5277                tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5278                if (mod == 3) {
5279                    gen_op_mov_reg_v(ot, R_EAX, oldv);
5280                    gen_op_mov_reg_v(ot, rm, newv);
5281                } else {
5282                    /* Perform an unconditional store cycle like physical cpu;
5283                       must be before changing accumulator to ensure
5284                       idempotency if the store faults and the instruction
5285                       is restarted */
5286                    gen_op_st_v(s, ot, newv, cpu_A0);
5287                    gen_op_mov_reg_v(ot, R_EAX, oldv);
5288                }
5289            }
5290            tcg_gen_mov_tl(cpu_cc_src, oldv);
5291            tcg_gen_mov_tl(cpu_cc_srcT, cmpv);
5292            tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5293            set_cc_op(s, CC_OP_SUBB + ot);
5294            tcg_temp_free(oldv);
5295            tcg_temp_free(newv);
5296            tcg_temp_free(cmpv);
5297        }
5298        break;
5299    case 0x1c7: /* cmpxchg8b */
5300        modrm = x86_ldub_code(env, s);
5301        mod = (modrm >> 6) & 3;
5302        if ((mod == 3) || ((modrm & 0x38) != 0x8))
5303            goto illegal_op;
5304#ifdef TARGET_X86_64
5305        if (dflag == MO_64) {
5306            if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
5307                goto illegal_op;
5308            gen_lea_modrm(env, s, modrm);
5309            if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5310                gen_helper_cmpxchg16b(cpu_env, cpu_A0);
5311            } else {
5312                gen_helper_cmpxchg16b_unlocked(cpu_env, cpu_A0);
5313            }
5314        } else
5315#endif        
5316        {
5317            if (!(s->cpuid_features & CPUID_CX8))
5318                goto illegal_op;
5319            gen_lea_modrm(env, s, modrm);
5320            if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5321                gen_helper_cmpxchg8b(cpu_env, cpu_A0);
5322            } else {
5323                gen_helper_cmpxchg8b_unlocked(cpu_env, cpu_A0);
5324            }
5325        }
5326        set_cc_op(s, CC_OP_EFLAGS);
5327        break;
5328
5329        /**************************/
5330        /* push/pop */
5331    case 0x50 ... 0x57: /* push */
5332        gen_op_mov_v_reg(MO_32, cpu_T0, (b & 7) | REX_B(s));
5333        gen_push_v(s, cpu_T0);
5334        break;
5335    case 0x58 ... 0x5f: /* pop */
5336        ot = gen_pop_T0(s);
5337        /* NOTE: order is important for pop %sp */
5338        gen_pop_update(s, ot);
5339        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T0);
5340        break;
5341    case 0x60: /* pusha */
5342        if (CODE64(s))
5343            goto illegal_op;
5344        gen_pusha(s);
5345        break;
5346    case 0x61: /* popa */
5347        if (CODE64(s))
5348            goto illegal_op;
5349        gen_popa(s);
5350        break;
5351    case 0x68: /* push Iv */
5352    case 0x6a:
5353        ot = mo_pushpop(s, dflag);
5354        if (b == 0x68)
5355            val = insn_get(env, s, ot);
5356        else
5357            val = (int8_t)insn_get(env, s, MO_8);
5358        tcg_gen_movi_tl(cpu_T0, val);
5359        gen_push_v(s, cpu_T0);
5360        break;
5361    case 0x8f: /* pop Ev */
5362        modrm = x86_ldub_code(env, s);
5363        mod = (modrm >> 6) & 3;
5364        ot = gen_pop_T0(s);
5365        if (mod == 3) {
5366            /* NOTE: order is important for pop %sp */
5367            gen_pop_update(s, ot);
5368            rm = (modrm & 7) | REX_B(s);
5369            gen_op_mov_reg_v(ot, rm, cpu_T0);
5370        } else {
5371            /* NOTE: order is important too for MMU exceptions */
5372            s->popl_esp_hack = 1 << ot;
5373            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5374            s->popl_esp_hack = 0;
5375            gen_pop_update(s, ot);
5376        }
5377        break;
5378    case 0xc8: /* enter */
5379        {
5380            int level;
5381            val = x86_lduw_code(env, s);
5382            level = x86_ldub_code(env, s);
5383            gen_enter(s, val, level);
5384        }
5385        break;
5386    case 0xc9: /* leave */
5387        gen_leave(s);
5388        break;
5389    case 0x06: /* push es */
5390    case 0x0e: /* push cs */
5391    case 0x16: /* push ss */
5392    case 0x1e: /* push ds */
5393        if (CODE64(s))
5394            goto illegal_op;
5395        gen_op_movl_T0_seg(b >> 3);
5396        gen_push_v(s, cpu_T0);
5397        break;
5398    case 0x1a0: /* push fs */
5399    case 0x1a8: /* push gs */
5400        gen_op_movl_T0_seg((b >> 3) & 7);
5401        gen_push_v(s, cpu_T0);
5402        break;
5403    case 0x07: /* pop es */
5404    case 0x17: /* pop ss */
5405    case 0x1f: /* pop ds */
5406        if (CODE64(s))
5407            goto illegal_op;
5408        reg = b >> 3;
5409        ot = gen_pop_T0(s);
5410        gen_movl_seg_T0(s, reg);
5411        gen_pop_update(s, ot);
5412        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5413        if (s->base.is_jmp) {
5414            gen_jmp_im(s->pc - s->cs_base);
5415            if (reg == R_SS) {
5416                s->tf = 0;
5417                gen_eob_inhibit_irq(s, true);
5418            } else {
5419                gen_eob(s);
5420            }
5421        }
5422        break;
5423    case 0x1a1: /* pop fs */
5424    case 0x1a9: /* pop gs */
5425        ot = gen_pop_T0(s);
5426        gen_movl_seg_T0(s, (b >> 3) & 7);
5427        gen_pop_update(s, ot);
5428        if (s->base.is_jmp) {
5429            gen_jmp_im(s->pc - s->cs_base);
5430            gen_eob(s);
5431        }
5432        break;
5433
5434        /**************************/
5435        /* mov */
5436    case 0x88:
5437    case 0x89: /* mov Gv, Ev */
5438        ot = mo_b_d(b, dflag);
5439        modrm = x86_ldub_code(env, s);
5440        reg = ((modrm >> 3) & 7) | rex_r;
5441
5442        /* generate a generic store */
5443        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5444        break;
5445    case 0xc6:
5446    case 0xc7: /* mov Ev, Iv */
5447        ot = mo_b_d(b, dflag);
5448        modrm = x86_ldub_code(env, s);
5449        mod = (modrm >> 6) & 3;
5450        if (mod != 3) {
5451            s->rip_offset = insn_const_size(ot);
5452            gen_lea_modrm(env, s, modrm);
5453        }
5454        val = insn_get(env, s, ot);
5455        tcg_gen_movi_tl(cpu_T0, val);
5456        if (mod != 3) {
5457            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5458        } else {
5459            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
5460        }
5461        break;
5462    case 0x8a:
5463    case 0x8b: /* mov Ev, Gv */
5464        ot = mo_b_d(b, dflag);
5465        modrm = x86_ldub_code(env, s);
5466        reg = ((modrm >> 3) & 7) | rex_r;
5467
5468        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5469        gen_op_mov_reg_v(ot, reg, cpu_T0);
5470        break;
5471    case 0x8e: /* mov seg, Gv */
5472        modrm = x86_ldub_code(env, s);
5473        reg = (modrm >> 3) & 7;
5474        if (reg >= 6 || reg == R_CS)
5475            goto illegal_op;
5476        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5477        gen_movl_seg_T0(s, reg);
5478        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5479        if (s->base.is_jmp) {
5480            gen_jmp_im(s->pc - s->cs_base);
5481            if (reg == R_SS) {
5482                s->tf = 0;
5483                gen_eob_inhibit_irq(s, true);
5484            } else {
5485                gen_eob(s);
5486            }
5487        }
5488        break;
5489    case 0x8c: /* mov Gv, seg */
5490        modrm = x86_ldub_code(env, s);
5491        reg = (modrm >> 3) & 7;
5492        mod = (modrm >> 6) & 3;
5493        if (reg >= 6)
5494            goto illegal_op;
5495        gen_op_movl_T0_seg(reg);
5496        ot = mod == 3 ? dflag : MO_16;
5497        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5498        break;
5499
5500    case 0x1b6: /* movzbS Gv, Eb */
5501    case 0x1b7: /* movzwS Gv, Eb */
5502    case 0x1be: /* movsbS Gv, Eb */
5503    case 0x1bf: /* movswS Gv, Eb */
5504        {
5505            TCGMemOp d_ot;
5506            TCGMemOp s_ot;
5507
5508            /* d_ot is the size of destination */
5509            d_ot = dflag;
5510            /* ot is the size of source */
5511            ot = (b & 1) + MO_8;
5512            /* s_ot is the sign+size of source */
5513            s_ot = b & 8 ? MO_SIGN | ot : ot;
5514
5515            modrm = x86_ldub_code(env, s);
5516            reg = ((modrm >> 3) & 7) | rex_r;
5517            mod = (modrm >> 6) & 3;
5518            rm = (modrm & 7) | REX_B(s);
5519
5520            if (mod == 3) {
5521                if (s_ot == MO_SB && byte_reg_is_xH(rm)) {
5522                    tcg_gen_sextract_tl(cpu_T0, cpu_regs[rm - 4], 8, 8);
5523                } else {
5524                    gen_op_mov_v_reg(ot, cpu_T0, rm);
5525                    switch (s_ot) {
5526                    case MO_UB:
5527                        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
5528                        break;
5529                    case MO_SB:
5530                        tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
5531                        break;
5532                    case MO_UW:
5533                        tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
5534                        break;
5535                    default:
5536                    case MO_SW:
5537                        tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5538                        break;
5539                    }
5540                }
5541                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
5542            } else {
5543                gen_lea_modrm(env, s, modrm);
5544                gen_op_ld_v(s, s_ot, cpu_T0, cpu_A0);
5545                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
5546            }
5547        }
5548        break;
5549
5550    case 0x8d: /* lea */
5551        modrm = x86_ldub_code(env, s);
5552        mod = (modrm >> 6) & 3;
5553        if (mod == 3)
5554            goto illegal_op;
5555        reg = ((modrm >> 3) & 7) | rex_r;
5556        {
5557            AddressParts a = gen_lea_modrm_0(env, s, modrm);
5558            TCGv ea = gen_lea_modrm_1(a);
5559            gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5560            gen_op_mov_reg_v(dflag, reg, cpu_A0);
5561        }
5562        break;
5563
5564    case 0xa0: /* mov EAX, Ov */
5565    case 0xa1:
5566    case 0xa2: /* mov Ov, EAX */
5567    case 0xa3:
5568        {
5569            target_ulong offset_addr;
5570
5571            ot = mo_b_d(b, dflag);
5572            switch (s->aflag) {
5573#ifdef TARGET_X86_64
5574            case MO_64:
5575                offset_addr = x86_ldq_code(env, s);
5576                break;
5577#endif
5578            default:
5579                offset_addr = insn_get(env, s, s->aflag);
5580                break;
5581            }
5582            tcg_gen_movi_tl(cpu_A0, offset_addr);
5583            gen_add_A0_ds_seg(s);
5584            if ((b & 2) == 0) {
5585                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
5586                gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
5587            } else {
5588                gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
5589                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5590            }
5591        }
5592        break;
5593    case 0xd7: /* xlat */
5594        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]);
5595        tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
5596        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T0);
5597        gen_extu(s->aflag, cpu_A0);
5598        gen_add_A0_ds_seg(s);
5599        gen_op_ld_v(s, MO_8, cpu_T0, cpu_A0);
5600        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
5601        break;
5602    case 0xb0 ... 0xb7: /* mov R, Ib */
5603        val = insn_get(env, s, MO_8);
5604        tcg_gen_movi_tl(cpu_T0, val);
5605        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T0);
5606        break;
5607    case 0xb8 ... 0xbf: /* mov R, Iv */
5608#ifdef TARGET_X86_64
5609        if (dflag == MO_64) {
5610            uint64_t tmp;
5611            /* 64 bit case */
5612            tmp = x86_ldq_code(env, s);
5613            reg = (b & 7) | REX_B(s);
5614            tcg_gen_movi_tl(cpu_T0, tmp);
5615            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
5616        } else
5617#endif
5618        {
5619            ot = dflag;
5620            val = insn_get(env, s, ot);
5621            reg = (b & 7) | REX_B(s);
5622            tcg_gen_movi_tl(cpu_T0, val);
5623            gen_op_mov_reg_v(ot, reg, cpu_T0);
5624        }
5625        break;
5626
5627    case 0x91 ... 0x97: /* xchg R, EAX */
5628    do_xchg_reg_eax:
5629        ot = dflag;
5630        reg = (b & 7) | REX_B(s);
5631        rm = R_EAX;
5632        goto do_xchg_reg;
5633    case 0x86:
5634    case 0x87: /* xchg Ev, Gv */
5635        ot = mo_b_d(b, dflag);
5636        modrm = x86_ldub_code(env, s);
5637        reg = ((modrm >> 3) & 7) | rex_r;
5638        mod = (modrm >> 6) & 3;
5639        if (mod == 3) {
5640            rm = (modrm & 7) | REX_B(s);
5641        do_xchg_reg:
5642            gen_op_mov_v_reg(ot, cpu_T0, reg);
5643            gen_op_mov_v_reg(ot, cpu_T1, rm);
5644            gen_op_mov_reg_v(ot, rm, cpu_T0);
5645            gen_op_mov_reg_v(ot, reg, cpu_T1);
5646        } else {
5647            gen_lea_modrm(env, s, modrm);
5648            gen_op_mov_v_reg(ot, cpu_T0, reg);
5649            /* for xchg, lock is implicit */
5650            tcg_gen_atomic_xchg_tl(cpu_T1, cpu_A0, cpu_T0,
5651                                   s->mem_index, ot | MO_LE);
5652            gen_op_mov_reg_v(ot, reg, cpu_T1);
5653        }
5654        break;
5655    case 0xc4: /* les Gv */
5656        /* In CODE64 this is VEX3; see above.  */
5657        op = R_ES;
5658        goto do_lxx;
5659    case 0xc5: /* lds Gv */
5660        /* In CODE64 this is VEX2; see above.  */
5661        op = R_DS;
5662        goto do_lxx;
5663    case 0x1b2: /* lss Gv */
5664        op = R_SS;
5665        goto do_lxx;
5666    case 0x1b4: /* lfs Gv */
5667        op = R_FS;
5668        goto do_lxx;
5669    case 0x1b5: /* lgs Gv */
5670        op = R_GS;
5671    do_lxx:
5672        ot = dflag != MO_16 ? MO_32 : MO_16;
5673        modrm = x86_ldub_code(env, s);
5674        reg = ((modrm >> 3) & 7) | rex_r;
5675        mod = (modrm >> 6) & 3;
5676        if (mod == 3)
5677            goto illegal_op;
5678        gen_lea_modrm(env, s, modrm);
5679        gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5680        gen_add_A0_im(s, 1 << ot);
5681        /* load the segment first to handle exceptions properly */
5682        gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
5683        gen_movl_seg_T0(s, op);
5684        /* then put the data */
5685        gen_op_mov_reg_v(ot, reg, cpu_T1);
5686        if (s->base.is_jmp) {
5687            gen_jmp_im(s->pc - s->cs_base);
5688            gen_eob(s);
5689        }
5690        break;
5691
5692        /************************/
5693        /* shifts */
5694    case 0xc0:
5695    case 0xc1:
5696        /* shift Ev,Ib */
5697        shift = 2;
5698    grp2:
5699        {
5700            ot = mo_b_d(b, dflag);
5701            modrm = x86_ldub_code(env, s);
5702            mod = (modrm >> 6) & 3;
5703            op = (modrm >> 3) & 7;
5704
5705            if (mod != 3) {
5706                if (shift == 2) {
5707                    s->rip_offset = 1;
5708                }
5709                gen_lea_modrm(env, s, modrm);
5710                opreg = OR_TMP0;
5711            } else {
5712                opreg = (modrm & 7) | REX_B(s);
5713            }
5714
5715            /* simpler op */
5716            if (shift == 0) {
5717                gen_shift(s, op, ot, opreg, OR_ECX);
5718            } else {
5719                if (shift == 2) {
5720                    shift = x86_ldub_code(env, s);
5721                }
5722                gen_shifti(s, op, ot, opreg, shift);
5723            }
5724        }
5725        break;
5726    case 0xd0:
5727    case 0xd1:
5728        /* shift Ev,1 */
5729        shift = 1;
5730        goto grp2;
5731    case 0xd2:
5732    case 0xd3:
5733        /* shift Ev,cl */
5734        shift = 0;
5735        goto grp2;
5736
5737    case 0x1a4: /* shld imm */
5738        op = 0;
5739        shift = 1;
5740        goto do_shiftd;
5741    case 0x1a5: /* shld cl */
5742        op = 0;
5743        shift = 0;
5744        goto do_shiftd;
5745    case 0x1ac: /* shrd imm */
5746        op = 1;
5747        shift = 1;
5748        goto do_shiftd;
5749    case 0x1ad: /* shrd cl */
5750        op = 1;
5751        shift = 0;
5752    do_shiftd:
5753        ot = dflag;
5754        modrm = x86_ldub_code(env, s);
5755        mod = (modrm >> 6) & 3;
5756        rm = (modrm & 7) | REX_B(s);
5757        reg = ((modrm >> 3) & 7) | rex_r;
5758        if (mod != 3) {
5759            gen_lea_modrm(env, s, modrm);
5760            opreg = OR_TMP0;
5761        } else {
5762            opreg = rm;
5763        }
5764        gen_op_mov_v_reg(ot, cpu_T1, reg);
5765
5766        if (shift) {
5767            TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5768            gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5769            tcg_temp_free(imm);
5770        } else {
5771            gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5772        }
5773        break;
5774
5775        /************************/
5776        /* floats */
5777    case 0xd8 ... 0xdf:
5778        if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5779            /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5780            /* XXX: what to do if illegal op ? */
5781            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5782            break;
5783        }
5784        modrm = x86_ldub_code(env, s);
5785        mod = (modrm >> 6) & 3;
5786        rm = modrm & 7;
5787        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5788        if (mod != 3) {
5789            /* memory op */
5790            gen_lea_modrm(env, s, modrm);
5791            switch(op) {
5792            case 0x00 ... 0x07: /* fxxxs */
5793            case 0x10 ... 0x17: /* fixxxl */
5794            case 0x20 ... 0x27: /* fxxxl */
5795            case 0x30 ... 0x37: /* fixxx */
5796                {
5797                    int op1;
5798                    op1 = op & 7;
5799
5800                    switch(op >> 4) {
5801                    case 0:
5802                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5803                                            s->mem_index, MO_LEUL);
5804                        gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
5805                        break;
5806                    case 1:
5807                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5808                                            s->mem_index, MO_LEUL);
5809                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5810                        break;
5811                    case 2:
5812                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5813                                            s->mem_index, MO_LEQ);
5814                        gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
5815                        break;
5816                    case 3:
5817                    default:
5818                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5819                                            s->mem_index, MO_LESW);
5820                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5821                        break;
5822                    }
5823
5824                    gen_helper_fp_arith_ST0_FT0(op1);
5825                    if (op1 == 3) {
5826                        /* fcomp needs pop */
5827                        gen_helper_fpop(cpu_env);
5828                    }
5829                }
5830                break;
5831            case 0x08: /* flds */
5832            case 0x0a: /* fsts */
5833            case 0x0b: /* fstps */
5834            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5835            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5836            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5837                switch(op & 7) {
5838                case 0:
5839                    switch(op >> 4) {
5840                    case 0:
5841                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5842                                            s->mem_index, MO_LEUL);
5843                        gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
5844                        break;
5845                    case 1:
5846                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5847                                            s->mem_index, MO_LEUL);
5848                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5849                        break;
5850                    case 2:
5851                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5852                                            s->mem_index, MO_LEQ);
5853                        gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
5854                        break;
5855                    case 3:
5856                    default:
5857                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5858                                            s->mem_index, MO_LESW);
5859                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5860                        break;
5861                    }
5862                    break;
5863                case 1:
5864                    /* XXX: the corresponding CPUID bit must be tested ! */
5865                    switch(op >> 4) {
5866                    case 1:
5867                        gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
5868                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5869                                            s->mem_index, MO_LEUL);
5870                        break;
5871                    case 2:
5872                        gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
5873                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5874                                            s->mem_index, MO_LEQ);
5875                        break;
5876                    case 3:
5877                    default:
5878                        gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
5879                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5880                                            s->mem_index, MO_LEUW);
5881                        break;
5882                    }
5883                    gen_helper_fpop(cpu_env);
5884                    break;
5885                default:
5886                    switch(op >> 4) {
5887                    case 0:
5888                        gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
5889                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5890                                            s->mem_index, MO_LEUL);
5891                        break;
5892                    case 1:
5893                        gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
5894                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5895                                            s->mem_index, MO_LEUL);
5896                        break;
5897                    case 2:
5898                        gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
5899                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5900                                            s->mem_index, MO_LEQ);
5901                        break;
5902                    case 3:
5903                    default:
5904                        gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
5905                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5906                                            s->mem_index, MO_LEUW);
5907                        break;
5908                    }
5909                    if ((op & 7) == 3)
5910                        gen_helper_fpop(cpu_env);
5911                    break;
5912                }
5913                break;
5914            case 0x0c: /* fldenv mem */
5915                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5916                break;
5917            case 0x0d: /* fldcw mem */
5918                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5919                                    s->mem_index, MO_LEUW);
5920                gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
5921                break;
5922            case 0x0e: /* fnstenv mem */
5923                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5924                break;
5925            case 0x0f: /* fnstcw mem */
5926                gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
5927                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5928                                    s->mem_index, MO_LEUW);
5929                break;
5930            case 0x1d: /* fldt mem */
5931                gen_helper_fldt_ST0(cpu_env, cpu_A0);
5932                break;
5933            case 0x1f: /* fstpt mem */
5934                gen_helper_fstt_ST0(cpu_env, cpu_A0);
5935                gen_helper_fpop(cpu_env);
5936                break;
5937            case 0x2c: /* frstor mem */
5938                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5939                break;
5940            case 0x2e: /* fnsave mem */
5941                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5942                break;
5943            case 0x2f: /* fnstsw mem */
5944                gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
5945                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5946                                    s->mem_index, MO_LEUW);
5947                break;
5948            case 0x3c: /* fbld */
5949                gen_helper_fbld_ST0(cpu_env, cpu_A0);
5950                break;
5951            case 0x3e: /* fbstp */
5952                gen_helper_fbst_ST0(cpu_env, cpu_A0);
5953                gen_helper_fpop(cpu_env);
5954                break;
5955            case 0x3d: /* fildll */
5956                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5957                gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
5958                break;
5959            case 0x3f: /* fistpll */
5960                gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
5961                tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5962                gen_helper_fpop(cpu_env);
5963                break;
5964            default:
5965                goto unknown_op;
5966            }
5967        } else {
5968            /* register float ops */
5969            opreg = rm;
5970
5971            switch(op) {
5972            case 0x08: /* fld sti */
5973                gen_helper_fpush(cpu_env);
5974                gen_helper_fmov_ST0_STN(cpu_env,
5975                                        tcg_const_i32((opreg + 1) & 7));
5976                break;
5977            case 0x09: /* fxchg sti */
5978            case 0x29: /* fxchg4 sti, undocumented op */
5979            case 0x39: /* fxchg7 sti, undocumented op */
5980                gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
5981                break;
5982            case 0x0a: /* grp d9/2 */
5983                switch(rm) {
5984                case 0: /* fnop */
5985                    /* check exceptions (FreeBSD FPU probe) */
5986                    gen_helper_fwait(cpu_env);
5987                    break;
5988                default:
5989                    goto unknown_op;
5990                }
5991                break;
5992            case 0x0c: /* grp d9/4 */
5993                switch(rm) {
5994                case 0: /* fchs */
5995                    gen_helper_fchs_ST0(cpu_env);
5996                    break;
5997                case 1: /* fabs */
5998                    gen_helper_fabs_ST0(cpu_env);
5999                    break;
6000                case 4: /* ftst */
6001                    gen_helper_fldz_FT0(cpu_env);
6002                    gen_helper_fcom_ST0_FT0(cpu_env);
6003                    break;
6004                case 5: /* fxam */
6005                    gen_helper_fxam_ST0(cpu_env);
6006                    break;
6007                default:
6008                    goto unknown_op;
6009                }
6010                break;
6011            case 0x0d: /* grp d9/5 */
6012                {
6013                    switch(rm) {
6014                    case 0:
6015                        gen_helper_fpush(cpu_env);
6016                        gen_helper_fld1_ST0(cpu_env);
6017                        break;
6018                    case 1:
6019                        gen_helper_fpush(cpu_env);
6020                        gen_helper_fldl2t_ST0(cpu_env);
6021                        break;
6022                    case 2:
6023                        gen_helper_fpush(cpu_env);
6024                        gen_helper_fldl2e_ST0(cpu_env);
6025                        break;
6026                    case 3:
6027                        gen_helper_fpush(cpu_env);
6028                        gen_helper_fldpi_ST0(cpu_env);
6029                        break;
6030                    case 4:
6031                        gen_helper_fpush(cpu_env);
6032                        gen_helper_fldlg2_ST0(cpu_env);
6033                        break;
6034                    case 5:
6035                        gen_helper_fpush(cpu_env);
6036                        gen_helper_fldln2_ST0(cpu_env);
6037                        break;
6038                    case 6:
6039                        gen_helper_fpush(cpu_env);
6040                        gen_helper_fldz_ST0(cpu_env);
6041                        break;
6042                    default:
6043                        goto unknown_op;
6044                    }
6045                }
6046                break;
6047            case 0x0e: /* grp d9/6 */
6048                switch(rm) {
6049                case 0: /* f2xm1 */
6050                    gen_helper_f2xm1(cpu_env);
6051                    break;
6052                case 1: /* fyl2x */
6053                    gen_helper_fyl2x(cpu_env);
6054                    break;
6055                case 2: /* fptan */
6056                    gen_helper_fptan(cpu_env);
6057                    break;
6058                case 3: /* fpatan */
6059                    gen_helper_fpatan(cpu_env);
6060                    break;
6061                case 4: /* fxtract */
6062                    gen_helper_fxtract(cpu_env);
6063                    break;
6064                case 5: /* fprem1 */
6065                    gen_helper_fprem1(cpu_env);
6066                    break;
6067                case 6: /* fdecstp */
6068                    gen_helper_fdecstp(cpu_env);
6069                    break;
6070                default:
6071                case 7: /* fincstp */
6072                    gen_helper_fincstp(cpu_env);
6073                    break;
6074                }
6075                break;
6076            case 0x0f: /* grp d9/7 */
6077                switch(rm) {
6078                case 0: /* fprem */
6079                    gen_helper_fprem(cpu_env);
6080                    break;
6081                case 1: /* fyl2xp1 */
6082                    gen_helper_fyl2xp1(cpu_env);
6083                    break;
6084                case 2: /* fsqrt */
6085                    gen_helper_fsqrt(cpu_env);
6086                    break;
6087                case 3: /* fsincos */
6088                    gen_helper_fsincos(cpu_env);
6089                    break;
6090                case 5: /* fscale */
6091                    gen_helper_fscale(cpu_env);
6092                    break;
6093                case 4: /* frndint */
6094                    gen_helper_frndint(cpu_env);
6095                    break;
6096                case 6: /* fsin */
6097                    gen_helper_fsin(cpu_env);
6098                    break;
6099                default:
6100                case 7: /* fcos */
6101                    gen_helper_fcos(cpu_env);
6102                    break;
6103                }
6104                break;
6105            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6106            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6107            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6108                {
6109                    int op1;
6110
6111                    op1 = op & 7;
6112                    if (op >= 0x20) {
6113                        gen_helper_fp_arith_STN_ST0(op1, opreg);
6114                        if (op >= 0x30)
6115                            gen_helper_fpop(cpu_env);
6116                    } else {
6117                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6118                        gen_helper_fp_arith_ST0_FT0(op1);
6119                    }
6120                }
6121                break;
6122            case 0x02: /* fcom */
6123            case 0x22: /* fcom2, undocumented op */
6124                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6125                gen_helper_fcom_ST0_FT0(cpu_env);
6126                break;
6127            case 0x03: /* fcomp */
6128            case 0x23: /* fcomp3, undocumented op */
6129            case 0x32: /* fcomp5, undocumented op */
6130                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6131                gen_helper_fcom_ST0_FT0(cpu_env);
6132                gen_helper_fpop(cpu_env);
6133                break;
6134            case 0x15: /* da/5 */
6135                switch(rm) {
6136                case 1: /* fucompp */
6137                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6138                    gen_helper_fucom_ST0_FT0(cpu_env);
6139                    gen_helper_fpop(cpu_env);
6140                    gen_helper_fpop(cpu_env);
6141                    break;
6142                default:
6143                    goto unknown_op;
6144                }
6145                break;
6146            case 0x1c:
6147                switch(rm) {
6148                case 0: /* feni (287 only, just do nop here) */
6149                    break;
6150                case 1: /* fdisi (287 only, just do nop here) */
6151                    break;
6152                case 2: /* fclex */
6153                    gen_helper_fclex(cpu_env);
6154                    break;
6155                case 3: /* fninit */
6156                    gen_helper_fninit(cpu_env);
6157                    break;
6158                case 4: /* fsetpm (287 only, just do nop here) */
6159                    break;
6160                default:
6161                    goto unknown_op;
6162                }
6163                break;
6164            case 0x1d: /* fucomi */
6165                if (!(s->cpuid_features & CPUID_CMOV)) {
6166                    goto illegal_op;
6167                }
6168                gen_update_cc_op(s);
6169                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6170                gen_helper_fucomi_ST0_FT0(cpu_env);
6171                set_cc_op(s, CC_OP_EFLAGS);
6172                break;
6173            case 0x1e: /* fcomi */
6174                if (!(s->cpuid_features & CPUID_CMOV)) {
6175                    goto illegal_op;
6176                }
6177                gen_update_cc_op(s);
6178                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6179                gen_helper_fcomi_ST0_FT0(cpu_env);
6180                set_cc_op(s, CC_OP_EFLAGS);
6181                break;
6182            case 0x28: /* ffree sti */
6183                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6184                break;
6185            case 0x2a: /* fst sti */
6186                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6187                break;
6188            case 0x2b: /* fstp sti */
6189            case 0x0b: /* fstp1 sti, undocumented op */
6190            case 0x3a: /* fstp8 sti, undocumented op */
6191            case 0x3b: /* fstp9 sti, undocumented op */
6192                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6193                gen_helper_fpop(cpu_env);
6194                break;
6195            case 0x2c: /* fucom st(i) */
6196                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6197                gen_helper_fucom_ST0_FT0(cpu_env);
6198                break;
6199            case 0x2d: /* fucomp st(i) */
6200                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6201                gen_helper_fucom_ST0_FT0(cpu_env);
6202                gen_helper_fpop(cpu_env);
6203                break;
6204            case 0x33: /* de/3 */
6205                switch(rm) {
6206                case 1: /* fcompp */
6207                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6208                    gen_helper_fcom_ST0_FT0(cpu_env);
6209                    gen_helper_fpop(cpu_env);
6210                    gen_helper_fpop(cpu_env);
6211                    break;
6212                default:
6213                    goto unknown_op;
6214                }
6215                break;
6216            case 0x38: /* ffreep sti, undocumented op */
6217                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6218                gen_helper_fpop(cpu_env);
6219                break;
6220            case 0x3c: /* df/4 */
6221                switch(rm) {
6222                case 0:
6223                    gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
6224                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
6225                    gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
6226                    break;
6227                default:
6228                    goto unknown_op;
6229                }
6230                break;
6231            case 0x3d: /* fucomip */
6232                if (!(s->cpuid_features & CPUID_CMOV)) {
6233                    goto illegal_op;
6234                }
6235                gen_update_cc_op(s);
6236                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6237                gen_helper_fucomi_ST0_FT0(cpu_env);
6238                gen_helper_fpop(cpu_env);
6239                set_cc_op(s, CC_OP_EFLAGS);
6240                break;
6241            case 0x3e: /* fcomip */
6242                if (!(s->cpuid_features & CPUID_CMOV)) {
6243                    goto illegal_op;
6244                }
6245                gen_update_cc_op(s);
6246                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6247                gen_helper_fcomi_ST0_FT0(cpu_env);
6248                gen_helper_fpop(cpu_env);
6249                set_cc_op(s, CC_OP_EFLAGS);
6250                break;
6251            case 0x10 ... 0x13: /* fcmovxx */
6252            case 0x18 ... 0x1b:
6253                {
6254                    int op1;
6255                    TCGLabel *l1;
6256                    static const uint8_t fcmov_cc[8] = {
6257                        (JCC_B << 1),
6258                        (JCC_Z << 1),
6259                        (JCC_BE << 1),
6260                        (JCC_P << 1),
6261                    };
6262
6263                    if (!(s->cpuid_features & CPUID_CMOV)) {
6264                        goto illegal_op;
6265                    }
6266                    op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6267                    l1 = gen_new_label();
6268                    gen_jcc1_noeob(s, op1, l1);
6269                    gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6270                    gen_set_label(l1);
6271                }
6272                break;
6273            default:
6274                goto unknown_op;
6275            }
6276        }
6277        break;
6278        /************************/
6279        /* string ops */
6280
6281    case 0xa4: /* movsS */
6282    case 0xa5:
6283        ot = mo_b_d(b, dflag);
6284        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6285            gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6286        } else {
6287            gen_movs(s, ot);
6288        }
6289        break;
6290
6291    case 0xaa: /* stosS */
6292    case 0xab:
6293        ot = mo_b_d(b, dflag);
6294        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6295            gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6296        } else {
6297            gen_stos(s, ot);
6298        }
6299        break;
6300    case 0xac: /* lodsS */
6301    case 0xad:
6302        ot = mo_b_d(b, dflag);
6303        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6304            gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6305        } else {
6306            gen_lods(s, ot);
6307        }
6308        break;
6309    case 0xae: /* scasS */
6310    case 0xaf:
6311        ot = mo_b_d(b, dflag);
6312        if (prefixes & PREFIX_REPNZ) {
6313            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6314        } else if (prefixes & PREFIX_REPZ) {
6315            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6316        } else {
6317            gen_scas(s, ot);
6318        }
6319        break;
6320
6321    case 0xa6: /* cmpsS */
6322    case 0xa7:
6323        ot = mo_b_d(b, dflag);
6324        if (prefixes & PREFIX_REPNZ) {
6325            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6326        } else if (prefixes & PREFIX_REPZ) {
6327            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6328        } else {
6329            gen_cmps(s, ot);
6330        }
6331        break;
6332    case 0x6c: /* insS */
6333    case 0x6d:
6334        ot = mo_b_d32(b, dflag);
6335        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6336        gen_check_io(s, ot, pc_start - s->cs_base, 
6337                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6338        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6339            gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6340        } else {
6341            gen_ins(s, ot);
6342            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6343                gen_jmp(s, s->pc - s->cs_base);
6344            }
6345        }
6346        break;
6347    case 0x6e: /* outsS */
6348    case 0x6f:
6349        ot = mo_b_d32(b, dflag);
6350        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6351        gen_check_io(s, ot, pc_start - s->cs_base,
6352                     svm_is_rep(prefixes) | 4);
6353        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6354            gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6355        } else {
6356            gen_outs(s, ot);
6357            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6358                gen_jmp(s, s->pc - s->cs_base);
6359            }
6360        }
6361        break;
6362
6363        /************************/
6364        /* port I/O */
6365
6366    case 0xe4:
6367    case 0xe5:
6368        ot = mo_b_d32(b, dflag);
6369        val = x86_ldub_code(env, s);
6370        tcg_gen_movi_tl(cpu_T0, val);
6371        gen_check_io(s, ot, pc_start - s->cs_base,
6372                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6373        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6374            gen_io_start();
6375        }
6376        tcg_gen_movi_i32(cpu_tmp2_i32, val);
6377        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
6378        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
6379        gen_bpt_io(s, cpu_tmp2_i32, ot);
6380        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6381            gen_io_end();
6382            gen_jmp(s, s->pc - s->cs_base);
6383        }
6384        break;
6385    case 0xe6:
6386    case 0xe7:
6387        ot = mo_b_d32(b, dflag);
6388        val = x86_ldub_code(env, s);
6389        tcg_gen_movi_tl(cpu_T0, val);
6390        gen_check_io(s, ot, pc_start - s->cs_base,
6391                     svm_is_rep(prefixes));
6392        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
6393
6394        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6395            gen_io_start();
6396        }
6397        tcg_gen_movi_i32(cpu_tmp2_i32, val);
6398        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
6399        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6400        gen_bpt_io(s, cpu_tmp2_i32, ot);
6401        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6402            gen_io_end();
6403            gen_jmp(s, s->pc - s->cs_base);
6404        }
6405        break;
6406    case 0xec:
6407    case 0xed:
6408        ot = mo_b_d32(b, dflag);
6409        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6410        gen_check_io(s, ot, pc_start - s->cs_base,
6411                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6412        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6413            gen_io_start();
6414        }
6415        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6416        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
6417        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
6418        gen_bpt_io(s, cpu_tmp2_i32, ot);
6419        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6420            gen_io_end();
6421            gen_jmp(s, s->pc - s->cs_base);
6422        }
6423        break;
6424    case 0xee:
6425    case 0xef:
6426        ot = mo_b_d32(b, dflag);
6427        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6428        gen_check_io(s, ot, pc_start - s->cs_base,
6429                     svm_is_rep(prefixes));
6430        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
6431
6432        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6433            gen_io_start();
6434        }
6435        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6436        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
6437        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6438        gen_bpt_io(s, cpu_tmp2_i32, ot);
6439        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6440            gen_io_end();
6441            gen_jmp(s, s->pc - s->cs_base);
6442        }
6443        break;
6444
6445        /************************/
6446        /* control */
6447    case 0xc2: /* ret im */
6448        val = x86_ldsw_code(env, s);
6449        ot = gen_pop_T0(s);
6450        gen_stack_update(s, val + (1 << ot));
6451        /* Note that gen_pop_T0 uses a zero-extending load.  */
6452        gen_op_jmp_v(cpu_T0);
6453        gen_bnd_jmp(s);
6454        gen_jr(s, cpu_T0);
6455        break;
6456    case 0xc3: /* ret */
6457        ot = gen_pop_T0(s);
6458        gen_pop_update(s, ot);
6459        /* Note that gen_pop_T0 uses a zero-extending load.  */
6460        gen_op_jmp_v(cpu_T0);
6461        gen_bnd_jmp(s);
6462        gen_jr(s, cpu_T0);
6463        break;
6464    case 0xca: /* lret im */
6465        val = x86_ldsw_code(env, s);
6466    do_lret:
6467        if (s->pe && !s->vm86) {
6468            gen_update_cc_op(s);
6469            gen_jmp_im(pc_start - s->cs_base);
6470            gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6471                                      tcg_const_i32(val));
6472        } else {
6473            gen_stack_A0(s);
6474            /* pop offset */
6475            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
6476            /* NOTE: keeping EIP updated is not a problem in case of
6477               exception */
6478            gen_op_jmp_v(cpu_T0);
6479            /* pop selector */
6480            gen_add_A0_im(s, 1 << dflag);
6481            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
6482            gen_op_movl_seg_T0_vm(R_CS);
6483            /* add stack offset */
6484            gen_stack_update(s, val + (2 << dflag));
6485        }
6486        gen_eob(s);
6487        break;
6488    case 0xcb: /* lret */
6489        val = 0;
6490        goto do_lret;
6491    case 0xcf: /* iret */
6492        gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6493        if (!s->pe) {
6494            /* real mode */
6495            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6496            set_cc_op(s, CC_OP_EFLAGS);
6497        } else if (s->vm86) {
6498            if (s->iopl != 3) {
6499                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6500            } else {
6501                gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6502                set_cc_op(s, CC_OP_EFLAGS);
6503            }
6504        } else {
6505            gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6506                                      tcg_const_i32(s->pc - s->cs_base));
6507            set_cc_op(s, CC_OP_EFLAGS);
6508        }
6509        gen_eob(s);
6510        break;
6511    case 0xe8: /* call im */
6512        {
6513            if (dflag != MO_16) {
6514                tval = (int32_t)insn_get(env, s, MO_32);
6515            } else {
6516                tval = (int16_t)insn_get(env, s, MO_16);
6517            }
6518            next_eip = s->pc - s->cs_base;
6519            tval += next_eip;
6520            if (dflag == MO_16) {
6521                tval &= 0xffff;
6522            } else if (!CODE64(s)) {
6523                tval &= 0xffffffff;
6524            }
6525            tcg_gen_movi_tl(cpu_T0, next_eip);
6526            gen_push_v(s, cpu_T0);
6527            gen_bnd_jmp(s);
6528            gen_jmp(s, tval);
6529        }
6530        break;
6531    case 0x9a: /* lcall im */
6532        {
6533            unsigned int selector, offset;
6534
6535            if (CODE64(s))
6536                goto illegal_op;
6537            ot = dflag;
6538            offset = insn_get(env, s, ot);
6539            selector = insn_get(env, s, MO_16);
6540
6541            tcg_gen_movi_tl(cpu_T0, selector);
6542            tcg_gen_movi_tl(cpu_T1, offset);
6543        }
6544        goto do_lcall;
6545    case 0xe9: /* jmp im */
6546        if (dflag != MO_16) {
6547            tval = (int32_t)insn_get(env, s, MO_32);
6548        } else {
6549            tval = (int16_t)insn_get(env, s, MO_16);
6550        }
6551        tval += s->pc - s->cs_base;
6552        if (dflag == MO_16) {
6553            tval &= 0xffff;
6554        } else if (!CODE64(s)) {
6555            tval &= 0xffffffff;
6556        }
6557        gen_bnd_jmp(s);
6558        gen_jmp(s, tval);
6559        break;
6560    case 0xea: /* ljmp im */
6561        {
6562            unsigned int selector, offset;
6563
6564            if (CODE64(s))
6565                goto illegal_op;
6566            ot = dflag;
6567            offset = insn_get(env, s, ot);
6568            selector = insn_get(env, s, MO_16);
6569
6570            tcg_gen_movi_tl(cpu_T0, selector);
6571            tcg_gen_movi_tl(cpu_T1, offset);
6572        }
6573        goto do_ljmp;
6574    case 0xeb: /* jmp Jb */
6575        tval = (int8_t)insn_get(env, s, MO_8);
6576        tval += s->pc - s->cs_base;
6577        if (dflag == MO_16) {
6578            tval &= 0xffff;
6579        }
6580        gen_jmp(s, tval);
6581        break;
6582    case 0x70 ... 0x7f: /* jcc Jb */
6583        tval = (int8_t)insn_get(env, s, MO_8);
6584        goto do_jcc;
6585    case 0x180 ... 0x18f: /* jcc Jv */
6586        if (dflag != MO_16) {
6587            tval = (int32_t)insn_get(env, s, MO_32);
6588        } else {
6589            tval = (int16_t)insn_get(env, s, MO_16);
6590        }
6591    do_jcc:
6592        next_eip = s->pc - s->cs_base;
6593        tval += next_eip;
6594        if (dflag == MO_16) {
6595            tval &= 0xffff;
6596        }
6597        gen_bnd_jmp(s);
6598        gen_jcc(s, b, tval, next_eip);
6599        break;
6600
6601    case 0x190 ... 0x19f: /* setcc Gv */
6602        modrm = x86_ldub_code(env, s);
6603        gen_setcc1(s, b, cpu_T0);
6604        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6605        break;
6606    case 0x140 ... 0x14f: /* cmov Gv, Ev */
6607        if (!(s->cpuid_features & CPUID_CMOV)) {
6608            goto illegal_op;
6609        }
6610        ot = dflag;
6611        modrm = x86_ldub_code(env, s);
6612        reg = ((modrm >> 3) & 7) | rex_r;
6613        gen_cmovcc1(env, s, ot, b, modrm, reg);
6614        break;
6615
6616        /************************/
6617        /* flags */
6618    case 0x9c: /* pushf */
6619        gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6620        if (s->vm86 && s->iopl != 3) {
6621            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6622        } else {
6623            gen_update_cc_op(s);
6624            gen_helper_read_eflags(cpu_T0, cpu_env);
6625            gen_push_v(s, cpu_T0);
6626        }
6627        break;
6628    case 0x9d: /* popf */
6629        gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6630        if (s->vm86 && s->iopl != 3) {
6631            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6632        } else {
6633            ot = gen_pop_T0(s);
6634            if (s->cpl == 0) {
6635                if (dflag != MO_16) {
6636                    gen_helper_write_eflags(cpu_env, cpu_T0,
6637                                            tcg_const_i32((TF_MASK | AC_MASK |
6638                                                           ID_MASK | NT_MASK |
6639                                                           IF_MASK |
6640                                                           IOPL_MASK)));
6641                } else {
6642                    gen_helper_write_eflags(cpu_env, cpu_T0,
6643                                            tcg_const_i32((TF_MASK | AC_MASK |
6644                                                           ID_MASK | NT_MASK |
6645                                                           IF_MASK | IOPL_MASK)
6646                                                          & 0xffff));
6647                }
6648            } else {
6649                if (s->cpl <= s->iopl) {
6650                    if (dflag != MO_16) {
6651                        gen_helper_write_eflags(cpu_env, cpu_T0,
6652                                                tcg_const_i32((TF_MASK |
6653                                                               AC_MASK |
6654                                                               ID_MASK |
6655                                                               NT_MASK |
6656                                                               IF_MASK)));
6657                    } else {
6658                        gen_helper_write_eflags(cpu_env, cpu_T0,
6659                                                tcg_const_i32((TF_MASK |
6660                                                               AC_MASK |
6661                                                               ID_MASK |
6662                                                               NT_MASK |
6663                                                               IF_MASK)
6664                                                              & 0xffff));
6665                    }
6666                } else {
6667                    if (dflag != MO_16) {
6668                        gen_helper_write_eflags(cpu_env, cpu_T0,
6669                                           tcg_const_i32((TF_MASK | AC_MASK |
6670                                                          ID_MASK | NT_MASK)));
6671                    } else {
6672                        gen_helper_write_eflags(cpu_env, cpu_T0,
6673                                           tcg_const_i32((TF_MASK | AC_MASK |
6674                                                          ID_MASK | NT_MASK)
6675                                                         & 0xffff));
6676                    }
6677                }
6678            }
6679            gen_pop_update(s, ot);
6680            set_cc_op(s, CC_OP_EFLAGS);
6681            /* abort translation because TF/AC flag may change */
6682            gen_jmp_im(s->pc - s->cs_base);
6683            gen_eob(s);
6684        }
6685        break;
6686    case 0x9e: /* sahf */
6687        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6688            goto illegal_op;
6689        gen_op_mov_v_reg(MO_8, cpu_T0, R_AH);
6690        gen_compute_eflags(s);
6691        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6692        tcg_gen_andi_tl(cpu_T0, cpu_T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6693        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T0);
6694        break;
6695    case 0x9f: /* lahf */
6696        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6697            goto illegal_op;
6698        gen_compute_eflags(s);
6699        /* Note: gen_compute_eflags() only gives the condition codes */
6700        tcg_gen_ori_tl(cpu_T0, cpu_cc_src, 0x02);
6701        gen_op_mov_reg_v(MO_8, R_AH, cpu_T0);
6702        break;
6703    case 0xf5: /* cmc */
6704        gen_compute_eflags(s);
6705        tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6706        break;
6707    case 0xf8: /* clc */
6708        gen_compute_eflags(s);
6709        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6710        break;
6711    case 0xf9: /* stc */
6712        gen_compute_eflags(s);
6713        tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6714        break;
6715    case 0xfc: /* cld */
6716        tcg_gen_movi_i32(cpu_tmp2_i32, 1);
6717        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6718        break;
6719    case 0xfd: /* std */
6720        tcg_gen_movi_i32(cpu_tmp2_i32, -1);
6721        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6722        break;
6723
6724        /************************/
6725        /* bit operations */
6726    case 0x1ba: /* bt/bts/btr/btc Gv, im */
6727        ot = dflag;
6728        modrm = x86_ldub_code(env, s);
6729        op = (modrm >> 3) & 7;
6730        mod = (modrm >> 6) & 3;
6731        rm = (modrm & 7) | REX_B(s);
6732        if (mod != 3) {
6733            s->rip_offset = 1;
6734            gen_lea_modrm(env, s, modrm);
6735            if (!(s->prefix & PREFIX_LOCK)) {
6736                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6737            }
6738        } else {
6739            gen_op_mov_v_reg(ot, cpu_T0, rm);
6740        }
6741        /* load shift */
6742        val = x86_ldub_code(env, s);
6743        tcg_gen_movi_tl(cpu_T1, val);
6744        if (op < 4)
6745            goto unknown_op;
6746        op -= 4;
6747        goto bt_op;
6748    case 0x1a3: /* bt Gv, Ev */
6749        op = 0;
6750        goto do_btx;
6751    case 0x1ab: /* bts */
6752        op = 1;
6753        goto do_btx;
6754    case 0x1b3: /* btr */
6755        op = 2;
6756        goto do_btx;
6757    case 0x1bb: /* btc */
6758        op = 3;
6759    do_btx:
6760        ot = dflag;
6761        modrm = x86_ldub_code(env, s);
6762        reg = ((modrm >> 3) & 7) | rex_r;
6763        mod = (modrm >> 6) & 3;
6764        rm = (modrm & 7) | REX_B(s);
6765        gen_op_mov_v_reg(MO_32, cpu_T1, reg);
6766        if (mod != 3) {
6767            AddressParts a = gen_lea_modrm_0(env, s, modrm);
6768            /* specific case: we need to add a displacement */
6769            gen_exts(ot, cpu_T1);
6770            tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
6771            tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
6772            tcg_gen_add_tl(cpu_A0, gen_lea_modrm_1(a), cpu_tmp0);
6773            gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
6774            if (!(s->prefix & PREFIX_LOCK)) {
6775                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6776            }
6777        } else {
6778            gen_op_mov_v_reg(ot, cpu_T0, rm);
6779        }
6780    bt_op:
6781        tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
6782        tcg_gen_movi_tl(cpu_tmp0, 1);
6783        tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
6784        if (s->prefix & PREFIX_LOCK) {
6785            switch (op) {
6786            case 0: /* bt */
6787                /* Needs no atomic ops; we surpressed the normal
6788                   memory load for LOCK above so do it now.  */
6789                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6790                break;
6791            case 1: /* bts */
6792                tcg_gen_atomic_fetch_or_tl(cpu_T0, cpu_A0, cpu_tmp0,
6793                                           s->mem_index, ot | MO_LE);
6794                break;
6795            case 2: /* btr */
6796                tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
6797                tcg_gen_atomic_fetch_and_tl(cpu_T0, cpu_A0, cpu_tmp0,
6798                                            s->mem_index, ot | MO_LE);
6799                break;
6800            default:
6801            case 3: /* btc */
6802                tcg_gen_atomic_fetch_xor_tl(cpu_T0, cpu_A0, cpu_tmp0,
6803                                            s->mem_index, ot | MO_LE);
6804                break;
6805            }
6806            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
6807        } else {
6808            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
6809            switch (op) {
6810            case 0: /* bt */
6811                /* Data already loaded; nothing to do.  */
6812                break;
6813            case 1: /* bts */
6814                tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
6815                break;
6816            case 2: /* btr */
6817                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
6818                break;
6819            default:
6820            case 3: /* btc */
6821                tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
6822                break;
6823            }
6824            if (op != 0) {
6825                if (mod != 3) {
6826                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
6827                } else {
6828                    gen_op_mov_reg_v(ot, rm, cpu_T0);
6829                }
6830            }
6831        }
6832
6833        /* Delay all CC updates until after the store above.  Note that
6834           C is the result of the test, Z is unchanged, and the others
6835           are all undefined.  */
6836        switch (s->cc_op) {
6837        case CC_OP_MULB ... CC_OP_MULQ:
6838        case CC_OP_ADDB ... CC_OP_ADDQ:
6839        case CC_OP_ADCB ... CC_OP_ADCQ:
6840        case CC_OP_SUBB ... CC_OP_SUBQ:
6841        case CC_OP_SBBB ... CC_OP_SBBQ:
6842        case CC_OP_LOGICB ... CC_OP_LOGICQ:
6843        case CC_OP_INCB ... CC_OP_INCQ:
6844        case CC_OP_DECB ... CC_OP_DECQ:
6845        case CC_OP_SHLB ... CC_OP_SHLQ:
6846        case CC_OP_SARB ... CC_OP_SARQ:
6847        case CC_OP_BMILGB ... CC_OP_BMILGQ:
6848            /* Z was going to be computed from the non-zero status of CC_DST.
6849               We can get that same Z value (and the new C value) by leaving
6850               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6851               same width.  */
6852            tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
6853            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6854            break;
6855        default:
6856            /* Otherwise, generate EFLAGS and replace the C bit.  */
6857            gen_compute_eflags(s);
6858            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4,
6859                               ctz32(CC_C), 1);
6860            break;
6861        }
6862        break;
6863    case 0x1bc: /* bsf / tzcnt */
6864    case 0x1bd: /* bsr / lzcnt */
6865        ot = dflag;
6866        modrm = x86_ldub_code(env, s);
6867        reg = ((modrm >> 3) & 7) | rex_r;
6868        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6869        gen_extu(ot, cpu_T0);
6870
6871        /* Note that lzcnt and tzcnt are in different extensions.  */
6872        if ((prefixes & PREFIX_REPZ)
6873            && (b & 1
6874                ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6875                : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6876            int size = 8 << ot;
6877            /* For lzcnt/tzcnt, C bit is defined related to the input. */
6878            tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
6879            if (b & 1) {
6880                /* For lzcnt, reduce the target_ulong result by the
6881                   number of zeros that we expect to find at the top.  */
6882                tcg_gen_clzi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS);
6883                tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size);
6884            } else {
6885                /* For tzcnt, a zero input must return the operand size.  */
6886                tcg_gen_ctzi_tl(cpu_T0, cpu_T0, size);
6887            }
6888            /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
6889            gen_op_update1_cc();
6890            set_cc_op(s, CC_OP_BMILGB + ot);
6891        } else {
6892            /* For bsr/bsf, only the Z bit is defined and it is related
6893               to the input and not the result.  */
6894            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
6895            set_cc_op(s, CC_OP_LOGICB + ot);
6896
6897            /* ??? The manual says that the output is undefined when the
6898               input is zero, but real hardware leaves it unchanged, and
6899               real programs appear to depend on that.  Accomplish this
6900               by passing the output as the value to return upon zero.  */
6901            if (b & 1) {
6902                /* For bsr, return the bit index of the first 1 bit,
6903                   not the count of leading zeros.  */
6904                tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
6905                tcg_gen_clz_tl(cpu_T0, cpu_T0, cpu_T1);
6906                tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1);
6907            } else {
6908                tcg_gen_ctz_tl(cpu_T0, cpu_T0, cpu_regs[reg]);
6909            }
6910        }
6911        gen_op_mov_reg_v(ot, reg, cpu_T0);
6912        break;
6913        /************************/
6914        /* bcd */
6915    case 0x27: /* daa */
6916        if (CODE64(s))
6917            goto illegal_op;
6918        gen_update_cc_op(s);
6919        gen_helper_daa(cpu_env);
6920        set_cc_op(s, CC_OP_EFLAGS);
6921        break;
6922    case 0x2f: /* das */
6923        if (CODE64(s))
6924            goto illegal_op;
6925        gen_update_cc_op(s);
6926        gen_helper_das(cpu_env);
6927        set_cc_op(s, CC_OP_EFLAGS);
6928        break;
6929    case 0x37: /* aaa */
6930        if (CODE64(s))
6931            goto illegal_op;
6932        gen_update_cc_op(s);
6933        gen_helper_aaa(cpu_env);
6934        set_cc_op(s, CC_OP_EFLAGS);
6935        break;
6936    case 0x3f: /* aas */
6937        if (CODE64(s))
6938            goto illegal_op;
6939        gen_update_cc_op(s);
6940        gen_helper_aas(cpu_env);
6941        set_cc_op(s, CC_OP_EFLAGS);
6942        break;
6943    case 0xd4: /* aam */
6944        if (CODE64(s))
6945            goto illegal_op;
6946        val = x86_ldub_code(env, s);
6947        if (val == 0) {
6948            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
6949        } else {
6950            gen_helper_aam(cpu_env, tcg_const_i32(val));
6951            set_cc_op(s, CC_OP_LOGICB);
6952        }
6953        break;
6954    case 0xd5: /* aad */
6955        if (CODE64(s))
6956            goto illegal_op;
6957        val = x86_ldub_code(env, s);
6958        gen_helper_aad(cpu_env, tcg_const_i32(val));
6959        set_cc_op(s, CC_OP_LOGICB);
6960        break;
6961        /************************/
6962        /* misc */
6963    case 0x90: /* nop */
6964        /* XXX: correct lock test for all insn */
6965        if (prefixes & PREFIX_LOCK) {
6966            goto illegal_op;
6967        }
6968        /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
6969        if (REX_B(s)) {
6970            goto do_xchg_reg_eax;
6971        }
6972        if (prefixes & PREFIX_REPZ) {
6973            gen_update_cc_op(s);
6974            gen_jmp_im(pc_start - s->cs_base);
6975            gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
6976            s->base.is_jmp = DISAS_NORETURN;
6977        }
6978        break;
6979    case 0x9b: /* fwait */
6980        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
6981            (HF_MP_MASK | HF_TS_MASK)) {
6982            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
6983        } else {
6984            gen_helper_fwait(cpu_env);
6985        }
6986        break;
6987    case 0xcc: /* int3 */
6988        gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
6989        break;
6990    case 0xcd: /* int N */
6991        val = x86_ldub_code(env, s);
6992        if (s->vm86 && s->iopl != 3) {
6993            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6994        } else {
6995            gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
6996        }
6997        break;
6998    case 0xce: /* into */
6999        if (CODE64(s))
7000            goto illegal_op;
7001        gen_update_cc_op(s);
7002        gen_jmp_im(pc_start - s->cs_base);
7003        gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7004        break;
7005#ifdef WANT_ICEBP
7006    case 0xf1: /* icebp (undocumented, exits to external debugger) */
7007        gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
7008#if 1
7009        gen_debug(s, pc_start - s->cs_base);
7010#else
7011        /* start debug */
7012        tb_flush(CPU(x86_env_get_cpu(env)));
7013        qemu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
7014#endif
7015        break;
7016#endif
7017    case 0xfa: /* cli */
7018        if (!s->vm86) {
7019            if (s->cpl <= s->iopl) {
7020                gen_helper_cli(cpu_env);
7021            } else {
7022                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7023            }
7024        } else {
7025            if (s->iopl == 3) {
7026                gen_helper_cli(cpu_env);
7027            } else {
7028                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7029            }
7030        }
7031        break;
7032    case 0xfb: /* sti */
7033        if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
7034            gen_helper_sti(cpu_env);
7035            /* interruptions are enabled only the first insn after sti */
7036            gen_jmp_im(s->pc - s->cs_base);
7037            gen_eob_inhibit_irq(s, true);
7038        } else {
7039            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7040        }
7041        break;
7042    case 0x62: /* bound */
7043        if (CODE64(s))
7044            goto illegal_op;
7045        ot = dflag;
7046        modrm = x86_ldub_code(env, s);
7047        reg = (modrm >> 3) & 7;
7048        mod = (modrm >> 6) & 3;
7049        if (mod == 3)
7050            goto illegal_op;
7051        gen_op_mov_v_reg(ot, cpu_T0, reg);
7052        gen_lea_modrm(env, s, modrm);
7053        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
7054        if (ot == MO_16) {
7055            gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
7056        } else {
7057            gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
7058        }
7059        break;
7060    case 0x1c8 ... 0x1cf: /* bswap reg */
7061        reg = (b & 7) | REX_B(s);
7062#ifdef TARGET_X86_64
7063        if (dflag == MO_64) {
7064            gen_op_mov_v_reg(MO_64, cpu_T0, reg);
7065            tcg_gen_bswap64_i64(cpu_T0, cpu_T0);
7066            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
7067        } else
7068#endif
7069        {
7070            gen_op_mov_v_reg(MO_32, cpu_T0, reg);
7071            tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
7072            tcg_gen_bswap32_tl(cpu_T0, cpu_T0);
7073            gen_op_mov_reg_v(MO_32, reg, cpu_T0);
7074        }
7075        break;
7076    case 0xd6: /* salc */
7077        if (CODE64(s))
7078            goto illegal_op;
7079        gen_compute_eflags_c(s, cpu_T0);
7080        tcg_gen_neg_tl(cpu_T0, cpu_T0);
7081        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
7082        break;
7083    case 0xe0: /* loopnz */
7084    case 0xe1: /* loopz */
7085    case 0xe2: /* loop */
7086    case 0xe3: /* jecxz */
7087        {
7088            TCGLabel *l1, *l2, *l3;
7089
7090            tval = (int8_t)insn_get(env, s, MO_8);
7091            next_eip = s->pc - s->cs_base;
7092            tval += next_eip;
7093            if (dflag == MO_16) {
7094                tval &= 0xffff;
7095            }
7096
7097            l1 = gen_new_label();
7098            l2 = gen_new_label();
7099            l3 = gen_new_label();
7100            b &= 3;
7101            switch(b) {
7102            case 0: /* loopnz */
7103            case 1: /* loopz */
7104                gen_op_add_reg_im(s->aflag, R_ECX, -1);
7105                gen_op_jz_ecx(s->aflag, l3);
7106                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7107                break;
7108            case 2: /* loop */
7109                gen_op_add_reg_im(s->aflag, R_ECX, -1);
7110                gen_op_jnz_ecx(s->aflag, l1);
7111                break;
7112            default:
7113            case 3: /* jcxz */
7114                gen_op_jz_ecx(s->aflag, l1);
7115                break;
7116            }
7117
7118            gen_set_label(l3);
7119            gen_jmp_im(next_eip);
7120            tcg_gen_br(l2);
7121
7122            gen_set_label(l1);
7123            gen_jmp_im(tval);
7124            gen_set_label(l2);
7125            gen_eob(s);
7126        }
7127        break;
7128    case 0x130: /* wrmsr */
7129    case 0x132: /* rdmsr */
7130        if (s->cpl != 0) {
7131            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7132        } else {
7133            gen_update_cc_op(s);
7134            gen_jmp_im(pc_start - s->cs_base);
7135            if (b & 2) {
7136                gen_helper_rdmsr(cpu_env);
7137            } else {
7138                gen_helper_wrmsr(cpu_env);
7139            }
7140        }
7141        break;
7142    case 0x131: /* rdtsc */
7143        gen_update_cc_op(s);
7144        gen_jmp_im(pc_start - s->cs_base);
7145        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7146            gen_io_start();
7147        }
7148        gen_helper_rdtsc(cpu_env);
7149        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7150            gen_io_end();
7151            gen_jmp(s, s->pc - s->cs_base);
7152        }
7153        break;
7154    case 0x133: /* rdpmc */
7155        gen_update_cc_op(s);
7156        gen_jmp_im(pc_start - s->cs_base);
7157        gen_helper_rdpmc(cpu_env);
7158        break;
7159    case 0x134: /* sysenter */
7160        /* For Intel SYSENTER is valid on 64-bit */
7161        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7162            goto illegal_op;
7163        if (!s->pe) {
7164            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7165        } else {
7166            gen_helper_sysenter(cpu_env);
7167            gen_eob(s);
7168        }
7169        break;
7170    case 0x135: /* sysexit */
7171        /* For Intel SYSEXIT is valid on 64-bit */
7172        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7173            goto illegal_op;
7174        if (!s->pe) {
7175            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7176        } else {
7177            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7178            gen_eob(s);
7179        }
7180        break;
7181#ifdef TARGET_X86_64
7182    case 0x105: /* syscall */
7183        /* XXX: is it usable in real mode ? */
7184        gen_update_cc_op(s);
7185        gen_jmp_im(pc_start - s->cs_base);
7186        gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7187        /* TF handling for the syscall insn is different. The TF bit is  checked
7188           after the syscall insn completes. This allows #DB to not be
7189           generated after one has entered CPL0 if TF is set in FMASK.  */
7190        gen_eob_worker(s, false, true);
7191        break;
7192    case 0x107: /* sysret */
7193        if (!s->pe) {
7194            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7195        } else {
7196            gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7197            /* condition codes are modified only in long mode */
7198            if (s->lma) {
7199                set_cc_op(s, CC_OP_EFLAGS);
7200            }
7201            /* TF handling for the sysret insn is different. The TF bit is
7202               checked after the sysret insn completes. This allows #DB to be
7203               generated "as if" the syscall insn in userspace has just
7204               completed.  */
7205            gen_eob_worker(s, false, true);
7206        }
7207        break;
7208#endif
7209    case 0x1a2: /* cpuid */
7210        gen_update_cc_op(s);
7211        gen_jmp_im(pc_start - s->cs_base);
7212        gen_helper_cpuid(cpu_env);
7213        break;
7214    case 0xf4: /* hlt */
7215        if (s->cpl != 0) {
7216            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7217        } else {
7218            gen_update_cc_op(s);
7219            gen_jmp_im(pc_start - s->cs_base);
7220            gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7221            s->base.is_jmp = DISAS_NORETURN;
7222        }
7223        break;
7224    case 0x100:
7225        modrm = x86_ldub_code(env, s);
7226        mod = (modrm >> 6) & 3;
7227        op = (modrm >> 3) & 7;
7228        switch(op) {
7229        case 0: /* sldt */
7230            if (!s->pe || s->vm86)
7231                goto illegal_op;
7232            gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7233            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
7234                             offsetof(CPUX86State, ldt.selector));
7235            ot = mod == 3 ? dflag : MO_16;
7236            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7237            break;
7238        case 2: /* lldt */
7239            if (!s->pe || s->vm86)
7240                goto illegal_op;
7241            if (s->cpl != 0) {
7242                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7243            } else {
7244                gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7245                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7246                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
7247                gen_helper_lldt(cpu_env, cpu_tmp2_i32);
7248            }
7249            break;
7250        case 1: /* str */
7251            if (!s->pe || s->vm86)
7252                goto illegal_op;
7253            gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7254            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
7255                             offsetof(CPUX86State, tr.selector));
7256            ot = mod == 3 ? dflag : MO_16;
7257            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7258            break;
7259        case 3: /* ltr */
7260            if (!s->pe || s->vm86)
7261                goto illegal_op;
7262            if (s->cpl != 0) {
7263                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7264            } else {
7265                gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7266                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7267                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
7268                gen_helper_ltr(cpu_env, cpu_tmp2_i32);
7269            }
7270            break;
7271        case 4: /* verr */
7272        case 5: /* verw */
7273            if (!s->pe || s->vm86)
7274                goto illegal_op;
7275            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7276            gen_update_cc_op(s);
7277            if (op == 4) {
7278                gen_helper_verr(cpu_env, cpu_T0);
7279            } else {
7280                gen_helper_verw(cpu_env, cpu_T0);
7281            }
7282            set_cc_op(s, CC_OP_EFLAGS);
7283            break;
7284        default:
7285            goto unknown_op;
7286        }
7287        break;
7288
7289    case 0x101:
7290        modrm = x86_ldub_code(env, s);
7291        switch (modrm) {
7292        CASE_MODRM_MEM_OP(0): /* sgdt */
7293            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7294            gen_lea_modrm(env, s, modrm);
7295            tcg_gen_ld32u_tl(cpu_T0,
7296                             cpu_env, offsetof(CPUX86State, gdt.limit));
7297            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
7298            gen_add_A0_im(s, 2);
7299            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
7300            if (dflag == MO_16) {
7301                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7302            }
7303            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7304            break;
7305
7306        case 0xc8: /* monitor */
7307            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7308                goto illegal_op;
7309            }
7310            gen_update_cc_op(s);
7311            gen_jmp_im(pc_start - s->cs_base);
7312            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
7313            gen_extu(s->aflag, cpu_A0);
7314            gen_add_A0_ds_seg(s);
7315            gen_helper_monitor(cpu_env, cpu_A0);
7316            break;
7317
7318        case 0xc9: /* mwait */
7319            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7320                goto illegal_op;
7321            }
7322            gen_update_cc_op(s);
7323            gen_jmp_im(pc_start - s->cs_base);
7324            gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7325            gen_eob(s);
7326            break;
7327
7328        case 0xca: /* clac */
7329            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7330                || s->cpl != 0) {
7331                goto illegal_op;
7332            }
7333            gen_helper_clac(cpu_env);
7334            gen_jmp_im(s->pc - s->cs_base);
7335            gen_eob(s);
7336            break;
7337
7338        case 0xcb: /* stac */
7339            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7340                || s->cpl != 0) {
7341                goto illegal_op;
7342            }
7343            gen_helper_stac(cpu_env);
7344            gen_jmp_im(s->pc - s->cs_base);
7345            gen_eob(s);
7346            break;
7347
7348        CASE_MODRM_MEM_OP(1): /* sidt */
7349            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7350            gen_lea_modrm(env, s, modrm);
7351            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
7352            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
7353            gen_add_A0_im(s, 2);
7354            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
7355            if (dflag == MO_16) {
7356                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7357            }
7358            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7359            break;
7360
7361        case 0xd0: /* xgetbv */
7362            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7363                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7364                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7365                goto illegal_op;
7366            }
7367            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7368            gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
7369            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
7370            break;
7371
7372        case 0xd1: /* xsetbv */
7373            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7374                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7375                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7376                goto illegal_op;
7377            }
7378            if (s->cpl != 0) {
7379                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7380                break;
7381            }
7382            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7383                                  cpu_regs[R_EDX]);
7384            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7385            gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
7386            /* End TB because translation flags may change.  */
7387            gen_jmp_im(s->pc - s->cs_base);
7388            gen_eob(s);
7389            break;
7390
7391        case 0xd8: /* VMRUN */
7392            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7393                goto illegal_op;
7394            }
7395            if (s->cpl != 0) {
7396                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7397                break;
7398            }
7399            gen_update_cc_op(s);
7400            gen_jmp_im(pc_start - s->cs_base);
7401            gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7402                             tcg_const_i32(s->pc - pc_start));
7403            tcg_gen_exit_tb(0);
7404            s->base.is_jmp = DISAS_NORETURN;
7405            break;
7406
7407        case 0xd9: /* VMMCALL */
7408            if (!(s->flags & HF_SVME_MASK)) {
7409                goto illegal_op;
7410            }
7411            gen_update_cc_op(s);
7412            gen_jmp_im(pc_start - s->cs_base);
7413            gen_helper_vmmcall(cpu_env);
7414            break;
7415
7416        case 0xda: /* VMLOAD */
7417            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7418                goto illegal_op;
7419            }
7420            if (s->cpl != 0) {
7421                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7422                break;
7423            }
7424            gen_update_cc_op(s);
7425            gen_jmp_im(pc_start - s->cs_base);
7426            gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7427            break;
7428
7429        case 0xdb: /* VMSAVE */
7430            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7431                goto illegal_op;
7432            }
7433            if (s->cpl != 0) {
7434                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7435                break;
7436            }
7437            gen_update_cc_op(s);
7438            gen_jmp_im(pc_start - s->cs_base);
7439            gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7440            break;
7441
7442        case 0xdc: /* STGI */
7443            if ((!(s->flags & HF_SVME_MASK)
7444                   && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7445                || !s->pe) {
7446                goto illegal_op;
7447            }
7448            if (s->cpl != 0) {
7449                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7450                break;
7451            }
7452            gen_update_cc_op(s);
7453            gen_jmp_im(pc_start - s->cs_base);
7454            gen_helper_stgi(cpu_env);
7455            break;
7456
7457        case 0xdd: /* CLGI */
7458            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7459                goto illegal_op;
7460            }
7461            if (s->cpl != 0) {
7462                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7463                break;
7464            }
7465            gen_update_cc_op(s);
7466            gen_jmp_im(pc_start - s->cs_base);
7467            gen_helper_clgi(cpu_env);
7468            break;
7469
7470        case 0xde: /* SKINIT */
7471            if ((!(s->flags & HF_SVME_MASK)
7472                 && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7473                || !s->pe) {
7474                goto illegal_op;
7475            }
7476            gen_update_cc_op(s);
7477            gen_jmp_im(pc_start - s->cs_base);
7478            gen_helper_skinit(cpu_env);
7479            break;
7480
7481        case 0xdf: /* INVLPGA */
7482            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7483                goto illegal_op;
7484            }
7485            if (s->cpl != 0) {
7486                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7487                break;
7488            }
7489            gen_update_cc_op(s);
7490            gen_jmp_im(pc_start - s->cs_base);
7491            gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
7492            break;
7493
7494        CASE_MODRM_MEM_OP(2): /* lgdt */
7495            if (s->cpl != 0) {
7496                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7497                break;
7498            }
7499            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
7500            gen_lea_modrm(env, s, modrm);
7501            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
7502            gen_add_A0_im(s, 2);
7503            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7504            if (dflag == MO_16) {
7505                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7506            }
7507            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
7508            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7509            break;
7510
7511        CASE_MODRM_MEM_OP(3): /* lidt */
7512            if (s->cpl != 0) {
7513                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7514                break;
7515            }
7516            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
7517            gen_lea_modrm(env, s, modrm);
7518            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
7519            gen_add_A0_im(s, 2);
7520            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7521            if (dflag == MO_16) {
7522                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7523            }
7524            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
7525            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
7526            break;
7527
7528        CASE_MODRM_OP(4): /* smsw */
7529            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7530            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
7531            if (CODE64(s)) {
7532                mod = (modrm >> 6) & 3;
7533                ot = (mod != 3 ? MO_16 : s->dflag);
7534            } else {
7535                ot = MO_16;
7536            }
7537            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7538            break;
7539        case 0xee: /* rdpkru */
7540            if (prefixes & PREFIX_LOCK) {
7541                goto illegal_op;
7542            }
7543            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7544            gen_helper_rdpkru(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
7545            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
7546            break;
7547        case 0xef: /* wrpkru */
7548            if (prefixes & PREFIX_LOCK) {
7549                goto illegal_op;
7550            }
7551            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7552                                  cpu_regs[R_EDX]);
7553            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7554            gen_helper_wrpkru(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
7555            break;
7556        CASE_MODRM_OP(6): /* lmsw */
7557            if (s->cpl != 0) {
7558                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7559                break;
7560            }
7561            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7562            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7563            gen_helper_lmsw(cpu_env, cpu_T0);
7564            gen_jmp_im(s->pc - s->cs_base);
7565            gen_eob(s);
7566            break;
7567
7568        CASE_MODRM_MEM_OP(7): /* invlpg */
7569            if (s->cpl != 0) {
7570                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7571                break;
7572            }
7573            gen_update_cc_op(s);
7574            gen_jmp_im(pc_start - s->cs_base);
7575            gen_lea_modrm(env, s, modrm);
7576            gen_helper_invlpg(cpu_env, cpu_A0);
7577            gen_jmp_im(s->pc - s->cs_base);
7578            gen_eob(s);
7579            break;
7580
7581        case 0xf8: /* swapgs */
7582#ifdef TARGET_X86_64
7583            if (CODE64(s)) {
7584                if (s->cpl != 0) {
7585                    gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7586                } else {
7587                    tcg_gen_mov_tl(cpu_T0, cpu_seg_base[R_GS]);
7588                    tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7589                                  offsetof(CPUX86State, kernelgsbase));
7590                    tcg_gen_st_tl(cpu_T0, cpu_env,
7591                                  offsetof(CPUX86State, kernelgsbase));
7592                }
7593                break;
7594            }
7595#endif
7596            goto illegal_op;
7597
7598        case 0xf9: /* rdtscp */
7599            if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7600                goto illegal_op;
7601            }
7602            gen_update_cc_op(s);
7603            gen_jmp_im(pc_start - s->cs_base);
7604            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7605                gen_io_start();
7606            }
7607            gen_helper_rdtscp(cpu_env);
7608            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7609                gen_io_end();
7610                gen_jmp(s, s->pc - s->cs_base);
7611            }
7612            break;
7613
7614        default:
7615            goto unknown_op;
7616        }
7617        break;
7618
7619    case 0x108: /* invd */
7620    case 0x109: /* wbinvd */
7621        if (s->cpl != 0) {
7622            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7623        } else {
7624            gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7625            /* nothing to do */
7626        }
7627        break;
7628    case 0x63: /* arpl or movslS (x86_64) */
7629#ifdef TARGET_X86_64
7630        if (CODE64(s)) {
7631            int d_ot;
7632            /* d_ot is the size of destination */
7633            d_ot = dflag;
7634
7635            modrm = x86_ldub_code(env, s);
7636            reg = ((modrm >> 3) & 7) | rex_r;
7637            mod = (modrm >> 6) & 3;
7638            rm = (modrm & 7) | REX_B(s);
7639
7640            if (mod == 3) {
7641                gen_op_mov_v_reg(MO_32, cpu_T0, rm);
7642                /* sign extend */
7643                if (d_ot == MO_64) {
7644                    tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
7645                }
7646                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
7647            } else {
7648                gen_lea_modrm(env, s, modrm);
7649                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, cpu_A0);
7650                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
7651            }
7652        } else
7653#endif
7654        {
7655            TCGLabel *label1;
7656            TCGv t0, t1, t2, a0;
7657
7658            if (!s->pe || s->vm86)
7659                goto illegal_op;
7660            t0 = tcg_temp_local_new();
7661            t1 = tcg_temp_local_new();
7662            t2 = tcg_temp_local_new();
7663            ot = MO_16;
7664            modrm = x86_ldub_code(env, s);
7665            reg = (modrm >> 3) & 7;
7666            mod = (modrm >> 6) & 3;
7667            rm = modrm & 7;
7668            if (mod != 3) {
7669                gen_lea_modrm(env, s, modrm);
7670                gen_op_ld_v(s, ot, t0, cpu_A0);
7671                a0 = tcg_temp_local_new();
7672                tcg_gen_mov_tl(a0, cpu_A0);
7673            } else {
7674                gen_op_mov_v_reg(ot, t0, rm);
7675                TCGV_UNUSED(a0);
7676            }
7677            gen_op_mov_v_reg(ot, t1, reg);
7678            tcg_gen_andi_tl(cpu_tmp0, t0, 3);
7679            tcg_gen_andi_tl(t1, t1, 3);
7680            tcg_gen_movi_tl(t2, 0);
7681            label1 = gen_new_label();
7682            tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
7683            tcg_gen_andi_tl(t0, t0, ~3);
7684            tcg_gen_or_tl(t0, t0, t1);
7685            tcg_gen_movi_tl(t2, CC_Z);
7686            gen_set_label(label1);
7687            if (mod != 3) {
7688                gen_op_st_v(s, ot, t0, a0);
7689                tcg_temp_free(a0);
7690           } else {
7691                gen_op_mov_reg_v(ot, rm, t0);
7692            }
7693            gen_compute_eflags(s);
7694            tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7695            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7696            tcg_temp_free(t0);
7697            tcg_temp_free(t1);
7698            tcg_temp_free(t2);
7699        }
7700        break;
7701    case 0x102: /* lar */
7702    case 0x103: /* lsl */
7703        {
7704            TCGLabel *label1;
7705            TCGv t0;
7706            if (!s->pe || s->vm86)
7707                goto illegal_op;
7708            ot = dflag != MO_16 ? MO_32 : MO_16;
7709            modrm = x86_ldub_code(env, s);
7710            reg = ((modrm >> 3) & 7) | rex_r;
7711            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7712            t0 = tcg_temp_local_new();
7713            gen_update_cc_op(s);
7714            if (b == 0x102) {
7715                gen_helper_lar(t0, cpu_env, cpu_T0);
7716            } else {
7717                gen_helper_lsl(t0, cpu_env, cpu_T0);
7718            }
7719            tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
7720            label1 = gen_new_label();
7721            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
7722            gen_op_mov_reg_v(ot, reg, t0);
7723            gen_set_label(label1);
7724            set_cc_op(s, CC_OP_EFLAGS);
7725            tcg_temp_free(t0);
7726        }
7727        break;
7728    case 0x118:
7729        modrm = x86_ldub_code(env, s);
7730        mod = (modrm >> 6) & 3;
7731        op = (modrm >> 3) & 7;
7732        switch(op) {
7733        case 0: /* prefetchnta */
7734        case 1: /* prefetchnt0 */
7735        case 2: /* prefetchnt0 */
7736        case 3: /* prefetchnt0 */
7737            if (mod == 3)
7738                goto illegal_op;
7739            gen_nop_modrm(env, s, modrm);
7740            /* nothing more to do */
7741            break;
7742        default: /* nop (multi byte) */
7743            gen_nop_modrm(env, s, modrm);
7744            break;
7745        }
7746        break;
7747    case 0x11a:
7748        modrm = x86_ldub_code(env, s);
7749        if (s->flags & HF_MPX_EN_MASK) {
7750            mod = (modrm >> 6) & 3;
7751            reg = ((modrm >> 3) & 7) | rex_r;
7752            if (prefixes & PREFIX_REPZ) {
7753                /* bndcl */
7754                if (reg >= 4
7755                    || (prefixes & PREFIX_LOCK)
7756                    || s->aflag == MO_16) {
7757                    goto illegal_op;
7758                }
7759                gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7760            } else if (prefixes & PREFIX_REPNZ) {
7761                /* bndcu */
7762                if (reg >= 4
7763                    || (prefixes & PREFIX_LOCK)
7764                    || s->aflag == MO_16) {
7765                    goto illegal_op;
7766                }
7767                TCGv_i64 notu = tcg_temp_new_i64();
7768                tcg_gen_not_i64(notu, cpu_bndu[reg]);
7769                gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7770                tcg_temp_free_i64(notu);
7771            } else if (prefixes & PREFIX_DATA) {
7772                /* bndmov -- from reg/mem */
7773                if (reg >= 4 || s->aflag == MO_16) {
7774                    goto illegal_op;
7775                }
7776                if (mod == 3) {
7777                    int reg2 = (modrm & 7) | REX_B(s);
7778                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7779                        goto illegal_op;
7780                    }
7781                    if (s->flags & HF_MPX_IU_MASK) {
7782                        tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7783                        tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7784                    }
7785                } else {
7786                    gen_lea_modrm(env, s, modrm);
7787                    if (CODE64(s)) {
7788                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
7789                                            s->mem_index, MO_LEQ);
7790                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
7791                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
7792                                            s->mem_index, MO_LEQ);
7793                    } else {
7794                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
7795                                            s->mem_index, MO_LEUL);
7796                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
7797                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
7798                                            s->mem_index, MO_LEUL);
7799                    }
7800                    /* bnd registers are now in-use */
7801                    gen_set_hflag(s, HF_MPX_IU_MASK);
7802                }
7803            } else if (mod != 3) {
7804                /* bndldx */
7805                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7806                if (reg >= 4
7807                    || (prefixes & PREFIX_LOCK)
7808                    || s->aflag == MO_16
7809                    || a.base < -1) {
7810                    goto illegal_op;
7811                }
7812                if (a.base >= 0) {
7813                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
7814                } else {
7815                    tcg_gen_movi_tl(cpu_A0, 0);
7816                }
7817                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
7818                if (a.index >= 0) {
7819                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
7820                } else {
7821                    tcg_gen_movi_tl(cpu_T0, 0);
7822                }
7823                if (CODE64(s)) {
7824                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, cpu_A0, cpu_T0);
7825                    tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7826                                   offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7827                } else {
7828                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, cpu_A0, cpu_T0);
7829                    tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7830                    tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7831                }
7832                gen_set_hflag(s, HF_MPX_IU_MASK);
7833            }
7834        }
7835        gen_nop_modrm(env, s, modrm);
7836        break;
7837    case 0x11b:
7838        modrm = x86_ldub_code(env, s);
7839        if (s->flags & HF_MPX_EN_MASK) {
7840            mod = (modrm >> 6) & 3;
7841            reg = ((modrm >> 3) & 7) | rex_r;
7842            if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7843                /* bndmk */
7844                if (reg >= 4
7845                    || (prefixes & PREFIX_LOCK)
7846                    || s->aflag == MO_16) {
7847                    goto illegal_op;
7848                }
7849                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7850                if (a.base >= 0) {
7851                    tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7852                    if (!CODE64(s)) {
7853                        tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7854                    }
7855                } else if (a.base == -1) {
7856                    /* no base register has lower bound of 0 */
7857                    tcg_gen_movi_i64(cpu_bndl[reg], 0);
7858                } else {
7859                    /* rip-relative generates #ud */
7860                    goto illegal_op;
7861                }
7862                tcg_gen_not_tl(cpu_A0, gen_lea_modrm_1(a));
7863                if (!CODE64(s)) {
7864                    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
7865                }
7866                tcg_gen_extu_tl_i64(cpu_bndu[reg], cpu_A0);
7867                /* bnd registers are now in-use */
7868                gen_set_hflag(s, HF_MPX_IU_MASK);
7869                break;
7870            } else if (prefixes & PREFIX_REPNZ) {
7871                /* bndcn */
7872                if (reg >= 4
7873                    || (prefixes & PREFIX_LOCK)
7874                    || s->aflag == MO_16) {
7875                    goto illegal_op;
7876                }
7877                gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7878            } else if (prefixes & PREFIX_DATA) {
7879                /* bndmov -- to reg/mem */
7880                if (reg >= 4 || s->aflag == MO_16) {
7881                    goto illegal_op;
7882                }
7883                if (mod == 3) {
7884                    int reg2 = (modrm & 7) | REX_B(s);
7885                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7886                        goto illegal_op;
7887                    }
7888                    if (s->flags & HF_MPX_IU_MASK) {
7889                        tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7890                        tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
7891                    }
7892                } else {
7893                    gen_lea_modrm(env, s, modrm);
7894                    if (CODE64(s)) {
7895                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
7896                                            s->mem_index, MO_LEQ);
7897                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
7898                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
7899                                            s->mem_index, MO_LEQ);
7900                    } else {
7901                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
7902                                            s->mem_index, MO_LEUL);
7903                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
7904                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
7905                                            s->mem_index, MO_LEUL);
7906                    }
7907                }
7908            } else if (mod != 3) {
7909                /* bndstx */
7910                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7911                if (reg >= 4
7912                    || (prefixes & PREFIX_LOCK)
7913                    || s->aflag == MO_16
7914                    || a.base < -1) {
7915                    goto illegal_op;
7916                }
7917                if (a.base >= 0) {
7918                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
7919                } else {
7920                    tcg_gen_movi_tl(cpu_A0, 0);
7921                }
7922                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
7923                if (a.index >= 0) {
7924                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
7925                } else {
7926                    tcg_gen_movi_tl(cpu_T0, 0);
7927                }
7928                if (CODE64(s)) {
7929                    gen_helper_bndstx64(cpu_env, cpu_A0, cpu_T0,
7930                                        cpu_bndl[reg], cpu_bndu[reg]);
7931                } else {
7932                    gen_helper_bndstx32(cpu_env, cpu_A0, cpu_T0,
7933                                        cpu_bndl[reg], cpu_bndu[reg]);
7934                }
7935            }
7936        }
7937        gen_nop_modrm(env, s, modrm);
7938        break;
7939    case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
7940        modrm = x86_ldub_code(env, s);
7941        gen_nop_modrm(env, s, modrm);
7942        break;
7943    case 0x120: /* mov reg, crN */
7944    case 0x122: /* mov crN, reg */
7945        if (s->cpl != 0) {
7946            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7947        } else {
7948            modrm = x86_ldub_code(env, s);
7949            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7950             * AMD documentation (24594.pdf) and testing of
7951             * intel 386 and 486 processors all show that the mod bits
7952             * are assumed to be 1's, regardless of actual values.
7953             */
7954            rm = (modrm & 7) | REX_B(s);
7955            reg = ((modrm >> 3) & 7) | rex_r;
7956            if (CODE64(s))
7957                ot = MO_64;
7958            else
7959                ot = MO_32;
7960            if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
7961                (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
7962                reg = 8;
7963            }
7964            switch(reg) {
7965            case 0:
7966            case 2:
7967            case 3:
7968            case 4:
7969            case 8:
7970                gen_update_cc_op(s);
7971                gen_jmp_im(pc_start - s->cs_base);
7972                if (b & 2) {
7973                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7974                        gen_io_start();
7975                    }
7976                    gen_op_mov_v_reg(ot, cpu_T0, rm);
7977                    gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
7978                                         cpu_T0);
7979                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7980                        gen_io_end();
7981                    }
7982                    gen_jmp_im(s->pc - s->cs_base);
7983                    gen_eob(s);
7984                } else {
7985                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7986                        gen_io_start();
7987                    }
7988                    gen_helper_read_crN(cpu_T0, cpu_env, tcg_const_i32(reg));
7989                    gen_op_mov_reg_v(ot, rm, cpu_T0);
7990                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7991                        gen_io_end();
7992                    }
7993                }
7994                break;
7995            default:
7996                goto unknown_op;
7997            }
7998        }
7999        break;
8000    case 0x121: /* mov reg, drN */
8001    case 0x123: /* mov drN, reg */
8002        if (s->cpl != 0) {
8003            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8004        } else {
8005            modrm = x86_ldub_code(env, s);
8006            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8007             * AMD documentation (24594.pdf) and testing of
8008             * intel 386 and 486 processors all show that the mod bits
8009             * are assumed to be 1's, regardless of actual values.
8010             */
8011            rm = (modrm & 7) | REX_B(s);
8012            reg = ((modrm >> 3) & 7) | rex_r;
8013            if (CODE64(s))
8014                ot = MO_64;
8015            else
8016                ot = MO_32;
8017            if (reg >= 8) {
8018                goto illegal_op;
8019            }
8020            if (b & 2) {
8021                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
8022                gen_op_mov_v_reg(ot, cpu_T0, rm);
8023                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
8024                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T0);
8025                gen_jmp_im(s->pc - s->cs_base);
8026                gen_eob(s);
8027            } else {
8028                gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
8029                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
8030                gen_helper_get_dr(cpu_T0, cpu_env, cpu_tmp2_i32);
8031                gen_op_mov_reg_v(ot, rm, cpu_T0);
8032            }
8033        }
8034        break;
8035    case 0x106: /* clts */
8036        if (s->cpl != 0) {
8037            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8038        } else {
8039            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
8040            gen_helper_clts(cpu_env);
8041            /* abort block because static cpu state changed */
8042            gen_jmp_im(s->pc - s->cs_base);
8043            gen_eob(s);
8044        }
8045        break;
8046    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8047    case 0x1c3: /* MOVNTI reg, mem */
8048        if (!(s->cpuid_features & CPUID_SSE2))
8049            goto illegal_op;
8050        ot = mo_64_32(dflag);
8051        modrm = x86_ldub_code(env, s);
8052        mod = (modrm >> 6) & 3;
8053        if (mod == 3)
8054            goto illegal_op;
8055        reg = ((modrm >> 3) & 7) | rex_r;
8056        /* generate a generic store */
8057        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8058        break;
8059    case 0x1ae:
8060        modrm = x86_ldub_code(env, s);
8061        switch (modrm) {
8062        CASE_MODRM_MEM_OP(0): /* fxsave */
8063            if (!(s->cpuid_features & CPUID_FXSR)
8064                || (prefixes & PREFIX_LOCK)) {
8065                goto illegal_op;
8066            }
8067            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8068                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8069                break;
8070            }
8071            gen_lea_modrm(env, s, modrm);
8072            gen_helper_fxsave(cpu_env, cpu_A0);
8073            break;
8074
8075        CASE_MODRM_MEM_OP(1): /* fxrstor */
8076            if (!(s->cpuid_features & CPUID_FXSR)
8077                || (prefixes & PREFIX_LOCK)) {
8078                goto illegal_op;
8079            }
8080            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8081                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8082                break;
8083            }
8084            gen_lea_modrm(env, s, modrm);
8085            gen_helper_fxrstor(cpu_env, cpu_A0);
8086            break;
8087
8088        CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8089            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8090                goto illegal_op;
8091            }
8092            if (s->flags & HF_TS_MASK) {
8093                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8094                break;
8095            }
8096            gen_lea_modrm(env, s, modrm);
8097            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, s->mem_index, MO_LEUL);
8098            gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
8099            break;
8100
8101        CASE_MODRM_MEM_OP(3): /* stmxcsr */
8102            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8103                goto illegal_op;
8104            }
8105            if (s->flags & HF_TS_MASK) {
8106                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8107                break;
8108            }
8109            gen_lea_modrm(env, s, modrm);
8110            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
8111            gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
8112            break;
8113
8114        CASE_MODRM_MEM_OP(4): /* xsave */
8115            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8116                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8117                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8118                goto illegal_op;
8119            }
8120            gen_lea_modrm(env, s, modrm);
8121            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
8122                                  cpu_regs[R_EDX]);
8123            gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
8124            break;
8125
8126        CASE_MODRM_MEM_OP(5): /* xrstor */
8127            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8128                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8129                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8130                goto illegal_op;
8131            }
8132            gen_lea_modrm(env, s, modrm);
8133            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
8134                                  cpu_regs[R_EDX]);
8135            gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
8136            /* XRSTOR is how MPX is enabled, which changes how
8137               we translate.  Thus we need to end the TB.  */
8138            gen_update_cc_op(s);
8139            gen_jmp_im(s->pc - s->cs_base);
8140            gen_eob(s);
8141            break;
8142
8143        CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8144            if (prefixes & PREFIX_LOCK) {
8145                goto illegal_op;
8146            }
8147            if (prefixes & PREFIX_DATA) {
8148                /* clwb */
8149                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8150                    goto illegal_op;
8151                }
8152                gen_nop_modrm(env, s, modrm);
8153            } else {
8154                /* xsaveopt */
8155                if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8156                    || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8157                    || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8158                    goto illegal_op;
8159                }
8160                gen_lea_modrm(env, s, modrm);
8161                tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
8162                                      cpu_regs[R_EDX]);
8163                gen_helper_xsaveopt(cpu_env, cpu_A0, cpu_tmp1_i64);
8164            }
8165            break;
8166
8167        CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8168            if (prefixes & PREFIX_LOCK) {
8169                goto illegal_op;
8170            }
8171            if (prefixes & PREFIX_DATA) {
8172                /* clflushopt */
8173                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8174                    goto illegal_op;
8175                }
8176            } else {
8177                /* clflush */
8178                if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8179                    || !(s->cpuid_features & CPUID_CLFLUSH)) {
8180                    goto illegal_op;
8181                }
8182            }
8183            gen_nop_modrm(env, s, modrm);
8184            break;
8185
8186        case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8187        case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8188        case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8189        case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8190            if (CODE64(s)
8191                && (prefixes & PREFIX_REPZ)
8192                && !(prefixes & PREFIX_LOCK)
8193                && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8194                TCGv base, treg, src, dst;
8195
8196                /* Preserve hflags bits by testing CR4 at runtime.  */
8197                tcg_gen_movi_i32(cpu_tmp2_i32, CR4_FSGSBASE_MASK);
8198                gen_helper_cr4_testbit(cpu_env, cpu_tmp2_i32);
8199
8200                base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8201                treg = cpu_regs[(modrm & 7) | REX_B(s)];
8202
8203                if (modrm & 0x10) {
8204                    /* wr*base */
8205                    dst = base, src = treg;
8206                } else {
8207                    /* rd*base */
8208                    dst = treg, src = base;
8209                }
8210
8211                if (s->dflag == MO_32) {
8212                    tcg_gen_ext32u_tl(dst, src);
8213                } else {
8214                    tcg_gen_mov_tl(dst, src);
8215                }
8216                break;
8217            }
8218            goto unknown_op;
8219
8220        case 0xf8: /* sfence / pcommit */
8221            if (prefixes & PREFIX_DATA) {
8222                /* pcommit */
8223                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8224                    || (prefixes & PREFIX_LOCK)) {
8225                    goto illegal_op;
8226                }
8227                break;
8228            }
8229            /* fallthru */
8230        case 0xf9 ... 0xff: /* sfence */
8231            if (!(s->cpuid_features & CPUID_SSE)
8232                || (prefixes & PREFIX_LOCK)) {
8233                goto illegal_op;
8234            }
8235            tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8236            break;
8237        case 0xe8 ... 0xef: /* lfence */
8238            if (!(s->cpuid_features & CPUID_SSE)
8239                || (prefixes & PREFIX_LOCK)) {
8240                goto illegal_op;
8241            }
8242            tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8243            break;
8244        case 0xf0 ... 0xf7: /* mfence */
8245            if (!(s->cpuid_features & CPUID_SSE2)
8246                || (prefixes & PREFIX_LOCK)) {
8247                goto illegal_op;
8248            }
8249            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8250            break;
8251
8252        default:
8253            goto unknown_op;
8254        }
8255        break;
8256
8257    case 0x10d: /* 3DNow! prefetch(w) */
8258        modrm = x86_ldub_code(env, s);
8259        mod = (modrm >> 6) & 3;
8260        if (mod == 3)
8261            goto illegal_op;
8262        gen_nop_modrm(env, s, modrm);
8263        break;
8264    case 0x1aa: /* rsm */
8265        gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
8266        if (!(s->flags & HF_SMM_MASK))
8267            goto illegal_op;
8268        gen_update_cc_op(s);
8269        gen_jmp_im(s->pc - s->cs_base);
8270        gen_helper_rsm(cpu_env);
8271        gen_eob(s);
8272        break;
8273    case 0x1b8: /* SSE4.2 popcnt */
8274        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8275             PREFIX_REPZ)
8276            goto illegal_op;
8277        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8278            goto illegal_op;
8279
8280        modrm = x86_ldub_code(env, s);
8281        reg = ((modrm >> 3) & 7) | rex_r;
8282
8283        if (s->prefix & PREFIX_DATA) {
8284            ot = MO_16;
8285        } else {
8286            ot = mo_64_32(dflag);
8287        }
8288
8289        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8290        gen_extu(ot, cpu_T0);
8291        tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
8292        tcg_gen_ctpop_tl(cpu_T0, cpu_T0);
8293        gen_op_mov_reg_v(ot, reg, cpu_T0);
8294
8295        set_cc_op(s, CC_OP_POPCNT);
8296        break;
8297    case 0x10e ... 0x10f:
8298        /* 3DNow! instructions, ignore prefixes */
8299        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8300    case 0x110 ... 0x117:
8301    case 0x128 ... 0x12f:
8302    case 0x138 ... 0x13a:
8303    case 0x150 ... 0x179:
8304    case 0x17c ... 0x17f:
8305    case 0x1c2:
8306    case 0x1c4 ... 0x1c6:
8307    case 0x1d0 ... 0x1fe:
8308        gen_sse(env, s, b, pc_start, rex_r);
8309        break;
8310    default:
8311        goto unknown_op;
8312    }
8313    return s->pc;
8314 illegal_op:
8315    gen_illegal_opcode(s);
8316    return s->pc;
8317 unknown_op:
8318    gen_unknown_opcode(env, s);
8319    return s->pc;
8320}
8321
8322void tcg_x86_init(void)
8323{
8324    static const char reg_names[CPU_NB_REGS][4] = {
8325#ifdef TARGET_X86_64
8326        [R_EAX] = "rax",
8327        [R_EBX] = "rbx",
8328        [R_ECX] = "rcx",
8329        [R_EDX] = "rdx",
8330        [R_ESI] = "rsi",
8331        [R_EDI] = "rdi",
8332        [R_EBP] = "rbp",
8333        [R_ESP] = "rsp",
8334        [8]  = "r8",
8335        [9]  = "r9",
8336        [10] = "r10",
8337        [11] = "r11",
8338        [12] = "r12",
8339        [13] = "r13",
8340        [14] = "r14",
8341        [15] = "r15",
8342#else
8343        [R_EAX] = "eax",
8344        [R_EBX] = "ebx",
8345        [R_ECX] = "ecx",
8346        [R_EDX] = "edx",
8347        [R_ESI] = "esi",
8348        [R_EDI] = "edi",
8349        [R_EBP] = "ebp",
8350        [R_ESP] = "esp",
8351#endif
8352    };
8353    static const char seg_base_names[6][8] = {
8354        [R_CS] = "cs_base",
8355        [R_DS] = "ds_base",
8356        [R_ES] = "es_base",
8357        [R_FS] = "fs_base",
8358        [R_GS] = "gs_base",
8359        [R_SS] = "ss_base",
8360    };
8361    static const char bnd_regl_names[4][8] = {
8362        "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8363    };
8364    static const char bnd_regu_names[4][8] = {
8365        "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8366    };
8367    int i;
8368
8369    cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8370                                       offsetof(CPUX86State, cc_op), "cc_op");
8371    cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8372                                    "cc_dst");
8373    cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8374                                    "cc_src");
8375    cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8376                                     "cc_src2");
8377
8378    for (i = 0; i < CPU_NB_REGS; ++i) {
8379        cpu_regs[i] = tcg_global_mem_new(cpu_env,
8380                                         offsetof(CPUX86State, regs[i]),
8381                                         reg_names[i]);
8382    }
8383
8384    for (i = 0; i < 6; ++i) {
8385        cpu_seg_base[i]
8386            = tcg_global_mem_new(cpu_env,
8387                                 offsetof(CPUX86State, segs[i].base),
8388                                 seg_base_names[i]);
8389    }
8390
8391    for (i = 0; i < 4; ++i) {
8392        cpu_bndl[i]
8393            = tcg_global_mem_new_i64(cpu_env,
8394                                     offsetof(CPUX86State, bnd_regs[i].lb),
8395                                     bnd_regl_names[i]);
8396        cpu_bndu[i]
8397            = tcg_global_mem_new_i64(cpu_env,
8398                                     offsetof(CPUX86State, bnd_regs[i].ub),
8399                                     bnd_regu_names[i]);
8400    }
8401}
8402
8403static int i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu,
8404                                      int max_insns)
8405{
8406    DisasContext *dc = container_of(dcbase, DisasContext, base);
8407    CPUX86State *env = cpu->env_ptr;
8408    uint32_t flags = dc->base.tb->flags;
8409    target_ulong cs_base = dc->base.tb->cs_base;
8410
8411    dc->pe = (flags >> HF_PE_SHIFT) & 1;
8412    dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
8413    dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
8414    dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
8415    dc->f_st = 0;
8416    dc->vm86 = (flags >> VM_SHIFT) & 1;
8417    dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
8418    dc->iopl = (flags >> IOPL_SHIFT) & 3;
8419    dc->tf = (flags >> TF_SHIFT) & 1;
8420    dc->cc_op = CC_OP_DYNAMIC;
8421    dc->cc_op_dirty = false;
8422    dc->cs_base = cs_base;
8423    dc->popl_esp_hack = 0;
8424    /* select memory access functions */
8425    dc->mem_index = 0;
8426#ifdef CONFIG_SOFTMMU
8427    dc->mem_index = cpu_mmu_index(env, false);
8428#endif
8429    dc->cpuid_features = env->features[FEAT_1_EDX];
8430    dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8431    dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8432    dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8433    dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8434    dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8435#ifdef TARGET_X86_64
8436    dc->lma = (flags >> HF_LMA_SHIFT) & 1;
8437    dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
8438#endif
8439    dc->flags = flags;
8440    dc->jmp_opt = !(dc->tf || dc->base.singlestep_enabled ||
8441                    (flags & HF_INHIBIT_IRQ_MASK));
8442    /* Do not optimize repz jumps at all in icount mode, because
8443       rep movsS instructions are execured with different paths
8444       in !repz_opt and repz_opt modes. The first one was used
8445       always except single step mode. And this setting
8446       disables jumps optimization and control paths become
8447       equivalent in run and single step modes.
8448       Now there will be no jump optimization for repz in
8449       record/replay modes and there will always be an
8450       additional step for ecx=0 when icount is enabled.
8451     */
8452    dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT);
8453#if 0
8454    /* check addseg logic */
8455    if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
8456        printf("ERROR addseg\n");
8457#endif
8458
8459    cpu_T0 = tcg_temp_new();
8460    cpu_T1 = tcg_temp_new();
8461    cpu_A0 = tcg_temp_new();
8462
8463    cpu_tmp0 = tcg_temp_new();
8464    cpu_tmp1_i64 = tcg_temp_new_i64();
8465    cpu_tmp2_i32 = tcg_temp_new_i32();
8466    cpu_tmp3_i32 = tcg_temp_new_i32();
8467    cpu_tmp4 = tcg_temp_new();
8468    cpu_ptr0 = tcg_temp_new_ptr();
8469    cpu_ptr1 = tcg_temp_new_ptr();
8470    cpu_cc_srcT = tcg_temp_local_new();
8471
8472    return max_insns;
8473}
8474
8475static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8476{
8477}
8478
8479static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8480{
8481    DisasContext *dc = container_of(dcbase, DisasContext, base);
8482
8483    tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8484}
8485
8486static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8487                                     const CPUBreakpoint *bp)
8488{
8489    DisasContext *dc = container_of(dcbase, DisasContext, base);
8490    /* If RF is set, suppress an internally generated breakpoint.  */
8491    int flags = dc->base.tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY;
8492    if (bp->flags & flags) {
8493        gen_debug(dc, dc->base.pc_next - dc->cs_base);
8494        dc->base.is_jmp = DISAS_NORETURN;
8495        /* The address covered by the breakpoint must be included in
8496           [tb->pc, tb->pc + tb->size) in order to for it to be
8497           properly cleared -- thus we increment the PC here so that
8498           the generic logic setting tb->size later does the right thing.  */
8499        dc->base.pc_next += 1;
8500        return true;
8501    } else {
8502        return false;
8503    }
8504}
8505
8506static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8507{
8508    DisasContext *dc = container_of(dcbase, DisasContext, base);
8509    target_ulong pc_next = disas_insn(dc, cpu);
8510
8511    if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) {
8512        /* if single step mode, we generate only one instruction and
8513           generate an exception */
8514        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8515           the flag and abort the translation to give the irqs a
8516           chance to happen */
8517        dc->base.is_jmp = DISAS_TOO_MANY;
8518    } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8519               && ((dc->base.pc_next & TARGET_PAGE_MASK)
8520                   != ((dc->base.pc_next + TARGET_MAX_INSN_SIZE - 1)
8521                       & TARGET_PAGE_MASK)
8522                   || (dc->base.pc_next & ~TARGET_PAGE_MASK) == 0)) {
8523        /* Do not cross the boundary of the pages in icount mode,
8524           it can cause an exception. Do it only when boundary is
8525           crossed by the first instruction in the block.
8526           If current instruction already crossed the bound - it's ok,
8527           because an exception hasn't stopped this code.
8528         */
8529        dc->base.is_jmp = DISAS_TOO_MANY;
8530    } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8531        dc->base.is_jmp = DISAS_TOO_MANY;
8532    }
8533
8534    dc->base.pc_next = pc_next;
8535}
8536
8537static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8538{
8539    DisasContext *dc = container_of(dcbase, DisasContext, base);
8540
8541    if (dc->base.is_jmp == DISAS_TOO_MANY) {
8542        gen_jmp_im(dc->base.pc_next - dc->cs_base);
8543        gen_eob(dc);
8544    }
8545}
8546
8547static void i386_tr_disas_log(const DisasContextBase *dcbase,
8548                              CPUState *cpu)
8549{
8550    DisasContext *dc = container_of(dcbase, DisasContext, base);
8551
8552    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8553    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8554}
8555
8556static const TranslatorOps i386_tr_ops = {
8557    .init_disas_context = i386_tr_init_disas_context,
8558    .tb_start           = i386_tr_tb_start,
8559    .insn_start         = i386_tr_insn_start,
8560    .breakpoint_check   = i386_tr_breakpoint_check,
8561    .translate_insn     = i386_tr_translate_insn,
8562    .tb_stop            = i386_tr_tb_stop,
8563    .disas_log          = i386_tr_disas_log,
8564};
8565
8566/* generate intermediate code for basic block 'tb'.  */
8567void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb)
8568{
8569    DisasContext dc;
8570
8571    translator_loop(&i386_tr_ops, &dc.base, cpu, tb);
8572}
8573
8574void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8575                          target_ulong *data)
8576{
8577    int cc_op = data[1];
8578    env->eip = data[0] - tb->cs_base;
8579    if (cc_op != CC_OP_DYNAMIC) {
8580        env->cc_op = cc_op;
8581    }
8582}
8583