qemu/target-i386/translate.c
<<
>>
Prefs
   1/*
   2 *  i386 translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "qemu/host-utils.h"
  22#include "cpu.h"
  23#include "disas/disas.h"
  24#include "tcg-op.h"
  25#include "exec/cpu_ldst.h"
  26
  27#include "exec/helper-proto.h"
  28#include "exec/helper-gen.h"
  29
  30#include "trace-tcg.h"
  31#include "exec/log.h"
  32
  33
  34#define PREFIX_REPZ   0x01
  35#define PREFIX_REPNZ  0x02
  36#define PREFIX_LOCK   0x04
  37#define PREFIX_DATA   0x08
  38#define PREFIX_ADR    0x10
  39#define PREFIX_VEX    0x20
  40
  41#ifdef TARGET_X86_64
  42#define CODE64(s) ((s)->code64)
  43#define REX_X(s) ((s)->rex_x)
  44#define REX_B(s) ((s)->rex_b)
  45#else
  46#define CODE64(s) 0
  47#define REX_X(s) 0
  48#define REX_B(s) 0
  49#endif
  50
  51#ifdef TARGET_X86_64
  52# define ctztl  ctz64
  53# define clztl  clz64
  54#else
  55# define ctztl  ctz32
  56# define clztl  clz32
  57#endif
  58
  59/* For a switch indexed by MODRM, match all memory operands for a given OP.  */
  60#define CASE_MODRM_MEM_OP(OP) \
  61    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  62    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  63    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
  64
  65#define CASE_MODRM_OP(OP) \
  66    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  67    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  68    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
  69    case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
  70
  71//#define MACRO_TEST   1
  72
  73/* global register indexes */
  74static TCGv_env cpu_env;
  75static TCGv cpu_A0;
  76static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
  77static TCGv_i32 cpu_cc_op;
  78static TCGv cpu_regs[CPU_NB_REGS];
  79static TCGv cpu_seg_base[6];
  80static TCGv_i64 cpu_bndl[4];
  81static TCGv_i64 cpu_bndu[4];
  82/* local temps */
  83static TCGv cpu_T0, cpu_T1;
  84/* local register indexes (only used inside old micro ops) */
  85static TCGv cpu_tmp0, cpu_tmp4;
  86static TCGv_ptr cpu_ptr0, cpu_ptr1;
  87static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
  88static TCGv_i64 cpu_tmp1_i64;
  89
  90#include "exec/gen-icount.h"
  91
  92#ifdef TARGET_X86_64
  93static int x86_64_hregs;
  94#endif
  95
  96typedef struct DisasContext {
  97    /* current insn context */
  98    int override; /* -1 if no override */
  99    int prefix;
 100    TCGMemOp aflag;
 101    TCGMemOp dflag;
 102    target_ulong pc_start;
 103    target_ulong pc; /* pc = eip + cs_base */
 104    int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
 105                   static state change (stop translation) */
 106    /* current block context */
 107    target_ulong cs_base; /* base of CS segment */
 108    int pe;     /* protected mode */
 109    int code32; /* 32 bit code segment */
 110#ifdef TARGET_X86_64
 111    int lma;    /* long mode active */
 112    int code64; /* 64 bit code segment */
 113    int rex_x, rex_b;
 114#endif
 115    int vex_l;  /* vex vector length */
 116    int vex_v;  /* vex vvvv register, without 1's compliment.  */
 117    int ss32;   /* 32 bit stack segment */
 118    CCOp cc_op;  /* current CC operation */
 119    bool cc_op_dirty;
 120    int addseg; /* non zero if either DS/ES/SS have a non zero base */
 121    int f_st;   /* currently unused */
 122    int vm86;   /* vm86 mode */
 123    int cpl;
 124    int iopl;
 125    int tf;     /* TF cpu flag */
 126    int singlestep_enabled; /* "hardware" single step enabled */
 127    int jmp_opt; /* use direct block chaining for direct jumps */
 128    int repz_opt; /* optimize jumps within repz instructions */
 129    int mem_index; /* select memory access functions */
 130    uint64_t flags; /* all execution flags */
 131    struct TranslationBlock *tb;
 132    int popl_esp_hack; /* for correct popl with esp base handling */
 133    int rip_offset; /* only used in x86_64, but left for simplicity */
 134    int cpuid_features;
 135    int cpuid_ext_features;
 136    int cpuid_ext2_features;
 137    int cpuid_ext3_features;
 138    int cpuid_7_0_ebx_features;
 139    int cpuid_xsave_features;
 140} DisasContext;
 141
 142static void gen_eob(DisasContext *s);
 143static void gen_jmp(DisasContext *s, target_ulong eip);
 144static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 145static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
 146
 147/* i386 arith/logic operations */
 148enum {
 149    OP_ADDL,
 150    OP_ORL,
 151    OP_ADCL,
 152    OP_SBBL,
 153    OP_ANDL,
 154    OP_SUBL,
 155    OP_XORL,
 156    OP_CMPL,
 157};
 158
 159/* i386 shift ops */
 160enum {
 161    OP_ROL,
 162    OP_ROR,
 163    OP_RCL,
 164    OP_RCR,
 165    OP_SHL,
 166    OP_SHR,
 167    OP_SHL1, /* undocumented */
 168    OP_SAR = 7,
 169};
 170
 171enum {
 172    JCC_O,
 173    JCC_B,
 174    JCC_Z,
 175    JCC_BE,
 176    JCC_S,
 177    JCC_P,
 178    JCC_L,
 179    JCC_LE,
 180};
 181
 182enum {
 183    /* I386 int registers */
 184    OR_EAX,   /* MUST be even numbered */
 185    OR_ECX,
 186    OR_EDX,
 187    OR_EBX,
 188    OR_ESP,
 189    OR_EBP,
 190    OR_ESI,
 191    OR_EDI,
 192
 193    OR_TMP0 = 16,    /* temporary operand register */
 194    OR_TMP1,
 195    OR_A0, /* temporary register used when doing address evaluation */
 196};
 197
 198enum {
 199    USES_CC_DST  = 1,
 200    USES_CC_SRC  = 2,
 201    USES_CC_SRC2 = 4,
 202    USES_CC_SRCT = 8,
 203};
 204
 205/* Bit set if the global variable is live after setting CC_OP to X.  */
 206static const uint8_t cc_op_live[CC_OP_NB] = {
 207    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 208    [CC_OP_EFLAGS] = USES_CC_SRC,
 209    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
 210    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
 211    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 212    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
 213    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 214    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
 215    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
 216    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
 217    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
 218    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
 219    [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
 220    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
 221    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
 222    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 223    [CC_OP_CLR] = 0,
 224};
 225
 226static void set_cc_op(DisasContext *s, CCOp op)
 227{
 228    int dead;
 229
 230    if (s->cc_op == op) {
 231        return;
 232    }
 233
 234    /* Discard CC computation that will no longer be used.  */
 235    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
 236    if (dead & USES_CC_DST) {
 237        tcg_gen_discard_tl(cpu_cc_dst);
 238    }
 239    if (dead & USES_CC_SRC) {
 240        tcg_gen_discard_tl(cpu_cc_src);
 241    }
 242    if (dead & USES_CC_SRC2) {
 243        tcg_gen_discard_tl(cpu_cc_src2);
 244    }
 245    if (dead & USES_CC_SRCT) {
 246        tcg_gen_discard_tl(cpu_cc_srcT);
 247    }
 248
 249    if (op == CC_OP_DYNAMIC) {
 250        /* The DYNAMIC setting is translator only, and should never be
 251           stored.  Thus we always consider it clean.  */
 252        s->cc_op_dirty = false;
 253    } else {
 254        /* Discard any computed CC_OP value (see shifts).  */
 255        if (s->cc_op == CC_OP_DYNAMIC) {
 256            tcg_gen_discard_i32(cpu_cc_op);
 257        }
 258        s->cc_op_dirty = true;
 259    }
 260    s->cc_op = op;
 261}
 262
 263static void gen_update_cc_op(DisasContext *s)
 264{
 265    if (s->cc_op_dirty) {
 266        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
 267        s->cc_op_dirty = false;
 268    }
 269}
 270
 271#ifdef TARGET_X86_64
 272
 273#define NB_OP_SIZES 4
 274
 275#else /* !TARGET_X86_64 */
 276
 277#define NB_OP_SIZES 3
 278
 279#endif /* !TARGET_X86_64 */
 280
 281#if defined(HOST_WORDS_BIGENDIAN)
 282#define REG_B_OFFSET (sizeof(target_ulong) - 1)
 283#define REG_H_OFFSET (sizeof(target_ulong) - 2)
 284#define REG_W_OFFSET (sizeof(target_ulong) - 2)
 285#define REG_L_OFFSET (sizeof(target_ulong) - 4)
 286#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
 287#else
 288#define REG_B_OFFSET 0
 289#define REG_H_OFFSET 1
 290#define REG_W_OFFSET 0
 291#define REG_L_OFFSET 0
 292#define REG_LH_OFFSET 4
 293#endif
 294
 295/* In instruction encodings for byte register accesses the
 296 * register number usually indicates "low 8 bits of register N";
 297 * however there are some special cases where N 4..7 indicates
 298 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
 299 * true for this special case, false otherwise.
 300 */
 301static inline bool byte_reg_is_xH(int reg)
 302{
 303    if (reg < 4) {
 304        return false;
 305    }
 306#ifdef TARGET_X86_64
 307    if (reg >= 8 || x86_64_hregs) {
 308        return false;
 309    }
 310#endif
 311    return true;
 312}
 313
 314/* Select the size of a push/pop operation.  */
 315static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
 316{
 317    if (CODE64(s)) {
 318        return ot == MO_16 ? MO_16 : MO_64;
 319    } else {
 320        return ot;
 321    }
 322}
 323
 324/* Select the size of the stack pointer.  */
 325static inline TCGMemOp mo_stacksize(DisasContext *s)
 326{
 327    return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
 328}
 329
 330/* Select only size 64 else 32.  Used for SSE operand sizes.  */
 331static inline TCGMemOp mo_64_32(TCGMemOp ot)
 332{
 333#ifdef TARGET_X86_64
 334    return ot == MO_64 ? MO_64 : MO_32;
 335#else
 336    return MO_32;
 337#endif
 338}
 339
 340/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
 341   byte vs word opcodes.  */
 342static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
 343{
 344    return b & 1 ? ot : MO_8;
 345}
 346
 347/* Select size 8 if lsb of B is clear, else OT capped at 32.
 348   Used for decoding operand size of port opcodes.  */
 349static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
 350{
 351    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 352}
 353
 354static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
 355{
 356    switch(ot) {
 357    case MO_8:
 358        if (!byte_reg_is_xH(reg)) {
 359            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
 360        } else {
 361            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
 362        }
 363        break;
 364    case MO_16:
 365        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
 366        break;
 367    case MO_32:
 368        /* For x86_64, this sets the higher half of register to zero.
 369           For i386, this is equivalent to a mov. */
 370        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
 371        break;
 372#ifdef TARGET_X86_64
 373    case MO_64:
 374        tcg_gen_mov_tl(cpu_regs[reg], t0);
 375        break;
 376#endif
 377    default:
 378        tcg_abort();
 379    }
 380}
 381
 382static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
 383{
 384    if (ot == MO_8 && byte_reg_is_xH(reg)) {
 385        tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
 386        tcg_gen_ext8u_tl(t0, t0);
 387    } else {
 388        tcg_gen_mov_tl(t0, cpu_regs[reg]);
 389    }
 390}
 391
 392static void gen_add_A0_im(DisasContext *s, int val)
 393{
 394    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
 395    if (!CODE64(s)) {
 396        tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
 397    }
 398}
 399
 400static inline void gen_op_jmp_v(TCGv dest)
 401{
 402    tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 403}
 404
 405static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
 406{
 407    tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
 408    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 409}
 410
 411static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
 412{
 413    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0);
 414    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 415}
 416
 417static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 418{
 419    tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
 420}
 421
 422static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 423{
 424    tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 425}
 426
 427static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 428{
 429    if (d == OR_TMP0) {
 430        gen_op_st_v(s, idx, cpu_T0, cpu_A0);
 431    } else {
 432        gen_op_mov_reg_v(idx, d, cpu_T0);
 433    }
 434}
 435
 436static inline void gen_jmp_im(target_ulong pc)
 437{
 438    tcg_gen_movi_tl(cpu_tmp0, pc);
 439    gen_op_jmp_v(cpu_tmp0);
 440}
 441
 442/* Compute SEG:REG into A0.  SEG is selected from the override segment
 443   (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
 444   indicate no override.  */
 445static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
 446                          int def_seg, int ovr_seg)
 447{
 448    switch (aflag) {
 449#ifdef TARGET_X86_64
 450    case MO_64:
 451        if (ovr_seg < 0) {
 452            tcg_gen_mov_tl(cpu_A0, a0);
 453            return;
 454        }
 455        break;
 456#endif
 457    case MO_32:
 458        /* 32 bit address */
 459        if (ovr_seg < 0) {
 460            if (s->addseg) {
 461                ovr_seg = def_seg;
 462            } else {
 463                tcg_gen_ext32u_tl(cpu_A0, a0);
 464                return;
 465            }
 466        }
 467        break;
 468    case MO_16:
 469        /* 16 bit address */
 470        tcg_gen_ext16u_tl(cpu_A0, a0);
 471        a0 = cpu_A0;
 472        if (ovr_seg < 0) {
 473            if (s->addseg) {
 474                ovr_seg = def_seg;
 475            } else {
 476                return;
 477            }
 478        }
 479        break;
 480    default:
 481        tcg_abort();
 482    }
 483
 484    if (ovr_seg >= 0) {
 485        TCGv seg = cpu_seg_base[ovr_seg];
 486
 487        if (aflag == MO_64) {
 488            tcg_gen_add_tl(cpu_A0, a0, seg);
 489        } else if (CODE64(s)) {
 490            tcg_gen_ext32u_tl(cpu_A0, a0);
 491            tcg_gen_add_tl(cpu_A0, cpu_A0, seg);
 492        } else {
 493            tcg_gen_add_tl(cpu_A0, a0, seg);
 494            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
 495        }
 496    }
 497}
 498
 499static inline void gen_string_movl_A0_ESI(DisasContext *s)
 500{
 501    gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
 502}
 503
 504static inline void gen_string_movl_A0_EDI(DisasContext *s)
 505{
 506    gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
 507}
 508
 509static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
 510{
 511    tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df));
 512    tcg_gen_shli_tl(cpu_T0, cpu_T0, ot);
 513};
 514
 515static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
 516{
 517    switch (size) {
 518    case MO_8:
 519        if (sign) {
 520            tcg_gen_ext8s_tl(dst, src);
 521        } else {
 522            tcg_gen_ext8u_tl(dst, src);
 523        }
 524        return dst;
 525    case MO_16:
 526        if (sign) {
 527            tcg_gen_ext16s_tl(dst, src);
 528        } else {
 529            tcg_gen_ext16u_tl(dst, src);
 530        }
 531        return dst;
 532#ifdef TARGET_X86_64
 533    case MO_32:
 534        if (sign) {
 535            tcg_gen_ext32s_tl(dst, src);
 536        } else {
 537            tcg_gen_ext32u_tl(dst, src);
 538        }
 539        return dst;
 540#endif
 541    default:
 542        return src;
 543    }
 544}
 545
 546static void gen_extu(TCGMemOp ot, TCGv reg)
 547{
 548    gen_ext_tl(reg, reg, ot, false);
 549}
 550
 551static void gen_exts(TCGMemOp ot, TCGv reg)
 552{
 553    gen_ext_tl(reg, reg, ot, true);
 554}
 555
 556static inline void gen_op_jnz_ecx(TCGMemOp size, TCGLabel *label1)
 557{
 558    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
 559    gen_extu(size, cpu_tmp0);
 560    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
 561}
 562
 563static inline void gen_op_jz_ecx(TCGMemOp size, TCGLabel *label1)
 564{
 565    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
 566    gen_extu(size, cpu_tmp0);
 567    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
 568}
 569
 570static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
 571{
 572    switch (ot) {
 573    case MO_8:
 574        gen_helper_inb(v, cpu_env, n);
 575        break;
 576    case MO_16:
 577        gen_helper_inw(v, cpu_env, n);
 578        break;
 579    case MO_32:
 580        gen_helper_inl(v, cpu_env, n);
 581        break;
 582    default:
 583        tcg_abort();
 584    }
 585}
 586
 587static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
 588{
 589    switch (ot) {
 590    case MO_8:
 591        gen_helper_outb(cpu_env, v, n);
 592        break;
 593    case MO_16:
 594        gen_helper_outw(cpu_env, v, n);
 595        break;
 596    case MO_32:
 597        gen_helper_outl(cpu_env, v, n);
 598        break;
 599    default:
 600        tcg_abort();
 601    }
 602}
 603
 604static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
 605                         uint32_t svm_flags)
 606{
 607    target_ulong next_eip;
 608
 609    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
 610        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
 611        switch (ot) {
 612        case MO_8:
 613            gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
 614            break;
 615        case MO_16:
 616            gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
 617            break;
 618        case MO_32:
 619            gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
 620            break;
 621        default:
 622            tcg_abort();
 623        }
 624    }
 625    if(s->flags & HF_SVMI_MASK) {
 626        gen_update_cc_op(s);
 627        gen_jmp_im(cur_eip);
 628        svm_flags |= (1 << (4 + ot));
 629        next_eip = s->pc - s->cs_base;
 630        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
 631        gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
 632                                tcg_const_i32(svm_flags),
 633                                tcg_const_i32(next_eip - cur_eip));
 634    }
 635}
 636
 637static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 638{
 639    gen_string_movl_A0_ESI(s);
 640    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
 641    gen_string_movl_A0_EDI(s);
 642    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
 643    gen_op_movl_T0_Dshift(ot);
 644    gen_op_add_reg_T0(s->aflag, R_ESI);
 645    gen_op_add_reg_T0(s->aflag, R_EDI);
 646}
 647
 648static void gen_op_update1_cc(void)
 649{
 650    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 651}
 652
 653static void gen_op_update2_cc(void)
 654{
 655    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
 656    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 657}
 658
 659static void gen_op_update3_cc(TCGv reg)
 660{
 661    tcg_gen_mov_tl(cpu_cc_src2, reg);
 662    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
 663    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 664}
 665
 666static inline void gen_op_testl_T0_T1_cc(void)
 667{
 668    tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
 669}
 670
 671static void gen_op_update_neg_cc(void)
 672{
 673    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 674    tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
 675    tcg_gen_movi_tl(cpu_cc_srcT, 0);
 676}
 677
 678/* compute all eflags to cc_src */
 679static void gen_compute_eflags(DisasContext *s)
 680{
 681    TCGv zero, dst, src1, src2;
 682    int live, dead;
 683
 684    if (s->cc_op == CC_OP_EFLAGS) {
 685        return;
 686    }
 687    if (s->cc_op == CC_OP_CLR) {
 688        tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
 689        set_cc_op(s, CC_OP_EFLAGS);
 690        return;
 691    }
 692
 693    TCGV_UNUSED(zero);
 694    dst = cpu_cc_dst;
 695    src1 = cpu_cc_src;
 696    src2 = cpu_cc_src2;
 697
 698    /* Take care to not read values that are not live.  */
 699    live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
 700    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
 701    if (dead) {
 702        zero = tcg_const_tl(0);
 703        if (dead & USES_CC_DST) {
 704            dst = zero;
 705        }
 706        if (dead & USES_CC_SRC) {
 707            src1 = zero;
 708        }
 709        if (dead & USES_CC_SRC2) {
 710            src2 = zero;
 711        }
 712    }
 713
 714    gen_update_cc_op(s);
 715    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
 716    set_cc_op(s, CC_OP_EFLAGS);
 717
 718    if (dead) {
 719        tcg_temp_free(zero);
 720    }
 721}
 722
 723typedef struct CCPrepare {
 724    TCGCond cond;
 725    TCGv reg;
 726    TCGv reg2;
 727    target_ulong imm;
 728    target_ulong mask;
 729    bool use_reg2;
 730    bool no_setcond;
 731} CCPrepare;
 732
 733/* compute eflags.C to reg */
 734static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 735{
 736    TCGv t0, t1;
 737    int size, shift;
 738
 739    switch (s->cc_op) {
 740    case CC_OP_SUBB ... CC_OP_SUBQ:
 741        /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
 742        size = s->cc_op - CC_OP_SUBB;
 743        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 744        /* If no temporary was used, be careful not to alias t1 and t0.  */
 745        t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg;
 746        tcg_gen_mov_tl(t0, cpu_cc_srcT);
 747        gen_extu(size, t0);
 748        goto add_sub;
 749
 750    case CC_OP_ADDB ... CC_OP_ADDQ:
 751        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
 752        size = s->cc_op - CC_OP_ADDB;
 753        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 754        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 755    add_sub:
 756        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
 757                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 758
 759    case CC_OP_LOGICB ... CC_OP_LOGICQ:
 760    case CC_OP_CLR:
 761        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 762
 763    case CC_OP_INCB ... CC_OP_INCQ:
 764    case CC_OP_DECB ... CC_OP_DECQ:
 765        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 766                             .mask = -1, .no_setcond = true };
 767
 768    case CC_OP_SHLB ... CC_OP_SHLQ:
 769        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
 770        size = s->cc_op - CC_OP_SHLB;
 771        shift = (8 << size) - 1;
 772        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 773                             .mask = (target_ulong)1 << shift };
 774
 775    case CC_OP_MULB ... CC_OP_MULQ:
 776        return (CCPrepare) { .cond = TCG_COND_NE,
 777                             .reg = cpu_cc_src, .mask = -1 };
 778
 779    case CC_OP_BMILGB ... CC_OP_BMILGQ:
 780        size = s->cc_op - CC_OP_BMILGB;
 781        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
 782        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 783
 784    case CC_OP_ADCX:
 785    case CC_OP_ADCOX:
 786        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
 787                             .mask = -1, .no_setcond = true };
 788
 789    case CC_OP_EFLAGS:
 790    case CC_OP_SARB ... CC_OP_SARQ:
 791        /* CC_SRC & 1 */
 792        return (CCPrepare) { .cond = TCG_COND_NE,
 793                             .reg = cpu_cc_src, .mask = CC_C };
 794
 795    default:
 796       /* The need to compute only C from CC_OP_DYNAMIC is important
 797          in efficiently implementing e.g. INC at the start of a TB.  */
 798       gen_update_cc_op(s);
 799       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
 800                               cpu_cc_src2, cpu_cc_op);
 801       return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 802                            .mask = -1, .no_setcond = true };
 803    }
 804}
 805
 806/* compute eflags.P to reg */
 807static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 808{
 809    gen_compute_eflags(s);
 810    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 811                         .mask = CC_P };
 812}
 813
 814/* compute eflags.S to reg */
 815static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 816{
 817    switch (s->cc_op) {
 818    case CC_OP_DYNAMIC:
 819        gen_compute_eflags(s);
 820        /* FALLTHRU */
 821    case CC_OP_EFLAGS:
 822    case CC_OP_ADCX:
 823    case CC_OP_ADOX:
 824    case CC_OP_ADCOX:
 825        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 826                             .mask = CC_S };
 827    case CC_OP_CLR:
 828        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 829    default:
 830        {
 831            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 832            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
 833            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
 834        }
 835    }
 836}
 837
 838/* compute eflags.O to reg */
 839static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 840{
 841    switch (s->cc_op) {
 842    case CC_OP_ADOX:
 843    case CC_OP_ADCOX:
 844        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
 845                             .mask = -1, .no_setcond = true };
 846    case CC_OP_CLR:
 847        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 848    default:
 849        gen_compute_eflags(s);
 850        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 851                             .mask = CC_O };
 852    }
 853}
 854
 855/* compute eflags.Z to reg */
 856static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 857{
 858    switch (s->cc_op) {
 859    case CC_OP_DYNAMIC:
 860        gen_compute_eflags(s);
 861        /* FALLTHRU */
 862    case CC_OP_EFLAGS:
 863    case CC_OP_ADCX:
 864    case CC_OP_ADOX:
 865    case CC_OP_ADCOX:
 866        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 867                             .mask = CC_Z };
 868    case CC_OP_CLR:
 869        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
 870    default:
 871        {
 872            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 873            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 874            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 875        }
 876    }
 877}
 878
 879/* perform a conditional store into register 'reg' according to jump opcode
 880   value 'b'. In the fast case, T0 is guaranted not to be used. */
 881static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 882{
 883    int inv, jcc_op, cond;
 884    TCGMemOp size;
 885    CCPrepare cc;
 886    TCGv t0;
 887
 888    inv = b & 1;
 889    jcc_op = (b >> 1) & 7;
 890
 891    switch (s->cc_op) {
 892    case CC_OP_SUBB ... CC_OP_SUBQ:
 893        /* We optimize relational operators for the cmp/jcc case.  */
 894        size = s->cc_op - CC_OP_SUBB;
 895        switch (jcc_op) {
 896        case JCC_BE:
 897            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
 898            gen_extu(size, cpu_tmp4);
 899            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 900            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
 901                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 902            break;
 903
 904        case JCC_L:
 905            cond = TCG_COND_LT;
 906            goto fast_jcc_l;
 907        case JCC_LE:
 908            cond = TCG_COND_LE;
 909        fast_jcc_l:
 910            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
 911            gen_exts(size, cpu_tmp4);
 912            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
 913            cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
 914                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 915            break;
 916
 917        default:
 918            goto slow_jcc;
 919        }
 920        break;
 921
 922    default:
 923    slow_jcc:
 924        /* This actually generates good code for JC, JZ and JS.  */
 925        switch (jcc_op) {
 926        case JCC_O:
 927            cc = gen_prepare_eflags_o(s, reg);
 928            break;
 929        case JCC_B:
 930            cc = gen_prepare_eflags_c(s, reg);
 931            break;
 932        case JCC_Z:
 933            cc = gen_prepare_eflags_z(s, reg);
 934            break;
 935        case JCC_BE:
 936            gen_compute_eflags(s);
 937            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 938                               .mask = CC_Z | CC_C };
 939            break;
 940        case JCC_S:
 941            cc = gen_prepare_eflags_s(s, reg);
 942            break;
 943        case JCC_P:
 944            cc = gen_prepare_eflags_p(s, reg);
 945            break;
 946        case JCC_L:
 947            gen_compute_eflags(s);
 948            if (TCGV_EQUAL(reg, cpu_cc_src)) {
 949                reg = cpu_tmp0;
 950            }
 951            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 952            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 953            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 954                               .mask = CC_S };
 955            break;
 956        default:
 957        case JCC_LE:
 958            gen_compute_eflags(s);
 959            if (TCGV_EQUAL(reg, cpu_cc_src)) {
 960                reg = cpu_tmp0;
 961            }
 962            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 963            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 964            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 965                               .mask = CC_S | CC_Z };
 966            break;
 967        }
 968        break;
 969    }
 970
 971    if (inv) {
 972        cc.cond = tcg_invert_cond(cc.cond);
 973    }
 974    return cc;
 975}
 976
 977static void gen_setcc1(DisasContext *s, int b, TCGv reg)
 978{
 979    CCPrepare cc = gen_prepare_cc(s, b, reg);
 980
 981    if (cc.no_setcond) {
 982        if (cc.cond == TCG_COND_EQ) {
 983            tcg_gen_xori_tl(reg, cc.reg, 1);
 984        } else {
 985            tcg_gen_mov_tl(reg, cc.reg);
 986        }
 987        return;
 988    }
 989
 990    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
 991        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
 992        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
 993        tcg_gen_andi_tl(reg, reg, 1);
 994        return;
 995    }
 996    if (cc.mask != -1) {
 997        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
 998        cc.reg = reg;
 999    }
1000    if (cc.use_reg2) {
1001        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1002    } else {
1003        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1004    }
1005}
1006
1007static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1008{
1009    gen_setcc1(s, JCC_B << 1, reg);
1010}
1011
1012/* generate a conditional jump to label 'l1' according to jump opcode
1013   value 'b'. In the fast case, T0 is guaranted not to be used. */
1014static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1015{
1016    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1017
1018    if (cc.mask != -1) {
1019        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1020        cc.reg = cpu_T0;
1021    }
1022    if (cc.use_reg2) {
1023        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1024    } else {
1025        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1026    }
1027}
1028
1029/* Generate a conditional jump to label 'l1' according to jump opcode
1030   value 'b'. In the fast case, T0 is guaranted not to be used.
1031   A translation block must end soon.  */
1032static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1033{
1034    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1035
1036    gen_update_cc_op(s);
1037    if (cc.mask != -1) {
1038        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1039        cc.reg = cpu_T0;
1040    }
1041    set_cc_op(s, CC_OP_DYNAMIC);
1042    if (cc.use_reg2) {
1043        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1044    } else {
1045        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1046    }
1047}
1048
1049/* XXX: does not work with gdbstub "ice" single step - not a
1050   serious problem */
1051static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1052{
1053    TCGLabel *l1 = gen_new_label();
1054    TCGLabel *l2 = gen_new_label();
1055    gen_op_jnz_ecx(s->aflag, l1);
1056    gen_set_label(l2);
1057    gen_jmp_tb(s, next_eip, 1);
1058    gen_set_label(l1);
1059    return l2;
1060}
1061
1062static inline void gen_stos(DisasContext *s, TCGMemOp ot)
1063{
1064    gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
1065    gen_string_movl_A0_EDI(s);
1066    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1067    gen_op_movl_T0_Dshift(ot);
1068    gen_op_add_reg_T0(s->aflag, R_EDI);
1069}
1070
1071static inline void gen_lods(DisasContext *s, TCGMemOp ot)
1072{
1073    gen_string_movl_A0_ESI(s);
1074    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1075    gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
1076    gen_op_movl_T0_Dshift(ot);
1077    gen_op_add_reg_T0(s->aflag, R_ESI);
1078}
1079
1080static inline void gen_scas(DisasContext *s, TCGMemOp ot)
1081{
1082    gen_string_movl_A0_EDI(s);
1083    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1084    gen_op(s, OP_CMPL, ot, R_EAX);
1085    gen_op_movl_T0_Dshift(ot);
1086    gen_op_add_reg_T0(s->aflag, R_EDI);
1087}
1088
1089static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
1090{
1091    gen_string_movl_A0_EDI(s);
1092    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1093    gen_string_movl_A0_ESI(s);
1094    gen_op(s, OP_CMPL, ot, OR_TMP0);
1095    gen_op_movl_T0_Dshift(ot);
1096    gen_op_add_reg_T0(s->aflag, R_ESI);
1097    gen_op_add_reg_T0(s->aflag, R_EDI);
1098}
1099
1100static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1101{
1102    if (s->flags & HF_IOBPT_MASK) {
1103        TCGv_i32 t_size = tcg_const_i32(1 << ot);
1104        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1105
1106        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1107        tcg_temp_free_i32(t_size);
1108        tcg_temp_free(t_next);
1109    }
1110}
1111
1112
1113static inline void gen_ins(DisasContext *s, TCGMemOp ot)
1114{
1115    if (s->tb->cflags & CF_USE_ICOUNT) {
1116        gen_io_start();
1117    }
1118    gen_string_movl_A0_EDI(s);
1119    /* Note: we must do this dummy write first to be restartable in
1120       case of page fault. */
1121    tcg_gen_movi_tl(cpu_T0, 0);
1122    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1123    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1124    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1125    gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
1126    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1127    gen_op_movl_T0_Dshift(ot);
1128    gen_op_add_reg_T0(s->aflag, R_EDI);
1129    gen_bpt_io(s, cpu_tmp2_i32, ot);
1130    if (s->tb->cflags & CF_USE_ICOUNT) {
1131        gen_io_end();
1132    }
1133}
1134
1135static inline void gen_outs(DisasContext *s, TCGMemOp ot)
1136{
1137    if (s->tb->cflags & CF_USE_ICOUNT) {
1138        gen_io_start();
1139    }
1140    gen_string_movl_A0_ESI(s);
1141    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1142
1143    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1144    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1145    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0);
1146    gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
1147    gen_op_movl_T0_Dshift(ot);
1148    gen_op_add_reg_T0(s->aflag, R_ESI);
1149    gen_bpt_io(s, cpu_tmp2_i32, ot);
1150    if (s->tb->cflags & CF_USE_ICOUNT) {
1151        gen_io_end();
1152    }
1153}
1154
1155/* same method as Valgrind : we generate jumps to current or next
1156   instruction */
1157#define GEN_REPZ(op)                                                          \
1158static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1159                                 target_ulong cur_eip, target_ulong next_eip) \
1160{                                                                             \
1161    TCGLabel *l2;                                                             \
1162    gen_update_cc_op(s);                                                      \
1163    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1164    gen_ ## op(s, ot);                                                        \
1165    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1166    /* a loop would cause two single step exceptions if ECX = 1               \
1167       before rep string_insn */                                              \
1168    if (s->repz_opt)                                                          \
1169        gen_op_jz_ecx(s->aflag, l2);                                          \
1170    gen_jmp(s, cur_eip);                                                      \
1171}
1172
1173#define GEN_REPZ2(op)                                                         \
1174static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1175                                   target_ulong cur_eip,                      \
1176                                   target_ulong next_eip,                     \
1177                                   int nz)                                    \
1178{                                                                             \
1179    TCGLabel *l2;                                                             \
1180    gen_update_cc_op(s);                                                      \
1181    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1182    gen_ ## op(s, ot);                                                        \
1183    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1184    gen_update_cc_op(s);                                                      \
1185    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1186    if (s->repz_opt)                                                          \
1187        gen_op_jz_ecx(s->aflag, l2);                                          \
1188    gen_jmp(s, cur_eip);                                                      \
1189}
1190
1191GEN_REPZ(movs)
1192GEN_REPZ(stos)
1193GEN_REPZ(lods)
1194GEN_REPZ(ins)
1195GEN_REPZ(outs)
1196GEN_REPZ2(scas)
1197GEN_REPZ2(cmps)
1198
1199static void gen_helper_fp_arith_ST0_FT0(int op)
1200{
1201    switch (op) {
1202    case 0:
1203        gen_helper_fadd_ST0_FT0(cpu_env);
1204        break;
1205    case 1:
1206        gen_helper_fmul_ST0_FT0(cpu_env);
1207        break;
1208    case 2:
1209        gen_helper_fcom_ST0_FT0(cpu_env);
1210        break;
1211    case 3:
1212        gen_helper_fcom_ST0_FT0(cpu_env);
1213        break;
1214    case 4:
1215        gen_helper_fsub_ST0_FT0(cpu_env);
1216        break;
1217    case 5:
1218        gen_helper_fsubr_ST0_FT0(cpu_env);
1219        break;
1220    case 6:
1221        gen_helper_fdiv_ST0_FT0(cpu_env);
1222        break;
1223    case 7:
1224        gen_helper_fdivr_ST0_FT0(cpu_env);
1225        break;
1226    }
1227}
1228
1229/* NOTE the exception in "r" op ordering */
1230static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1231{
1232    TCGv_i32 tmp = tcg_const_i32(opreg);
1233    switch (op) {
1234    case 0:
1235        gen_helper_fadd_STN_ST0(cpu_env, tmp);
1236        break;
1237    case 1:
1238        gen_helper_fmul_STN_ST0(cpu_env, tmp);
1239        break;
1240    case 4:
1241        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1242        break;
1243    case 5:
1244        gen_helper_fsub_STN_ST0(cpu_env, tmp);
1245        break;
1246    case 6:
1247        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1248        break;
1249    case 7:
1250        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1251        break;
1252    }
1253}
1254
1255/* if d == OR_TMP0, it means memory operand (address in A0) */
1256static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
1257{
1258    if (d != OR_TMP0) {
1259        gen_op_mov_v_reg(ot, cpu_T0, d);
1260    } else {
1261        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1262    }
1263    switch(op) {
1264    case OP_ADCL:
1265        gen_compute_eflags_c(s1, cpu_tmp4);
1266        tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1267        tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
1268        gen_op_st_rm_T0_A0(s1, ot, d);
1269        gen_op_update3_cc(cpu_tmp4);
1270        set_cc_op(s1, CC_OP_ADCB + ot);
1271        break;
1272    case OP_SBBL:
1273        gen_compute_eflags_c(s1, cpu_tmp4);
1274        tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1275        tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
1276        gen_op_st_rm_T0_A0(s1, ot, d);
1277        gen_op_update3_cc(cpu_tmp4);
1278        set_cc_op(s1, CC_OP_SBBB + ot);
1279        break;
1280    case OP_ADDL:
1281        tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1282        gen_op_st_rm_T0_A0(s1, ot, d);
1283        gen_op_update2_cc();
1284        set_cc_op(s1, CC_OP_ADDB + ot);
1285        break;
1286    case OP_SUBL:
1287        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1288        tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1289        gen_op_st_rm_T0_A0(s1, ot, d);
1290        gen_op_update2_cc();
1291        set_cc_op(s1, CC_OP_SUBB + ot);
1292        break;
1293    default:
1294    case OP_ANDL:
1295        tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
1296        gen_op_st_rm_T0_A0(s1, ot, d);
1297        gen_op_update1_cc();
1298        set_cc_op(s1, CC_OP_LOGICB + ot);
1299        break;
1300    case OP_ORL:
1301        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1302        gen_op_st_rm_T0_A0(s1, ot, d);
1303        gen_op_update1_cc();
1304        set_cc_op(s1, CC_OP_LOGICB + ot);
1305        break;
1306    case OP_XORL:
1307        tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
1308        gen_op_st_rm_T0_A0(s1, ot, d);
1309        gen_op_update1_cc();
1310        set_cc_op(s1, CC_OP_LOGICB + ot);
1311        break;
1312    case OP_CMPL:
1313        tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
1314        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1315        tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
1316        set_cc_op(s1, CC_OP_SUBB + ot);
1317        break;
1318    }
1319}
1320
1321/* if d == OR_TMP0, it means memory operand (address in A0) */
1322static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
1323{
1324    if (d != OR_TMP0) {
1325        gen_op_mov_v_reg(ot, cpu_T0, d);
1326    } else {
1327        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1328    }
1329    gen_compute_eflags_c(s1, cpu_cc_src);
1330    if (c > 0) {
1331        tcg_gen_addi_tl(cpu_T0, cpu_T0, 1);
1332        set_cc_op(s1, CC_OP_INCB + ot);
1333    } else {
1334        tcg_gen_addi_tl(cpu_T0, cpu_T0, -1);
1335        set_cc_op(s1, CC_OP_DECB + ot);
1336    }
1337    gen_op_st_rm_T0_A0(s1, ot, d);
1338    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1339}
1340
1341static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
1342                            TCGv shm1, TCGv count, bool is_right)
1343{
1344    TCGv_i32 z32, s32, oldop;
1345    TCGv z_tl;
1346
1347    /* Store the results into the CC variables.  If we know that the
1348       variable must be dead, store unconditionally.  Otherwise we'll
1349       need to not disrupt the current contents.  */
1350    z_tl = tcg_const_tl(0);
1351    if (cc_op_live[s->cc_op] & USES_CC_DST) {
1352        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1353                           result, cpu_cc_dst);
1354    } else {
1355        tcg_gen_mov_tl(cpu_cc_dst, result);
1356    }
1357    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1358        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1359                           shm1, cpu_cc_src);
1360    } else {
1361        tcg_gen_mov_tl(cpu_cc_src, shm1);
1362    }
1363    tcg_temp_free(z_tl);
1364
1365    /* Get the two potential CC_OP values into temporaries.  */
1366    tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1367    if (s->cc_op == CC_OP_DYNAMIC) {
1368        oldop = cpu_cc_op;
1369    } else {
1370        tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
1371        oldop = cpu_tmp3_i32;
1372    }
1373
1374    /* Conditionally store the CC_OP value.  */
1375    z32 = tcg_const_i32(0);
1376    s32 = tcg_temp_new_i32();
1377    tcg_gen_trunc_tl_i32(s32, count);
1378    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
1379    tcg_temp_free_i32(z32);
1380    tcg_temp_free_i32(s32);
1381
1382    /* The CC_OP value is no longer predictable.  */
1383    set_cc_op(s, CC_OP_DYNAMIC);
1384}
1385
1386static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1387                            int is_right, int is_arith)
1388{
1389    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1390
1391    /* load */
1392    if (op1 == OR_TMP0) {
1393        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1394    } else {
1395        gen_op_mov_v_reg(ot, cpu_T0, op1);
1396    }
1397
1398    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1399    tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1);
1400
1401    if (is_right) {
1402        if (is_arith) {
1403            gen_exts(ot, cpu_T0);
1404            tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1405            tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
1406        } else {
1407            gen_extu(ot, cpu_T0);
1408            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1409            tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
1410        }
1411    } else {
1412        tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1413        tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
1414    }
1415
1416    /* store */
1417    gen_op_st_rm_T0_A0(s, ot, op1);
1418
1419    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right);
1420}
1421
1422static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1423                            int is_right, int is_arith)
1424{
1425    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1426
1427    /* load */
1428    if (op1 == OR_TMP0)
1429        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1430    else
1431        gen_op_mov_v_reg(ot, cpu_T0, op1);
1432
1433    op2 &= mask;
1434    if (op2 != 0) {
1435        if (is_right) {
1436            if (is_arith) {
1437                gen_exts(ot, cpu_T0);
1438                tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1);
1439                tcg_gen_sari_tl(cpu_T0, cpu_T0, op2);
1440            } else {
1441                gen_extu(ot, cpu_T0);
1442                tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1);
1443                tcg_gen_shri_tl(cpu_T0, cpu_T0, op2);
1444            }
1445        } else {
1446            tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1);
1447            tcg_gen_shli_tl(cpu_T0, cpu_T0, op2);
1448        }
1449    }
1450
1451    /* store */
1452    gen_op_st_rm_T0_A0(s, ot, op1);
1453
1454    /* update eflags if non zero shift */
1455    if (op2 != 0) {
1456        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1457        tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1458        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1459    }
1460}
1461
1462static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
1463{
1464    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1465    TCGv_i32 t0, t1;
1466
1467    /* load */
1468    if (op1 == OR_TMP0) {
1469        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1470    } else {
1471        gen_op_mov_v_reg(ot, cpu_T0, op1);
1472    }
1473
1474    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1475
1476    switch (ot) {
1477    case MO_8:
1478        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1479        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
1480        tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101);
1481        goto do_long;
1482    case MO_16:
1483        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1484        tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16);
1485        goto do_long;
1486    do_long:
1487#ifdef TARGET_X86_64
1488    case MO_32:
1489        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1490        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
1491        if (is_right) {
1492            tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1493        } else {
1494            tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1495        }
1496        tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1497        break;
1498#endif
1499    default:
1500        if (is_right) {
1501            tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1);
1502        } else {
1503            tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1);
1504        }
1505        break;
1506    }
1507
1508    /* store */
1509    gen_op_st_rm_T0_A0(s, ot, op1);
1510
1511    /* We'll need the flags computed into CC_SRC.  */
1512    gen_compute_eflags(s);
1513
1514    /* The value that was "rotated out" is now present at the other end
1515       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1516       since we've computed the flags into CC_SRC, these variables are
1517       currently dead.  */
1518    if (is_right) {
1519        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1520        tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1521        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1522    } else {
1523        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1524        tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1525    }
1526    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1527    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1528
1529    /* Now conditionally store the new CC_OP value.  If the shift count
1530       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1531       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1532       exactly as we computed above.  */
1533    t0 = tcg_const_i32(0);
1534    t1 = tcg_temp_new_i32();
1535    tcg_gen_trunc_tl_i32(t1, cpu_T1);
1536    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); 
1537    tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
1538    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1539                        cpu_tmp2_i32, cpu_tmp3_i32);
1540    tcg_temp_free_i32(t0);
1541    tcg_temp_free_i32(t1);
1542
1543    /* The CC_OP value is no longer predictable.  */ 
1544    set_cc_op(s, CC_OP_DYNAMIC);
1545}
1546
1547static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1548                          int is_right)
1549{
1550    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1551    int shift;
1552
1553    /* load */
1554    if (op1 == OR_TMP0) {
1555        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1556    } else {
1557        gen_op_mov_v_reg(ot, cpu_T0, op1);
1558    }
1559
1560    op2 &= mask;
1561    if (op2 != 0) {
1562        switch (ot) {
1563#ifdef TARGET_X86_64
1564        case MO_32:
1565            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1566            if (is_right) {
1567                tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1568            } else {
1569                tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1570            }
1571            tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1572            break;
1573#endif
1574        default:
1575            if (is_right) {
1576                tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2);
1577            } else {
1578                tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2);
1579            }
1580            break;
1581        case MO_8:
1582            mask = 7;
1583            goto do_shifts;
1584        case MO_16:
1585            mask = 15;
1586        do_shifts:
1587            shift = op2 & mask;
1588            if (is_right) {
1589                shift = mask + 1 - shift;
1590            }
1591            gen_extu(ot, cpu_T0);
1592            tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift);
1593            tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift);
1594            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
1595            break;
1596        }
1597    }
1598
1599    /* store */
1600    gen_op_st_rm_T0_A0(s, ot, op1);
1601
1602    if (op2 != 0) {
1603        /* Compute the flags into CC_SRC.  */
1604        gen_compute_eflags(s);
1605
1606        /* The value that was "rotated out" is now present at the other end
1607           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1608           since we've computed the flags into CC_SRC, these variables are
1609           currently dead.  */
1610        if (is_right) {
1611            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1612            tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1613            tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1614        } else {
1615            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1616            tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1617        }
1618        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1619        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1620        set_cc_op(s, CC_OP_ADCOX);
1621    }
1622}
1623
1624/* XXX: add faster immediate = 1 case */
1625static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1626                           int is_right)
1627{
1628    gen_compute_eflags(s);
1629    assert(s->cc_op == CC_OP_EFLAGS);
1630
1631    /* load */
1632    if (op1 == OR_TMP0)
1633        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1634    else
1635        gen_op_mov_v_reg(ot, cpu_T0, op1);
1636    
1637    if (is_right) {
1638        switch (ot) {
1639        case MO_8:
1640            gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1641            break;
1642        case MO_16:
1643            gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1644            break;
1645        case MO_32:
1646            gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1647            break;
1648#ifdef TARGET_X86_64
1649        case MO_64:
1650            gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1651            break;
1652#endif
1653        default:
1654            tcg_abort();
1655        }
1656    } else {
1657        switch (ot) {
1658        case MO_8:
1659            gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1660            break;
1661        case MO_16:
1662            gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1663            break;
1664        case MO_32:
1665            gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1666            break;
1667#ifdef TARGET_X86_64
1668        case MO_64:
1669            gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1670            break;
1671#endif
1672        default:
1673            tcg_abort();
1674        }
1675    }
1676    /* store */
1677    gen_op_st_rm_T0_A0(s, ot, op1);
1678}
1679
1680/* XXX: add faster immediate case */
1681static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1682                             bool is_right, TCGv count_in)
1683{
1684    target_ulong mask = (ot == MO_64 ? 63 : 31);
1685    TCGv count;
1686
1687    /* load */
1688    if (op1 == OR_TMP0) {
1689        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1690    } else {
1691        gen_op_mov_v_reg(ot, cpu_T0, op1);
1692    }
1693
1694    count = tcg_temp_new();
1695    tcg_gen_andi_tl(count, count_in, mask);
1696
1697    switch (ot) {
1698    case MO_16:
1699        /* Note: we implement the Intel behaviour for shift count > 16.
1700           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1701           portion by constructing it as a 32-bit value.  */
1702        if (is_right) {
1703            tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16);
1704            tcg_gen_mov_tl(cpu_T1, cpu_T0);
1705            tcg_gen_mov_tl(cpu_T0, cpu_tmp0);
1706        } else {
1707            tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16);
1708        }
1709        /* FALLTHRU */
1710#ifdef TARGET_X86_64
1711    case MO_32:
1712        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1713        tcg_gen_subi_tl(cpu_tmp0, count, 1);
1714        if (is_right) {
1715            tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1);
1716            tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1717            tcg_gen_shr_i64(cpu_T0, cpu_T0, count);
1718        } else {
1719            tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0);
1720            tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1721            tcg_gen_shl_i64(cpu_T0, cpu_T0, count);
1722            tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
1723            tcg_gen_shri_i64(cpu_T0, cpu_T0, 32);
1724        }
1725        break;
1726#endif
1727    default:
1728        tcg_gen_subi_tl(cpu_tmp0, count, 1);
1729        if (is_right) {
1730            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1731
1732            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1733            tcg_gen_shr_tl(cpu_T0, cpu_T0, count);
1734            tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
1735        } else {
1736            tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1737            if (ot == MO_16) {
1738                /* Only needed if count > 16, for Intel behaviour.  */
1739                tcg_gen_subfi_tl(cpu_tmp4, 33, count);
1740                tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4);
1741                tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
1742            }
1743
1744            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1745            tcg_gen_shl_tl(cpu_T0, cpu_T0, count);
1746            tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
1747        }
1748        tcg_gen_movi_tl(cpu_tmp4, 0);
1749        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
1750                           cpu_tmp4, cpu_T1);
1751        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1752        break;
1753    }
1754
1755    /* store */
1756    gen_op_st_rm_T0_A0(s, ot, op1);
1757
1758    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right);
1759    tcg_temp_free(count);
1760}
1761
1762static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
1763{
1764    if (s != OR_TMP1)
1765        gen_op_mov_v_reg(ot, cpu_T1, s);
1766    switch(op) {
1767    case OP_ROL:
1768        gen_rot_rm_T1(s1, ot, d, 0);
1769        break;
1770    case OP_ROR:
1771        gen_rot_rm_T1(s1, ot, d, 1);
1772        break;
1773    case OP_SHL:
1774    case OP_SHL1:
1775        gen_shift_rm_T1(s1, ot, d, 0, 0);
1776        break;
1777    case OP_SHR:
1778        gen_shift_rm_T1(s1, ot, d, 1, 0);
1779        break;
1780    case OP_SAR:
1781        gen_shift_rm_T1(s1, ot, d, 1, 1);
1782        break;
1783    case OP_RCL:
1784        gen_rotc_rm_T1(s1, ot, d, 0);
1785        break;
1786    case OP_RCR:
1787        gen_rotc_rm_T1(s1, ot, d, 1);
1788        break;
1789    }
1790}
1791
1792static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
1793{
1794    switch(op) {
1795    case OP_ROL:
1796        gen_rot_rm_im(s1, ot, d, c, 0);
1797        break;
1798    case OP_ROR:
1799        gen_rot_rm_im(s1, ot, d, c, 1);
1800        break;
1801    case OP_SHL:
1802    case OP_SHL1:
1803        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1804        break;
1805    case OP_SHR:
1806        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1807        break;
1808    case OP_SAR:
1809        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1810        break;
1811    default:
1812        /* currently not optimized */
1813        tcg_gen_movi_tl(cpu_T1, c);
1814        gen_shift(s1, op, ot, d, OR_TMP1);
1815        break;
1816    }
1817}
1818
1819/* Decompose an address.  */
1820
1821typedef struct AddressParts {
1822    int def_seg;
1823    int base;
1824    int index;
1825    int scale;
1826    target_long disp;
1827} AddressParts;
1828
1829static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1830                                    int modrm)
1831{
1832    int def_seg, base, index, scale, mod, rm;
1833    target_long disp;
1834    bool havesib;
1835
1836    def_seg = R_DS;
1837    index = -1;
1838    scale = 0;
1839    disp = 0;
1840
1841    mod = (modrm >> 6) & 3;
1842    rm = modrm & 7;
1843    base = rm | REX_B(s);
1844
1845    if (mod == 3) {
1846        /* Normally filtered out earlier, but including this path
1847           simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
1848        goto done;
1849    }
1850
1851    switch (s->aflag) {
1852    case MO_64:
1853    case MO_32:
1854        havesib = 0;
1855        if (rm == 4) {
1856            int code = cpu_ldub_code(env, s->pc++);
1857            scale = (code >> 6) & 3;
1858            index = ((code >> 3) & 7) | REX_X(s);
1859            if (index == 4) {
1860                index = -1;  /* no index */
1861            }
1862            base = (code & 7) | REX_B(s);
1863            havesib = 1;
1864        }
1865
1866        switch (mod) {
1867        case 0:
1868            if ((base & 7) == 5) {
1869                base = -1;
1870                disp = (int32_t)cpu_ldl_code(env, s->pc);
1871                s->pc += 4;
1872                if (CODE64(s) && !havesib) {
1873                    base = -2;
1874                    disp += s->pc + s->rip_offset;
1875                }
1876            }
1877            break;
1878        case 1:
1879            disp = (int8_t)cpu_ldub_code(env, s->pc++);
1880            break;
1881        default:
1882        case 2:
1883            disp = (int32_t)cpu_ldl_code(env, s->pc);
1884            s->pc += 4;
1885            break;
1886        }
1887
1888        /* For correct popl handling with esp.  */
1889        if (base == R_ESP && s->popl_esp_hack) {
1890            disp += s->popl_esp_hack;
1891        }
1892        if (base == R_EBP || base == R_ESP) {
1893            def_seg = R_SS;
1894        }
1895        break;
1896
1897    case MO_16:
1898        if (mod == 0) {
1899            if (rm == 6) {
1900                base = -1;
1901                disp = cpu_lduw_code(env, s->pc);
1902                s->pc += 2;
1903                break;
1904            }
1905        } else if (mod == 1) {
1906            disp = (int8_t)cpu_ldub_code(env, s->pc++);
1907        } else {
1908            disp = (int16_t)cpu_lduw_code(env, s->pc);
1909            s->pc += 2;
1910        }
1911
1912        switch (rm) {
1913        case 0:
1914            base = R_EBX;
1915            index = R_ESI;
1916            break;
1917        case 1:
1918            base = R_EBX;
1919            index = R_EDI;
1920            break;
1921        case 2:
1922            base = R_EBP;
1923            index = R_ESI;
1924            def_seg = R_SS;
1925            break;
1926        case 3:
1927            base = R_EBP;
1928            index = R_EDI;
1929            def_seg = R_SS;
1930            break;
1931        case 4:
1932            base = R_ESI;
1933            break;
1934        case 5:
1935            base = R_EDI;
1936            break;
1937        case 6:
1938            base = R_EBP;
1939            def_seg = R_SS;
1940            break;
1941        default:
1942        case 7:
1943            base = R_EBX;
1944            break;
1945        }
1946        break;
1947
1948    default:
1949        tcg_abort();
1950    }
1951
1952 done:
1953    return (AddressParts){ def_seg, base, index, scale, disp };
1954}
1955
1956/* Compute the address, with a minimum number of TCG ops.  */
1957static TCGv gen_lea_modrm_1(AddressParts a)
1958{
1959    TCGv ea;
1960
1961    TCGV_UNUSED(ea);
1962    if (a.index >= 0) {
1963        if (a.scale == 0) {
1964            ea = cpu_regs[a.index];
1965        } else {
1966            tcg_gen_shli_tl(cpu_A0, cpu_regs[a.index], a.scale);
1967            ea = cpu_A0;
1968        }
1969        if (a.base >= 0) {
1970            tcg_gen_add_tl(cpu_A0, ea, cpu_regs[a.base]);
1971            ea = cpu_A0;
1972        }
1973    } else if (a.base >= 0) {
1974        ea = cpu_regs[a.base];
1975    }
1976    if (TCGV_IS_UNUSED(ea)) {
1977        tcg_gen_movi_tl(cpu_A0, a.disp);
1978        ea = cpu_A0;
1979    } else if (a.disp != 0) {
1980        tcg_gen_addi_tl(cpu_A0, ea, a.disp);
1981        ea = cpu_A0;
1982    }
1983
1984    return ea;
1985}
1986
1987static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
1988{
1989    AddressParts a = gen_lea_modrm_0(env, s, modrm);
1990    TCGv ea = gen_lea_modrm_1(a);
1991    gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
1992}
1993
1994static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
1995{
1996    (void)gen_lea_modrm_0(env, s, modrm);
1997}
1998
1999/* Used for BNDCL, BNDCU, BNDCN.  */
2000static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2001                      TCGCond cond, TCGv_i64 bndv)
2002{
2003    TCGv ea = gen_lea_modrm_1(gen_lea_modrm_0(env, s, modrm));
2004
2005    tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
2006    if (!CODE64(s)) {
2007        tcg_gen_ext32u_i64(cpu_tmp1_i64, cpu_tmp1_i64);
2008    }
2009    tcg_gen_setcond_i64(cond, cpu_tmp1_i64, cpu_tmp1_i64, bndv);
2010    tcg_gen_extrl_i64_i32(cpu_tmp2_i32, cpu_tmp1_i64);
2011    gen_helper_bndck(cpu_env, cpu_tmp2_i32);
2012}
2013
2014/* used for LEA and MOV AX, mem */
2015static void gen_add_A0_ds_seg(DisasContext *s)
2016{
2017    gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override);
2018}
2019
2020/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2021   OR_TMP0 */
2022static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2023                           TCGMemOp ot, int reg, int is_store)
2024{
2025    int mod, rm;
2026
2027    mod = (modrm >> 6) & 3;
2028    rm = (modrm & 7) | REX_B(s);
2029    if (mod == 3) {
2030        if (is_store) {
2031            if (reg != OR_TMP0)
2032                gen_op_mov_v_reg(ot, cpu_T0, reg);
2033            gen_op_mov_reg_v(ot, rm, cpu_T0);
2034        } else {
2035            gen_op_mov_v_reg(ot, cpu_T0, rm);
2036            if (reg != OR_TMP0)
2037                gen_op_mov_reg_v(ot, reg, cpu_T0);
2038        }
2039    } else {
2040        gen_lea_modrm(env, s, modrm);
2041        if (is_store) {
2042            if (reg != OR_TMP0)
2043                gen_op_mov_v_reg(ot, cpu_T0, reg);
2044            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
2045        } else {
2046            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
2047            if (reg != OR_TMP0)
2048                gen_op_mov_reg_v(ot, reg, cpu_T0);
2049        }
2050    }
2051}
2052
2053static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
2054{
2055    uint32_t ret;
2056
2057    switch (ot) {
2058    case MO_8:
2059        ret = cpu_ldub_code(env, s->pc);
2060        s->pc++;
2061        break;
2062    case MO_16:
2063        ret = cpu_lduw_code(env, s->pc);
2064        s->pc += 2;
2065        break;
2066    case MO_32:
2067#ifdef TARGET_X86_64
2068    case MO_64:
2069#endif
2070        ret = cpu_ldl_code(env, s->pc);
2071        s->pc += 4;
2072        break;
2073    default:
2074        tcg_abort();
2075    }
2076    return ret;
2077}
2078
2079static inline int insn_const_size(TCGMemOp ot)
2080{
2081    if (ot <= MO_32) {
2082        return 1 << ot;
2083    } else {
2084        return 4;
2085    }
2086}
2087
2088static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2089{
2090    TranslationBlock *tb;
2091    target_ulong pc;
2092
2093    pc = s->cs_base + eip;
2094    tb = s->tb;
2095    /* NOTE: we handle the case where the TB spans two pages here */
2096    if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
2097        (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK))  {
2098        /* jump to same page: we can use a direct jump */
2099        tcg_gen_goto_tb(tb_num);
2100        gen_jmp_im(eip);
2101        tcg_gen_exit_tb((uintptr_t)tb + tb_num);
2102    } else {
2103        /* jump to another page: currently not optimized */
2104        gen_jmp_im(eip);
2105        gen_eob(s);
2106    }
2107}
2108
2109static inline void gen_jcc(DisasContext *s, int b,
2110                           target_ulong val, target_ulong next_eip)
2111{
2112    TCGLabel *l1, *l2;
2113
2114    if (s->jmp_opt) {
2115        l1 = gen_new_label();
2116        gen_jcc1(s, b, l1);
2117
2118        gen_goto_tb(s, 0, next_eip);
2119
2120        gen_set_label(l1);
2121        gen_goto_tb(s, 1, val);
2122        s->is_jmp = DISAS_TB_JUMP;
2123    } else {
2124        l1 = gen_new_label();
2125        l2 = gen_new_label();
2126        gen_jcc1(s, b, l1);
2127
2128        gen_jmp_im(next_eip);
2129        tcg_gen_br(l2);
2130
2131        gen_set_label(l1);
2132        gen_jmp_im(val);
2133        gen_set_label(l2);
2134        gen_eob(s);
2135    }
2136}
2137
2138static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
2139                        int modrm, int reg)
2140{
2141    CCPrepare cc;
2142
2143    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2144
2145    cc = gen_prepare_cc(s, b, cpu_T1);
2146    if (cc.mask != -1) {
2147        TCGv t0 = tcg_temp_new();
2148        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2149        cc.reg = t0;
2150    }
2151    if (!cc.use_reg2) {
2152        cc.reg2 = tcg_const_tl(cc.imm);
2153    }
2154
2155    tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2,
2156                       cpu_T0, cpu_regs[reg]);
2157    gen_op_mov_reg_v(ot, reg, cpu_T0);
2158
2159    if (cc.mask != -1) {
2160        tcg_temp_free(cc.reg);
2161    }
2162    if (!cc.use_reg2) {
2163        tcg_temp_free(cc.reg2);
2164    }
2165}
2166
2167static inline void gen_op_movl_T0_seg(int seg_reg)
2168{
2169    tcg_gen_ld32u_tl(cpu_T0, cpu_env,
2170                     offsetof(CPUX86State,segs[seg_reg].selector));
2171}
2172
2173static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2174{
2175    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
2176    tcg_gen_st32_tl(cpu_T0, cpu_env,
2177                    offsetof(CPUX86State,segs[seg_reg].selector));
2178    tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4);
2179}
2180
2181/* move T0 to seg_reg and compute if the CPU state may change. Never
2182   call this function with seg_reg == R_CS */
2183static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2184{
2185    if (s->pe && !s->vm86) {
2186        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
2187        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
2188        /* abort translation because the addseg value may change or
2189           because ss32 may change. For R_SS, translation must always
2190           stop as a special handling must be done to disable hardware
2191           interrupts for the next instruction */
2192        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
2193            s->is_jmp = DISAS_TB_JUMP;
2194    } else {
2195        gen_op_movl_seg_T0_vm(seg_reg);
2196        if (seg_reg == R_SS)
2197            s->is_jmp = DISAS_TB_JUMP;
2198    }
2199}
2200
2201static inline int svm_is_rep(int prefixes)
2202{
2203    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2204}
2205
2206static inline void
2207gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2208                              uint32_t type, uint64_t param)
2209{
2210    /* no SVM activated; fast case */
2211    if (likely(!(s->flags & HF_SVMI_MASK)))
2212        return;
2213    gen_update_cc_op(s);
2214    gen_jmp_im(pc_start - s->cs_base);
2215    gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2216                                         tcg_const_i64(param));
2217}
2218
2219static inline void
2220gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2221{
2222    gen_svm_check_intercept_param(s, pc_start, type, 0);
2223}
2224
2225static inline void gen_stack_update(DisasContext *s, int addend)
2226{
2227    gen_op_add_reg_im(mo_stacksize(s), R_ESP, addend);
2228}
2229
2230/* Generate a push. It depends on ss32, addseg and dflag.  */
2231static void gen_push_v(DisasContext *s, TCGv val)
2232{
2233    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2234    TCGMemOp a_ot = mo_stacksize(s);
2235    int size = 1 << d_ot;
2236    TCGv new_esp = cpu_A0;
2237
2238    tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
2239
2240    if (!CODE64(s)) {
2241        if (s->addseg) {
2242            new_esp = cpu_tmp4;
2243            tcg_gen_mov_tl(new_esp, cpu_A0);
2244        }
2245        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2246    }
2247
2248    gen_op_st_v(s, d_ot, val, cpu_A0);
2249    gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
2250}
2251
2252/* two step pop is necessary for precise exceptions */
2253static TCGMemOp gen_pop_T0(DisasContext *s)
2254{
2255    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2256
2257    gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2258    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2259
2260    return d_ot;
2261}
2262
2263static inline void gen_pop_update(DisasContext *s, TCGMemOp ot)
2264{
2265    gen_stack_update(s, 1 << ot);
2266}
2267
2268static inline void gen_stack_A0(DisasContext *s)
2269{
2270    gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2271}
2272
2273static void gen_pusha(DisasContext *s)
2274{
2275    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2276    TCGMemOp d_ot = s->dflag;
2277    int size = 1 << d_ot;
2278    int i;
2279
2280    for (i = 0; i < 8; i++) {
2281        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size);
2282        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2283        gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0);
2284    }
2285
2286    gen_stack_update(s, -8 * size);
2287}
2288
2289static void gen_popa(DisasContext *s)
2290{
2291    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2292    TCGMemOp d_ot = s->dflag;
2293    int size = 1 << d_ot;
2294    int i;
2295
2296    for (i = 0; i < 8; i++) {
2297        /* ESP is not reloaded */
2298        if (7 - i == R_ESP) {
2299            continue;
2300        }
2301        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size);
2302        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2303        gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2304        gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
2305    }
2306
2307    gen_stack_update(s, 8 * size);
2308}
2309
2310static void gen_enter(DisasContext *s, int esp_addend, int level)
2311{
2312    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2313    TCGMemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2314    int size = 1 << d_ot;
2315
2316    /* Push BP; compute FrameTemp into T1.  */
2317    tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
2318    gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
2319    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0);
2320
2321    level &= 31;
2322    if (level != 0) {
2323        int i;
2324
2325        /* Copy level-1 pointers from the previous frame.  */
2326        for (i = 1; i < level; ++i) {
2327            tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i);
2328            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2329            gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0);
2330
2331            tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i);
2332            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2333            gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0);
2334        }
2335
2336        /* Push the current FrameTemp as the last level.  */
2337        tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level);
2338        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2339        gen_op_st_v(s, d_ot, cpu_T1, cpu_A0);
2340    }
2341
2342    /* Copy the FrameTemp value to EBP.  */
2343    gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1);
2344
2345    /* Compute the final value of ESP.  */
2346    tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level);
2347    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2348}
2349
2350static void gen_leave(DisasContext *s)
2351{
2352    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2353    TCGMemOp a_ot = mo_stacksize(s);
2354
2355    gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2356    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2357
2358    tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
2359
2360    gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0);
2361    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2362}
2363
2364static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2365{
2366    gen_update_cc_op(s);
2367    gen_jmp_im(cur_eip);
2368    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
2369    s->is_jmp = DISAS_TB_JUMP;
2370}
2371
2372/* Generate #UD for the current instruction.  The assumption here is that
2373   the instruction is known, but it isn't allowed in the current cpu mode.  */
2374static void gen_illegal_opcode(DisasContext *s)
2375{
2376    gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
2377}
2378
2379/* Similarly, except that the assumption here is that we don't decode
2380   the instruction at all -- either a missing opcode, an unimplemented
2381   feature, or just a bogus instruction stream.  */
2382static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2383{
2384    gen_illegal_opcode(s);
2385
2386    if (qemu_loglevel_mask(LOG_UNIMP)) {
2387        target_ulong pc = s->pc_start, end = s->pc;
2388        qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2389        for (; pc < end; ++pc) {
2390            qemu_log(" %02x", cpu_ldub_code(env, pc));
2391        }
2392        qemu_log("\n");
2393    }
2394}
2395
2396/* an interrupt is different from an exception because of the
2397   privilege checks */
2398static void gen_interrupt(DisasContext *s, int intno,
2399                          target_ulong cur_eip, target_ulong next_eip)
2400{
2401    gen_update_cc_op(s);
2402    gen_jmp_im(cur_eip);
2403    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2404                               tcg_const_i32(next_eip - cur_eip));
2405    s->is_jmp = DISAS_TB_JUMP;
2406}
2407
2408static void gen_debug(DisasContext *s, target_ulong cur_eip)
2409{
2410    gen_update_cc_op(s);
2411    gen_jmp_im(cur_eip);
2412    gen_helper_debug(cpu_env);
2413    s->is_jmp = DISAS_TB_JUMP;
2414}
2415
2416static void gen_set_hflag(DisasContext *s, uint32_t mask)
2417{
2418    if ((s->flags & mask) == 0) {
2419        TCGv_i32 t = tcg_temp_new_i32();
2420        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2421        tcg_gen_ori_i32(t, t, mask);
2422        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2423        tcg_temp_free_i32(t);
2424        s->flags |= mask;
2425    }
2426}
2427
2428static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2429{
2430    if (s->flags & mask) {
2431        TCGv_i32 t = tcg_temp_new_i32();
2432        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2433        tcg_gen_andi_i32(t, t, ~mask);
2434        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2435        tcg_temp_free_i32(t);
2436        s->flags &= ~mask;
2437    }
2438}
2439
2440/* Clear BND registers during legacy branches.  */
2441static void gen_bnd_jmp(DisasContext *s)
2442{
2443    /* Clear the registers only if BND prefix is missing, MPX is enabled,
2444       and if the BNDREGs are known to be in use (non-zero) already.
2445       The helper itself will check BNDPRESERVE at runtime.  */
2446    if ((s->prefix & PREFIX_REPNZ) == 0
2447        && (s->flags & HF_MPX_EN_MASK) != 0
2448        && (s->flags & HF_MPX_IU_MASK) != 0) {
2449        gen_helper_bnd_jmp(cpu_env);
2450    }
2451}
2452
2453/* Generate an end of block. Trace exception is also generated if needed.
2454   If IIM, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2455static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2456{
2457    gen_update_cc_op(s);
2458
2459    /* If several instructions disable interrupts, only the first does it.  */
2460    if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2461        gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2462    } else {
2463        gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2464    }
2465
2466    if (s->tb->flags & HF_RF_MASK) {
2467        gen_helper_reset_rf(cpu_env);
2468    }
2469    if (s->singlestep_enabled) {
2470        gen_helper_debug(cpu_env);
2471    } else if (s->tf) {
2472        gen_helper_single_step(cpu_env);
2473    } else {
2474        tcg_gen_exit_tb(0);
2475    }
2476    s->is_jmp = DISAS_TB_JUMP;
2477}
2478
2479/* End of block, resetting the inhibit irq flag.  */
2480static void gen_eob(DisasContext *s)
2481{
2482    gen_eob_inhibit_irq(s, false);
2483}
2484
2485/* generate a jump to eip. No segment change must happen before as a
2486   direct call to the next block may occur */
2487static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2488{
2489    gen_update_cc_op(s);
2490    set_cc_op(s, CC_OP_DYNAMIC);
2491    if (s->jmp_opt) {
2492        gen_goto_tb(s, tb_num, eip);
2493        s->is_jmp = DISAS_TB_JUMP;
2494    } else {
2495        gen_jmp_im(eip);
2496        gen_eob(s);
2497    }
2498}
2499
2500static void gen_jmp(DisasContext *s, target_ulong eip)
2501{
2502    gen_jmp_tb(s, eip, 0);
2503}
2504
2505static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2506{
2507    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2508    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2509}
2510
2511static inline void gen_stq_env_A0(DisasContext *s, int offset)
2512{
2513    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2514    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2515}
2516
2517static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2518{
2519    int mem_index = s->mem_index;
2520    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2521    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2522    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2523    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2524    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2525}
2526
2527static inline void gen_sto_env_A0(DisasContext *s, int offset)
2528{
2529    int mem_index = s->mem_index;
2530    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2531    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2532    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2533    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2534    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2535}
2536
2537static inline void gen_op_movo(int d_offset, int s_offset)
2538{
2539    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2540    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2541    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2542    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2543}
2544
2545static inline void gen_op_movq(int d_offset, int s_offset)
2546{
2547    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2548    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2549}
2550
2551static inline void gen_op_movl(int d_offset, int s_offset)
2552{
2553    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2554    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2555}
2556
2557static inline void gen_op_movq_env_0(int d_offset)
2558{
2559    tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2560    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2561}
2562
2563typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2564typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2565typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2566typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2567typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2568typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2569                               TCGv_i32 val);
2570typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2571typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2572                               TCGv val);
2573
2574#define SSE_SPECIAL ((void *)1)
2575#define SSE_DUMMY ((void *)2)
2576
2577#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2578#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2579                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2580
2581static const SSEFunc_0_epp sse_op_table1[256][4] = {
2582    /* 3DNow! extensions */
2583    [0x0e] = { SSE_DUMMY }, /* femms */
2584    [0x0f] = { SSE_DUMMY }, /* pf... */
2585    /* pure SSE operations */
2586    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2587    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2588    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2589    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2590    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2591    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2592    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2593    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2594
2595    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2596    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2597    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2598    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2599    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2600    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2601    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2602    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2603    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2604    [0x51] = SSE_FOP(sqrt),
2605    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2606    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2607    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2608    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2609    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2610    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2611    [0x58] = SSE_FOP(add),
2612    [0x59] = SSE_FOP(mul),
2613    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2614               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2615    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2616    [0x5c] = SSE_FOP(sub),
2617    [0x5d] = SSE_FOP(min),
2618    [0x5e] = SSE_FOP(div),
2619    [0x5f] = SSE_FOP(max),
2620
2621    [0xc2] = SSE_FOP(cmpeq),
2622    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2623               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2624
2625    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2626    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2627    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2628
2629    /* MMX ops and their SSE extensions */
2630    [0x60] = MMX_OP2(punpcklbw),
2631    [0x61] = MMX_OP2(punpcklwd),
2632    [0x62] = MMX_OP2(punpckldq),
2633    [0x63] = MMX_OP2(packsswb),
2634    [0x64] = MMX_OP2(pcmpgtb),
2635    [0x65] = MMX_OP2(pcmpgtw),
2636    [0x66] = MMX_OP2(pcmpgtl),
2637    [0x67] = MMX_OP2(packuswb),
2638    [0x68] = MMX_OP2(punpckhbw),
2639    [0x69] = MMX_OP2(punpckhwd),
2640    [0x6a] = MMX_OP2(punpckhdq),
2641    [0x6b] = MMX_OP2(packssdw),
2642    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2643    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2644    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2645    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2646    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2647               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2648               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2649               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2650    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2651    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2652    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2653    [0x74] = MMX_OP2(pcmpeqb),
2654    [0x75] = MMX_OP2(pcmpeqw),
2655    [0x76] = MMX_OP2(pcmpeql),
2656    [0x77] = { SSE_DUMMY }, /* emms */
2657    [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2658    [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2659    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2660    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2661    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2662    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2663    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2664    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2665    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2666    [0xd1] = MMX_OP2(psrlw),
2667    [0xd2] = MMX_OP2(psrld),
2668    [0xd3] = MMX_OP2(psrlq),
2669    [0xd4] = MMX_OP2(paddq),
2670    [0xd5] = MMX_OP2(pmullw),
2671    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2672    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2673    [0xd8] = MMX_OP2(psubusb),
2674    [0xd9] = MMX_OP2(psubusw),
2675    [0xda] = MMX_OP2(pminub),
2676    [0xdb] = MMX_OP2(pand),
2677    [0xdc] = MMX_OP2(paddusb),
2678    [0xdd] = MMX_OP2(paddusw),
2679    [0xde] = MMX_OP2(pmaxub),
2680    [0xdf] = MMX_OP2(pandn),
2681    [0xe0] = MMX_OP2(pavgb),
2682    [0xe1] = MMX_OP2(psraw),
2683    [0xe2] = MMX_OP2(psrad),
2684    [0xe3] = MMX_OP2(pavgw),
2685    [0xe4] = MMX_OP2(pmulhuw),
2686    [0xe5] = MMX_OP2(pmulhw),
2687    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2688    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2689    [0xe8] = MMX_OP2(psubsb),
2690    [0xe9] = MMX_OP2(psubsw),
2691    [0xea] = MMX_OP2(pminsw),
2692    [0xeb] = MMX_OP2(por),
2693    [0xec] = MMX_OP2(paddsb),
2694    [0xed] = MMX_OP2(paddsw),
2695    [0xee] = MMX_OP2(pmaxsw),
2696    [0xef] = MMX_OP2(pxor),
2697    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2698    [0xf1] = MMX_OP2(psllw),
2699    [0xf2] = MMX_OP2(pslld),
2700    [0xf3] = MMX_OP2(psllq),
2701    [0xf4] = MMX_OP2(pmuludq),
2702    [0xf5] = MMX_OP2(pmaddwd),
2703    [0xf6] = MMX_OP2(psadbw),
2704    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2705               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2706    [0xf8] = MMX_OP2(psubb),
2707    [0xf9] = MMX_OP2(psubw),
2708    [0xfa] = MMX_OP2(psubl),
2709    [0xfb] = MMX_OP2(psubq),
2710    [0xfc] = MMX_OP2(paddb),
2711    [0xfd] = MMX_OP2(paddw),
2712    [0xfe] = MMX_OP2(paddl),
2713};
2714
2715static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2716    [0 + 2] = MMX_OP2(psrlw),
2717    [0 + 4] = MMX_OP2(psraw),
2718    [0 + 6] = MMX_OP2(psllw),
2719    [8 + 2] = MMX_OP2(psrld),
2720    [8 + 4] = MMX_OP2(psrad),
2721    [8 + 6] = MMX_OP2(pslld),
2722    [16 + 2] = MMX_OP2(psrlq),
2723    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2724    [16 + 6] = MMX_OP2(psllq),
2725    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2726};
2727
2728static const SSEFunc_0_epi sse_op_table3ai[] = {
2729    gen_helper_cvtsi2ss,
2730    gen_helper_cvtsi2sd
2731};
2732
2733#ifdef TARGET_X86_64
2734static const SSEFunc_0_epl sse_op_table3aq[] = {
2735    gen_helper_cvtsq2ss,
2736    gen_helper_cvtsq2sd
2737};
2738#endif
2739
2740static const SSEFunc_i_ep sse_op_table3bi[] = {
2741    gen_helper_cvttss2si,
2742    gen_helper_cvtss2si,
2743    gen_helper_cvttsd2si,
2744    gen_helper_cvtsd2si
2745};
2746
2747#ifdef TARGET_X86_64
2748static const SSEFunc_l_ep sse_op_table3bq[] = {
2749    gen_helper_cvttss2sq,
2750    gen_helper_cvtss2sq,
2751    gen_helper_cvttsd2sq,
2752    gen_helper_cvtsd2sq
2753};
2754#endif
2755
2756static const SSEFunc_0_epp sse_op_table4[8][4] = {
2757    SSE_FOP(cmpeq),
2758    SSE_FOP(cmplt),
2759    SSE_FOP(cmple),
2760    SSE_FOP(cmpunord),
2761    SSE_FOP(cmpneq),
2762    SSE_FOP(cmpnlt),
2763    SSE_FOP(cmpnle),
2764    SSE_FOP(cmpord),
2765};
2766
2767static const SSEFunc_0_epp sse_op_table5[256] = {
2768    [0x0c] = gen_helper_pi2fw,
2769    [0x0d] = gen_helper_pi2fd,
2770    [0x1c] = gen_helper_pf2iw,
2771    [0x1d] = gen_helper_pf2id,
2772    [0x8a] = gen_helper_pfnacc,
2773    [0x8e] = gen_helper_pfpnacc,
2774    [0x90] = gen_helper_pfcmpge,
2775    [0x94] = gen_helper_pfmin,
2776    [0x96] = gen_helper_pfrcp,
2777    [0x97] = gen_helper_pfrsqrt,
2778    [0x9a] = gen_helper_pfsub,
2779    [0x9e] = gen_helper_pfadd,
2780    [0xa0] = gen_helper_pfcmpgt,
2781    [0xa4] = gen_helper_pfmax,
2782    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2783    [0xa7] = gen_helper_movq, /* pfrsqit1 */
2784    [0xaa] = gen_helper_pfsubr,
2785    [0xae] = gen_helper_pfacc,
2786    [0xb0] = gen_helper_pfcmpeq,
2787    [0xb4] = gen_helper_pfmul,
2788    [0xb6] = gen_helper_movq, /* pfrcpit2 */
2789    [0xb7] = gen_helper_pmulhrw_mmx,
2790    [0xbb] = gen_helper_pswapd,
2791    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2792};
2793
2794struct SSEOpHelper_epp {
2795    SSEFunc_0_epp op[2];
2796    uint32_t ext_mask;
2797};
2798
2799struct SSEOpHelper_eppi {
2800    SSEFunc_0_eppi op[2];
2801    uint32_t ext_mask;
2802};
2803
2804#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2805#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2806#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2807#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2808#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2809        CPUID_EXT_PCLMULQDQ }
2810#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2811
2812static const struct SSEOpHelper_epp sse_op_table6[256] = {
2813    [0x00] = SSSE3_OP(pshufb),
2814    [0x01] = SSSE3_OP(phaddw),
2815    [0x02] = SSSE3_OP(phaddd),
2816    [0x03] = SSSE3_OP(phaddsw),
2817    [0x04] = SSSE3_OP(pmaddubsw),
2818    [0x05] = SSSE3_OP(phsubw),
2819    [0x06] = SSSE3_OP(phsubd),
2820    [0x07] = SSSE3_OP(phsubsw),
2821    [0x08] = SSSE3_OP(psignb),
2822    [0x09] = SSSE3_OP(psignw),
2823    [0x0a] = SSSE3_OP(psignd),
2824    [0x0b] = SSSE3_OP(pmulhrsw),
2825    [0x10] = SSE41_OP(pblendvb),
2826    [0x14] = SSE41_OP(blendvps),
2827    [0x15] = SSE41_OP(blendvpd),
2828    [0x17] = SSE41_OP(ptest),
2829    [0x1c] = SSSE3_OP(pabsb),
2830    [0x1d] = SSSE3_OP(pabsw),
2831    [0x1e] = SSSE3_OP(pabsd),
2832    [0x20] = SSE41_OP(pmovsxbw),
2833    [0x21] = SSE41_OP(pmovsxbd),
2834    [0x22] = SSE41_OP(pmovsxbq),
2835    [0x23] = SSE41_OP(pmovsxwd),
2836    [0x24] = SSE41_OP(pmovsxwq),
2837    [0x25] = SSE41_OP(pmovsxdq),
2838    [0x28] = SSE41_OP(pmuldq),
2839    [0x29] = SSE41_OP(pcmpeqq),
2840    [0x2a] = SSE41_SPECIAL, /* movntqda */
2841    [0x2b] = SSE41_OP(packusdw),
2842    [0x30] = SSE41_OP(pmovzxbw),
2843    [0x31] = SSE41_OP(pmovzxbd),
2844    [0x32] = SSE41_OP(pmovzxbq),
2845    [0x33] = SSE41_OP(pmovzxwd),
2846    [0x34] = SSE41_OP(pmovzxwq),
2847    [0x35] = SSE41_OP(pmovzxdq),
2848    [0x37] = SSE42_OP(pcmpgtq),
2849    [0x38] = SSE41_OP(pminsb),
2850    [0x39] = SSE41_OP(pminsd),
2851    [0x3a] = SSE41_OP(pminuw),
2852    [0x3b] = SSE41_OP(pminud),
2853    [0x3c] = SSE41_OP(pmaxsb),
2854    [0x3d] = SSE41_OP(pmaxsd),
2855    [0x3e] = SSE41_OP(pmaxuw),
2856    [0x3f] = SSE41_OP(pmaxud),
2857    [0x40] = SSE41_OP(pmulld),
2858    [0x41] = SSE41_OP(phminposuw),
2859    [0xdb] = AESNI_OP(aesimc),
2860    [0xdc] = AESNI_OP(aesenc),
2861    [0xdd] = AESNI_OP(aesenclast),
2862    [0xde] = AESNI_OP(aesdec),
2863    [0xdf] = AESNI_OP(aesdeclast),
2864};
2865
2866static const struct SSEOpHelper_eppi sse_op_table7[256] = {
2867    [0x08] = SSE41_OP(roundps),
2868    [0x09] = SSE41_OP(roundpd),
2869    [0x0a] = SSE41_OP(roundss),
2870    [0x0b] = SSE41_OP(roundsd),
2871    [0x0c] = SSE41_OP(blendps),
2872    [0x0d] = SSE41_OP(blendpd),
2873    [0x0e] = SSE41_OP(pblendw),
2874    [0x0f] = SSSE3_OP(palignr),
2875    [0x14] = SSE41_SPECIAL, /* pextrb */
2876    [0x15] = SSE41_SPECIAL, /* pextrw */
2877    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
2878    [0x17] = SSE41_SPECIAL, /* extractps */
2879    [0x20] = SSE41_SPECIAL, /* pinsrb */
2880    [0x21] = SSE41_SPECIAL, /* insertps */
2881    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
2882    [0x40] = SSE41_OP(dpps),
2883    [0x41] = SSE41_OP(dppd),
2884    [0x42] = SSE41_OP(mpsadbw),
2885    [0x44] = PCLMULQDQ_OP(pclmulqdq),
2886    [0x60] = SSE42_OP(pcmpestrm),
2887    [0x61] = SSE42_OP(pcmpestri),
2888    [0x62] = SSE42_OP(pcmpistrm),
2889    [0x63] = SSE42_OP(pcmpistri),
2890    [0xdf] = AESNI_OP(aeskeygenassist),
2891};
2892
2893static void gen_sse(CPUX86State *env, DisasContext *s, int b,
2894                    target_ulong pc_start, int rex_r)
2895{
2896    int b1, op1_offset, op2_offset, is_xmm, val;
2897    int modrm, mod, rm, reg;
2898    SSEFunc_0_epp sse_fn_epp;
2899    SSEFunc_0_eppi sse_fn_eppi;
2900    SSEFunc_0_ppi sse_fn_ppi;
2901    SSEFunc_0_eppt sse_fn_eppt;
2902    TCGMemOp ot;
2903
2904    b &= 0xff;
2905    if (s->prefix & PREFIX_DATA)
2906        b1 = 1;
2907    else if (s->prefix & PREFIX_REPZ)
2908        b1 = 2;
2909    else if (s->prefix & PREFIX_REPNZ)
2910        b1 = 3;
2911    else
2912        b1 = 0;
2913    sse_fn_epp = sse_op_table1[b][b1];
2914    if (!sse_fn_epp) {
2915        goto unknown_op;
2916    }
2917    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
2918        is_xmm = 1;
2919    } else {
2920        if (b1 == 0) {
2921            /* MMX case */
2922            is_xmm = 0;
2923        } else {
2924            is_xmm = 1;
2925        }
2926    }
2927    /* simple MMX/SSE operation */
2928    if (s->flags & HF_TS_MASK) {
2929        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
2930        return;
2931    }
2932    if (s->flags & HF_EM_MASK) {
2933    illegal_op:
2934        gen_illegal_opcode(s);
2935        return;
2936    }
2937    if (is_xmm
2938        && !(s->flags & HF_OSFXSR_MASK)
2939        && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
2940        goto unknown_op;
2941    }
2942    if (b == 0x0e) {
2943        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
2944            /* If we were fully decoding this we might use illegal_op.  */
2945            goto unknown_op;
2946        }
2947        /* femms */
2948        gen_helper_emms(cpu_env);
2949        return;
2950    }
2951    if (b == 0x77) {
2952        /* emms */
2953        gen_helper_emms(cpu_env);
2954        return;
2955    }
2956    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
2957       the static cpu state) */
2958    if (!is_xmm) {
2959        gen_helper_enter_mmx(cpu_env);
2960    }
2961
2962    modrm = cpu_ldub_code(env, s->pc++);
2963    reg = ((modrm >> 3) & 7);
2964    if (is_xmm)
2965        reg |= rex_r;
2966    mod = (modrm >> 6) & 3;
2967    if (sse_fn_epp == SSE_SPECIAL) {
2968        b |= (b1 << 8);
2969        switch(b) {
2970        case 0x0e7: /* movntq */
2971            if (mod == 3) {
2972                goto illegal_op;
2973            }
2974            gen_lea_modrm(env, s, modrm);
2975            gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
2976            break;
2977        case 0x1e7: /* movntdq */
2978        case 0x02b: /* movntps */
2979        case 0x12b: /* movntps */
2980            if (mod == 3)
2981                goto illegal_op;
2982            gen_lea_modrm(env, s, modrm);
2983            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
2984            break;
2985        case 0x3f0: /* lddqu */
2986            if (mod == 3)
2987                goto illegal_op;
2988            gen_lea_modrm(env, s, modrm);
2989            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
2990            break;
2991        case 0x22b: /* movntss */
2992        case 0x32b: /* movntsd */
2993            if (mod == 3)
2994                goto illegal_op;
2995            gen_lea_modrm(env, s, modrm);
2996            if (b1 & 1) {
2997                gen_stq_env_A0(s, offsetof(CPUX86State,
2998                                           xmm_regs[reg].ZMM_Q(0)));
2999            } else {
3000                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
3001                    xmm_regs[reg].ZMM_L(0)));
3002                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3003            }
3004            break;
3005        case 0x6e: /* movd mm, ea */
3006#ifdef TARGET_X86_64
3007            if (s->dflag == MO_64) {
3008                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3009                tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
3010            } else
3011#endif
3012            {
3013                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3014                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3015                                 offsetof(CPUX86State,fpregs[reg].mmx));
3016                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3017                gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
3018            }
3019            break;
3020        case 0x16e: /* movd xmm, ea */
3021#ifdef TARGET_X86_64
3022            if (s->dflag == MO_64) {
3023                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3024                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3025                                 offsetof(CPUX86State,xmm_regs[reg]));
3026                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0);
3027            } else
3028#endif
3029            {
3030                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3031                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3032                                 offsetof(CPUX86State,xmm_regs[reg]));
3033                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3034                gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
3035            }
3036            break;
3037        case 0x6f: /* movq mm, ea */
3038            if (mod != 3) {
3039                gen_lea_modrm(env, s, modrm);
3040                gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3041            } else {
3042                rm = (modrm & 7);
3043                tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3044                               offsetof(CPUX86State,fpregs[rm].mmx));
3045                tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3046                               offsetof(CPUX86State,fpregs[reg].mmx));
3047            }
3048            break;
3049        case 0x010: /* movups */
3050        case 0x110: /* movupd */
3051        case 0x028: /* movaps */
3052        case 0x128: /* movapd */
3053        case 0x16f: /* movdqa xmm, ea */
3054        case 0x26f: /* movdqu xmm, ea */
3055            if (mod != 3) {
3056                gen_lea_modrm(env, s, modrm);
3057                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3058            } else {
3059                rm = (modrm & 7) | REX_B(s);
3060                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
3061                            offsetof(CPUX86State,xmm_regs[rm]));
3062            }
3063            break;
3064        case 0x210: /* movss xmm, ea */
3065            if (mod != 3) {
3066                gen_lea_modrm(env, s, modrm);
3067                gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3068                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3069                tcg_gen_movi_tl(cpu_T0, 0);
3070                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3071                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3072                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3073            } else {
3074                rm = (modrm & 7) | REX_B(s);
3075                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3076                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3077            }
3078            break;
3079        case 0x310: /* movsd xmm, ea */
3080            if (mod != 3) {
3081                gen_lea_modrm(env, s, modrm);
3082                gen_ldq_env_A0(s, offsetof(CPUX86State,
3083                                           xmm_regs[reg].ZMM_Q(0)));
3084                tcg_gen_movi_tl(cpu_T0, 0);
3085                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3086                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3087            } else {
3088                rm = (modrm & 7) | REX_B(s);
3089                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3090                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3091            }
3092            break;
3093        case 0x012: /* movlps */
3094        case 0x112: /* movlpd */
3095            if (mod != 3) {
3096                gen_lea_modrm(env, s, modrm);
3097                gen_ldq_env_A0(s, offsetof(CPUX86State,
3098                                           xmm_regs[reg].ZMM_Q(0)));
3099            } else {
3100                /* movhlps */
3101                rm = (modrm & 7) | REX_B(s);
3102                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3103                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3104            }
3105            break;
3106        case 0x212: /* movsldup */
3107            if (mod != 3) {
3108                gen_lea_modrm(env, s, modrm);
3109                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3110            } else {
3111                rm = (modrm & 7) | REX_B(s);
3112                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3113                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3114                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3115                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3116            }
3117            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3118                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3119            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3120                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3121            break;
3122        case 0x312: /* movddup */
3123            if (mod != 3) {
3124                gen_lea_modrm(env, s, modrm);
3125                gen_ldq_env_A0(s, offsetof(CPUX86State,
3126                                           xmm_regs[reg].ZMM_Q(0)));
3127            } else {
3128                rm = (modrm & 7) | REX_B(s);
3129                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3130                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3131            }
3132            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3133                        offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3134            break;
3135        case 0x016: /* movhps */
3136        case 0x116: /* movhpd */
3137            if (mod != 3) {
3138                gen_lea_modrm(env, s, modrm);
3139                gen_ldq_env_A0(s, offsetof(CPUX86State,
3140                                           xmm_regs[reg].ZMM_Q(1)));
3141            } else {
3142                /* movlhps */
3143                rm = (modrm & 7) | REX_B(s);
3144                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3145                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3146            }
3147            break;
3148        case 0x216: /* movshdup */
3149            if (mod != 3) {
3150                gen_lea_modrm(env, s, modrm);
3151                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3152            } else {
3153                rm = (modrm & 7) | REX_B(s);
3154                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3155                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3156                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3157                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3158            }
3159            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3160                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3161            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3162                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3163            break;
3164        case 0x178:
3165        case 0x378:
3166            {
3167                int bit_index, field_length;
3168
3169                if (b1 == 1 && reg != 0)
3170                    goto illegal_op;
3171                field_length = cpu_ldub_code(env, s->pc++) & 0x3F;
3172                bit_index = cpu_ldub_code(env, s->pc++) & 0x3F;
3173                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3174                    offsetof(CPUX86State,xmm_regs[reg]));
3175                if (b1 == 1)
3176                    gen_helper_extrq_i(cpu_env, cpu_ptr0,
3177                                       tcg_const_i32(bit_index),
3178                                       tcg_const_i32(field_length));
3179                else
3180                    gen_helper_insertq_i(cpu_env, cpu_ptr0,
3181                                         tcg_const_i32(bit_index),
3182                                         tcg_const_i32(field_length));
3183            }
3184            break;
3185        case 0x7e: /* movd ea, mm */
3186#ifdef TARGET_X86_64
3187            if (s->dflag == MO_64) {
3188                tcg_gen_ld_i64(cpu_T0, cpu_env,
3189                               offsetof(CPUX86State,fpregs[reg].mmx));
3190                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3191            } else
3192#endif
3193            {
3194                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3195                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3196                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3197            }
3198            break;
3199        case 0x17e: /* movd ea, xmm */
3200#ifdef TARGET_X86_64
3201            if (s->dflag == MO_64) {
3202                tcg_gen_ld_i64(cpu_T0, cpu_env,
3203                               offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3204                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3205            } else
3206#endif
3207            {
3208                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3209                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3210                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3211            }
3212            break;
3213        case 0x27e: /* movq xmm, ea */
3214            if (mod != 3) {
3215                gen_lea_modrm(env, s, modrm);
3216                gen_ldq_env_A0(s, offsetof(CPUX86State,
3217                                           xmm_regs[reg].ZMM_Q(0)));
3218            } else {
3219                rm = (modrm & 7) | REX_B(s);
3220                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3221                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3222            }
3223            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3224            break;
3225        case 0x7f: /* movq ea, mm */
3226            if (mod != 3) {
3227                gen_lea_modrm(env, s, modrm);
3228                gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3229            } else {
3230                rm = (modrm & 7);
3231                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
3232                            offsetof(CPUX86State,fpregs[reg].mmx));
3233            }
3234            break;
3235        case 0x011: /* movups */
3236        case 0x111: /* movupd */
3237        case 0x029: /* movaps */
3238        case 0x129: /* movapd */
3239        case 0x17f: /* movdqa ea, xmm */
3240        case 0x27f: /* movdqu ea, xmm */
3241            if (mod != 3) {
3242                gen_lea_modrm(env, s, modrm);
3243                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3244            } else {
3245                rm = (modrm & 7) | REX_B(s);
3246                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
3247                            offsetof(CPUX86State,xmm_regs[reg]));
3248            }
3249            break;
3250        case 0x211: /* movss ea, xmm */
3251            if (mod != 3) {
3252                gen_lea_modrm(env, s, modrm);
3253                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3254                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3255            } else {
3256                rm = (modrm & 7) | REX_B(s);
3257                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
3258                            offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3259            }
3260            break;
3261        case 0x311: /* movsd ea, xmm */
3262            if (mod != 3) {
3263                gen_lea_modrm(env, s, modrm);
3264                gen_stq_env_A0(s, offsetof(CPUX86State,
3265                                           xmm_regs[reg].ZMM_Q(0)));
3266            } else {
3267                rm = (modrm & 7) | REX_B(s);
3268                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
3269                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3270            }
3271            break;
3272        case 0x013: /* movlps */
3273        case 0x113: /* movlpd */
3274            if (mod != 3) {
3275                gen_lea_modrm(env, s, modrm);
3276                gen_stq_env_A0(s, offsetof(CPUX86State,
3277                                           xmm_regs[reg].ZMM_Q(0)));
3278            } else {
3279                goto illegal_op;
3280            }
3281            break;
3282        case 0x017: /* movhps */
3283        case 0x117: /* movhpd */
3284            if (mod != 3) {
3285                gen_lea_modrm(env, s, modrm);
3286                gen_stq_env_A0(s, offsetof(CPUX86State,
3287                                           xmm_regs[reg].ZMM_Q(1)));
3288            } else {
3289                goto illegal_op;
3290            }
3291            break;
3292        case 0x71: /* shift mm, im */
3293        case 0x72:
3294        case 0x73:
3295        case 0x171: /* shift xmm, im */
3296        case 0x172:
3297        case 0x173:
3298            if (b1 >= 2) {
3299                goto unknown_op;
3300            }
3301            val = cpu_ldub_code(env, s->pc++);
3302            if (is_xmm) {
3303                tcg_gen_movi_tl(cpu_T0, val);
3304                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
3305                tcg_gen_movi_tl(cpu_T0, 0);
3306                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(1)));
3307                op1_offset = offsetof(CPUX86State,xmm_t0);
3308            } else {
3309                tcg_gen_movi_tl(cpu_T0, val);
3310                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
3311                tcg_gen_movi_tl(cpu_T0, 0);
3312                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
3313                op1_offset = offsetof(CPUX86State,mmx_t0);
3314            }
3315            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3316                                       (((modrm >> 3)) & 7)][b1];
3317            if (!sse_fn_epp) {
3318                goto unknown_op;
3319            }
3320            if (is_xmm) {
3321                rm = (modrm & 7) | REX_B(s);
3322                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3323            } else {
3324                rm = (modrm & 7);
3325                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3326            }
3327            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3328            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
3329            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3330            break;
3331        case 0x050: /* movmskps */
3332            rm = (modrm & 7) | REX_B(s);
3333            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3334                             offsetof(CPUX86State,xmm_regs[rm]));
3335            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3336            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3337            break;
3338        case 0x150: /* movmskpd */
3339            rm = (modrm & 7) | REX_B(s);
3340            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3341                             offsetof(CPUX86State,xmm_regs[rm]));
3342            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3343            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3344            break;
3345        case 0x02a: /* cvtpi2ps */
3346        case 0x12a: /* cvtpi2pd */
3347            gen_helper_enter_mmx(cpu_env);
3348            if (mod != 3) {
3349                gen_lea_modrm(env, s, modrm);
3350                op2_offset = offsetof(CPUX86State,mmx_t0);
3351                gen_ldq_env_A0(s, op2_offset);
3352            } else {
3353                rm = (modrm & 7);
3354                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3355            }
3356            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3357            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3358            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3359            switch(b >> 8) {
3360            case 0x0:
3361                gen_helper_cvtpi2ps(cpu_env, cpu_ptr0, cpu_ptr1);
3362                break;
3363            default:
3364            case 0x1:
3365                gen_helper_cvtpi2pd(cpu_env, cpu_ptr0, cpu_ptr1);
3366                break;
3367            }
3368            break;
3369        case 0x22a: /* cvtsi2ss */
3370        case 0x32a: /* cvtsi2sd */
3371            ot = mo_64_32(s->dflag);
3372            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3373            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3374            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3375            if (ot == MO_32) {
3376                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3377                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3378                sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
3379            } else {
3380#ifdef TARGET_X86_64
3381                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3382                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T0);
3383#else
3384                goto illegal_op;
3385#endif
3386            }
3387            break;
3388        case 0x02c: /* cvttps2pi */
3389        case 0x12c: /* cvttpd2pi */
3390        case 0x02d: /* cvtps2pi */
3391        case 0x12d: /* cvtpd2pi */
3392            gen_helper_enter_mmx(cpu_env);
3393            if (mod != 3) {
3394                gen_lea_modrm(env, s, modrm);
3395                op2_offset = offsetof(CPUX86State,xmm_t0);
3396                gen_ldo_env_A0(s, op2_offset);
3397            } else {
3398                rm = (modrm & 7) | REX_B(s);
3399                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3400            }
3401            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3402            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3403            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3404            switch(b) {
3405            case 0x02c:
3406                gen_helper_cvttps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3407                break;
3408            case 0x12c:
3409                gen_helper_cvttpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3410                break;
3411            case 0x02d:
3412                gen_helper_cvtps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3413                break;
3414            case 0x12d:
3415                gen_helper_cvtpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3416                break;
3417            }
3418            break;
3419        case 0x22c: /* cvttss2si */
3420        case 0x32c: /* cvttsd2si */
3421        case 0x22d: /* cvtss2si */
3422        case 0x32d: /* cvtsd2si */
3423            ot = mo_64_32(s->dflag);
3424            if (mod != 3) {
3425                gen_lea_modrm(env, s, modrm);
3426                if ((b >> 8) & 1) {
3427                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3428                } else {
3429                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3430                    tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
3431                }
3432                op2_offset = offsetof(CPUX86State,xmm_t0);
3433            } else {
3434                rm = (modrm & 7) | REX_B(s);
3435                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3436            }
3437            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3438            if (ot == MO_32) {
3439                SSEFunc_i_ep sse_fn_i_ep =
3440                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3441                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3442                tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
3443            } else {
3444#ifdef TARGET_X86_64
3445                SSEFunc_l_ep sse_fn_l_ep =
3446                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3447                sse_fn_l_ep(cpu_T0, cpu_env, cpu_ptr0);
3448#else
3449                goto illegal_op;
3450#endif
3451            }
3452            gen_op_mov_reg_v(ot, reg, cpu_T0);
3453            break;
3454        case 0xc4: /* pinsrw */
3455        case 0x1c4:
3456            s->rip_offset = 1;
3457            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3458            val = cpu_ldub_code(env, s->pc++);
3459            if (b1) {
3460                val &= 7;
3461                tcg_gen_st16_tl(cpu_T0, cpu_env,
3462                                offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3463            } else {
3464                val &= 3;
3465                tcg_gen_st16_tl(cpu_T0, cpu_env,
3466                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3467            }
3468            break;
3469        case 0xc5: /* pextrw */
3470        case 0x1c5:
3471            if (mod != 3)
3472                goto illegal_op;
3473            ot = mo_64_32(s->dflag);
3474            val = cpu_ldub_code(env, s->pc++);
3475            if (b1) {
3476                val &= 7;
3477                rm = (modrm & 7) | REX_B(s);
3478                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
3479                                 offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3480            } else {
3481                val &= 3;
3482                rm = (modrm & 7);
3483                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
3484                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3485            }
3486            reg = ((modrm >> 3) & 7) | rex_r;
3487            gen_op_mov_reg_v(ot, reg, cpu_T0);
3488            break;
3489        case 0x1d6: /* movq ea, xmm */
3490            if (mod != 3) {
3491                gen_lea_modrm(env, s, modrm);
3492                gen_stq_env_A0(s, offsetof(CPUX86State,
3493                                           xmm_regs[reg].ZMM_Q(0)));
3494            } else {
3495                rm = (modrm & 7) | REX_B(s);
3496                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
3497                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3498                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3499            }
3500            break;
3501        case 0x2d6: /* movq2dq */
3502            gen_helper_enter_mmx(cpu_env);
3503            rm = (modrm & 7);
3504            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3505                        offsetof(CPUX86State,fpregs[rm].mmx));
3506            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3507            break;
3508        case 0x3d6: /* movdq2q */
3509            gen_helper_enter_mmx(cpu_env);
3510            rm = (modrm & 7) | REX_B(s);
3511            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3512                        offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3513            break;
3514        case 0xd7: /* pmovmskb */
3515        case 0x1d7:
3516            if (mod != 3)
3517                goto illegal_op;
3518            if (b1) {
3519                rm = (modrm & 7) | REX_B(s);
3520                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
3521                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3522            } else {
3523                rm = (modrm & 7);
3524                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
3525                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3526            }
3527            reg = ((modrm >> 3) & 7) | rex_r;
3528            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3529            break;
3530
3531        case 0x138:
3532        case 0x038:
3533            b = modrm;
3534            if ((b & 0xf0) == 0xf0) {
3535                goto do_0f_38_fx;
3536            }
3537            modrm = cpu_ldub_code(env, s->pc++);
3538            rm = modrm & 7;
3539            reg = ((modrm >> 3) & 7) | rex_r;
3540            mod = (modrm >> 6) & 3;
3541            if (b1 >= 2) {
3542                goto unknown_op;
3543            }
3544
3545            sse_fn_epp = sse_op_table6[b].op[b1];
3546            if (!sse_fn_epp) {
3547                goto unknown_op;
3548            }
3549            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3550                goto illegal_op;
3551
3552            if (b1) {
3553                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3554                if (mod == 3) {
3555                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3556                } else {
3557                    op2_offset = offsetof(CPUX86State,xmm_t0);
3558                    gen_lea_modrm(env, s, modrm);
3559                    switch (b) {
3560                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3561                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3562                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3563                        gen_ldq_env_A0(s, op2_offset +
3564                                        offsetof(ZMMReg, ZMM_Q(0)));
3565                        break;
3566                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3567                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3568                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
3569                                            s->mem_index, MO_LEUL);
3570                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
3571                                        offsetof(ZMMReg, ZMM_L(0)));
3572                        break;
3573                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3574                        tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
3575                                           s->mem_index, MO_LEUW);
3576                        tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
3577                                        offsetof(ZMMReg, ZMM_W(0)));
3578                        break;
3579                    case 0x2a:            /* movntqda */
3580                        gen_ldo_env_A0(s, op1_offset);
3581                        return;
3582                    default:
3583                        gen_ldo_env_A0(s, op2_offset);
3584                    }
3585                }
3586            } else {
3587                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3588                if (mod == 3) {
3589                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3590                } else {
3591                    op2_offset = offsetof(CPUX86State,mmx_t0);
3592                    gen_lea_modrm(env, s, modrm);
3593                    gen_ldq_env_A0(s, op2_offset);
3594                }
3595            }
3596            if (sse_fn_epp == SSE_SPECIAL) {
3597                goto unknown_op;
3598            }
3599
3600            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3601            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3602            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3603
3604            if (b == 0x17) {
3605                set_cc_op(s, CC_OP_EFLAGS);
3606            }
3607            break;
3608
3609        case 0x238:
3610        case 0x338:
3611        do_0f_38_fx:
3612            /* Various integer extensions at 0f 38 f[0-f].  */
3613            b = modrm | (b1 << 8);
3614            modrm = cpu_ldub_code(env, s->pc++);
3615            reg = ((modrm >> 3) & 7) | rex_r;
3616
3617            switch (b) {
3618            case 0x3f0: /* crc32 Gd,Eb */
3619            case 0x3f1: /* crc32 Gd,Ey */
3620            do_crc32:
3621                if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3622                    goto illegal_op;
3623                }
3624                if ((b & 0xff) == 0xf0) {
3625                    ot = MO_8;
3626                } else if (s->dflag != MO_64) {
3627                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3628                } else {
3629                    ot = MO_64;
3630                }
3631
3632                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
3633                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3634                gen_helper_crc32(cpu_T0, cpu_tmp2_i32,
3635                                 cpu_T0, tcg_const_i32(8 << ot));
3636
3637                ot = mo_64_32(s->dflag);
3638                gen_op_mov_reg_v(ot, reg, cpu_T0);
3639                break;
3640
3641            case 0x1f0: /* crc32 or movbe */
3642            case 0x1f1:
3643                /* For these insns, the f3 prefix is supposed to have priority
3644                   over the 66 prefix, but that's not what we implement above
3645                   setting b1.  */
3646                if (s->prefix & PREFIX_REPNZ) {
3647                    goto do_crc32;
3648                }
3649                /* FALLTHRU */
3650            case 0x0f0: /* movbe Gy,My */
3651            case 0x0f1: /* movbe My,Gy */
3652                if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3653                    goto illegal_op;
3654                }
3655                if (s->dflag != MO_64) {
3656                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3657                } else {
3658                    ot = MO_64;
3659                }
3660
3661                gen_lea_modrm(env, s, modrm);
3662                if ((b & 1) == 0) {
3663                    tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
3664                                       s->mem_index, ot | MO_BE);
3665                    gen_op_mov_reg_v(ot, reg, cpu_T0);
3666                } else {
3667                    tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
3668                                       s->mem_index, ot | MO_BE);
3669                }
3670                break;
3671
3672            case 0x0f2: /* andn Gy, By, Ey */
3673                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3674                    || !(s->prefix & PREFIX_VEX)
3675                    || s->vex_l != 0) {
3676                    goto illegal_op;
3677                }
3678                ot = mo_64_32(s->dflag);
3679                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3680                tcg_gen_andc_tl(cpu_T0, cpu_regs[s->vex_v], cpu_T0);
3681                gen_op_mov_reg_v(ot, reg, cpu_T0);
3682                gen_op_update1_cc();
3683                set_cc_op(s, CC_OP_LOGICB + ot);
3684                break;
3685
3686            case 0x0f7: /* bextr Gy, Ey, By */
3687                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3688                    || !(s->prefix & PREFIX_VEX)
3689                    || s->vex_l != 0) {
3690                    goto illegal_op;
3691                }
3692                ot = mo_64_32(s->dflag);
3693                {
3694                    TCGv bound, zero;
3695
3696                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3697                    /* Extract START, and shift the operand.
3698                       Shifts larger than operand size get zeros.  */
3699                    tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
3700                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_A0);
3701
3702                    bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3703                    zero = tcg_const_tl(0);
3704                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, cpu_A0, bound,
3705                                       cpu_T0, zero);
3706                    tcg_temp_free(zero);
3707
3708                    /* Extract the LEN into a mask.  Lengths larger than
3709                       operand size get all ones.  */
3710                    tcg_gen_shri_tl(cpu_A0, cpu_regs[s->vex_v], 8);
3711                    tcg_gen_ext8u_tl(cpu_A0, cpu_A0);
3712                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
3713                                       cpu_A0, bound);
3714                    tcg_temp_free(bound);
3715                    tcg_gen_movi_tl(cpu_T1, 1);
3716                    tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_A0);
3717                    tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
3718                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
3719
3720                    gen_op_mov_reg_v(ot, reg, cpu_T0);
3721                    gen_op_update1_cc();
3722                    set_cc_op(s, CC_OP_LOGICB + ot);
3723                }
3724                break;
3725
3726            case 0x0f5: /* bzhi Gy, Ey, By */
3727                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3728                    || !(s->prefix & PREFIX_VEX)
3729                    || s->vex_l != 0) {
3730                    goto illegal_op;
3731                }
3732                ot = mo_64_32(s->dflag);
3733                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3734                tcg_gen_ext8u_tl(cpu_T1, cpu_regs[s->vex_v]);
3735                {
3736                    TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3737                    /* Note that since we're using BMILG (in order to get O
3738                       cleared) we need to store the inverse into C.  */
3739                    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3740                                       cpu_T1, bound);
3741                    tcg_gen_movcond_tl(TCG_COND_GT, cpu_T1, cpu_T1,
3742                                       bound, bound, cpu_T1);
3743                    tcg_temp_free(bound);
3744                }
3745                tcg_gen_movi_tl(cpu_A0, -1);
3746                tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T1);
3747                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_A0);
3748                gen_op_mov_reg_v(ot, reg, cpu_T0);
3749                gen_op_update1_cc();
3750                set_cc_op(s, CC_OP_BMILGB + ot);
3751                break;
3752
3753            case 0x3f6: /* mulx By, Gy, rdx, Ey */
3754                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3755                    || !(s->prefix & PREFIX_VEX)
3756                    || s->vex_l != 0) {
3757                    goto illegal_op;
3758                }
3759                ot = mo_64_32(s->dflag);
3760                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3761                switch (ot) {
3762                default:
3763                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3764                    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
3765                    tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
3766                                      cpu_tmp2_i32, cpu_tmp3_i32);
3767                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
3768                    tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
3769                    break;
3770#ifdef TARGET_X86_64
3771                case MO_64:
3772                    tcg_gen_mulu2_i64(cpu_T0, cpu_T1,
3773                                      cpu_T0, cpu_regs[R_EDX]);
3774                    tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T0);
3775                    tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
3776                    break;
3777#endif
3778                }
3779                break;
3780
3781            case 0x3f5: /* pdep Gy, By, Ey */
3782                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3783                    || !(s->prefix & PREFIX_VEX)
3784                    || s->vex_l != 0) {
3785                    goto illegal_op;
3786                }
3787                ot = mo_64_32(s->dflag);
3788                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3789                /* Note that by zero-extending the mask operand, we
3790                   automatically handle zero-extending the result.  */
3791                if (ot == MO_64) {
3792                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
3793                } else {
3794                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
3795                }
3796                gen_helper_pdep(cpu_regs[reg], cpu_T0, cpu_T1);
3797                break;
3798
3799            case 0x2f5: /* pext Gy, By, Ey */
3800                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3801                    || !(s->prefix & PREFIX_VEX)
3802                    || s->vex_l != 0) {
3803                    goto illegal_op;
3804                }
3805                ot = mo_64_32(s->dflag);
3806                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3807                /* Note that by zero-extending the mask operand, we
3808                   automatically handle zero-extending the result.  */
3809                if (ot == MO_64) {
3810                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
3811                } else {
3812                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
3813                }
3814                gen_helper_pext(cpu_regs[reg], cpu_T0, cpu_T1);
3815                break;
3816
3817            case 0x1f6: /* adcx Gy, Ey */
3818            case 0x2f6: /* adox Gy, Ey */
3819                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3820                    goto illegal_op;
3821                } else {
3822                    TCGv carry_in, carry_out, zero;
3823                    int end_op;
3824
3825                    ot = mo_64_32(s->dflag);
3826                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3827
3828                    /* Re-use the carry-out from a previous round.  */
3829                    TCGV_UNUSED(carry_in);
3830                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3831                    switch (s->cc_op) {
3832                    case CC_OP_ADCX:
3833                        if (b == 0x1f6) {
3834                            carry_in = cpu_cc_dst;
3835                            end_op = CC_OP_ADCX;
3836                        } else {
3837                            end_op = CC_OP_ADCOX;
3838                        }
3839                        break;
3840                    case CC_OP_ADOX:
3841                        if (b == 0x1f6) {
3842                            end_op = CC_OP_ADCOX;
3843                        } else {
3844                            carry_in = cpu_cc_src2;
3845                            end_op = CC_OP_ADOX;
3846                        }
3847                        break;
3848                    case CC_OP_ADCOX:
3849                        end_op = CC_OP_ADCOX;
3850                        carry_in = carry_out;
3851                        break;
3852                    default:
3853                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
3854                        break;
3855                    }
3856                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
3857                    if (TCGV_IS_UNUSED(carry_in)) {
3858                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
3859                            gen_compute_eflags(s);
3860                        }
3861                        carry_in = cpu_tmp0;
3862                        tcg_gen_shri_tl(carry_in, cpu_cc_src,
3863                                        ctz32(b == 0x1f6 ? CC_C : CC_O));
3864                        tcg_gen_andi_tl(carry_in, carry_in, 1);
3865                    }
3866
3867                    switch (ot) {
3868#ifdef TARGET_X86_64
3869                    case MO_32:
3870                        /* If we know TL is 64-bit, and we want a 32-bit
3871                           result, just do everything in 64-bit arithmetic.  */
3872                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
3873                        tcg_gen_ext32u_i64(cpu_T0, cpu_T0);
3874                        tcg_gen_add_i64(cpu_T0, cpu_T0, cpu_regs[reg]);
3875                        tcg_gen_add_i64(cpu_T0, cpu_T0, carry_in);
3876                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T0);
3877                        tcg_gen_shri_i64(carry_out, cpu_T0, 32);
3878                        break;
3879#endif
3880                    default:
3881                        /* Otherwise compute the carry-out in two steps.  */
3882                        zero = tcg_const_tl(0);
3883                        tcg_gen_add2_tl(cpu_T0, carry_out,
3884                                        cpu_T0, zero,
3885                                        carry_in, zero);
3886                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
3887                                        cpu_regs[reg], carry_out,
3888                                        cpu_T0, zero);
3889                        tcg_temp_free(zero);
3890                        break;
3891                    }
3892                    set_cc_op(s, end_op);
3893                }
3894                break;
3895
3896            case 0x1f7: /* shlx Gy, Ey, By */
3897            case 0x2f7: /* sarx Gy, Ey, By */
3898            case 0x3f7: /* shrx Gy, Ey, By */
3899                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3900                    || !(s->prefix & PREFIX_VEX)
3901                    || s->vex_l != 0) {
3902                    goto illegal_op;
3903                }
3904                ot = mo_64_32(s->dflag);
3905                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3906                if (ot == MO_64) {
3907                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 63);
3908                } else {
3909                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
3910                }
3911                if (b == 0x1f7) {
3912                    tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
3913                } else if (b == 0x2f7) {
3914                    if (ot != MO_64) {
3915                        tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
3916                    }
3917                    tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
3918                } else {
3919                    if (ot != MO_64) {
3920                        tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
3921                    }
3922                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
3923                }
3924                gen_op_mov_reg_v(ot, reg, cpu_T0);
3925                break;
3926
3927            case 0x0f3:
3928            case 0x1f3:
3929            case 0x2f3:
3930            case 0x3f3: /* Group 17 */
3931                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3932                    || !(s->prefix & PREFIX_VEX)
3933                    || s->vex_l != 0) {
3934                    goto illegal_op;
3935                }
3936                ot = mo_64_32(s->dflag);
3937                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3938
3939                switch (reg & 7) {
3940                case 1: /* blsr By,Ey */
3941                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
3942                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
3943                    gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
3944                    gen_op_update2_cc();
3945                    set_cc_op(s, CC_OP_BMILGB + ot);
3946                    break;
3947
3948                case 2: /* blsmsk By,Ey */
3949                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
3950                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
3951                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
3952                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
3953                    set_cc_op(s, CC_OP_BMILGB + ot);
3954                    break;
3955
3956                case 3: /* blsi By, Ey */
3957                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
3958                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
3959                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
3960                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
3961                    set_cc_op(s, CC_OP_BMILGB + ot);
3962                    break;
3963
3964                default:
3965                    goto unknown_op;
3966                }
3967                break;
3968
3969            default:
3970                goto unknown_op;
3971            }
3972            break;
3973
3974        case 0x03a:
3975        case 0x13a:
3976            b = modrm;
3977            modrm = cpu_ldub_code(env, s->pc++);
3978            rm = modrm & 7;
3979            reg = ((modrm >> 3) & 7) | rex_r;
3980            mod = (modrm >> 6) & 3;
3981            if (b1 >= 2) {
3982                goto unknown_op;
3983            }
3984
3985            sse_fn_eppi = sse_op_table7[b].op[b1];
3986            if (!sse_fn_eppi) {
3987                goto unknown_op;
3988            }
3989            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
3990                goto illegal_op;
3991
3992            if (sse_fn_eppi == SSE_SPECIAL) {
3993                ot = mo_64_32(s->dflag);
3994                rm = (modrm & 7) | REX_B(s);
3995                if (mod != 3)
3996                    gen_lea_modrm(env, s, modrm);
3997                reg = ((modrm >> 3) & 7) | rex_r;
3998                val = cpu_ldub_code(env, s->pc++);
3999                switch (b) {
4000                case 0x14: /* pextrb */
4001                    tcg_gen_ld8u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4002                                            xmm_regs[reg].ZMM_B(val & 15)));
4003                    if (mod == 3) {
4004                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4005                    } else {
4006                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4007                                           s->mem_index, MO_UB);
4008                    }
4009                    break;
4010                case 0x15: /* pextrw */
4011                    tcg_gen_ld16u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4012                                            xmm_regs[reg].ZMM_W(val & 7)));
4013                    if (mod == 3) {
4014                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4015                    } else {
4016                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4017                                           s->mem_index, MO_LEUW);
4018                    }
4019                    break;
4020                case 0x16:
4021                    if (ot == MO_32) { /* pextrd */
4022                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4023                                        offsetof(CPUX86State,
4024                                                xmm_regs[reg].ZMM_L(val & 3)));
4025                        if (mod == 3) {
4026                            tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
4027                        } else {
4028                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
4029                                                s->mem_index, MO_LEUL);
4030                        }
4031                    } else { /* pextrq */
4032#ifdef TARGET_X86_64
4033                        tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
4034                                        offsetof(CPUX86State,
4035                                                xmm_regs[reg].ZMM_Q(val & 1)));
4036                        if (mod == 3) {
4037                            tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
4038                        } else {
4039                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
4040                                                s->mem_index, MO_LEQ);
4041                        }
4042#else
4043                        goto illegal_op;
4044#endif
4045                    }
4046                    break;
4047                case 0x17: /* extractps */
4048                    tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4049                                            xmm_regs[reg].ZMM_L(val & 3)));
4050                    if (mod == 3) {
4051                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4052                    } else {
4053                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4054                                           s->mem_index, MO_LEUL);
4055                    }
4056                    break;
4057                case 0x20: /* pinsrb */
4058                    if (mod == 3) {
4059                        gen_op_mov_v_reg(MO_32, cpu_T0, rm);
4060                    } else {
4061                        tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
4062                                           s->mem_index, MO_UB);
4063                    }
4064                    tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4065                                            xmm_regs[reg].ZMM_B(val & 15)));
4066                    break;
4067                case 0x21: /* insertps */
4068                    if (mod == 3) {
4069                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4070                                        offsetof(CPUX86State,xmm_regs[rm]
4071                                                .ZMM_L((val >> 6) & 3)));
4072                    } else {
4073                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4074                                            s->mem_index, MO_LEUL);
4075                    }
4076                    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4077                                    offsetof(CPUX86State,xmm_regs[reg]
4078                                            .ZMM_L((val >> 4) & 3)));
4079                    if ((val >> 0) & 1)
4080                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4081                                        cpu_env, offsetof(CPUX86State,
4082                                                xmm_regs[reg].ZMM_L(0)));
4083                    if ((val >> 1) & 1)
4084                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4085                                        cpu_env, offsetof(CPUX86State,
4086                                                xmm_regs[reg].ZMM_L(1)));
4087                    if ((val >> 2) & 1)
4088                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4089                                        cpu_env, offsetof(CPUX86State,
4090                                                xmm_regs[reg].ZMM_L(2)));
4091                    if ((val >> 3) & 1)
4092                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4093                                        cpu_env, offsetof(CPUX86State,
4094                                                xmm_regs[reg].ZMM_L(3)));
4095                    break;
4096                case 0x22:
4097                    if (ot == MO_32) { /* pinsrd */
4098                        if (mod == 3) {
4099                            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
4100                        } else {
4101                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4102                                                s->mem_index, MO_LEUL);
4103                        }
4104                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4105                                        offsetof(CPUX86State,
4106                                                xmm_regs[reg].ZMM_L(val & 3)));
4107                    } else { /* pinsrq */
4108#ifdef TARGET_X86_64
4109                        if (mod == 3) {
4110                            gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
4111                        } else {
4112                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
4113                                                s->mem_index, MO_LEQ);
4114                        }
4115                        tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
4116                                        offsetof(CPUX86State,
4117                                                xmm_regs[reg].ZMM_Q(val & 1)));
4118#else
4119                        goto illegal_op;
4120#endif
4121                    }
4122                    break;
4123                }
4124                return;
4125            }
4126
4127            if (b1) {
4128                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4129                if (mod == 3) {
4130                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4131                } else {
4132                    op2_offset = offsetof(CPUX86State,xmm_t0);
4133                    gen_lea_modrm(env, s, modrm);
4134                    gen_ldo_env_A0(s, op2_offset);
4135                }
4136            } else {
4137                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4138                if (mod == 3) {
4139                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4140                } else {
4141                    op2_offset = offsetof(CPUX86State,mmx_t0);
4142                    gen_lea_modrm(env, s, modrm);
4143                    gen_ldq_env_A0(s, op2_offset);
4144                }
4145            }
4146            val = cpu_ldub_code(env, s->pc++);
4147
4148            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4149                set_cc_op(s, CC_OP_EFLAGS);
4150
4151                if (s->dflag == MO_64) {
4152                    /* The helper must use entire 64-bit gp registers */
4153                    val |= 1 << 8;
4154                }
4155            }
4156
4157            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4158            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4159            sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4160            break;
4161
4162        case 0x33a:
4163            /* Various integer extensions at 0f 3a f[0-f].  */
4164            b = modrm | (b1 << 8);
4165            modrm = cpu_ldub_code(env, s->pc++);
4166            reg = ((modrm >> 3) & 7) | rex_r;
4167
4168            switch (b) {
4169            case 0x3f0: /* rorx Gy,Ey, Ib */
4170                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4171                    || !(s->prefix & PREFIX_VEX)
4172                    || s->vex_l != 0) {
4173                    goto illegal_op;
4174                }
4175                ot = mo_64_32(s->dflag);
4176                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4177                b = cpu_ldub_code(env, s->pc++);
4178                if (ot == MO_64) {
4179                    tcg_gen_rotri_tl(cpu_T0, cpu_T0, b & 63);
4180                } else {
4181                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4182                    tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
4183                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
4184                }
4185                gen_op_mov_reg_v(ot, reg, cpu_T0);
4186                break;
4187
4188            default:
4189                goto unknown_op;
4190            }
4191            break;
4192
4193        default:
4194        unknown_op:
4195            gen_unknown_opcode(env, s);
4196            return;
4197        }
4198    } else {
4199        /* generic MMX or SSE operation */
4200        switch(b) {
4201        case 0x70: /* pshufx insn */
4202        case 0xc6: /* pshufx insn */
4203        case 0xc2: /* compare insns */
4204            s->rip_offset = 1;
4205            break;
4206        default:
4207            break;
4208        }
4209        if (is_xmm) {
4210            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4211            if (mod != 3) {
4212                int sz = 4;
4213
4214                gen_lea_modrm(env, s, modrm);
4215                op2_offset = offsetof(CPUX86State,xmm_t0);
4216
4217                switch (b) {
4218                case 0x50 ... 0x5a:
4219                case 0x5c ... 0x5f:
4220                case 0xc2:
4221                    /* Most sse scalar operations.  */
4222                    if (b1 == 2) {
4223                        sz = 2;
4224                    } else if (b1 == 3) {
4225                        sz = 3;
4226                    }
4227                    break;
4228
4229                case 0x2e:  /* ucomis[sd] */
4230                case 0x2f:  /* comis[sd] */
4231                    if (b1 == 0) {
4232                        sz = 2;
4233                    } else {
4234                        sz = 3;
4235                    }
4236                    break;
4237                }
4238
4239                switch (sz) {
4240                case 2:
4241                    /* 32 bit access */
4242                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
4243                    tcg_gen_st32_tl(cpu_T0, cpu_env,
4244                                    offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4245                    break;
4246                case 3:
4247                    /* 64 bit access */
4248                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4249                    break;
4250                default:
4251                    /* 128 bit access */
4252                    gen_ldo_env_A0(s, op2_offset);
4253                    break;
4254                }
4255            } else {
4256                rm = (modrm & 7) | REX_B(s);
4257                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4258            }
4259        } else {
4260            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4261            if (mod != 3) {
4262                gen_lea_modrm(env, s, modrm);
4263                op2_offset = offsetof(CPUX86State,mmx_t0);
4264                gen_ldq_env_A0(s, op2_offset);
4265            } else {
4266                rm = (modrm & 7);
4267                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4268            }
4269        }
4270        switch(b) {
4271        case 0x0f: /* 3DNow! data insns */
4272            val = cpu_ldub_code(env, s->pc++);
4273            sse_fn_epp = sse_op_table5[val];
4274            if (!sse_fn_epp) {
4275                goto unknown_op;
4276            }
4277            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4278                goto illegal_op;
4279            }
4280            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4281            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4282            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4283            break;
4284        case 0x70: /* pshufx insn */
4285        case 0xc6: /* pshufx insn */
4286            val = cpu_ldub_code(env, s->pc++);
4287            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4288            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4289            /* XXX: introduce a new table? */
4290            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4291            sse_fn_ppi(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4292            break;
4293        case 0xc2:
4294            /* compare insns */
4295            val = cpu_ldub_code(env, s->pc++);
4296            if (val >= 8)
4297                goto unknown_op;
4298            sse_fn_epp = sse_op_table4[val][b1];
4299
4300            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4301            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4302            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4303            break;
4304        case 0xf7:
4305            /* maskmov : we must prepare A0 */
4306            if (mod != 3)
4307                goto illegal_op;
4308            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]);
4309            gen_extu(s->aflag, cpu_A0);
4310            gen_add_A0_ds_seg(s);
4311
4312            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4313            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4314            /* XXX: introduce a new table? */
4315            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4316            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
4317            break;
4318        default:
4319            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4320            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4321            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4322            break;
4323        }
4324        if (b == 0x2e || b == 0x2f) {
4325            set_cc_op(s, CC_OP_EFLAGS);
4326        }
4327    }
4328}
4329
4330/* convert one instruction. s->is_jmp is set if the translation must
4331   be stopped. Return the next pc value */
4332static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
4333                               target_ulong pc_start)
4334{
4335    int b, prefixes;
4336    int shift;
4337    TCGMemOp ot, aflag, dflag;
4338    int modrm, reg, rm, mod, op, opreg, val;
4339    target_ulong next_eip, tval;
4340    int rex_w, rex_r;
4341
4342    s->pc_start = s->pc = pc_start;
4343    prefixes = 0;
4344    s->override = -1;
4345    rex_w = -1;
4346    rex_r = 0;
4347#ifdef TARGET_X86_64
4348    s->rex_x = 0;
4349    s->rex_b = 0;
4350    x86_64_hregs = 0;
4351#endif
4352    s->rip_offset = 0; /* for relative ip address */
4353    s->vex_l = 0;
4354    s->vex_v = 0;
4355 next_byte:
4356    b = cpu_ldub_code(env, s->pc);
4357    s->pc++;
4358    /* Collect prefixes.  */
4359    switch (b) {
4360    case 0xf3:
4361        prefixes |= PREFIX_REPZ;
4362        goto next_byte;
4363    case 0xf2:
4364        prefixes |= PREFIX_REPNZ;
4365        goto next_byte;
4366    case 0xf0:
4367        prefixes |= PREFIX_LOCK;
4368        goto next_byte;
4369    case 0x2e:
4370        s->override = R_CS;
4371        goto next_byte;
4372    case 0x36:
4373        s->override = R_SS;
4374        goto next_byte;
4375    case 0x3e:
4376        s->override = R_DS;
4377        goto next_byte;
4378    case 0x26:
4379        s->override = R_ES;
4380        goto next_byte;
4381    case 0x64:
4382        s->override = R_FS;
4383        goto next_byte;
4384    case 0x65:
4385        s->override = R_GS;
4386        goto next_byte;
4387    case 0x66:
4388        prefixes |= PREFIX_DATA;
4389        goto next_byte;
4390    case 0x67:
4391        prefixes |= PREFIX_ADR;
4392        goto next_byte;
4393#ifdef TARGET_X86_64
4394    case 0x40 ... 0x4f:
4395        if (CODE64(s)) {
4396            /* REX prefix */
4397            rex_w = (b >> 3) & 1;
4398            rex_r = (b & 0x4) << 1;
4399            s->rex_x = (b & 0x2) << 2;
4400            REX_B(s) = (b & 0x1) << 3;
4401            x86_64_hregs = 1; /* select uniform byte register addressing */
4402            goto next_byte;
4403        }
4404        break;
4405#endif
4406    case 0xc5: /* 2-byte VEX */
4407    case 0xc4: /* 3-byte VEX */
4408        /* VEX prefixes cannot be used except in 32-bit mode.
4409           Otherwise the instruction is LES or LDS.  */
4410        if (s->code32 && !s->vm86) {
4411            static const int pp_prefix[4] = {
4412                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4413            };
4414            int vex3, vex2 = cpu_ldub_code(env, s->pc);
4415
4416            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4417                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4418                   otherwise the instruction is LES or LDS.  */
4419                break;
4420            }
4421            s->pc++;
4422
4423            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4424            if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4425                            | PREFIX_LOCK | PREFIX_DATA)) {
4426                goto illegal_op;
4427            }
4428#ifdef TARGET_X86_64
4429            if (x86_64_hregs) {
4430                goto illegal_op;
4431            }
4432#endif
4433            rex_r = (~vex2 >> 4) & 8;
4434            if (b == 0xc5) {
4435                vex3 = vex2;
4436                b = cpu_ldub_code(env, s->pc++);
4437            } else {
4438#ifdef TARGET_X86_64
4439                s->rex_x = (~vex2 >> 3) & 8;
4440                s->rex_b = (~vex2 >> 2) & 8;
4441#endif
4442                vex3 = cpu_ldub_code(env, s->pc++);
4443                rex_w = (vex3 >> 7) & 1;
4444                switch (vex2 & 0x1f) {
4445                case 0x01: /* Implied 0f leading opcode bytes.  */
4446                    b = cpu_ldub_code(env, s->pc++) | 0x100;
4447                    break;
4448                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4449                    b = 0x138;
4450                    break;
4451                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4452                    b = 0x13a;
4453                    break;
4454                default:   /* Reserved for future use.  */
4455                    goto unknown_op;
4456                }
4457            }
4458            s->vex_v = (~vex3 >> 3) & 0xf;
4459            s->vex_l = (vex3 >> 2) & 1;
4460            prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4461        }
4462        break;
4463    }
4464
4465    /* Post-process prefixes.  */
4466    if (CODE64(s)) {
4467        /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4468           data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4469           over 0x66 if both are present.  */
4470        dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4471        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4472        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4473    } else {
4474        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4475        if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4476            dflag = MO_32;
4477        } else {
4478            dflag = MO_16;
4479        }
4480        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4481        if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4482            aflag = MO_32;
4483        }  else {
4484            aflag = MO_16;
4485        }
4486    }
4487
4488    s->prefix = prefixes;
4489    s->aflag = aflag;
4490    s->dflag = dflag;
4491
4492    /* lock generation */
4493    if (prefixes & PREFIX_LOCK)
4494        gen_helper_lock();
4495
4496    /* now check op code */
4497 reswitch:
4498    switch(b) {
4499    case 0x0f:
4500        /**************************/
4501        /* extended op code */
4502        b = cpu_ldub_code(env, s->pc++) | 0x100;
4503        goto reswitch;
4504
4505        /**************************/
4506        /* arith & logic */
4507    case 0x00 ... 0x05:
4508    case 0x08 ... 0x0d:
4509    case 0x10 ... 0x15:
4510    case 0x18 ... 0x1d:
4511    case 0x20 ... 0x25:
4512    case 0x28 ... 0x2d:
4513    case 0x30 ... 0x35:
4514    case 0x38 ... 0x3d:
4515        {
4516            int op, f, val;
4517            op = (b >> 3) & 7;
4518            f = (b >> 1) & 3;
4519
4520            ot = mo_b_d(b, dflag);
4521
4522            switch(f) {
4523            case 0: /* OP Ev, Gv */
4524                modrm = cpu_ldub_code(env, s->pc++);
4525                reg = ((modrm >> 3) & 7) | rex_r;
4526                mod = (modrm >> 6) & 3;
4527                rm = (modrm & 7) | REX_B(s);
4528                if (mod != 3) {
4529                    gen_lea_modrm(env, s, modrm);
4530                    opreg = OR_TMP0;
4531                } else if (op == OP_XORL && rm == reg) {
4532                xor_zero:
4533                    /* xor reg, reg optimisation */
4534                    set_cc_op(s, CC_OP_CLR);
4535                    tcg_gen_movi_tl(cpu_T0, 0);
4536                    gen_op_mov_reg_v(ot, reg, cpu_T0);
4537                    break;
4538                } else {
4539                    opreg = rm;
4540                }
4541                gen_op_mov_v_reg(ot, cpu_T1, reg);
4542                gen_op(s, op, ot, opreg);
4543                break;
4544            case 1: /* OP Gv, Ev */
4545                modrm = cpu_ldub_code(env, s->pc++);
4546                mod = (modrm >> 6) & 3;
4547                reg = ((modrm >> 3) & 7) | rex_r;
4548                rm = (modrm & 7) | REX_B(s);
4549                if (mod != 3) {
4550                    gen_lea_modrm(env, s, modrm);
4551                    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4552                } else if (op == OP_XORL && rm == reg) {
4553                    goto xor_zero;
4554                } else {
4555                    gen_op_mov_v_reg(ot, cpu_T1, rm);
4556                }
4557                gen_op(s, op, ot, reg);
4558                break;
4559            case 2: /* OP A, Iv */
4560                val = insn_get(env, s, ot);
4561                tcg_gen_movi_tl(cpu_T1, val);
4562                gen_op(s, op, ot, OR_EAX);
4563                break;
4564            }
4565        }
4566        break;
4567
4568    case 0x82:
4569        if (CODE64(s))
4570            goto illegal_op;
4571    case 0x80: /* GRP1 */
4572    case 0x81:
4573    case 0x83:
4574        {
4575            int val;
4576
4577            ot = mo_b_d(b, dflag);
4578
4579            modrm = cpu_ldub_code(env, s->pc++);
4580            mod = (modrm >> 6) & 3;
4581            rm = (modrm & 7) | REX_B(s);
4582            op = (modrm >> 3) & 7;
4583
4584            if (mod != 3) {
4585                if (b == 0x83)
4586                    s->rip_offset = 1;
4587                else
4588                    s->rip_offset = insn_const_size(ot);
4589                gen_lea_modrm(env, s, modrm);
4590                opreg = OR_TMP0;
4591            } else {
4592                opreg = rm;
4593            }
4594
4595            switch(b) {
4596            default:
4597            case 0x80:
4598            case 0x81:
4599            case 0x82:
4600                val = insn_get(env, s, ot);
4601                break;
4602            case 0x83:
4603                val = (int8_t)insn_get(env, s, MO_8);
4604                break;
4605            }
4606            tcg_gen_movi_tl(cpu_T1, val);
4607            gen_op(s, op, ot, opreg);
4608        }
4609        break;
4610
4611        /**************************/
4612        /* inc, dec, and other misc arith */
4613    case 0x40 ... 0x47: /* inc Gv */
4614        ot = dflag;
4615        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4616        break;
4617    case 0x48 ... 0x4f: /* dec Gv */
4618        ot = dflag;
4619        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4620        break;
4621    case 0xf6: /* GRP3 */
4622    case 0xf7:
4623        ot = mo_b_d(b, dflag);
4624
4625        modrm = cpu_ldub_code(env, s->pc++);
4626        mod = (modrm >> 6) & 3;
4627        rm = (modrm & 7) | REX_B(s);
4628        op = (modrm >> 3) & 7;
4629        if (mod != 3) {
4630            if (op == 0)
4631                s->rip_offset = insn_const_size(ot);
4632            gen_lea_modrm(env, s, modrm);
4633            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
4634        } else {
4635            gen_op_mov_v_reg(ot, cpu_T0, rm);
4636        }
4637
4638        switch(op) {
4639        case 0: /* test */
4640            val = insn_get(env, s, ot);
4641            tcg_gen_movi_tl(cpu_T1, val);
4642            gen_op_testl_T0_T1_cc();
4643            set_cc_op(s, CC_OP_LOGICB + ot);
4644            break;
4645        case 2: /* not */
4646            tcg_gen_not_tl(cpu_T0, cpu_T0);
4647            if (mod != 3) {
4648                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
4649            } else {
4650                gen_op_mov_reg_v(ot, rm, cpu_T0);
4651            }
4652            break;
4653        case 3: /* neg */
4654            tcg_gen_neg_tl(cpu_T0, cpu_T0);
4655            if (mod != 3) {
4656                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
4657            } else {
4658                gen_op_mov_reg_v(ot, rm, cpu_T0);
4659            }
4660            gen_op_update_neg_cc();
4661            set_cc_op(s, CC_OP_SUBB + ot);
4662            break;
4663        case 4: /* mul */
4664            switch(ot) {
4665            case MO_8:
4666                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
4667                tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
4668                tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
4669                /* XXX: use 32 bit mul which could be faster */
4670                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4671                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4672                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4673                tcg_gen_andi_tl(cpu_cc_src, cpu_T0, 0xff00);
4674                set_cc_op(s, CC_OP_MULB);
4675                break;
4676            case MO_16:
4677                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
4678                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4679                tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
4680                /* XXX: use 32 bit mul which could be faster */
4681                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4682                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4683                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4684                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
4685                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4686                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4687                set_cc_op(s, CC_OP_MULW);
4688                break;
4689            default:
4690            case MO_32:
4691                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4692                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4693                tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4694                                  cpu_tmp2_i32, cpu_tmp3_i32);
4695                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4696                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4697                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4698                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4699                set_cc_op(s, CC_OP_MULL);
4700                break;
4701#ifdef TARGET_X86_64
4702            case MO_64:
4703                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4704                                  cpu_T0, cpu_regs[R_EAX]);
4705                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4706                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4707                set_cc_op(s, CC_OP_MULQ);
4708                break;
4709#endif
4710            }
4711            break;
4712        case 5: /* imul */
4713            switch(ot) {
4714            case MO_8:
4715                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
4716                tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
4717                tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
4718                /* XXX: use 32 bit mul which could be faster */
4719                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4720                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4721                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4722                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T0);
4723                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
4724                set_cc_op(s, CC_OP_MULB);
4725                break;
4726            case MO_16:
4727                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
4728                tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
4729                tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
4730                /* XXX: use 32 bit mul which could be faster */
4731                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4732                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4733                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4734                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
4735                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
4736                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
4737                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4738                set_cc_op(s, CC_OP_MULW);
4739                break;
4740            default:
4741            case MO_32:
4742                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4743                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4744                tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4745                                  cpu_tmp2_i32, cpu_tmp3_i32);
4746                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4747                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4748                tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
4749                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4750                tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
4751                tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
4752                set_cc_op(s, CC_OP_MULL);
4753                break;
4754#ifdef TARGET_X86_64
4755            case MO_64:
4756                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4757                                  cpu_T0, cpu_regs[R_EAX]);
4758                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4759                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4760                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4761                set_cc_op(s, CC_OP_MULQ);
4762                break;
4763#endif
4764            }
4765            break;
4766        case 6: /* div */
4767            switch(ot) {
4768            case MO_8:
4769                gen_helper_divb_AL(cpu_env, cpu_T0);
4770                break;
4771            case MO_16:
4772                gen_helper_divw_AX(cpu_env, cpu_T0);
4773                break;
4774            default:
4775            case MO_32:
4776                gen_helper_divl_EAX(cpu_env, cpu_T0);
4777                break;
4778#ifdef TARGET_X86_64
4779            case MO_64:
4780                gen_helper_divq_EAX(cpu_env, cpu_T0);
4781                break;
4782#endif
4783            }
4784            break;
4785        case 7: /* idiv */
4786            switch(ot) {
4787            case MO_8:
4788                gen_helper_idivb_AL(cpu_env, cpu_T0);
4789                break;
4790            case MO_16:
4791                gen_helper_idivw_AX(cpu_env, cpu_T0);
4792                break;
4793            default:
4794            case MO_32:
4795                gen_helper_idivl_EAX(cpu_env, cpu_T0);
4796                break;
4797#ifdef TARGET_X86_64
4798            case MO_64:
4799                gen_helper_idivq_EAX(cpu_env, cpu_T0);
4800                break;
4801#endif
4802            }
4803            break;
4804        default:
4805            goto unknown_op;
4806        }
4807        break;
4808
4809    case 0xfe: /* GRP4 */
4810    case 0xff: /* GRP5 */
4811        ot = mo_b_d(b, dflag);
4812
4813        modrm = cpu_ldub_code(env, s->pc++);
4814        mod = (modrm >> 6) & 3;
4815        rm = (modrm & 7) | REX_B(s);
4816        op = (modrm >> 3) & 7;
4817        if (op >= 2 && b == 0xfe) {
4818            goto unknown_op;
4819        }
4820        if (CODE64(s)) {
4821            if (op == 2 || op == 4) {
4822                /* operand size for jumps is 64 bit */
4823                ot = MO_64;
4824            } else if (op == 3 || op == 5) {
4825                ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
4826            } else if (op == 6) {
4827                /* default push size is 64 bit */
4828                ot = mo_pushpop(s, dflag);
4829            }
4830        }
4831        if (mod != 3) {
4832            gen_lea_modrm(env, s, modrm);
4833            if (op >= 2 && op != 3 && op != 5)
4834                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
4835        } else {
4836            gen_op_mov_v_reg(ot, cpu_T0, rm);
4837        }
4838
4839        switch(op) {
4840        case 0: /* inc Ev */
4841            if (mod != 3)
4842                opreg = OR_TMP0;
4843            else
4844                opreg = rm;
4845            gen_inc(s, ot, opreg, 1);
4846            break;
4847        case 1: /* dec Ev */
4848            if (mod != 3)
4849                opreg = OR_TMP0;
4850            else
4851                opreg = rm;
4852            gen_inc(s, ot, opreg, -1);
4853            break;
4854        case 2: /* call Ev */
4855            /* XXX: optimize if memory (no 'and' is necessary) */
4856            if (dflag == MO_16) {
4857                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4858            }
4859            next_eip = s->pc - s->cs_base;
4860            tcg_gen_movi_tl(cpu_T1, next_eip);
4861            gen_push_v(s, cpu_T1);
4862            gen_op_jmp_v(cpu_T0);
4863            gen_bnd_jmp(s);
4864            gen_eob(s);
4865            break;
4866        case 3: /* lcall Ev */
4867            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4868            gen_add_A0_im(s, 1 << ot);
4869            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
4870        do_lcall:
4871            if (s->pe && !s->vm86) {
4872                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4873                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
4874                                           tcg_const_i32(dflag - 1),
4875                                           tcg_const_tl(s->pc - s->cs_base));
4876            } else {
4877                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4878                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
4879                                      tcg_const_i32(dflag - 1),
4880                                      tcg_const_i32(s->pc - s->cs_base));
4881            }
4882            gen_eob(s);
4883            break;
4884        case 4: /* jmp Ev */
4885            if (dflag == MO_16) {
4886                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4887            }
4888            gen_op_jmp_v(cpu_T0);
4889            gen_bnd_jmp(s);
4890            gen_eob(s);
4891            break;
4892        case 5: /* ljmp Ev */
4893            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4894            gen_add_A0_im(s, 1 << ot);
4895            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
4896        do_ljmp:
4897            if (s->pe && !s->vm86) {
4898                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4899                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
4900                                          tcg_const_tl(s->pc - s->cs_base));
4901            } else {
4902                gen_op_movl_seg_T0_vm(R_CS);
4903                gen_op_jmp_v(cpu_T1);
4904            }
4905            gen_eob(s);
4906            break;
4907        case 6: /* push Ev */
4908            gen_push_v(s, cpu_T0);
4909            break;
4910        default:
4911            goto unknown_op;
4912        }
4913        break;
4914
4915    case 0x84: /* test Ev, Gv */
4916    case 0x85:
4917        ot = mo_b_d(b, dflag);
4918
4919        modrm = cpu_ldub_code(env, s->pc++);
4920        reg = ((modrm >> 3) & 7) | rex_r;
4921
4922        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4923        gen_op_mov_v_reg(ot, cpu_T1, reg);
4924        gen_op_testl_T0_T1_cc();
4925        set_cc_op(s, CC_OP_LOGICB + ot);
4926        break;
4927
4928    case 0xa8: /* test eAX, Iv */
4929    case 0xa9:
4930        ot = mo_b_d(b, dflag);
4931        val = insn_get(env, s, ot);
4932
4933        gen_op_mov_v_reg(ot, cpu_T0, OR_EAX);
4934        tcg_gen_movi_tl(cpu_T1, val);
4935        gen_op_testl_T0_T1_cc();
4936        set_cc_op(s, CC_OP_LOGICB + ot);
4937        break;
4938
4939    case 0x98: /* CWDE/CBW */
4940        switch (dflag) {
4941#ifdef TARGET_X86_64
4942        case MO_64:
4943            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
4944            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
4945            gen_op_mov_reg_v(MO_64, R_EAX, cpu_T0);
4946            break;
4947#endif
4948        case MO_32:
4949            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
4950            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
4951            gen_op_mov_reg_v(MO_32, R_EAX, cpu_T0);
4952            break;
4953        case MO_16:
4954            gen_op_mov_v_reg(MO_8, cpu_T0, R_EAX);
4955            tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
4956            gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4957            break;
4958        default:
4959            tcg_abort();
4960        }
4961        break;
4962    case 0x99: /* CDQ/CWD */
4963        switch (dflag) {
4964#ifdef TARGET_X86_64
4965        case MO_64:
4966            gen_op_mov_v_reg(MO_64, cpu_T0, R_EAX);
4967            tcg_gen_sari_tl(cpu_T0, cpu_T0, 63);
4968            gen_op_mov_reg_v(MO_64, R_EDX, cpu_T0);
4969            break;
4970#endif
4971        case MO_32:
4972            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
4973            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
4974            tcg_gen_sari_tl(cpu_T0, cpu_T0, 31);
4975            gen_op_mov_reg_v(MO_32, R_EDX, cpu_T0);
4976            break;
4977        case MO_16:
4978            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
4979            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
4980            tcg_gen_sari_tl(cpu_T0, cpu_T0, 15);
4981            gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4982            break;
4983        default:
4984            tcg_abort();
4985        }
4986        break;
4987    case 0x1af: /* imul Gv, Ev */
4988    case 0x69: /* imul Gv, Ev, I */
4989    case 0x6b:
4990        ot = dflag;
4991        modrm = cpu_ldub_code(env, s->pc++);
4992        reg = ((modrm >> 3) & 7) | rex_r;
4993        if (b == 0x69)
4994            s->rip_offset = insn_const_size(ot);
4995        else if (b == 0x6b)
4996            s->rip_offset = 1;
4997        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4998        if (b == 0x69) {
4999            val = insn_get(env, s, ot);
5000            tcg_gen_movi_tl(cpu_T1, val);
5001        } else if (b == 0x6b) {
5002            val = (int8_t)insn_get(env, s, MO_8);
5003            tcg_gen_movi_tl(cpu_T1, val);
5004        } else {
5005            gen_op_mov_v_reg(ot, cpu_T1, reg);
5006        }
5007        switch (ot) {
5008#ifdef TARGET_X86_64
5009        case MO_64:
5010            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, cpu_T0, cpu_T1);
5011            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5012            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5013            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
5014            break;
5015#endif
5016        case MO_32:
5017            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
5018            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
5019            tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
5020                              cpu_tmp2_i32, cpu_tmp3_i32);
5021            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
5022            tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
5023            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5024            tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
5025            tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
5026            break;
5027        default:
5028            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5029            tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
5030            /* XXX: use 32 bit mul which could be faster */
5031            tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
5032            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
5033            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
5034            tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
5035            gen_op_mov_reg_v(ot, reg, cpu_T0);
5036            break;
5037        }
5038        set_cc_op(s, CC_OP_MULB + ot);
5039        break;
5040    case 0x1c0:
5041    case 0x1c1: /* xadd Ev, Gv */
5042        ot = mo_b_d(b, dflag);
5043        modrm = cpu_ldub_code(env, s->pc++);
5044        reg = ((modrm >> 3) & 7) | rex_r;
5045        mod = (modrm >> 6) & 3;
5046        if (mod == 3) {
5047            rm = (modrm & 7) | REX_B(s);
5048            gen_op_mov_v_reg(ot, cpu_T0, reg);
5049            gen_op_mov_v_reg(ot, cpu_T1, rm);
5050            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5051            gen_op_mov_reg_v(ot, reg, cpu_T1);
5052            gen_op_mov_reg_v(ot, rm, cpu_T0);
5053        } else {
5054            gen_lea_modrm(env, s, modrm);
5055            gen_op_mov_v_reg(ot, cpu_T0, reg);
5056            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5057            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5058            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5059            gen_op_mov_reg_v(ot, reg, cpu_T1);
5060        }
5061        gen_op_update2_cc();
5062        set_cc_op(s, CC_OP_ADDB + ot);
5063        break;
5064    case 0x1b0:
5065    case 0x1b1: /* cmpxchg Ev, Gv */
5066        {
5067            TCGLabel *label1, *label2;
5068            TCGv t0, t1, t2, a0;
5069
5070            ot = mo_b_d(b, dflag);
5071            modrm = cpu_ldub_code(env, s->pc++);
5072            reg = ((modrm >> 3) & 7) | rex_r;
5073            mod = (modrm >> 6) & 3;
5074            t0 = tcg_temp_local_new();
5075            t1 = tcg_temp_local_new();
5076            t2 = tcg_temp_local_new();
5077            a0 = tcg_temp_local_new();
5078            gen_op_mov_v_reg(ot, t1, reg);
5079            if (mod == 3) {
5080                rm = (modrm & 7) | REX_B(s);
5081                gen_op_mov_v_reg(ot, t0, rm);
5082            } else {
5083                gen_lea_modrm(env, s, modrm);
5084                tcg_gen_mov_tl(a0, cpu_A0);
5085                gen_op_ld_v(s, ot, t0, a0);
5086                rm = 0; /* avoid warning */
5087            }
5088            label1 = gen_new_label();
5089            tcg_gen_mov_tl(t2, cpu_regs[R_EAX]);
5090            gen_extu(ot, t0);
5091            gen_extu(ot, t2);
5092            tcg_gen_brcond_tl(TCG_COND_EQ, t2, t0, label1);
5093            label2 = gen_new_label();
5094            if (mod == 3) {
5095                gen_op_mov_reg_v(ot, R_EAX, t0);
5096                tcg_gen_br(label2);
5097                gen_set_label(label1);
5098                gen_op_mov_reg_v(ot, rm, t1);
5099            } else {
5100                /* perform no-op store cycle like physical cpu; must be
5101                   before changing accumulator to ensure idempotency if
5102                   the store faults and the instruction is restarted */
5103                gen_op_st_v(s, ot, t0, a0);
5104                gen_op_mov_reg_v(ot, R_EAX, t0);
5105                tcg_gen_br(label2);
5106                gen_set_label(label1);
5107                gen_op_st_v(s, ot, t1, a0);
5108            }
5109            gen_set_label(label2);
5110            tcg_gen_mov_tl(cpu_cc_src, t0);
5111            tcg_gen_mov_tl(cpu_cc_srcT, t2);
5112            tcg_gen_sub_tl(cpu_cc_dst, t2, t0);
5113            set_cc_op(s, CC_OP_SUBB + ot);
5114            tcg_temp_free(t0);
5115            tcg_temp_free(t1);
5116            tcg_temp_free(t2);
5117            tcg_temp_free(a0);
5118        }
5119        break;
5120    case 0x1c7: /* cmpxchg8b */
5121        modrm = cpu_ldub_code(env, s->pc++);
5122        mod = (modrm >> 6) & 3;
5123        if ((mod == 3) || ((modrm & 0x38) != 0x8))
5124            goto illegal_op;
5125#ifdef TARGET_X86_64
5126        if (dflag == MO_64) {
5127            if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
5128                goto illegal_op;
5129            gen_lea_modrm(env, s, modrm);
5130            gen_helper_cmpxchg16b(cpu_env, cpu_A0);
5131        } else
5132#endif        
5133        {
5134            if (!(s->cpuid_features & CPUID_CX8))
5135                goto illegal_op;
5136            gen_lea_modrm(env, s, modrm);
5137            gen_helper_cmpxchg8b(cpu_env, cpu_A0);
5138        }
5139        set_cc_op(s, CC_OP_EFLAGS);
5140        break;
5141
5142        /**************************/
5143        /* push/pop */
5144    case 0x50 ... 0x57: /* push */
5145        gen_op_mov_v_reg(MO_32, cpu_T0, (b & 7) | REX_B(s));
5146        gen_push_v(s, cpu_T0);
5147        break;
5148    case 0x58 ... 0x5f: /* pop */
5149        ot = gen_pop_T0(s);
5150        /* NOTE: order is important for pop %sp */
5151        gen_pop_update(s, ot);
5152        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T0);
5153        break;
5154    case 0x60: /* pusha */
5155        if (CODE64(s))
5156            goto illegal_op;
5157        gen_pusha(s);
5158        break;
5159    case 0x61: /* popa */
5160        if (CODE64(s))
5161            goto illegal_op;
5162        gen_popa(s);
5163        break;
5164    case 0x68: /* push Iv */
5165    case 0x6a:
5166        ot = mo_pushpop(s, dflag);
5167        if (b == 0x68)
5168            val = insn_get(env, s, ot);
5169        else
5170            val = (int8_t)insn_get(env, s, MO_8);
5171        tcg_gen_movi_tl(cpu_T0, val);
5172        gen_push_v(s, cpu_T0);
5173        break;
5174    case 0x8f: /* pop Ev */
5175        modrm = cpu_ldub_code(env, s->pc++);
5176        mod = (modrm >> 6) & 3;
5177        ot = gen_pop_T0(s);
5178        if (mod == 3) {
5179            /* NOTE: order is important for pop %sp */
5180            gen_pop_update(s, ot);
5181            rm = (modrm & 7) | REX_B(s);
5182            gen_op_mov_reg_v(ot, rm, cpu_T0);
5183        } else {
5184            /* NOTE: order is important too for MMU exceptions */
5185            s->popl_esp_hack = 1 << ot;
5186            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5187            s->popl_esp_hack = 0;
5188            gen_pop_update(s, ot);
5189        }
5190        break;
5191    case 0xc8: /* enter */
5192        {
5193            int level;
5194            val = cpu_lduw_code(env, s->pc);
5195            s->pc += 2;
5196            level = cpu_ldub_code(env, s->pc++);
5197            gen_enter(s, val, level);
5198        }
5199        break;
5200    case 0xc9: /* leave */
5201        gen_leave(s);
5202        break;
5203    case 0x06: /* push es */
5204    case 0x0e: /* push cs */
5205    case 0x16: /* push ss */
5206    case 0x1e: /* push ds */
5207        if (CODE64(s))
5208            goto illegal_op;
5209        gen_op_movl_T0_seg(b >> 3);
5210        gen_push_v(s, cpu_T0);
5211        break;
5212    case 0x1a0: /* push fs */
5213    case 0x1a8: /* push gs */
5214        gen_op_movl_T0_seg((b >> 3) & 7);
5215        gen_push_v(s, cpu_T0);
5216        break;
5217    case 0x07: /* pop es */
5218    case 0x17: /* pop ss */
5219    case 0x1f: /* pop ds */
5220        if (CODE64(s))
5221            goto illegal_op;
5222        reg = b >> 3;
5223        ot = gen_pop_T0(s);
5224        gen_movl_seg_T0(s, reg);
5225        gen_pop_update(s, ot);
5226        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5227        if (s->is_jmp) {
5228            gen_jmp_im(s->pc - s->cs_base);
5229            if (reg == R_SS) {
5230                s->tf = 0;
5231                gen_eob_inhibit_irq(s, true);
5232            } else {
5233                gen_eob(s);
5234            }
5235        }
5236        break;
5237    case 0x1a1: /* pop fs */
5238    case 0x1a9: /* pop gs */
5239        ot = gen_pop_T0(s);
5240        gen_movl_seg_T0(s, (b >> 3) & 7);
5241        gen_pop_update(s, ot);
5242        if (s->is_jmp) {
5243            gen_jmp_im(s->pc - s->cs_base);
5244            gen_eob(s);
5245        }
5246        break;
5247
5248        /**************************/
5249        /* mov */
5250    case 0x88:
5251    case 0x89: /* mov Gv, Ev */
5252        ot = mo_b_d(b, dflag);
5253        modrm = cpu_ldub_code(env, s->pc++);
5254        reg = ((modrm >> 3) & 7) | rex_r;
5255
5256        /* generate a generic store */
5257        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5258        break;
5259    case 0xc6:
5260    case 0xc7: /* mov Ev, Iv */
5261        ot = mo_b_d(b, dflag);
5262        modrm = cpu_ldub_code(env, s->pc++);
5263        mod = (modrm >> 6) & 3;
5264        if (mod != 3) {
5265            s->rip_offset = insn_const_size(ot);
5266            gen_lea_modrm(env, s, modrm);
5267        }
5268        val = insn_get(env, s, ot);
5269        tcg_gen_movi_tl(cpu_T0, val);
5270        if (mod != 3) {
5271            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5272        } else {
5273            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
5274        }
5275        break;
5276    case 0x8a:
5277    case 0x8b: /* mov Ev, Gv */
5278        ot = mo_b_d(b, dflag);
5279        modrm = cpu_ldub_code(env, s->pc++);
5280        reg = ((modrm >> 3) & 7) | rex_r;
5281
5282        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5283        gen_op_mov_reg_v(ot, reg, cpu_T0);
5284        break;
5285    case 0x8e: /* mov seg, Gv */
5286        modrm = cpu_ldub_code(env, s->pc++);
5287        reg = (modrm >> 3) & 7;
5288        if (reg >= 6 || reg == R_CS)
5289            goto illegal_op;
5290        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5291        gen_movl_seg_T0(s, reg);
5292        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5293        if (s->is_jmp) {
5294            gen_jmp_im(s->pc - s->cs_base);
5295            if (reg == R_SS) {
5296                s->tf = 0;
5297                gen_eob_inhibit_irq(s, true);
5298            } else {
5299                gen_eob(s);
5300            }
5301        }
5302        break;
5303    case 0x8c: /* mov Gv, seg */
5304        modrm = cpu_ldub_code(env, s->pc++);
5305        reg = (modrm >> 3) & 7;
5306        mod = (modrm >> 6) & 3;
5307        if (reg >= 6)
5308            goto illegal_op;
5309        gen_op_movl_T0_seg(reg);
5310        ot = mod == 3 ? dflag : MO_16;
5311        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5312        break;
5313
5314    case 0x1b6: /* movzbS Gv, Eb */
5315    case 0x1b7: /* movzwS Gv, Eb */
5316    case 0x1be: /* movsbS Gv, Eb */
5317    case 0x1bf: /* movswS Gv, Eb */
5318        {
5319            TCGMemOp d_ot;
5320            TCGMemOp s_ot;
5321
5322            /* d_ot is the size of destination */
5323            d_ot = dflag;
5324            /* ot is the size of source */
5325            ot = (b & 1) + MO_8;
5326            /* s_ot is the sign+size of source */
5327            s_ot = b & 8 ? MO_SIGN | ot : ot;
5328
5329            modrm = cpu_ldub_code(env, s->pc++);
5330            reg = ((modrm >> 3) & 7) | rex_r;
5331            mod = (modrm >> 6) & 3;
5332            rm = (modrm & 7) | REX_B(s);
5333
5334            if (mod == 3) {
5335                gen_op_mov_v_reg(ot, cpu_T0, rm);
5336                switch (s_ot) {
5337                case MO_UB:
5338                    tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
5339                    break;
5340                case MO_SB:
5341                    tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
5342                    break;
5343                case MO_UW:
5344                    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
5345                    break;
5346                default:
5347                case MO_SW:
5348                    tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5349                    break;
5350                }
5351                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
5352            } else {
5353                gen_lea_modrm(env, s, modrm);
5354                gen_op_ld_v(s, s_ot, cpu_T0, cpu_A0);
5355                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
5356            }
5357        }
5358        break;
5359
5360    case 0x8d: /* lea */
5361        modrm = cpu_ldub_code(env, s->pc++);
5362        mod = (modrm >> 6) & 3;
5363        if (mod == 3)
5364            goto illegal_op;
5365        reg = ((modrm >> 3) & 7) | rex_r;
5366        {
5367            AddressParts a = gen_lea_modrm_0(env, s, modrm);
5368            TCGv ea = gen_lea_modrm_1(a);
5369            gen_op_mov_reg_v(dflag, reg, ea);
5370        }
5371        break;
5372
5373    case 0xa0: /* mov EAX, Ov */
5374    case 0xa1:
5375    case 0xa2: /* mov Ov, EAX */
5376    case 0xa3:
5377        {
5378            target_ulong offset_addr;
5379
5380            ot = mo_b_d(b, dflag);
5381            switch (s->aflag) {
5382#ifdef TARGET_X86_64
5383            case MO_64:
5384                offset_addr = cpu_ldq_code(env, s->pc);
5385                s->pc += 8;
5386                break;
5387#endif
5388            default:
5389                offset_addr = insn_get(env, s, s->aflag);
5390                break;
5391            }
5392            tcg_gen_movi_tl(cpu_A0, offset_addr);
5393            gen_add_A0_ds_seg(s);
5394            if ((b & 2) == 0) {
5395                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
5396                gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
5397            } else {
5398                gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
5399                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5400            }
5401        }
5402        break;
5403    case 0xd7: /* xlat */
5404        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]);
5405        tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
5406        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T0);
5407        gen_extu(s->aflag, cpu_A0);
5408        gen_add_A0_ds_seg(s);
5409        gen_op_ld_v(s, MO_8, cpu_T0, cpu_A0);
5410        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
5411        break;
5412    case 0xb0 ... 0xb7: /* mov R, Ib */
5413        val = insn_get(env, s, MO_8);
5414        tcg_gen_movi_tl(cpu_T0, val);
5415        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T0);
5416        break;
5417    case 0xb8 ... 0xbf: /* mov R, Iv */
5418#ifdef TARGET_X86_64
5419        if (dflag == MO_64) {
5420            uint64_t tmp;
5421            /* 64 bit case */
5422            tmp = cpu_ldq_code(env, s->pc);
5423            s->pc += 8;
5424            reg = (b & 7) | REX_B(s);
5425            tcg_gen_movi_tl(cpu_T0, tmp);
5426            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
5427        } else
5428#endif
5429        {
5430            ot = dflag;
5431            val = insn_get(env, s, ot);
5432            reg = (b & 7) | REX_B(s);
5433            tcg_gen_movi_tl(cpu_T0, val);
5434            gen_op_mov_reg_v(ot, reg, cpu_T0);
5435        }
5436        break;
5437
5438    case 0x91 ... 0x97: /* xchg R, EAX */
5439    do_xchg_reg_eax:
5440        ot = dflag;
5441        reg = (b & 7) | REX_B(s);
5442        rm = R_EAX;
5443        goto do_xchg_reg;
5444    case 0x86:
5445    case 0x87: /* xchg Ev, Gv */
5446        ot = mo_b_d(b, dflag);
5447        modrm = cpu_ldub_code(env, s->pc++);
5448        reg = ((modrm >> 3) & 7) | rex_r;
5449        mod = (modrm >> 6) & 3;
5450        if (mod == 3) {
5451            rm = (modrm & 7) | REX_B(s);
5452        do_xchg_reg:
5453            gen_op_mov_v_reg(ot, cpu_T0, reg);
5454            gen_op_mov_v_reg(ot, cpu_T1, rm);
5455            gen_op_mov_reg_v(ot, rm, cpu_T0);
5456            gen_op_mov_reg_v(ot, reg, cpu_T1);
5457        } else {
5458            gen_lea_modrm(env, s, modrm);
5459            gen_op_mov_v_reg(ot, cpu_T0, reg);
5460            /* for xchg, lock is implicit */
5461            if (!(prefixes & PREFIX_LOCK))
5462                gen_helper_lock();
5463            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5464            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5465            if (!(prefixes & PREFIX_LOCK))
5466                gen_helper_unlock();
5467            gen_op_mov_reg_v(ot, reg, cpu_T1);
5468        }
5469        break;
5470    case 0xc4: /* les Gv */
5471        /* In CODE64 this is VEX3; see above.  */
5472        op = R_ES;
5473        goto do_lxx;
5474    case 0xc5: /* lds Gv */
5475        /* In CODE64 this is VEX2; see above.  */
5476        op = R_DS;
5477        goto do_lxx;
5478    case 0x1b2: /* lss Gv */
5479        op = R_SS;
5480        goto do_lxx;
5481    case 0x1b4: /* lfs Gv */
5482        op = R_FS;
5483        goto do_lxx;
5484    case 0x1b5: /* lgs Gv */
5485        op = R_GS;
5486    do_lxx:
5487        ot = dflag != MO_16 ? MO_32 : MO_16;
5488        modrm = cpu_ldub_code(env, s->pc++);
5489        reg = ((modrm >> 3) & 7) | rex_r;
5490        mod = (modrm >> 6) & 3;
5491        if (mod == 3)
5492            goto illegal_op;
5493        gen_lea_modrm(env, s, modrm);
5494        gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5495        gen_add_A0_im(s, 1 << ot);
5496        /* load the segment first to handle exceptions properly */
5497        gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
5498        gen_movl_seg_T0(s, op);
5499        /* then put the data */
5500        gen_op_mov_reg_v(ot, reg, cpu_T1);
5501        if (s->is_jmp) {
5502            gen_jmp_im(s->pc - s->cs_base);
5503            gen_eob(s);
5504        }
5505        break;
5506
5507        /************************/
5508        /* shifts */
5509    case 0xc0:
5510    case 0xc1:
5511        /* shift Ev,Ib */
5512        shift = 2;
5513    grp2:
5514        {
5515            ot = mo_b_d(b, dflag);
5516            modrm = cpu_ldub_code(env, s->pc++);
5517            mod = (modrm >> 6) & 3;
5518            op = (modrm >> 3) & 7;
5519
5520            if (mod != 3) {
5521                if (shift == 2) {
5522                    s->rip_offset = 1;
5523                }
5524                gen_lea_modrm(env, s, modrm);
5525                opreg = OR_TMP0;
5526            } else {
5527                opreg = (modrm & 7) | REX_B(s);
5528            }
5529
5530            /* simpler op */
5531            if (shift == 0) {
5532                gen_shift(s, op, ot, opreg, OR_ECX);
5533            } else {
5534                if (shift == 2) {
5535                    shift = cpu_ldub_code(env, s->pc++);
5536                }
5537                gen_shifti(s, op, ot, opreg, shift);
5538            }
5539        }
5540        break;
5541    case 0xd0:
5542    case 0xd1:
5543        /* shift Ev,1 */
5544        shift = 1;
5545        goto grp2;
5546    case 0xd2:
5547    case 0xd3:
5548        /* shift Ev,cl */
5549        shift = 0;
5550        goto grp2;
5551
5552    case 0x1a4: /* shld imm */
5553        op = 0;
5554        shift = 1;
5555        goto do_shiftd;
5556    case 0x1a5: /* shld cl */
5557        op = 0;
5558        shift = 0;
5559        goto do_shiftd;
5560    case 0x1ac: /* shrd imm */
5561        op = 1;
5562        shift = 1;
5563        goto do_shiftd;
5564    case 0x1ad: /* shrd cl */
5565        op = 1;
5566        shift = 0;
5567    do_shiftd:
5568        ot = dflag;
5569        modrm = cpu_ldub_code(env, s->pc++);
5570        mod = (modrm >> 6) & 3;
5571        rm = (modrm & 7) | REX_B(s);
5572        reg = ((modrm >> 3) & 7) | rex_r;
5573        if (mod != 3) {
5574            gen_lea_modrm(env, s, modrm);
5575            opreg = OR_TMP0;
5576        } else {
5577            opreg = rm;
5578        }
5579        gen_op_mov_v_reg(ot, cpu_T1, reg);
5580
5581        if (shift) {
5582            TCGv imm = tcg_const_tl(cpu_ldub_code(env, s->pc++));
5583            gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5584            tcg_temp_free(imm);
5585        } else {
5586            gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5587        }
5588        break;
5589
5590        /************************/
5591        /* floats */
5592    case 0xd8 ... 0xdf:
5593        if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5594            /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5595            /* XXX: what to do if illegal op ? */
5596            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5597            break;
5598        }
5599        modrm = cpu_ldub_code(env, s->pc++);
5600        mod = (modrm >> 6) & 3;
5601        rm = modrm & 7;
5602        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5603        if (mod != 3) {
5604            /* memory op */
5605            gen_lea_modrm(env, s, modrm);
5606            switch(op) {
5607            case 0x00 ... 0x07: /* fxxxs */
5608            case 0x10 ... 0x17: /* fixxxl */
5609            case 0x20 ... 0x27: /* fxxxl */
5610            case 0x30 ... 0x37: /* fixxx */
5611                {
5612                    int op1;
5613                    op1 = op & 7;
5614
5615                    switch(op >> 4) {
5616                    case 0:
5617                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5618                                            s->mem_index, MO_LEUL);
5619                        gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
5620                        break;
5621                    case 1:
5622                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5623                                            s->mem_index, MO_LEUL);
5624                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5625                        break;
5626                    case 2:
5627                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5628                                            s->mem_index, MO_LEQ);
5629                        gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
5630                        break;
5631                    case 3:
5632                    default:
5633                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5634                                            s->mem_index, MO_LESW);
5635                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5636                        break;
5637                    }
5638
5639                    gen_helper_fp_arith_ST0_FT0(op1);
5640                    if (op1 == 3) {
5641                        /* fcomp needs pop */
5642                        gen_helper_fpop(cpu_env);
5643                    }
5644                }
5645                break;
5646            case 0x08: /* flds */
5647            case 0x0a: /* fsts */
5648            case 0x0b: /* fstps */
5649            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5650            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5651            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5652                switch(op & 7) {
5653                case 0:
5654                    switch(op >> 4) {
5655                    case 0:
5656                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5657                                            s->mem_index, MO_LEUL);
5658                        gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
5659                        break;
5660                    case 1:
5661                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5662                                            s->mem_index, MO_LEUL);
5663                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5664                        break;
5665                    case 2:
5666                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5667                                            s->mem_index, MO_LEQ);
5668                        gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
5669                        break;
5670                    case 3:
5671                    default:
5672                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5673                                            s->mem_index, MO_LESW);
5674                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5675                        break;
5676                    }
5677                    break;
5678                case 1:
5679                    /* XXX: the corresponding CPUID bit must be tested ! */
5680                    switch(op >> 4) {
5681                    case 1:
5682                        gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
5683                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5684                                            s->mem_index, MO_LEUL);
5685                        break;
5686                    case 2:
5687                        gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
5688                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5689                                            s->mem_index, MO_LEQ);
5690                        break;
5691                    case 3:
5692                    default:
5693                        gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
5694                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5695                                            s->mem_index, MO_LEUW);
5696                        break;
5697                    }
5698                    gen_helper_fpop(cpu_env);
5699                    break;
5700                default:
5701                    switch(op >> 4) {
5702                    case 0:
5703                        gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
5704                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5705                                            s->mem_index, MO_LEUL);
5706                        break;
5707                    case 1:
5708                        gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
5709                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5710                                            s->mem_index, MO_LEUL);
5711                        break;
5712                    case 2:
5713                        gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
5714                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5715                                            s->mem_index, MO_LEQ);
5716                        break;
5717                    case 3:
5718                    default:
5719                        gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
5720                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5721                                            s->mem_index, MO_LEUW);
5722                        break;
5723                    }
5724                    if ((op & 7) == 3)
5725                        gen_helper_fpop(cpu_env);
5726                    break;
5727                }
5728                break;
5729            case 0x0c: /* fldenv mem */
5730                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5731                break;
5732            case 0x0d: /* fldcw mem */
5733                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5734                                    s->mem_index, MO_LEUW);
5735                gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
5736                break;
5737            case 0x0e: /* fnstenv mem */
5738                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5739                break;
5740            case 0x0f: /* fnstcw mem */
5741                gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
5742                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5743                                    s->mem_index, MO_LEUW);
5744                break;
5745            case 0x1d: /* fldt mem */
5746                gen_helper_fldt_ST0(cpu_env, cpu_A0);
5747                break;
5748            case 0x1f: /* fstpt mem */
5749                gen_helper_fstt_ST0(cpu_env, cpu_A0);
5750                gen_helper_fpop(cpu_env);
5751                break;
5752            case 0x2c: /* frstor mem */
5753                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5754                break;
5755            case 0x2e: /* fnsave mem */
5756                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5757                break;
5758            case 0x2f: /* fnstsw mem */
5759                gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
5760                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5761                                    s->mem_index, MO_LEUW);
5762                break;
5763            case 0x3c: /* fbld */
5764                gen_helper_fbld_ST0(cpu_env, cpu_A0);
5765                break;
5766            case 0x3e: /* fbstp */
5767                gen_helper_fbst_ST0(cpu_env, cpu_A0);
5768                gen_helper_fpop(cpu_env);
5769                break;
5770            case 0x3d: /* fildll */
5771                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5772                gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
5773                break;
5774            case 0x3f: /* fistpll */
5775                gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
5776                tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5777                gen_helper_fpop(cpu_env);
5778                break;
5779            default:
5780                goto unknown_op;
5781            }
5782        } else {
5783            /* register float ops */
5784            opreg = rm;
5785
5786            switch(op) {
5787            case 0x08: /* fld sti */
5788                gen_helper_fpush(cpu_env);
5789                gen_helper_fmov_ST0_STN(cpu_env,
5790                                        tcg_const_i32((opreg + 1) & 7));
5791                break;
5792            case 0x09: /* fxchg sti */
5793            case 0x29: /* fxchg4 sti, undocumented op */
5794            case 0x39: /* fxchg7 sti, undocumented op */
5795                gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
5796                break;
5797            case 0x0a: /* grp d9/2 */
5798                switch(rm) {
5799                case 0: /* fnop */
5800                    /* check exceptions (FreeBSD FPU probe) */
5801                    gen_helper_fwait(cpu_env);
5802                    break;
5803                default:
5804                    goto unknown_op;
5805                }
5806                break;
5807            case 0x0c: /* grp d9/4 */
5808                switch(rm) {
5809                case 0: /* fchs */
5810                    gen_helper_fchs_ST0(cpu_env);
5811                    break;
5812                case 1: /* fabs */
5813                    gen_helper_fabs_ST0(cpu_env);
5814                    break;
5815                case 4: /* ftst */
5816                    gen_helper_fldz_FT0(cpu_env);
5817                    gen_helper_fcom_ST0_FT0(cpu_env);
5818                    break;
5819                case 5: /* fxam */
5820                    gen_helper_fxam_ST0(cpu_env);
5821                    break;
5822                default:
5823                    goto unknown_op;
5824                }
5825                break;
5826            case 0x0d: /* grp d9/5 */
5827                {
5828                    switch(rm) {
5829                    case 0:
5830                        gen_helper_fpush(cpu_env);
5831                        gen_helper_fld1_ST0(cpu_env);
5832                        break;
5833                    case 1:
5834                        gen_helper_fpush(cpu_env);
5835                        gen_helper_fldl2t_ST0(cpu_env);
5836                        break;
5837                    case 2:
5838                        gen_helper_fpush(cpu_env);
5839                        gen_helper_fldl2e_ST0(cpu_env);
5840                        break;
5841                    case 3:
5842                        gen_helper_fpush(cpu_env);
5843                        gen_helper_fldpi_ST0(cpu_env);
5844                        break;
5845                    case 4:
5846                        gen_helper_fpush(cpu_env);
5847                        gen_helper_fldlg2_ST0(cpu_env);
5848                        break;
5849                    case 5:
5850                        gen_helper_fpush(cpu_env);
5851                        gen_helper_fldln2_ST0(cpu_env);
5852                        break;
5853                    case 6:
5854                        gen_helper_fpush(cpu_env);
5855                        gen_helper_fldz_ST0(cpu_env);
5856                        break;
5857                    default:
5858                        goto unknown_op;
5859                    }
5860                }
5861                break;
5862            case 0x0e: /* grp d9/6 */
5863                switch(rm) {
5864                case 0: /* f2xm1 */
5865                    gen_helper_f2xm1(cpu_env);
5866                    break;
5867                case 1: /* fyl2x */
5868                    gen_helper_fyl2x(cpu_env);
5869                    break;
5870                case 2: /* fptan */
5871                    gen_helper_fptan(cpu_env);
5872                    break;
5873                case 3: /* fpatan */
5874                    gen_helper_fpatan(cpu_env);
5875                    break;
5876                case 4: /* fxtract */
5877                    gen_helper_fxtract(cpu_env);
5878                    break;
5879                case 5: /* fprem1 */
5880                    gen_helper_fprem1(cpu_env);
5881                    break;
5882                case 6: /* fdecstp */
5883                    gen_helper_fdecstp(cpu_env);
5884                    break;
5885                default:
5886                case 7: /* fincstp */
5887                    gen_helper_fincstp(cpu_env);
5888                    break;
5889                }
5890                break;
5891            case 0x0f: /* grp d9/7 */
5892                switch(rm) {
5893                case 0: /* fprem */
5894                    gen_helper_fprem(cpu_env);
5895                    break;
5896                case 1: /* fyl2xp1 */
5897                    gen_helper_fyl2xp1(cpu_env);
5898                    break;
5899                case 2: /* fsqrt */
5900                    gen_helper_fsqrt(cpu_env);
5901                    break;
5902                case 3: /* fsincos */
5903                    gen_helper_fsincos(cpu_env);
5904                    break;
5905                case 5: /* fscale */
5906                    gen_helper_fscale(cpu_env);
5907                    break;
5908                case 4: /* frndint */
5909                    gen_helper_frndint(cpu_env);
5910                    break;
5911                case 6: /* fsin */
5912                    gen_helper_fsin(cpu_env);
5913                    break;
5914                default:
5915                case 7: /* fcos */
5916                    gen_helper_fcos(cpu_env);
5917                    break;
5918                }
5919                break;
5920            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
5921            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
5922            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
5923                {
5924                    int op1;
5925
5926                    op1 = op & 7;
5927                    if (op >= 0x20) {
5928                        gen_helper_fp_arith_STN_ST0(op1, opreg);
5929                        if (op >= 0x30)
5930                            gen_helper_fpop(cpu_env);
5931                    } else {
5932                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
5933                        gen_helper_fp_arith_ST0_FT0(op1);
5934                    }
5935                }
5936                break;
5937            case 0x02: /* fcom */
5938            case 0x22: /* fcom2, undocumented op */
5939                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
5940                gen_helper_fcom_ST0_FT0(cpu_env);
5941                break;
5942            case 0x03: /* fcomp */
5943            case 0x23: /* fcomp3, undocumented op */
5944            case 0x32: /* fcomp5, undocumented op */
5945                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
5946                gen_helper_fcom_ST0_FT0(cpu_env);
5947                gen_helper_fpop(cpu_env);
5948                break;
5949            case 0x15: /* da/5 */
5950                switch(rm) {
5951                case 1: /* fucompp */
5952                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
5953                    gen_helper_fucom_ST0_FT0(cpu_env);
5954                    gen_helper_fpop(cpu_env);
5955                    gen_helper_fpop(cpu_env);
5956                    break;
5957                default:
5958                    goto unknown_op;
5959                }
5960                break;
5961            case 0x1c:
5962                switch(rm) {
5963                case 0: /* feni (287 only, just do nop here) */
5964                    break;
5965                case 1: /* fdisi (287 only, just do nop here) */
5966                    break;
5967                case 2: /* fclex */
5968                    gen_helper_fclex(cpu_env);
5969                    break;
5970                case 3: /* fninit */
5971                    gen_helper_fninit(cpu_env);
5972                    break;
5973                case 4: /* fsetpm (287 only, just do nop here) */
5974                    break;
5975                default:
5976                    goto unknown_op;
5977                }
5978                break;
5979            case 0x1d: /* fucomi */
5980                if (!(s->cpuid_features & CPUID_CMOV)) {
5981                    goto illegal_op;
5982                }
5983                gen_update_cc_op(s);
5984                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
5985                gen_helper_fucomi_ST0_FT0(cpu_env);
5986                set_cc_op(s, CC_OP_EFLAGS);
5987                break;
5988            case 0x1e: /* fcomi */
5989                if (!(s->cpuid_features & CPUID_CMOV)) {
5990                    goto illegal_op;
5991                }
5992                gen_update_cc_op(s);
5993                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
5994                gen_helper_fcomi_ST0_FT0(cpu_env);
5995                set_cc_op(s, CC_OP_EFLAGS);
5996                break;
5997            case 0x28: /* ffree sti */
5998                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
5999                break;
6000            case 0x2a: /* fst sti */
6001                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6002                break;
6003            case 0x2b: /* fstp sti */
6004            case 0x0b: /* fstp1 sti, undocumented op */
6005            case 0x3a: /* fstp8 sti, undocumented op */
6006            case 0x3b: /* fstp9 sti, undocumented op */
6007                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6008                gen_helper_fpop(cpu_env);
6009                break;
6010            case 0x2c: /* fucom st(i) */
6011                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6012                gen_helper_fucom_ST0_FT0(cpu_env);
6013                break;
6014            case 0x2d: /* fucomp st(i) */
6015                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6016                gen_helper_fucom_ST0_FT0(cpu_env);
6017                gen_helper_fpop(cpu_env);
6018                break;
6019            case 0x33: /* de/3 */
6020                switch(rm) {
6021                case 1: /* fcompp */
6022                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6023                    gen_helper_fcom_ST0_FT0(cpu_env);
6024                    gen_helper_fpop(cpu_env);
6025                    gen_helper_fpop(cpu_env);
6026                    break;
6027                default:
6028                    goto unknown_op;
6029                }
6030                break;
6031            case 0x38: /* ffreep sti, undocumented op */
6032                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6033                gen_helper_fpop(cpu_env);
6034                break;
6035            case 0x3c: /* df/4 */
6036                switch(rm) {
6037                case 0:
6038                    gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
6039                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
6040                    gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
6041                    break;
6042                default:
6043                    goto unknown_op;
6044                }
6045                break;
6046            case 0x3d: /* fucomip */
6047                if (!(s->cpuid_features & CPUID_CMOV)) {
6048                    goto illegal_op;
6049                }
6050                gen_update_cc_op(s);
6051                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6052                gen_helper_fucomi_ST0_FT0(cpu_env);
6053                gen_helper_fpop(cpu_env);
6054                set_cc_op(s, CC_OP_EFLAGS);
6055                break;
6056            case 0x3e: /* fcomip */
6057                if (!(s->cpuid_features & CPUID_CMOV)) {
6058                    goto illegal_op;
6059                }
6060                gen_update_cc_op(s);
6061                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6062                gen_helper_fcomi_ST0_FT0(cpu_env);
6063                gen_helper_fpop(cpu_env);
6064                set_cc_op(s, CC_OP_EFLAGS);
6065                break;
6066            case 0x10 ... 0x13: /* fcmovxx */
6067            case 0x18 ... 0x1b:
6068                {
6069                    int op1;
6070                    TCGLabel *l1;
6071                    static const uint8_t fcmov_cc[8] = {
6072                        (JCC_B << 1),
6073                        (JCC_Z << 1),
6074                        (JCC_BE << 1),
6075                        (JCC_P << 1),
6076                    };
6077
6078                    if (!(s->cpuid_features & CPUID_CMOV)) {
6079                        goto illegal_op;
6080                    }
6081                    op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6082                    l1 = gen_new_label();
6083                    gen_jcc1_noeob(s, op1, l1);
6084                    gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6085                    gen_set_label(l1);
6086                }
6087                break;
6088            default:
6089                goto unknown_op;
6090            }
6091        }
6092        break;
6093        /************************/
6094        /* string ops */
6095
6096    case 0xa4: /* movsS */
6097    case 0xa5:
6098        ot = mo_b_d(b, dflag);
6099        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6100            gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6101        } else {
6102            gen_movs(s, ot);
6103        }
6104        break;
6105
6106    case 0xaa: /* stosS */
6107    case 0xab:
6108        ot = mo_b_d(b, dflag);
6109        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6110            gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6111        } else {
6112            gen_stos(s, ot);
6113        }
6114        break;
6115    case 0xac: /* lodsS */
6116    case 0xad:
6117        ot = mo_b_d(b, dflag);
6118        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6119            gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6120        } else {
6121            gen_lods(s, ot);
6122        }
6123        break;
6124    case 0xae: /* scasS */
6125    case 0xaf:
6126        ot = mo_b_d(b, dflag);
6127        if (prefixes & PREFIX_REPNZ) {
6128            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6129        } else if (prefixes & PREFIX_REPZ) {
6130            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6131        } else {
6132            gen_scas(s, ot);
6133        }
6134        break;
6135
6136    case 0xa6: /* cmpsS */
6137    case 0xa7:
6138        ot = mo_b_d(b, dflag);
6139        if (prefixes & PREFIX_REPNZ) {
6140            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6141        } else if (prefixes & PREFIX_REPZ) {
6142            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6143        } else {
6144            gen_cmps(s, ot);
6145        }
6146        break;
6147    case 0x6c: /* insS */
6148    case 0x6d:
6149        ot = mo_b_d32(b, dflag);
6150        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6151        gen_check_io(s, ot, pc_start - s->cs_base, 
6152                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6153        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6154            gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6155        } else {
6156            gen_ins(s, ot);
6157            if (s->tb->cflags & CF_USE_ICOUNT) {
6158                gen_jmp(s, s->pc - s->cs_base);
6159            }
6160        }
6161        break;
6162    case 0x6e: /* outsS */
6163    case 0x6f:
6164        ot = mo_b_d32(b, dflag);
6165        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6166        gen_check_io(s, ot, pc_start - s->cs_base,
6167                     svm_is_rep(prefixes) | 4);
6168        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6169            gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6170        } else {
6171            gen_outs(s, ot);
6172            if (s->tb->cflags & CF_USE_ICOUNT) {
6173                gen_jmp(s, s->pc - s->cs_base);
6174            }
6175        }
6176        break;
6177
6178        /************************/
6179        /* port I/O */
6180
6181    case 0xe4:
6182    case 0xe5:
6183        ot = mo_b_d32(b, dflag);
6184        val = cpu_ldub_code(env, s->pc++);
6185        tcg_gen_movi_tl(cpu_T0, val);
6186        gen_check_io(s, ot, pc_start - s->cs_base,
6187                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6188        if (s->tb->cflags & CF_USE_ICOUNT) {
6189            gen_io_start();
6190        }
6191        tcg_gen_movi_i32(cpu_tmp2_i32, val);
6192        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
6193        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
6194        gen_bpt_io(s, cpu_tmp2_i32, ot);
6195        if (s->tb->cflags & CF_USE_ICOUNT) {
6196            gen_io_end();
6197            gen_jmp(s, s->pc - s->cs_base);
6198        }
6199        break;
6200    case 0xe6:
6201    case 0xe7:
6202        ot = mo_b_d32(b, dflag);
6203        val = cpu_ldub_code(env, s->pc++);
6204        tcg_gen_movi_tl(cpu_T0, val);
6205        gen_check_io(s, ot, pc_start - s->cs_base,
6206                     svm_is_rep(prefixes));
6207        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
6208
6209        if (s->tb->cflags & CF_USE_ICOUNT) {
6210            gen_io_start();
6211        }
6212        tcg_gen_movi_i32(cpu_tmp2_i32, val);
6213        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
6214        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6215        gen_bpt_io(s, cpu_tmp2_i32, ot);
6216        if (s->tb->cflags & CF_USE_ICOUNT) {
6217            gen_io_end();
6218            gen_jmp(s, s->pc - s->cs_base);
6219        }
6220        break;
6221    case 0xec:
6222    case 0xed:
6223        ot = mo_b_d32(b, dflag);
6224        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6225        gen_check_io(s, ot, pc_start - s->cs_base,
6226                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6227        if (s->tb->cflags & CF_USE_ICOUNT) {
6228            gen_io_start();
6229        }
6230        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6231        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
6232        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
6233        gen_bpt_io(s, cpu_tmp2_i32, ot);
6234        if (s->tb->cflags & CF_USE_ICOUNT) {
6235            gen_io_end();
6236            gen_jmp(s, s->pc - s->cs_base);
6237        }
6238        break;
6239    case 0xee:
6240    case 0xef:
6241        ot = mo_b_d32(b, dflag);
6242        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6243        gen_check_io(s, ot, pc_start - s->cs_base,
6244                     svm_is_rep(prefixes));
6245        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
6246
6247        if (s->tb->cflags & CF_USE_ICOUNT) {
6248            gen_io_start();
6249        }
6250        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6251        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
6252        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6253        gen_bpt_io(s, cpu_tmp2_i32, ot);
6254        if (s->tb->cflags & CF_USE_ICOUNT) {
6255            gen_io_end();
6256            gen_jmp(s, s->pc - s->cs_base);
6257        }
6258        break;
6259
6260        /************************/
6261        /* control */
6262    case 0xc2: /* ret im */
6263        val = cpu_ldsw_code(env, s->pc);
6264        s->pc += 2;
6265        ot = gen_pop_T0(s);
6266        gen_stack_update(s, val + (1 << ot));
6267        /* Note that gen_pop_T0 uses a zero-extending load.  */
6268        gen_op_jmp_v(cpu_T0);
6269        gen_bnd_jmp(s);
6270        gen_eob(s);
6271        break;
6272    case 0xc3: /* ret */
6273        ot = gen_pop_T0(s);
6274        gen_pop_update(s, ot);
6275        /* Note that gen_pop_T0 uses a zero-extending load.  */
6276        gen_op_jmp_v(cpu_T0);
6277        gen_bnd_jmp(s);
6278        gen_eob(s);
6279        break;
6280    case 0xca: /* lret im */
6281        val = cpu_ldsw_code(env, s->pc);
6282        s->pc += 2;
6283    do_lret:
6284        if (s->pe && !s->vm86) {
6285            gen_update_cc_op(s);
6286            gen_jmp_im(pc_start - s->cs_base);
6287            gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6288                                      tcg_const_i32(val));
6289        } else {
6290            gen_stack_A0(s);
6291            /* pop offset */
6292            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
6293            /* NOTE: keeping EIP updated is not a problem in case of
6294               exception */
6295            gen_op_jmp_v(cpu_T0);
6296            /* pop selector */
6297            gen_add_A0_im(s, 1 << dflag);
6298            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
6299            gen_op_movl_seg_T0_vm(R_CS);
6300            /* add stack offset */
6301            gen_stack_update(s, val + (2 << dflag));
6302        }
6303        gen_eob(s);
6304        break;
6305    case 0xcb: /* lret */
6306        val = 0;
6307        goto do_lret;
6308    case 0xcf: /* iret */
6309        gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6310        if (!s->pe) {
6311            /* real mode */
6312            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6313            set_cc_op(s, CC_OP_EFLAGS);
6314        } else if (s->vm86) {
6315            if (s->iopl != 3) {
6316                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6317            } else {
6318                gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6319                set_cc_op(s, CC_OP_EFLAGS);
6320            }
6321        } else {
6322            gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6323                                      tcg_const_i32(s->pc - s->cs_base));
6324            set_cc_op(s, CC_OP_EFLAGS);
6325        }
6326        gen_eob(s);
6327        break;
6328    case 0xe8: /* call im */
6329        {
6330            if (dflag != MO_16) {
6331                tval = (int32_t)insn_get(env, s, MO_32);
6332            } else {
6333                tval = (int16_t)insn_get(env, s, MO_16);
6334            }
6335            next_eip = s->pc - s->cs_base;
6336            tval += next_eip;
6337            if (dflag == MO_16) {
6338                tval &= 0xffff;
6339            } else if (!CODE64(s)) {
6340                tval &= 0xffffffff;
6341            }
6342            tcg_gen_movi_tl(cpu_T0, next_eip);
6343            gen_push_v(s, cpu_T0);
6344            gen_bnd_jmp(s);
6345            gen_jmp(s, tval);
6346        }
6347        break;
6348    case 0x9a: /* lcall im */
6349        {
6350            unsigned int selector, offset;
6351
6352            if (CODE64(s))
6353                goto illegal_op;
6354            ot = dflag;
6355            offset = insn_get(env, s, ot);
6356            selector = insn_get(env, s, MO_16);
6357
6358            tcg_gen_movi_tl(cpu_T0, selector);
6359            tcg_gen_movi_tl(cpu_T1, offset);
6360        }
6361        goto do_lcall;
6362    case 0xe9: /* jmp im */
6363        if (dflag != MO_16) {
6364            tval = (int32_t)insn_get(env, s, MO_32);
6365        } else {
6366            tval = (int16_t)insn_get(env, s, MO_16);
6367        }
6368        tval += s->pc - s->cs_base;
6369        if (dflag == MO_16) {
6370            tval &= 0xffff;
6371        } else if (!CODE64(s)) {
6372            tval &= 0xffffffff;
6373        }
6374        gen_bnd_jmp(s);
6375        gen_jmp(s, tval);
6376        break;
6377    case 0xea: /* ljmp im */
6378        {
6379            unsigned int selector, offset;
6380
6381            if (CODE64(s))
6382                goto illegal_op;
6383            ot = dflag;
6384            offset = insn_get(env, s, ot);
6385            selector = insn_get(env, s, MO_16);
6386
6387            tcg_gen_movi_tl(cpu_T0, selector);
6388            tcg_gen_movi_tl(cpu_T1, offset);
6389        }
6390        goto do_ljmp;
6391    case 0xeb: /* jmp Jb */
6392        tval = (int8_t)insn_get(env, s, MO_8);
6393        tval += s->pc - s->cs_base;
6394        if (dflag == MO_16) {
6395            tval &= 0xffff;
6396        }
6397        gen_jmp(s, tval);
6398        break;
6399    case 0x70 ... 0x7f: /* jcc Jb */
6400        tval = (int8_t)insn_get(env, s, MO_8);
6401        goto do_jcc;
6402    case 0x180 ... 0x18f: /* jcc Jv */
6403        if (dflag != MO_16) {
6404            tval = (int32_t)insn_get(env, s, MO_32);
6405        } else {
6406            tval = (int16_t)insn_get(env, s, MO_16);
6407        }
6408    do_jcc:
6409        next_eip = s->pc - s->cs_base;
6410        tval += next_eip;
6411        if (dflag == MO_16) {
6412            tval &= 0xffff;
6413        }
6414        gen_bnd_jmp(s);
6415        gen_jcc(s, b, tval, next_eip);
6416        break;
6417
6418    case 0x190 ... 0x19f: /* setcc Gv */
6419        modrm = cpu_ldub_code(env, s->pc++);
6420        gen_setcc1(s, b, cpu_T0);
6421        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6422        break;
6423    case 0x140 ... 0x14f: /* cmov Gv, Ev */
6424        if (!(s->cpuid_features & CPUID_CMOV)) {
6425            goto illegal_op;
6426        }
6427        ot = dflag;
6428        modrm = cpu_ldub_code(env, s->pc++);
6429        reg = ((modrm >> 3) & 7) | rex_r;
6430        gen_cmovcc1(env, s, ot, b, modrm, reg);
6431        break;
6432
6433        /************************/
6434        /* flags */
6435    case 0x9c: /* pushf */
6436        gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6437        if (s->vm86 && s->iopl != 3) {
6438            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6439        } else {
6440            gen_update_cc_op(s);
6441            gen_helper_read_eflags(cpu_T0, cpu_env);
6442            gen_push_v(s, cpu_T0);
6443        }
6444        break;
6445    case 0x9d: /* popf */
6446        gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6447        if (s->vm86 && s->iopl != 3) {
6448            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6449        } else {
6450            ot = gen_pop_T0(s);
6451            if (s->cpl == 0) {
6452                if (dflag != MO_16) {
6453                    gen_helper_write_eflags(cpu_env, cpu_T0,
6454                                            tcg_const_i32((TF_MASK | AC_MASK |
6455                                                           ID_MASK | NT_MASK |
6456                                                           IF_MASK |
6457                                                           IOPL_MASK)));
6458                } else {
6459                    gen_helper_write_eflags(cpu_env, cpu_T0,
6460                                            tcg_const_i32((TF_MASK | AC_MASK |
6461                                                           ID_MASK | NT_MASK |
6462                                                           IF_MASK | IOPL_MASK)
6463                                                          & 0xffff));
6464                }
6465            } else {
6466                if (s->cpl <= s->iopl) {
6467                    if (dflag != MO_16) {
6468                        gen_helper_write_eflags(cpu_env, cpu_T0,
6469                                                tcg_const_i32((TF_MASK |
6470                                                               AC_MASK |
6471                                                               ID_MASK |
6472                                                               NT_MASK |
6473                                                               IF_MASK)));
6474                    } else {
6475                        gen_helper_write_eflags(cpu_env, cpu_T0,
6476                                                tcg_const_i32((TF_MASK |
6477                                                               AC_MASK |
6478                                                               ID_MASK |
6479                                                               NT_MASK |
6480                                                               IF_MASK)
6481                                                              & 0xffff));
6482                    }
6483                } else {
6484                    if (dflag != MO_16) {
6485                        gen_helper_write_eflags(cpu_env, cpu_T0,
6486                                           tcg_const_i32((TF_MASK | AC_MASK |
6487                                                          ID_MASK | NT_MASK)));
6488                    } else {
6489                        gen_helper_write_eflags(cpu_env, cpu_T0,
6490                                           tcg_const_i32((TF_MASK | AC_MASK |
6491                                                          ID_MASK | NT_MASK)
6492                                                         & 0xffff));
6493                    }
6494                }
6495            }
6496            gen_pop_update(s, ot);
6497            set_cc_op(s, CC_OP_EFLAGS);
6498            /* abort translation because TF/AC flag may change */
6499            gen_jmp_im(s->pc - s->cs_base);
6500            gen_eob(s);
6501        }
6502        break;
6503    case 0x9e: /* sahf */
6504        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6505            goto illegal_op;
6506        gen_op_mov_v_reg(MO_8, cpu_T0, R_AH);
6507        gen_compute_eflags(s);
6508        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6509        tcg_gen_andi_tl(cpu_T0, cpu_T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6510        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T0);
6511        break;
6512    case 0x9f: /* lahf */
6513        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6514            goto illegal_op;
6515        gen_compute_eflags(s);
6516        /* Note: gen_compute_eflags() only gives the condition codes */
6517        tcg_gen_ori_tl(cpu_T0, cpu_cc_src, 0x02);
6518        gen_op_mov_reg_v(MO_8, R_AH, cpu_T0);
6519        break;
6520    case 0xf5: /* cmc */
6521        gen_compute_eflags(s);
6522        tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6523        break;
6524    case 0xf8: /* clc */
6525        gen_compute_eflags(s);
6526        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6527        break;
6528    case 0xf9: /* stc */
6529        gen_compute_eflags(s);
6530        tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6531        break;
6532    case 0xfc: /* cld */
6533        tcg_gen_movi_i32(cpu_tmp2_i32, 1);
6534        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6535        break;
6536    case 0xfd: /* std */
6537        tcg_gen_movi_i32(cpu_tmp2_i32, -1);
6538        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6539        break;
6540
6541        /************************/
6542        /* bit operations */
6543    case 0x1ba: /* bt/bts/btr/btc Gv, im */
6544        ot = dflag;
6545        modrm = cpu_ldub_code(env, s->pc++);
6546        op = (modrm >> 3) & 7;
6547        mod = (modrm >> 6) & 3;
6548        rm = (modrm & 7) | REX_B(s);
6549        if (mod != 3) {
6550            s->rip_offset = 1;
6551            gen_lea_modrm(env, s, modrm);
6552            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6553        } else {
6554            gen_op_mov_v_reg(ot, cpu_T0, rm);
6555        }
6556        /* load shift */
6557        val = cpu_ldub_code(env, s->pc++);
6558        tcg_gen_movi_tl(cpu_T1, val);
6559        if (op < 4)
6560            goto unknown_op;
6561        op -= 4;
6562        goto bt_op;
6563    case 0x1a3: /* bt Gv, Ev */
6564        op = 0;
6565        goto do_btx;
6566    case 0x1ab: /* bts */
6567        op = 1;
6568        goto do_btx;
6569    case 0x1b3: /* btr */
6570        op = 2;
6571        goto do_btx;
6572    case 0x1bb: /* btc */
6573        op = 3;
6574    do_btx:
6575        ot = dflag;
6576        modrm = cpu_ldub_code(env, s->pc++);
6577        reg = ((modrm >> 3) & 7) | rex_r;
6578        mod = (modrm >> 6) & 3;
6579        rm = (modrm & 7) | REX_B(s);
6580        gen_op_mov_v_reg(MO_32, cpu_T1, reg);
6581        if (mod != 3) {
6582            gen_lea_modrm(env, s, modrm);
6583            /* specific case: we need to add a displacement */
6584            gen_exts(ot, cpu_T1);
6585            tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
6586            tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
6587            tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
6588            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6589        } else {
6590            gen_op_mov_v_reg(ot, cpu_T0, rm);
6591        }
6592    bt_op:
6593        tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
6594        tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
6595        switch(op) {
6596        case 0:
6597            break;
6598        case 1:
6599            tcg_gen_movi_tl(cpu_tmp0, 1);
6600            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
6601            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
6602            break;
6603        case 2:
6604            tcg_gen_movi_tl(cpu_tmp0, 1);
6605            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
6606            tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
6607            break;
6608        default:
6609        case 3:
6610            tcg_gen_movi_tl(cpu_tmp0, 1);
6611            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
6612            tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
6613            break;
6614        }
6615        if (op != 0) {
6616            if (mod != 3) {
6617                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
6618            } else {
6619                gen_op_mov_reg_v(ot, rm, cpu_T0);
6620            }
6621        }
6622
6623        /* Delay all CC updates until after the store above.  Note that
6624           C is the result of the test, Z is unchanged, and the others
6625           are all undefined.  */
6626        switch (s->cc_op) {
6627        case CC_OP_MULB ... CC_OP_MULQ:
6628        case CC_OP_ADDB ... CC_OP_ADDQ:
6629        case CC_OP_ADCB ... CC_OP_ADCQ:
6630        case CC_OP_SUBB ... CC_OP_SUBQ:
6631        case CC_OP_SBBB ... CC_OP_SBBQ:
6632        case CC_OP_LOGICB ... CC_OP_LOGICQ:
6633        case CC_OP_INCB ... CC_OP_INCQ:
6634        case CC_OP_DECB ... CC_OP_DECQ:
6635        case CC_OP_SHLB ... CC_OP_SHLQ:
6636        case CC_OP_SARB ... CC_OP_SARQ:
6637        case CC_OP_BMILGB ... CC_OP_BMILGQ:
6638            /* Z was going to be computed from the non-zero status of CC_DST.
6639               We can get that same Z value (and the new C value) by leaving
6640               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6641               same width.  */
6642            tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
6643            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6644            break;
6645        default:
6646            /* Otherwise, generate EFLAGS and replace the C bit.  */
6647            gen_compute_eflags(s);
6648            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4,
6649                               ctz32(CC_C), 1);
6650            break;
6651        }
6652        break;
6653    case 0x1bc: /* bsf / tzcnt */
6654    case 0x1bd: /* bsr / lzcnt */
6655        ot = dflag;
6656        modrm = cpu_ldub_code(env, s->pc++);
6657        reg = ((modrm >> 3) & 7) | rex_r;
6658        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6659        gen_extu(ot, cpu_T0);
6660
6661        /* Note that lzcnt and tzcnt are in different extensions.  */
6662        if ((prefixes & PREFIX_REPZ)
6663            && (b & 1
6664                ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6665                : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6666            int size = 8 << ot;
6667            tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
6668            if (b & 1) {
6669                /* For lzcnt, reduce the target_ulong result by the
6670                   number of zeros that we expect to find at the top.  */
6671                gen_helper_clz(cpu_T0, cpu_T0);
6672                tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size);
6673            } else {
6674                /* For tzcnt, a zero input must return the operand size:
6675                   force all bits outside the operand size to 1.  */
6676                target_ulong mask = (target_ulong)-2 << (size - 1);
6677                tcg_gen_ori_tl(cpu_T0, cpu_T0, mask);
6678                gen_helper_ctz(cpu_T0, cpu_T0);
6679            }
6680            /* For lzcnt/tzcnt, C and Z bits are defined and are
6681               related to the result.  */
6682            gen_op_update1_cc();
6683            set_cc_op(s, CC_OP_BMILGB + ot);
6684        } else {
6685            /* For bsr/bsf, only the Z bit is defined and it is related
6686               to the input and not the result.  */
6687            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
6688            set_cc_op(s, CC_OP_LOGICB + ot);
6689            if (b & 1) {
6690                /* For bsr, return the bit index of the first 1 bit,
6691                   not the count of leading zeros.  */
6692                gen_helper_clz(cpu_T0, cpu_T0);
6693                tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1);
6694            } else {
6695                gen_helper_ctz(cpu_T0, cpu_T0);
6696            }
6697            /* ??? The manual says that the output is undefined when the
6698               input is zero, but real hardware leaves it unchanged, and
6699               real programs appear to depend on that.  */
6700            tcg_gen_movi_tl(cpu_tmp0, 0);
6701            tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T0, cpu_cc_dst, cpu_tmp0,
6702                               cpu_regs[reg], cpu_T0);
6703        }
6704        gen_op_mov_reg_v(ot, reg, cpu_T0);
6705        break;
6706        /************************/
6707        /* bcd */
6708    case 0x27: /* daa */
6709        if (CODE64(s))
6710            goto illegal_op;
6711        gen_update_cc_op(s);
6712        gen_helper_daa(cpu_env);
6713        set_cc_op(s, CC_OP_EFLAGS);
6714        break;
6715    case 0x2f: /* das */
6716        if (CODE64(s))
6717            goto illegal_op;
6718        gen_update_cc_op(s);
6719        gen_helper_das(cpu_env);
6720        set_cc_op(s, CC_OP_EFLAGS);
6721        break;
6722    case 0x37: /* aaa */
6723        if (CODE64(s))
6724            goto illegal_op;
6725        gen_update_cc_op(s);
6726        gen_helper_aaa(cpu_env);
6727        set_cc_op(s, CC_OP_EFLAGS);
6728        break;
6729    case 0x3f: /* aas */
6730        if (CODE64(s))
6731            goto illegal_op;
6732        gen_update_cc_op(s);
6733        gen_helper_aas(cpu_env);
6734        set_cc_op(s, CC_OP_EFLAGS);
6735        break;
6736    case 0xd4: /* aam */
6737        if (CODE64(s))
6738            goto illegal_op;
6739        val = cpu_ldub_code(env, s->pc++);
6740        if (val == 0) {
6741            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
6742        } else {
6743            gen_helper_aam(cpu_env, tcg_const_i32(val));
6744            set_cc_op(s, CC_OP_LOGICB);
6745        }
6746        break;
6747    case 0xd5: /* aad */
6748        if (CODE64(s))
6749            goto illegal_op;
6750        val = cpu_ldub_code(env, s->pc++);
6751        gen_helper_aad(cpu_env, tcg_const_i32(val));
6752        set_cc_op(s, CC_OP_LOGICB);
6753        break;
6754        /************************/
6755        /* misc */
6756    case 0x90: /* nop */
6757        /* XXX: correct lock test for all insn */
6758        if (prefixes & PREFIX_LOCK) {
6759            goto illegal_op;
6760        }
6761        /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
6762        if (REX_B(s)) {
6763            goto do_xchg_reg_eax;
6764        }
6765        if (prefixes & PREFIX_REPZ) {
6766            gen_update_cc_op(s);
6767            gen_jmp_im(pc_start - s->cs_base);
6768            gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
6769            s->is_jmp = DISAS_TB_JUMP;
6770        }
6771        break;
6772    case 0x9b: /* fwait */
6773        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
6774            (HF_MP_MASK | HF_TS_MASK)) {
6775            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
6776        } else {
6777            gen_helper_fwait(cpu_env);
6778        }
6779        break;
6780    case 0xcc: /* int3 */
6781        gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
6782        break;
6783    case 0xcd: /* int N */
6784        val = cpu_ldub_code(env, s->pc++);
6785        if (s->vm86 && s->iopl != 3) {
6786            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6787        } else {
6788            gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
6789        }
6790        break;
6791    case 0xce: /* into */
6792        if (CODE64(s))
6793            goto illegal_op;
6794        gen_update_cc_op(s);
6795        gen_jmp_im(pc_start - s->cs_base);
6796        gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
6797        break;
6798#ifdef WANT_ICEBP
6799    case 0xf1: /* icebp (undocumented, exits to external debugger) */
6800        gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
6801#if 1
6802        gen_debug(s, pc_start - s->cs_base);
6803#else
6804        /* start debug */
6805        tb_flush(CPU(x86_env_get_cpu(env)));
6806        qemu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
6807#endif
6808        break;
6809#endif
6810    case 0xfa: /* cli */
6811        if (!s->vm86) {
6812            if (s->cpl <= s->iopl) {
6813                gen_helper_cli(cpu_env);
6814            } else {
6815                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6816            }
6817        } else {
6818            if (s->iopl == 3) {
6819                gen_helper_cli(cpu_env);
6820            } else {
6821                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6822            }
6823        }
6824        break;
6825    case 0xfb: /* sti */
6826        if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
6827            gen_helper_sti(cpu_env);
6828            /* interruptions are enabled only the first insn after sti */
6829            gen_jmp_im(s->pc - s->cs_base);
6830            gen_eob_inhibit_irq(s, true);
6831        } else {
6832            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6833        }
6834        break;
6835    case 0x62: /* bound */
6836        if (CODE64(s))
6837            goto illegal_op;
6838        ot = dflag;
6839        modrm = cpu_ldub_code(env, s->pc++);
6840        reg = (modrm >> 3) & 7;
6841        mod = (modrm >> 6) & 3;
6842        if (mod == 3)
6843            goto illegal_op;
6844        gen_op_mov_v_reg(ot, cpu_T0, reg);
6845        gen_lea_modrm(env, s, modrm);
6846        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6847        if (ot == MO_16) {
6848            gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
6849        } else {
6850            gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
6851        }
6852        break;
6853    case 0x1c8 ... 0x1cf: /* bswap reg */
6854        reg = (b & 7) | REX_B(s);
6855#ifdef TARGET_X86_64
6856        if (dflag == MO_64) {
6857            gen_op_mov_v_reg(MO_64, cpu_T0, reg);
6858            tcg_gen_bswap64_i64(cpu_T0, cpu_T0);
6859            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
6860        } else
6861#endif
6862        {
6863            gen_op_mov_v_reg(MO_32, cpu_T0, reg);
6864            tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
6865            tcg_gen_bswap32_tl(cpu_T0, cpu_T0);
6866            gen_op_mov_reg_v(MO_32, reg, cpu_T0);
6867        }
6868        break;
6869    case 0xd6: /* salc */
6870        if (CODE64(s))
6871            goto illegal_op;
6872        gen_compute_eflags_c(s, cpu_T0);
6873        tcg_gen_neg_tl(cpu_T0, cpu_T0);
6874        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
6875        break;
6876    case 0xe0: /* loopnz */
6877    case 0xe1: /* loopz */
6878    case 0xe2: /* loop */
6879    case 0xe3: /* jecxz */
6880        {
6881            TCGLabel *l1, *l2, *l3;
6882
6883            tval = (int8_t)insn_get(env, s, MO_8);
6884            next_eip = s->pc - s->cs_base;
6885            tval += next_eip;
6886            if (dflag == MO_16) {
6887                tval &= 0xffff;
6888            }
6889
6890            l1 = gen_new_label();
6891            l2 = gen_new_label();
6892            l3 = gen_new_label();
6893            b &= 3;
6894            switch(b) {
6895            case 0: /* loopnz */
6896            case 1: /* loopz */
6897                gen_op_add_reg_im(s->aflag, R_ECX, -1);
6898                gen_op_jz_ecx(s->aflag, l3);
6899                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
6900                break;
6901            case 2: /* loop */
6902                gen_op_add_reg_im(s->aflag, R_ECX, -1);
6903                gen_op_jnz_ecx(s->aflag, l1);
6904                break;
6905            default:
6906            case 3: /* jcxz */
6907                gen_op_jz_ecx(s->aflag, l1);
6908                break;
6909            }
6910
6911            gen_set_label(l3);
6912            gen_jmp_im(next_eip);
6913            tcg_gen_br(l2);
6914
6915            gen_set_label(l1);
6916            gen_jmp_im(tval);
6917            gen_set_label(l2);
6918            gen_eob(s);
6919        }
6920        break;
6921    case 0x130: /* wrmsr */
6922    case 0x132: /* rdmsr */
6923        if (s->cpl != 0) {
6924            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6925        } else {
6926            gen_update_cc_op(s);
6927            gen_jmp_im(pc_start - s->cs_base);
6928            if (b & 2) {
6929                gen_helper_rdmsr(cpu_env);
6930            } else {
6931                gen_helper_wrmsr(cpu_env);
6932            }
6933        }
6934        break;
6935    case 0x131: /* rdtsc */
6936        gen_update_cc_op(s);
6937        gen_jmp_im(pc_start - s->cs_base);
6938        if (s->tb->cflags & CF_USE_ICOUNT) {
6939            gen_io_start();
6940        }
6941        gen_helper_rdtsc(cpu_env);
6942        if (s->tb->cflags & CF_USE_ICOUNT) {
6943            gen_io_end();
6944            gen_jmp(s, s->pc - s->cs_base);
6945        }
6946        break;
6947    case 0x133: /* rdpmc */
6948        gen_update_cc_op(s);
6949        gen_jmp_im(pc_start - s->cs_base);
6950        gen_helper_rdpmc(cpu_env);
6951        break;
6952    case 0x134: /* sysenter */
6953        /* For Intel SYSENTER is valid on 64-bit */
6954        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
6955            goto illegal_op;
6956        if (!s->pe) {
6957            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6958        } else {
6959            gen_helper_sysenter(cpu_env);
6960            gen_eob(s);
6961        }
6962        break;
6963    case 0x135: /* sysexit */
6964        /* For Intel SYSEXIT is valid on 64-bit */
6965        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
6966            goto illegal_op;
6967        if (!s->pe) {
6968            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6969        } else {
6970            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
6971            gen_eob(s);
6972        }
6973        break;
6974#ifdef TARGET_X86_64
6975    case 0x105: /* syscall */
6976        /* XXX: is it usable in real mode ? */
6977        gen_update_cc_op(s);
6978        gen_jmp_im(pc_start - s->cs_base);
6979        gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
6980        gen_eob(s);
6981        break;
6982    case 0x107: /* sysret */
6983        if (!s->pe) {
6984            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6985        } else {
6986            gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
6987            /* condition codes are modified only in long mode */
6988            if (s->lma) {
6989                set_cc_op(s, CC_OP_EFLAGS);
6990            }
6991            gen_eob(s);
6992        }
6993        break;
6994#endif
6995    case 0x1a2: /* cpuid */
6996        gen_update_cc_op(s);
6997        gen_jmp_im(pc_start - s->cs_base);
6998        gen_helper_cpuid(cpu_env);
6999        break;
7000    case 0xf4: /* hlt */
7001        if (s->cpl != 0) {
7002            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7003        } else {
7004            gen_update_cc_op(s);
7005            gen_jmp_im(pc_start - s->cs_base);
7006            gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7007            s->is_jmp = DISAS_TB_JUMP;
7008        }
7009        break;
7010    case 0x100:
7011        modrm = cpu_ldub_code(env, s->pc++);
7012        mod = (modrm >> 6) & 3;
7013        op = (modrm >> 3) & 7;
7014        switch(op) {
7015        case 0: /* sldt */
7016            if (!s->pe || s->vm86)
7017                goto illegal_op;
7018            gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7019            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
7020                             offsetof(CPUX86State, ldt.selector));
7021            ot = mod == 3 ? dflag : MO_16;
7022            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7023            break;
7024        case 2: /* lldt */
7025            if (!s->pe || s->vm86)
7026                goto illegal_op;
7027            if (s->cpl != 0) {
7028                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7029            } else {
7030                gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7031                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7032                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
7033                gen_helper_lldt(cpu_env, cpu_tmp2_i32);
7034            }
7035            break;
7036        case 1: /* str */
7037            if (!s->pe || s->vm86)
7038                goto illegal_op;
7039            gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7040            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
7041                             offsetof(CPUX86State, tr.selector));
7042            ot = mod == 3 ? dflag : MO_16;
7043            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7044            break;
7045        case 3: /* ltr */
7046            if (!s->pe || s->vm86)
7047                goto illegal_op;
7048            if (s->cpl != 0) {
7049                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7050            } else {
7051                gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7052                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7053                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
7054                gen_helper_ltr(cpu_env, cpu_tmp2_i32);
7055            }
7056            break;
7057        case 4: /* verr */
7058        case 5: /* verw */
7059            if (!s->pe || s->vm86)
7060                goto illegal_op;
7061            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7062            gen_update_cc_op(s);
7063            if (op == 4) {
7064                gen_helper_verr(cpu_env, cpu_T0);
7065            } else {
7066                gen_helper_verw(cpu_env, cpu_T0);
7067            }
7068            set_cc_op(s, CC_OP_EFLAGS);
7069            break;
7070        default:
7071            goto unknown_op;
7072        }
7073        break;
7074
7075    case 0x101:
7076        modrm = cpu_ldub_code(env, s->pc++);
7077        switch (modrm) {
7078        CASE_MODRM_MEM_OP(0): /* sgdt */
7079            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7080            gen_lea_modrm(env, s, modrm);
7081            tcg_gen_ld32u_tl(cpu_T0,
7082                             cpu_env, offsetof(CPUX86State, gdt.limit));
7083            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
7084            gen_add_A0_im(s, 2);
7085            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
7086            if (dflag == MO_16) {
7087                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7088            }
7089            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7090            break;
7091
7092        case 0xc8: /* monitor */
7093            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7094                goto illegal_op;
7095            }
7096            gen_update_cc_op(s);
7097            gen_jmp_im(pc_start - s->cs_base);
7098            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
7099            gen_extu(s->aflag, cpu_A0);
7100            gen_add_A0_ds_seg(s);
7101            gen_helper_monitor(cpu_env, cpu_A0);
7102            break;
7103
7104        case 0xc9: /* mwait */
7105            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7106                goto illegal_op;
7107            }
7108            gen_update_cc_op(s);
7109            gen_jmp_im(pc_start - s->cs_base);
7110            gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7111            gen_eob(s);
7112            break;
7113
7114        case 0xca: /* clac */
7115            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7116                || s->cpl != 0) {
7117                goto illegal_op;
7118            }
7119            gen_helper_clac(cpu_env);
7120            gen_jmp_im(s->pc - s->cs_base);
7121            gen_eob(s);
7122            break;
7123
7124        case 0xcb: /* stac */
7125            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7126                || s->cpl != 0) {
7127                goto illegal_op;
7128            }
7129            gen_helper_stac(cpu_env);
7130            gen_jmp_im(s->pc - s->cs_base);
7131            gen_eob(s);
7132            break;
7133
7134        CASE_MODRM_MEM_OP(1): /* sidt */
7135            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7136            gen_lea_modrm(env, s, modrm);
7137            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
7138            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
7139            gen_add_A0_im(s, 2);
7140            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
7141            if (dflag == MO_16) {
7142                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7143            }
7144            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7145            break;
7146
7147        case 0xd0: /* xgetbv */
7148            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7149                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7150                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7151                goto illegal_op;
7152            }
7153            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7154            gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
7155            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
7156            break;
7157
7158        case 0xd1: /* xsetbv */
7159            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7160                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7161                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7162                goto illegal_op;
7163            }
7164            if (s->cpl != 0) {
7165                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7166                break;
7167            }
7168            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7169                                  cpu_regs[R_EDX]);
7170            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7171            gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
7172            /* End TB because translation flags may change.  */
7173            gen_jmp_im(s->pc - s->cs_base);
7174            gen_eob(s);
7175            break;
7176
7177        case 0xd8: /* VMRUN */
7178            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7179                goto illegal_op;
7180            }
7181            if (s->cpl != 0) {
7182                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7183                break;
7184            }
7185            gen_update_cc_op(s);
7186            gen_jmp_im(pc_start - s->cs_base);
7187            gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7188                             tcg_const_i32(s->pc - pc_start));
7189            tcg_gen_exit_tb(0);
7190            s->is_jmp = DISAS_TB_JUMP;
7191            break;
7192
7193        case 0xd9: /* VMMCALL */
7194            if (!(s->flags & HF_SVME_MASK)) {
7195                goto illegal_op;
7196            }
7197            gen_update_cc_op(s);
7198            gen_jmp_im(pc_start - s->cs_base);
7199            gen_helper_vmmcall(cpu_env);
7200            break;
7201
7202        case 0xda: /* VMLOAD */
7203            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7204                goto illegal_op;
7205            }
7206            if (s->cpl != 0) {
7207                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7208                break;
7209            }
7210            gen_update_cc_op(s);
7211            gen_jmp_im(pc_start - s->cs_base);
7212            gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7213            break;
7214
7215        case 0xdb: /* VMSAVE */
7216            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7217                goto illegal_op;
7218            }
7219            if (s->cpl != 0) {
7220                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7221                break;
7222            }
7223            gen_update_cc_op(s);
7224            gen_jmp_im(pc_start - s->cs_base);
7225            gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7226            break;
7227
7228        case 0xdc: /* STGI */
7229            if ((!(s->flags & HF_SVME_MASK)
7230                   && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7231                || !s->pe) {
7232                goto illegal_op;
7233            }
7234            if (s->cpl != 0) {
7235                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7236                break;
7237            }
7238            gen_update_cc_op(s);
7239            gen_jmp_im(pc_start - s->cs_base);
7240            gen_helper_stgi(cpu_env);
7241            break;
7242
7243        case 0xdd: /* CLGI */
7244            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7245                goto illegal_op;
7246            }
7247            if (s->cpl != 0) {
7248                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7249                break;
7250            }
7251            gen_update_cc_op(s);
7252            gen_jmp_im(pc_start - s->cs_base);
7253            gen_helper_clgi(cpu_env);
7254            break;
7255
7256        case 0xde: /* SKINIT */
7257            if ((!(s->flags & HF_SVME_MASK)
7258                 && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7259                || !s->pe) {
7260                goto illegal_op;
7261            }
7262            gen_update_cc_op(s);
7263            gen_jmp_im(pc_start - s->cs_base);
7264            gen_helper_skinit(cpu_env);
7265            break;
7266
7267        case 0xdf: /* INVLPGA */
7268            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7269                goto illegal_op;
7270            }
7271            if (s->cpl != 0) {
7272                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7273                break;
7274            }
7275            gen_update_cc_op(s);
7276            gen_jmp_im(pc_start - s->cs_base);
7277            gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
7278            break;
7279
7280        CASE_MODRM_MEM_OP(2): /* lgdt */
7281            if (s->cpl != 0) {
7282                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7283                break;
7284            }
7285            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
7286            gen_lea_modrm(env, s, modrm);
7287            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
7288            gen_add_A0_im(s, 2);
7289            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7290            if (dflag == MO_16) {
7291                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7292            }
7293            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
7294            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7295            break;
7296
7297        CASE_MODRM_MEM_OP(3): /* lidt */
7298            if (s->cpl != 0) {
7299                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7300                break;
7301            }
7302            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
7303            gen_lea_modrm(env, s, modrm);
7304            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
7305            gen_add_A0_im(s, 2);
7306            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7307            if (dflag == MO_16) {
7308                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7309            }
7310            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
7311            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
7312            break;
7313
7314        CASE_MODRM_OP(4): /* smsw */
7315            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7316            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
7317            if (CODE64(s)) {
7318                mod = (modrm >> 6) & 3;
7319                ot = (mod != 3 ? MO_16 : s->dflag);
7320            } else {
7321                ot = MO_16;
7322            }
7323            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7324            break;
7325        case 0xee: /* rdpkru */
7326            if (prefixes & PREFIX_LOCK) {
7327                goto illegal_op;
7328            }
7329            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7330            gen_helper_rdpkru(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
7331            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
7332            break;
7333        case 0xef: /* wrpkru */
7334            if (prefixes & PREFIX_LOCK) {
7335                goto illegal_op;
7336            }
7337            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7338                                  cpu_regs[R_EDX]);
7339            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7340            gen_helper_wrpkru(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
7341            break;
7342        CASE_MODRM_OP(6): /* lmsw */
7343            if (s->cpl != 0) {
7344                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7345                break;
7346            }
7347            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7348            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7349            gen_helper_lmsw(cpu_env, cpu_T0);
7350            gen_jmp_im(s->pc - s->cs_base);
7351            gen_eob(s);
7352            break;
7353
7354        CASE_MODRM_MEM_OP(7): /* invlpg */
7355            if (s->cpl != 0) {
7356                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7357                break;
7358            }
7359            gen_update_cc_op(s);
7360            gen_jmp_im(pc_start - s->cs_base);
7361            gen_lea_modrm(env, s, modrm);
7362            gen_helper_invlpg(cpu_env, cpu_A0);
7363            gen_jmp_im(s->pc - s->cs_base);
7364            gen_eob(s);
7365            break;
7366
7367        case 0xf8: /* swapgs */
7368#ifdef TARGET_X86_64
7369            if (CODE64(s)) {
7370                if (s->cpl != 0) {
7371                    gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7372                } else {
7373                    tcg_gen_mov_tl(cpu_T0, cpu_seg_base[R_GS]);
7374                    tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7375                                  offsetof(CPUX86State, kernelgsbase));
7376                    tcg_gen_st_tl(cpu_T0, cpu_env,
7377                                  offsetof(CPUX86State, kernelgsbase));
7378                }
7379                break;
7380            }
7381#endif
7382            goto illegal_op;
7383
7384        case 0xf9: /* rdtscp */
7385            if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7386                goto illegal_op;
7387            }
7388            gen_update_cc_op(s);
7389            gen_jmp_im(pc_start - s->cs_base);
7390            if (s->tb->cflags & CF_USE_ICOUNT) {
7391                gen_io_start();
7392            }
7393            gen_helper_rdtscp(cpu_env);
7394            if (s->tb->cflags & CF_USE_ICOUNT) {
7395                gen_io_end();
7396                gen_jmp(s, s->pc - s->cs_base);
7397            }
7398            break;
7399
7400        default:
7401            goto unknown_op;
7402        }
7403        break;
7404
7405    case 0x108: /* invd */
7406    case 0x109: /* wbinvd */
7407        if (s->cpl != 0) {
7408            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7409        } else {
7410            gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7411            /* nothing to do */
7412        }
7413        break;
7414    case 0x63: /* arpl or movslS (x86_64) */
7415#ifdef TARGET_X86_64
7416        if (CODE64(s)) {
7417            int d_ot;
7418            /* d_ot is the size of destination */
7419            d_ot = dflag;
7420
7421            modrm = cpu_ldub_code(env, s->pc++);
7422            reg = ((modrm >> 3) & 7) | rex_r;
7423            mod = (modrm >> 6) & 3;
7424            rm = (modrm & 7) | REX_B(s);
7425
7426            if (mod == 3) {
7427                gen_op_mov_v_reg(MO_32, cpu_T0, rm);
7428                /* sign extend */
7429                if (d_ot == MO_64) {
7430                    tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
7431                }
7432                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
7433            } else {
7434                gen_lea_modrm(env, s, modrm);
7435                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, cpu_A0);
7436                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
7437            }
7438        } else
7439#endif
7440        {
7441            TCGLabel *label1;
7442            TCGv t0, t1, t2, a0;
7443
7444            if (!s->pe || s->vm86)
7445                goto illegal_op;
7446            t0 = tcg_temp_local_new();
7447            t1 = tcg_temp_local_new();
7448            t2 = tcg_temp_local_new();
7449            ot = MO_16;
7450            modrm = cpu_ldub_code(env, s->pc++);
7451            reg = (modrm >> 3) & 7;
7452            mod = (modrm >> 6) & 3;
7453            rm = modrm & 7;
7454            if (mod != 3) {
7455                gen_lea_modrm(env, s, modrm);
7456                gen_op_ld_v(s, ot, t0, cpu_A0);
7457                a0 = tcg_temp_local_new();
7458                tcg_gen_mov_tl(a0, cpu_A0);
7459            } else {
7460                gen_op_mov_v_reg(ot, t0, rm);
7461                TCGV_UNUSED(a0);
7462            }
7463            gen_op_mov_v_reg(ot, t1, reg);
7464            tcg_gen_andi_tl(cpu_tmp0, t0, 3);
7465            tcg_gen_andi_tl(t1, t1, 3);
7466            tcg_gen_movi_tl(t2, 0);
7467            label1 = gen_new_label();
7468            tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
7469            tcg_gen_andi_tl(t0, t0, ~3);
7470            tcg_gen_or_tl(t0, t0, t1);
7471            tcg_gen_movi_tl(t2, CC_Z);
7472            gen_set_label(label1);
7473            if (mod != 3) {
7474                gen_op_st_v(s, ot, t0, a0);
7475                tcg_temp_free(a0);
7476           } else {
7477                gen_op_mov_reg_v(ot, rm, t0);
7478            }
7479            gen_compute_eflags(s);
7480            tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7481            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7482            tcg_temp_free(t0);
7483            tcg_temp_free(t1);
7484            tcg_temp_free(t2);
7485        }
7486        break;
7487    case 0x102: /* lar */
7488    case 0x103: /* lsl */
7489        {
7490            TCGLabel *label1;
7491            TCGv t0;
7492            if (!s->pe || s->vm86)
7493                goto illegal_op;
7494            ot = dflag != MO_16 ? MO_32 : MO_16;
7495            modrm = cpu_ldub_code(env, s->pc++);
7496            reg = ((modrm >> 3) & 7) | rex_r;
7497            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7498            t0 = tcg_temp_local_new();
7499            gen_update_cc_op(s);
7500            if (b == 0x102) {
7501                gen_helper_lar(t0, cpu_env, cpu_T0);
7502            } else {
7503                gen_helper_lsl(t0, cpu_env, cpu_T0);
7504            }
7505            tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
7506            label1 = gen_new_label();
7507            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
7508            gen_op_mov_reg_v(ot, reg, t0);
7509            gen_set_label(label1);
7510            set_cc_op(s, CC_OP_EFLAGS);
7511            tcg_temp_free(t0);
7512        }
7513        break;
7514    case 0x118:
7515        modrm = cpu_ldub_code(env, s->pc++);
7516        mod = (modrm >> 6) & 3;
7517        op = (modrm >> 3) & 7;
7518        switch(op) {
7519        case 0: /* prefetchnta */
7520        case 1: /* prefetchnt0 */
7521        case 2: /* prefetchnt0 */
7522        case 3: /* prefetchnt0 */
7523            if (mod == 3)
7524                goto illegal_op;
7525            gen_nop_modrm(env, s, modrm);
7526            /* nothing more to do */
7527            break;
7528        default: /* nop (multi byte) */
7529            gen_nop_modrm(env, s, modrm);
7530            break;
7531        }
7532        break;
7533    case 0x11a:
7534        modrm = cpu_ldub_code(env, s->pc++);
7535        if (s->flags & HF_MPX_EN_MASK) {
7536            mod = (modrm >> 6) & 3;
7537            reg = ((modrm >> 3) & 7) | rex_r;
7538            if (prefixes & PREFIX_REPZ) {
7539                /* bndcl */
7540                if (reg >= 4
7541                    || (prefixes & PREFIX_LOCK)
7542                    || s->aflag == MO_16) {
7543                    goto illegal_op;
7544                }
7545                gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7546            } else if (prefixes & PREFIX_REPNZ) {
7547                /* bndcu */
7548                if (reg >= 4
7549                    || (prefixes & PREFIX_LOCK)
7550                    || s->aflag == MO_16) {
7551                    goto illegal_op;
7552                }
7553                TCGv_i64 notu = tcg_temp_new_i64();
7554                tcg_gen_not_i64(notu, cpu_bndu[reg]);
7555                gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7556                tcg_temp_free_i64(notu);
7557            } else if (prefixes & PREFIX_DATA) {
7558                /* bndmov -- from reg/mem */
7559                if (reg >= 4 || s->aflag == MO_16) {
7560                    goto illegal_op;
7561                }
7562                if (mod == 3) {
7563                    int reg2 = (modrm & 7) | REX_B(s);
7564                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7565                        goto illegal_op;
7566                    }
7567                    if (s->flags & HF_MPX_IU_MASK) {
7568                        tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7569                        tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7570                    }
7571                } else {
7572                    gen_lea_modrm(env, s, modrm);
7573                    if (CODE64(s)) {
7574                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
7575                                            s->mem_index, MO_LEQ);
7576                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
7577                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
7578                                            s->mem_index, MO_LEQ);
7579                    } else {
7580                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
7581                                            s->mem_index, MO_LEUL);
7582                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
7583                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
7584                                            s->mem_index, MO_LEUL);
7585                    }
7586                    /* bnd registers are now in-use */
7587                    gen_set_hflag(s, HF_MPX_IU_MASK);
7588                }
7589            } else if (mod != 3) {
7590                /* bndldx */
7591                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7592                if (reg >= 4
7593                    || (prefixes & PREFIX_LOCK)
7594                    || s->aflag == MO_16
7595                    || a.base < -1) {
7596                    goto illegal_op;
7597                }
7598                if (a.base >= 0) {
7599                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
7600                } else {
7601                    tcg_gen_movi_tl(cpu_A0, 0);
7602                }
7603                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
7604                if (a.index >= 0) {
7605                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
7606                } else {
7607                    tcg_gen_movi_tl(cpu_T0, 0);
7608                }
7609                if (CODE64(s)) {
7610                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, cpu_A0, cpu_T0);
7611                    tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7612                                   offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7613                } else {
7614                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, cpu_A0, cpu_T0);
7615                    tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7616                    tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7617                }
7618                gen_set_hflag(s, HF_MPX_IU_MASK);
7619            }
7620        }
7621        gen_nop_modrm(env, s, modrm);
7622        break;
7623    case 0x11b:
7624        modrm = cpu_ldub_code(env, s->pc++);
7625        if (s->flags & HF_MPX_EN_MASK) {
7626            mod = (modrm >> 6) & 3;
7627            reg = ((modrm >> 3) & 7) | rex_r;
7628            if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7629                /* bndmk */
7630                if (reg >= 4
7631                    || (prefixes & PREFIX_LOCK)
7632                    || s->aflag == MO_16) {
7633                    goto illegal_op;
7634                }
7635                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7636                if (a.base >= 0) {
7637                    tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7638                    if (!CODE64(s)) {
7639                        tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7640                    }
7641                } else if (a.base == -1) {
7642                    /* no base register has lower bound of 0 */
7643                    tcg_gen_movi_i64(cpu_bndl[reg], 0);
7644                } else {
7645                    /* rip-relative generates #ud */
7646                    goto illegal_op;
7647                }
7648                tcg_gen_not_tl(cpu_A0, gen_lea_modrm_1(a));
7649                if (!CODE64(s)) {
7650                    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
7651                }
7652                tcg_gen_extu_tl_i64(cpu_bndu[reg], cpu_A0);
7653                /* bnd registers are now in-use */
7654                gen_set_hflag(s, HF_MPX_IU_MASK);
7655                break;
7656            } else if (prefixes & PREFIX_REPNZ) {
7657                /* bndcn */
7658                if (reg >= 4
7659                    || (prefixes & PREFIX_LOCK)
7660                    || s->aflag == MO_16) {
7661                    goto illegal_op;
7662                }
7663                gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7664            } else if (prefixes & PREFIX_DATA) {
7665                /* bndmov -- to reg/mem */
7666                if (reg >= 4 || s->aflag == MO_16) {
7667                    goto illegal_op;
7668                }
7669                if (mod == 3) {
7670                    int reg2 = (modrm & 7) | REX_B(s);
7671                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7672                        goto illegal_op;
7673                    }
7674                    if (s->flags & HF_MPX_IU_MASK) {
7675                        tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7676                        tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
7677                    }
7678                } else {
7679                    gen_lea_modrm(env, s, modrm);
7680                    if (CODE64(s)) {
7681                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
7682                                            s->mem_index, MO_LEQ);
7683                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
7684                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
7685                                            s->mem_index, MO_LEQ);
7686                    } else {
7687                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
7688                                            s->mem_index, MO_LEUL);
7689                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
7690                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
7691                                            s->mem_index, MO_LEUL);
7692                    }
7693                }
7694            } else if (mod != 3) {
7695                /* bndstx */
7696                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7697                if (reg >= 4
7698                    || (prefixes & PREFIX_LOCK)
7699                    || s->aflag == MO_16
7700                    || a.base < -1) {
7701                    goto illegal_op;
7702                }
7703                if (a.base >= 0) {
7704                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
7705                } else {
7706                    tcg_gen_movi_tl(cpu_A0, 0);
7707                }
7708                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
7709                if (a.index >= 0) {
7710                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
7711                } else {
7712                    tcg_gen_movi_tl(cpu_T0, 0);
7713                }
7714                if (CODE64(s)) {
7715                    gen_helper_bndstx64(cpu_env, cpu_A0, cpu_T0,
7716                                        cpu_bndl[reg], cpu_bndu[reg]);
7717                } else {
7718                    gen_helper_bndstx32(cpu_env, cpu_A0, cpu_T0,
7719                                        cpu_bndl[reg], cpu_bndu[reg]);
7720                }
7721            }
7722        }
7723        gen_nop_modrm(env, s, modrm);
7724        break;
7725    case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
7726        modrm = cpu_ldub_code(env, s->pc++);
7727        gen_nop_modrm(env, s, modrm);
7728        break;
7729    case 0x120: /* mov reg, crN */
7730    case 0x122: /* mov crN, reg */
7731        if (s->cpl != 0) {
7732            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7733        } else {
7734            modrm = cpu_ldub_code(env, s->pc++);
7735            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7736             * AMD documentation (24594.pdf) and testing of
7737             * intel 386 and 486 processors all show that the mod bits
7738             * are assumed to be 1's, regardless of actual values.
7739             */
7740            rm = (modrm & 7) | REX_B(s);
7741            reg = ((modrm >> 3) & 7) | rex_r;
7742            if (CODE64(s))
7743                ot = MO_64;
7744            else
7745                ot = MO_32;
7746            if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
7747                (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
7748                reg = 8;
7749            }
7750            switch(reg) {
7751            case 0:
7752            case 2:
7753            case 3:
7754            case 4:
7755            case 8:
7756                gen_update_cc_op(s);
7757                gen_jmp_im(pc_start - s->cs_base);
7758                if (b & 2) {
7759                    gen_op_mov_v_reg(ot, cpu_T0, rm);
7760                    gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
7761                                         cpu_T0);
7762                    gen_jmp_im(s->pc - s->cs_base);
7763                    gen_eob(s);
7764                } else {
7765                    gen_helper_read_crN(cpu_T0, cpu_env, tcg_const_i32(reg));
7766                    gen_op_mov_reg_v(ot, rm, cpu_T0);
7767                }
7768                break;
7769            default:
7770                goto unknown_op;
7771            }
7772        }
7773        break;
7774    case 0x121: /* mov reg, drN */
7775    case 0x123: /* mov drN, reg */
7776        if (s->cpl != 0) {
7777            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7778        } else {
7779            modrm = cpu_ldub_code(env, s->pc++);
7780            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7781             * AMD documentation (24594.pdf) and testing of
7782             * intel 386 and 486 processors all show that the mod bits
7783             * are assumed to be 1's, regardless of actual values.
7784             */
7785            rm = (modrm & 7) | REX_B(s);
7786            reg = ((modrm >> 3) & 7) | rex_r;
7787            if (CODE64(s))
7788                ot = MO_64;
7789            else
7790                ot = MO_32;
7791            if (reg >= 8) {
7792                goto illegal_op;
7793            }
7794            if (b & 2) {
7795                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
7796                gen_op_mov_v_reg(ot, cpu_T0, rm);
7797                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
7798                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T0);
7799                gen_jmp_im(s->pc - s->cs_base);
7800                gen_eob(s);
7801            } else {
7802                gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
7803                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
7804                gen_helper_get_dr(cpu_T0, cpu_env, cpu_tmp2_i32);
7805                gen_op_mov_reg_v(ot, rm, cpu_T0);
7806            }
7807        }
7808        break;
7809    case 0x106: /* clts */
7810        if (s->cpl != 0) {
7811            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7812        } else {
7813            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7814            gen_helper_clts(cpu_env);
7815            /* abort block because static cpu state changed */
7816            gen_jmp_im(s->pc - s->cs_base);
7817            gen_eob(s);
7818        }
7819        break;
7820    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
7821    case 0x1c3: /* MOVNTI reg, mem */
7822        if (!(s->cpuid_features & CPUID_SSE2))
7823            goto illegal_op;
7824        ot = mo_64_32(dflag);
7825        modrm = cpu_ldub_code(env, s->pc++);
7826        mod = (modrm >> 6) & 3;
7827        if (mod == 3)
7828            goto illegal_op;
7829        reg = ((modrm >> 3) & 7) | rex_r;
7830        /* generate a generic store */
7831        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
7832        break;
7833    case 0x1ae:
7834        modrm = cpu_ldub_code(env, s->pc++);
7835        switch (modrm) {
7836        CASE_MODRM_MEM_OP(0): /* fxsave */
7837            if (!(s->cpuid_features & CPUID_FXSR)
7838                || (prefixes & PREFIX_LOCK)) {
7839                goto illegal_op;
7840            }
7841            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
7842                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7843                break;
7844            }
7845            gen_lea_modrm(env, s, modrm);
7846            gen_helper_fxsave(cpu_env, cpu_A0);
7847            break;
7848
7849        CASE_MODRM_MEM_OP(1): /* fxrstor */
7850            if (!(s->cpuid_features & CPUID_FXSR)
7851                || (prefixes & PREFIX_LOCK)) {
7852                goto illegal_op;
7853            }
7854            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
7855                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7856                break;
7857            }
7858            gen_lea_modrm(env, s, modrm);
7859            gen_helper_fxrstor(cpu_env, cpu_A0);
7860            break;
7861
7862        CASE_MODRM_MEM_OP(2): /* ldmxcsr */
7863            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
7864                goto illegal_op;
7865            }
7866            if (s->flags & HF_TS_MASK) {
7867                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7868                break;
7869            }
7870            gen_lea_modrm(env, s, modrm);
7871            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, s->mem_index, MO_LEUL);
7872            gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
7873            break;
7874
7875        CASE_MODRM_MEM_OP(3): /* stmxcsr */
7876            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
7877                goto illegal_op;
7878            }
7879            if (s->flags & HF_TS_MASK) {
7880                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7881                break;
7882            }
7883            gen_lea_modrm(env, s, modrm);
7884            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
7885            gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
7886            break;
7887
7888        CASE_MODRM_MEM_OP(4): /* xsave */
7889            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7890                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
7891                                | PREFIX_REPZ | PREFIX_REPNZ))) {
7892                goto illegal_op;
7893            }
7894            gen_lea_modrm(env, s, modrm);
7895            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7896                                  cpu_regs[R_EDX]);
7897            gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
7898            break;
7899
7900        CASE_MODRM_MEM_OP(5): /* xrstor */
7901            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7902                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
7903                                | PREFIX_REPZ | PREFIX_REPNZ))) {
7904                goto illegal_op;
7905            }
7906            gen_lea_modrm(env, s, modrm);
7907            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7908                                  cpu_regs[R_EDX]);
7909            gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
7910            /* XRSTOR is how MPX is enabled, which changes how
7911               we translate.  Thus we need to end the TB.  */
7912            gen_update_cc_op(s);
7913            gen_jmp_im(s->pc - s->cs_base);
7914            gen_eob(s);
7915            break;
7916
7917        CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
7918            if (prefixes & PREFIX_LOCK) {
7919                goto illegal_op;
7920            }
7921            if (prefixes & PREFIX_DATA) {
7922                /* clwb */
7923                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
7924                    goto illegal_op;
7925                }
7926                gen_nop_modrm(env, s, modrm);
7927            } else {
7928                /* xsaveopt */
7929                if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7930                    || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
7931                    || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
7932                    goto illegal_op;
7933                }
7934                gen_lea_modrm(env, s, modrm);
7935                tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7936                                      cpu_regs[R_EDX]);
7937                gen_helper_xsaveopt(cpu_env, cpu_A0, cpu_tmp1_i64);
7938            }
7939            break;
7940
7941        CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
7942            if (prefixes & PREFIX_LOCK) {
7943                goto illegal_op;
7944            }
7945            if (prefixes & PREFIX_DATA) {
7946                /* clflushopt */
7947                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
7948                    goto illegal_op;
7949                }
7950            } else {
7951                /* clflush */
7952                if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
7953                    || !(s->cpuid_features & CPUID_CLFLUSH)) {
7954                    goto illegal_op;
7955                }
7956            }
7957            gen_nop_modrm(env, s, modrm);
7958            break;
7959
7960        case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
7961        case 0xc8 ... 0xc8: /* rdgsbase (f3 0f ae /1) */
7962        case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
7963        case 0xd8 ... 0xd8: /* wrgsbase (f3 0f ae /3) */
7964            if (CODE64(s)
7965                && (prefixes & PREFIX_REPZ)
7966                && !(prefixes & PREFIX_LOCK)
7967                && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
7968                TCGv base, treg, src, dst;
7969
7970                /* Preserve hflags bits by testing CR4 at runtime.  */
7971                tcg_gen_movi_i32(cpu_tmp2_i32, CR4_FSGSBASE_MASK);
7972                gen_helper_cr4_testbit(cpu_env, cpu_tmp2_i32);
7973
7974                base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
7975                treg = cpu_regs[(modrm & 7) | REX_B(s)];
7976
7977                if (modrm & 0x10) {
7978                    /* wr*base */
7979                    dst = base, src = treg;
7980                } else {
7981                    /* rd*base */
7982                    dst = treg, src = base;
7983                }
7984
7985                if (s->dflag == MO_32) {
7986                    tcg_gen_ext32u_tl(dst, src);
7987                } else {
7988                    tcg_gen_mov_tl(dst, src);
7989                }
7990                break;
7991            }
7992            goto unknown_op;
7993
7994        case 0xf8: /* sfence / pcommit */
7995            if (prefixes & PREFIX_DATA) {
7996                /* pcommit */
7997                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
7998                    || (prefixes & PREFIX_LOCK)) {
7999                    goto illegal_op;
8000                }
8001                break;
8002            }
8003            /* fallthru */
8004        case 0xf9 ... 0xff: /* sfence */
8005            if (!(s->cpuid_features & CPUID_SSE)
8006                || (prefixes & PREFIX_LOCK)) {
8007                goto illegal_op;
8008            }
8009            break;
8010        case 0xe8 ... 0xef: /* lfence */
8011        case 0xf0 ... 0xf7: /* mfence */
8012            if (!(s->cpuid_features & CPUID_SSE2)
8013                || (prefixes & PREFIX_LOCK)) {
8014                goto illegal_op;
8015            }
8016            break;
8017
8018        default:
8019            goto unknown_op;
8020        }
8021        break;
8022
8023    case 0x10d: /* 3DNow! prefetch(w) */
8024        modrm = cpu_ldub_code(env, s->pc++);
8025        mod = (modrm >> 6) & 3;
8026        if (mod == 3)
8027            goto illegal_op;
8028        gen_nop_modrm(env, s, modrm);
8029        break;
8030    case 0x1aa: /* rsm */
8031        gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
8032        if (!(s->flags & HF_SMM_MASK))
8033            goto illegal_op;
8034        gen_update_cc_op(s);
8035        gen_jmp_im(s->pc - s->cs_base);
8036        gen_helper_rsm(cpu_env);
8037        gen_eob(s);
8038        break;
8039    case 0x1b8: /* SSE4.2 popcnt */
8040        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8041             PREFIX_REPZ)
8042            goto illegal_op;
8043        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8044            goto illegal_op;
8045
8046        modrm = cpu_ldub_code(env, s->pc++);
8047        reg = ((modrm >> 3) & 7) | rex_r;
8048
8049        if (s->prefix & PREFIX_DATA) {
8050            ot = MO_16;
8051        } else {
8052            ot = mo_64_32(dflag);
8053        }
8054
8055        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8056        gen_helper_popcnt(cpu_T0, cpu_env, cpu_T0, tcg_const_i32(ot));
8057        gen_op_mov_reg_v(ot, reg, cpu_T0);
8058
8059        set_cc_op(s, CC_OP_EFLAGS);
8060        break;
8061    case 0x10e ... 0x10f:
8062        /* 3DNow! instructions, ignore prefixes */
8063        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8064    case 0x110 ... 0x117:
8065    case 0x128 ... 0x12f:
8066    case 0x138 ... 0x13a:
8067    case 0x150 ... 0x179:
8068    case 0x17c ... 0x17f:
8069    case 0x1c2:
8070    case 0x1c4 ... 0x1c6:
8071    case 0x1d0 ... 0x1fe:
8072        gen_sse(env, s, b, pc_start, rex_r);
8073        break;
8074    default:
8075        goto unknown_op;
8076    }
8077    /* lock generation */
8078    if (s->prefix & PREFIX_LOCK)
8079        gen_helper_unlock();
8080    return s->pc;
8081 illegal_op:
8082    if (s->prefix & PREFIX_LOCK)
8083        gen_helper_unlock();
8084    /* XXX: ensure that no lock was generated */
8085    gen_illegal_opcode(s);
8086    return s->pc;
8087 unknown_op:
8088    if (s->prefix & PREFIX_LOCK)
8089        gen_helper_unlock();
8090    /* XXX: ensure that no lock was generated */
8091    gen_unknown_opcode(env, s);
8092    return s->pc;
8093}
8094
8095void tcg_x86_init(void)
8096{
8097    static const char reg_names[CPU_NB_REGS][4] = {
8098#ifdef TARGET_X86_64
8099        [R_EAX] = "rax",
8100        [R_EBX] = "rbx",
8101        [R_ECX] = "rcx",
8102        [R_EDX] = "rdx",
8103        [R_ESI] = "rsi",
8104        [R_EDI] = "rdi",
8105        [R_EBP] = "rbp",
8106        [R_ESP] = "rsp",
8107        [8]  = "r8",
8108        [9]  = "r9",
8109        [10] = "r10",
8110        [11] = "r11",
8111        [12] = "r12",
8112        [13] = "r13",
8113        [14] = "r14",
8114        [15] = "r15",
8115#else
8116        [R_EAX] = "eax",
8117        [R_EBX] = "ebx",
8118        [R_ECX] = "ecx",
8119        [R_EDX] = "edx",
8120        [R_ESI] = "esi",
8121        [R_EDI] = "edi",
8122        [R_EBP] = "ebp",
8123        [R_ESP] = "esp",
8124#endif
8125    };
8126    static const char seg_base_names[6][8] = {
8127        [R_CS] = "cs_base",
8128        [R_DS] = "ds_base",
8129        [R_ES] = "es_base",
8130        [R_FS] = "fs_base",
8131        [R_GS] = "gs_base",
8132        [R_SS] = "ss_base",
8133    };
8134    static const char bnd_regl_names[4][8] = {
8135        "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8136    };
8137    static const char bnd_regu_names[4][8] = {
8138        "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8139    };
8140    int i;
8141
8142    cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
8143    cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8144                                       offsetof(CPUX86State, cc_op), "cc_op");
8145    cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8146                                    "cc_dst");
8147    cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8148                                    "cc_src");
8149    cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8150                                     "cc_src2");
8151
8152    for (i = 0; i < CPU_NB_REGS; ++i) {
8153        cpu_regs[i] = tcg_global_mem_new(cpu_env,
8154                                         offsetof(CPUX86State, regs[i]),
8155                                         reg_names[i]);
8156    }
8157
8158    for (i = 0; i < 6; ++i) {
8159        cpu_seg_base[i]
8160            = tcg_global_mem_new(cpu_env,
8161                                 offsetof(CPUX86State, segs[i].base),
8162                                 seg_base_names[i]);
8163    }
8164
8165    for (i = 0; i < 4; ++i) {
8166        cpu_bndl[i]
8167            = tcg_global_mem_new_i64(cpu_env,
8168                                     offsetof(CPUX86State, bnd_regs[i].lb),
8169                                     bnd_regl_names[i]);
8170        cpu_bndu[i]
8171            = tcg_global_mem_new_i64(cpu_env,
8172                                     offsetof(CPUX86State, bnd_regs[i].ub),
8173                                     bnd_regu_names[i]);
8174    }
8175
8176    helper_lock_init();
8177}
8178
8179/* generate intermediate code for basic block 'tb'.  */
8180void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
8181{
8182    X86CPU *cpu = x86_env_get_cpu(env);
8183    CPUState *cs = CPU(cpu);
8184    DisasContext dc1, *dc = &dc1;
8185    target_ulong pc_ptr;
8186    uint64_t flags;
8187    target_ulong pc_start;
8188    target_ulong cs_base;
8189    int num_insns;
8190    int max_insns;
8191
8192    /* generate intermediate code */
8193    pc_start = tb->pc;
8194    cs_base = tb->cs_base;
8195    flags = tb->flags;
8196
8197    dc->pe = (flags >> HF_PE_SHIFT) & 1;
8198    dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
8199    dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
8200    dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
8201    dc->f_st = 0;
8202    dc->vm86 = (flags >> VM_SHIFT) & 1;
8203    dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
8204    dc->iopl = (flags >> IOPL_SHIFT) & 3;
8205    dc->tf = (flags >> TF_SHIFT) & 1;
8206    dc->singlestep_enabled = cs->singlestep_enabled;
8207    dc->cc_op = CC_OP_DYNAMIC;
8208    dc->cc_op_dirty = false;
8209    dc->cs_base = cs_base;
8210    dc->tb = tb;
8211    dc->popl_esp_hack = 0;
8212    /* select memory access functions */
8213    dc->mem_index = 0;
8214    if (flags & HF_SOFTMMU_MASK) {
8215        dc->mem_index = cpu_mmu_index(env, false);
8216    }
8217    dc->cpuid_features = env->features[FEAT_1_EDX];
8218    dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8219    dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8220    dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8221    dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8222    dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8223#ifdef TARGET_X86_64
8224    dc->lma = (flags >> HF_LMA_SHIFT) & 1;
8225    dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
8226#endif
8227    dc->flags = flags;
8228    dc->jmp_opt = !(dc->tf || cs->singlestep_enabled ||
8229                    (flags & HF_INHIBIT_IRQ_MASK)
8230#ifndef CONFIG_SOFTMMU
8231                    || (flags & HF_SOFTMMU_MASK)
8232#endif
8233                    );
8234    /* Do not optimize repz jumps at all in icount mode, because
8235       rep movsS instructions are execured with different paths
8236       in !repz_opt and repz_opt modes. The first one was used
8237       always except single step mode. And this setting
8238       disables jumps optimization and control paths become
8239       equivalent in run and single step modes.
8240       Now there will be no jump optimization for repz in
8241       record/replay modes and there will always be an
8242       additional step for ecx=0 when icount is enabled.
8243     */
8244    dc->repz_opt = !dc->jmp_opt && !(tb->cflags & CF_USE_ICOUNT);
8245#if 0
8246    /* check addseg logic */
8247    if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
8248        printf("ERROR addseg\n");
8249#endif
8250
8251    cpu_T0 = tcg_temp_new();
8252    cpu_T1 = tcg_temp_new();
8253    cpu_A0 = tcg_temp_new();
8254
8255    cpu_tmp0 = tcg_temp_new();
8256    cpu_tmp1_i64 = tcg_temp_new_i64();
8257    cpu_tmp2_i32 = tcg_temp_new_i32();
8258    cpu_tmp3_i32 = tcg_temp_new_i32();
8259    cpu_tmp4 = tcg_temp_new();
8260    cpu_ptr0 = tcg_temp_new_ptr();
8261    cpu_ptr1 = tcg_temp_new_ptr();
8262    cpu_cc_srcT = tcg_temp_local_new();
8263
8264    dc->is_jmp = DISAS_NEXT;
8265    pc_ptr = pc_start;
8266    num_insns = 0;
8267    max_insns = tb->cflags & CF_COUNT_MASK;
8268    if (max_insns == 0) {
8269        max_insns = CF_COUNT_MASK;
8270    }
8271    if (max_insns > TCG_MAX_INSNS) {
8272        max_insns = TCG_MAX_INSNS;
8273    }
8274
8275    gen_tb_start(tb);
8276    for(;;) {
8277        tcg_gen_insn_start(pc_ptr, dc->cc_op);
8278        num_insns++;
8279
8280        /* If RF is set, suppress an internally generated breakpoint.  */
8281        if (unlikely(cpu_breakpoint_test(cs, pc_ptr,
8282                                         tb->flags & HF_RF_MASK
8283                                         ? BP_GDB : BP_ANY))) {
8284            gen_debug(dc, pc_ptr - dc->cs_base);
8285            /* The address covered by the breakpoint must be included in
8286               [tb->pc, tb->pc + tb->size) in order to for it to be
8287               properly cleared -- thus we increment the PC here so that
8288               the logic setting tb->size below does the right thing.  */
8289            pc_ptr += 1;
8290            goto done_generating;
8291        }
8292        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
8293            gen_io_start();
8294        }
8295
8296        pc_ptr = disas_insn(env, dc, pc_ptr);
8297        /* stop translation if indicated */
8298        if (dc->is_jmp)
8299            break;
8300        /* if single step mode, we generate only one instruction and
8301           generate an exception */
8302        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8303           the flag and abort the translation to give the irqs a
8304           change to be happen */
8305        if (dc->tf || dc->singlestep_enabled ||
8306            (flags & HF_INHIBIT_IRQ_MASK)) {
8307            gen_jmp_im(pc_ptr - dc->cs_base);
8308            gen_eob(dc);
8309            break;
8310        }
8311        /* Do not cross the boundary of the pages in icount mode,
8312           it can cause an exception. Do it only when boundary is
8313           crossed by the first instruction in the block.
8314           If current instruction already crossed the bound - it's ok,
8315           because an exception hasn't stopped this code.
8316         */
8317        if ((tb->cflags & CF_USE_ICOUNT)
8318            && ((pc_ptr & TARGET_PAGE_MASK)
8319                != ((pc_ptr + TARGET_MAX_INSN_SIZE - 1) & TARGET_PAGE_MASK)
8320                || (pc_ptr & ~TARGET_PAGE_MASK) == 0)) {
8321            gen_jmp_im(pc_ptr - dc->cs_base);
8322            gen_eob(dc);
8323            break;
8324        }
8325        /* if too long translation, stop generation too */
8326        if (tcg_op_buf_full() ||
8327            (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
8328            num_insns >= max_insns) {
8329            gen_jmp_im(pc_ptr - dc->cs_base);
8330            gen_eob(dc);
8331            break;
8332        }
8333        if (singlestep) {
8334            gen_jmp_im(pc_ptr - dc->cs_base);
8335            gen_eob(dc);
8336            break;
8337        }
8338    }
8339    if (tb->cflags & CF_LAST_IO)
8340        gen_io_end();
8341done_generating:
8342    gen_tb_end(tb, num_insns);
8343
8344#ifdef DEBUG_DISAS
8345    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
8346        int disas_flags;
8347        qemu_log("----------------\n");
8348        qemu_log("IN: %s\n", lookup_symbol(pc_start));
8349#ifdef TARGET_X86_64
8350        if (dc->code64)
8351            disas_flags = 2;
8352        else
8353#endif
8354            disas_flags = !dc->code32;
8355        log_target_disas(cs, pc_start, pc_ptr - pc_start, disas_flags);
8356        qemu_log("\n");
8357    }
8358#endif
8359
8360    tb->size = pc_ptr - pc_start;
8361    tb->icount = num_insns;
8362}
8363
8364void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8365                          target_ulong *data)
8366{
8367    int cc_op = data[1];
8368    env->eip = data[0] - tb->cs_base;
8369    if (cc_op != CC_OP_DYNAMIC) {
8370        env->cc_op = cc_op;
8371    }
8372}
8373