qemu/target/i386/translate.c
<<
>>
Prefs
   1/*
   2 *  i386 translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "qemu/host-utils.h"
  22#include "cpu.h"
  23#include "disas/disas.h"
  24#include "exec/exec-all.h"
  25#include "tcg-op.h"
  26#include "exec/cpu_ldst.h"
  27
  28#include "exec/helper-proto.h"
  29#include "exec/helper-gen.h"
  30
  31#include "trace-tcg.h"
  32#include "exec/log.h"
  33
  34
  35#define PREFIX_REPZ   0x01
  36#define PREFIX_REPNZ  0x02
  37#define PREFIX_LOCK   0x04
  38#define PREFIX_DATA   0x08
  39#define PREFIX_ADR    0x10
  40#define PREFIX_VEX    0x20
  41
  42#ifdef TARGET_X86_64
  43#define CODE64(s) ((s)->code64)
  44#define REX_X(s) ((s)->rex_x)
  45#define REX_B(s) ((s)->rex_b)
  46#else
  47#define CODE64(s) 0
  48#define REX_X(s) 0
  49#define REX_B(s) 0
  50#endif
  51
  52#ifdef TARGET_X86_64
  53# define ctztl  ctz64
  54# define clztl  clz64
  55#else
  56# define ctztl  ctz32
  57# define clztl  clz32
  58#endif
  59
  60/* For a switch indexed by MODRM, match all memory operands for a given OP.  */
  61#define CASE_MODRM_MEM_OP(OP) \
  62    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  63    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  64    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
  65
  66#define CASE_MODRM_OP(OP) \
  67    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  68    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  69    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
  70    case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
  71
  72//#define MACRO_TEST   1
  73
  74/* global register indexes */
  75static TCGv_env cpu_env;
  76static TCGv cpu_A0;
  77static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
  78static TCGv_i32 cpu_cc_op;
  79static TCGv cpu_regs[CPU_NB_REGS];
  80static TCGv cpu_seg_base[6];
  81static TCGv_i64 cpu_bndl[4];
  82static TCGv_i64 cpu_bndu[4];
  83/* local temps */
  84static TCGv cpu_T0, cpu_T1;
  85/* local register indexes (only used inside old micro ops) */
  86static TCGv cpu_tmp0, cpu_tmp4;
  87static TCGv_ptr cpu_ptr0, cpu_ptr1;
  88static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
  89static TCGv_i64 cpu_tmp1_i64;
  90
  91#include "exec/gen-icount.h"
  92
  93#ifdef TARGET_X86_64
  94static int x86_64_hregs;
  95#endif
  96
  97typedef struct DisasContext {
  98    /* current insn context */
  99    int override; /* -1 if no override */
 100    int prefix;
 101    TCGMemOp aflag;
 102    TCGMemOp dflag;
 103    target_ulong pc_start;
 104    target_ulong pc; /* pc = eip + cs_base */
 105    int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
 106                   static state change (stop translation) */
 107    /* current block context */
 108    target_ulong cs_base; /* base of CS segment */
 109    int pe;     /* protected mode */
 110    int code32; /* 32 bit code segment */
 111#ifdef TARGET_X86_64
 112    int lma;    /* long mode active */
 113    int code64; /* 64 bit code segment */
 114    int rex_x, rex_b;
 115#endif
 116    int vex_l;  /* vex vector length */
 117    int vex_v;  /* vex vvvv register, without 1's compliment.  */
 118    int ss32;   /* 32 bit stack segment */
 119    CCOp cc_op;  /* current CC operation */
 120    bool cc_op_dirty;
 121    int addseg; /* non zero if either DS/ES/SS have a non zero base */
 122    int f_st;   /* currently unused */
 123    int vm86;   /* vm86 mode */
 124    int cpl;
 125    int iopl;
 126    int tf;     /* TF cpu flag */
 127    int singlestep_enabled; /* "hardware" single step enabled */
 128    int jmp_opt; /* use direct block chaining for direct jumps */
 129    int repz_opt; /* optimize jumps within repz instructions */
 130    int mem_index; /* select memory access functions */
 131    uint64_t flags; /* all execution flags */
 132    struct TranslationBlock *tb;
 133    int popl_esp_hack; /* for correct popl with esp base handling */
 134    int rip_offset; /* only used in x86_64, but left for simplicity */
 135    int cpuid_features;
 136    int cpuid_ext_features;
 137    int cpuid_ext2_features;
 138    int cpuid_ext3_features;
 139    int cpuid_7_0_ebx_features;
 140    int cpuid_xsave_features;
 141} DisasContext;
 142
 143static void gen_eob(DisasContext *s);
 144static void gen_jr(DisasContext *s, TCGv dest);
 145static void gen_jmp(DisasContext *s, target_ulong eip);
 146static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 147static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
 148
 149/* i386 arith/logic operations */
 150enum {
 151    OP_ADDL,
 152    OP_ORL,
 153    OP_ADCL,
 154    OP_SBBL,
 155    OP_ANDL,
 156    OP_SUBL,
 157    OP_XORL,
 158    OP_CMPL,
 159};
 160
 161/* i386 shift ops */
 162enum {
 163    OP_ROL,
 164    OP_ROR,
 165    OP_RCL,
 166    OP_RCR,
 167    OP_SHL,
 168    OP_SHR,
 169    OP_SHL1, /* undocumented */
 170    OP_SAR = 7,
 171};
 172
 173enum {
 174    JCC_O,
 175    JCC_B,
 176    JCC_Z,
 177    JCC_BE,
 178    JCC_S,
 179    JCC_P,
 180    JCC_L,
 181    JCC_LE,
 182};
 183
 184enum {
 185    /* I386 int registers */
 186    OR_EAX,   /* MUST be even numbered */
 187    OR_ECX,
 188    OR_EDX,
 189    OR_EBX,
 190    OR_ESP,
 191    OR_EBP,
 192    OR_ESI,
 193    OR_EDI,
 194
 195    OR_TMP0 = 16,    /* temporary operand register */
 196    OR_TMP1,
 197    OR_A0, /* temporary register used when doing address evaluation */
 198};
 199
 200enum {
 201    USES_CC_DST  = 1,
 202    USES_CC_SRC  = 2,
 203    USES_CC_SRC2 = 4,
 204    USES_CC_SRCT = 8,
 205};
 206
 207/* Bit set if the global variable is live after setting CC_OP to X.  */
 208static const uint8_t cc_op_live[CC_OP_NB] = {
 209    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 210    [CC_OP_EFLAGS] = USES_CC_SRC,
 211    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
 212    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
 213    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 214    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
 215    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 216    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
 217    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
 218    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
 219    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
 220    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
 221    [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
 222    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
 223    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
 224    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 225    [CC_OP_CLR] = 0,
 226    [CC_OP_POPCNT] = USES_CC_SRC,
 227};
 228
 229static void set_cc_op(DisasContext *s, CCOp op)
 230{
 231    int dead;
 232
 233    if (s->cc_op == op) {
 234        return;
 235    }
 236
 237    /* Discard CC computation that will no longer be used.  */
 238    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
 239    if (dead & USES_CC_DST) {
 240        tcg_gen_discard_tl(cpu_cc_dst);
 241    }
 242    if (dead & USES_CC_SRC) {
 243        tcg_gen_discard_tl(cpu_cc_src);
 244    }
 245    if (dead & USES_CC_SRC2) {
 246        tcg_gen_discard_tl(cpu_cc_src2);
 247    }
 248    if (dead & USES_CC_SRCT) {
 249        tcg_gen_discard_tl(cpu_cc_srcT);
 250    }
 251
 252    if (op == CC_OP_DYNAMIC) {
 253        /* The DYNAMIC setting is translator only, and should never be
 254           stored.  Thus we always consider it clean.  */
 255        s->cc_op_dirty = false;
 256    } else {
 257        /* Discard any computed CC_OP value (see shifts).  */
 258        if (s->cc_op == CC_OP_DYNAMIC) {
 259            tcg_gen_discard_i32(cpu_cc_op);
 260        }
 261        s->cc_op_dirty = true;
 262    }
 263    s->cc_op = op;
 264}
 265
 266static void gen_update_cc_op(DisasContext *s)
 267{
 268    if (s->cc_op_dirty) {
 269        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
 270        s->cc_op_dirty = false;
 271    }
 272}
 273
 274#ifdef TARGET_X86_64
 275
 276#define NB_OP_SIZES 4
 277
 278#else /* !TARGET_X86_64 */
 279
 280#define NB_OP_SIZES 3
 281
 282#endif /* !TARGET_X86_64 */
 283
 284#if defined(HOST_WORDS_BIGENDIAN)
 285#define REG_B_OFFSET (sizeof(target_ulong) - 1)
 286#define REG_H_OFFSET (sizeof(target_ulong) - 2)
 287#define REG_W_OFFSET (sizeof(target_ulong) - 2)
 288#define REG_L_OFFSET (sizeof(target_ulong) - 4)
 289#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
 290#else
 291#define REG_B_OFFSET 0
 292#define REG_H_OFFSET 1
 293#define REG_W_OFFSET 0
 294#define REG_L_OFFSET 0
 295#define REG_LH_OFFSET 4
 296#endif
 297
 298/* In instruction encodings for byte register accesses the
 299 * register number usually indicates "low 8 bits of register N";
 300 * however there are some special cases where N 4..7 indicates
 301 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
 302 * true for this special case, false otherwise.
 303 */
 304static inline bool byte_reg_is_xH(int reg)
 305{
 306    if (reg < 4) {
 307        return false;
 308    }
 309#ifdef TARGET_X86_64
 310    if (reg >= 8 || x86_64_hregs) {
 311        return false;
 312    }
 313#endif
 314    return true;
 315}
 316
 317/* Select the size of a push/pop operation.  */
 318static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
 319{
 320    if (CODE64(s)) {
 321        return ot == MO_16 ? MO_16 : MO_64;
 322    } else {
 323        return ot;
 324    }
 325}
 326
 327/* Select the size of the stack pointer.  */
 328static inline TCGMemOp mo_stacksize(DisasContext *s)
 329{
 330    return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
 331}
 332
 333/* Select only size 64 else 32.  Used for SSE operand sizes.  */
 334static inline TCGMemOp mo_64_32(TCGMemOp ot)
 335{
 336#ifdef TARGET_X86_64
 337    return ot == MO_64 ? MO_64 : MO_32;
 338#else
 339    return MO_32;
 340#endif
 341}
 342
 343/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
 344   byte vs word opcodes.  */
 345static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
 346{
 347    return b & 1 ? ot : MO_8;
 348}
 349
 350/* Select size 8 if lsb of B is clear, else OT capped at 32.
 351   Used for decoding operand size of port opcodes.  */
 352static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
 353{
 354    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 355}
 356
 357static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
 358{
 359    switch(ot) {
 360    case MO_8:
 361        if (!byte_reg_is_xH(reg)) {
 362            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
 363        } else {
 364            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
 365        }
 366        break;
 367    case MO_16:
 368        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
 369        break;
 370    case MO_32:
 371        /* For x86_64, this sets the higher half of register to zero.
 372           For i386, this is equivalent to a mov. */
 373        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
 374        break;
 375#ifdef TARGET_X86_64
 376    case MO_64:
 377        tcg_gen_mov_tl(cpu_regs[reg], t0);
 378        break;
 379#endif
 380    default:
 381        tcg_abort();
 382    }
 383}
 384
 385static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
 386{
 387    if (ot == MO_8 && byte_reg_is_xH(reg)) {
 388        tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
 389    } else {
 390        tcg_gen_mov_tl(t0, cpu_regs[reg]);
 391    }
 392}
 393
 394static void gen_add_A0_im(DisasContext *s, int val)
 395{
 396    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
 397    if (!CODE64(s)) {
 398        tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
 399    }
 400}
 401
 402static inline void gen_op_jmp_v(TCGv dest)
 403{
 404    tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 405}
 406
 407static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
 408{
 409    tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
 410    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 411}
 412
 413static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
 414{
 415    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0);
 416    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 417}
 418
 419static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 420{
 421    tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
 422}
 423
 424static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 425{
 426    tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 427}
 428
 429static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 430{
 431    if (d == OR_TMP0) {
 432        gen_op_st_v(s, idx, cpu_T0, cpu_A0);
 433    } else {
 434        gen_op_mov_reg_v(idx, d, cpu_T0);
 435    }
 436}
 437
 438static inline void gen_jmp_im(target_ulong pc)
 439{
 440    tcg_gen_movi_tl(cpu_tmp0, pc);
 441    gen_op_jmp_v(cpu_tmp0);
 442}
 443
 444/* Compute SEG:REG into A0.  SEG is selected from the override segment
 445   (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
 446   indicate no override.  */
 447static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
 448                          int def_seg, int ovr_seg)
 449{
 450    switch (aflag) {
 451#ifdef TARGET_X86_64
 452    case MO_64:
 453        if (ovr_seg < 0) {
 454            tcg_gen_mov_tl(cpu_A0, a0);
 455            return;
 456        }
 457        break;
 458#endif
 459    case MO_32:
 460        /* 32 bit address */
 461        if (ovr_seg < 0 && s->addseg) {
 462            ovr_seg = def_seg;
 463        }
 464        if (ovr_seg < 0) {
 465            tcg_gen_ext32u_tl(cpu_A0, a0);
 466            return;
 467        }
 468        break;
 469    case MO_16:
 470        /* 16 bit address */
 471        tcg_gen_ext16u_tl(cpu_A0, a0);
 472        a0 = cpu_A0;
 473        if (ovr_seg < 0) {
 474            if (s->addseg) {
 475                ovr_seg = def_seg;
 476            } else {
 477                return;
 478            }
 479        }
 480        break;
 481    default:
 482        tcg_abort();
 483    }
 484
 485    if (ovr_seg >= 0) {
 486        TCGv seg = cpu_seg_base[ovr_seg];
 487
 488        if (aflag == MO_64) {
 489            tcg_gen_add_tl(cpu_A0, a0, seg);
 490        } else if (CODE64(s)) {
 491            tcg_gen_ext32u_tl(cpu_A0, a0);
 492            tcg_gen_add_tl(cpu_A0, cpu_A0, seg);
 493        } else {
 494            tcg_gen_add_tl(cpu_A0, a0, seg);
 495            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
 496        }
 497    }
 498}
 499
 500static inline void gen_string_movl_A0_ESI(DisasContext *s)
 501{
 502    gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
 503}
 504
 505static inline void gen_string_movl_A0_EDI(DisasContext *s)
 506{
 507    gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
 508}
 509
 510static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
 511{
 512    tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df));
 513    tcg_gen_shli_tl(cpu_T0, cpu_T0, ot);
 514};
 515
 516static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
 517{
 518    switch (size) {
 519    case MO_8:
 520        if (sign) {
 521            tcg_gen_ext8s_tl(dst, src);
 522        } else {
 523            tcg_gen_ext8u_tl(dst, src);
 524        }
 525        return dst;
 526    case MO_16:
 527        if (sign) {
 528            tcg_gen_ext16s_tl(dst, src);
 529        } else {
 530            tcg_gen_ext16u_tl(dst, src);
 531        }
 532        return dst;
 533#ifdef TARGET_X86_64
 534    case MO_32:
 535        if (sign) {
 536            tcg_gen_ext32s_tl(dst, src);
 537        } else {
 538            tcg_gen_ext32u_tl(dst, src);
 539        }
 540        return dst;
 541#endif
 542    default:
 543        return src;
 544    }
 545}
 546
 547static void gen_extu(TCGMemOp ot, TCGv reg)
 548{
 549    gen_ext_tl(reg, reg, ot, false);
 550}
 551
 552static void gen_exts(TCGMemOp ot, TCGv reg)
 553{
 554    gen_ext_tl(reg, reg, ot, true);
 555}
 556
 557static inline void gen_op_jnz_ecx(TCGMemOp size, TCGLabel *label1)
 558{
 559    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
 560    gen_extu(size, cpu_tmp0);
 561    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
 562}
 563
 564static inline void gen_op_jz_ecx(TCGMemOp size, TCGLabel *label1)
 565{
 566    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
 567    gen_extu(size, cpu_tmp0);
 568    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
 569}
 570
 571static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
 572{
 573    switch (ot) {
 574    case MO_8:
 575        gen_helper_inb(v, cpu_env, n);
 576        break;
 577    case MO_16:
 578        gen_helper_inw(v, cpu_env, n);
 579        break;
 580    case MO_32:
 581        gen_helper_inl(v, cpu_env, n);
 582        break;
 583    default:
 584        tcg_abort();
 585    }
 586}
 587
 588static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
 589{
 590    switch (ot) {
 591    case MO_8:
 592        gen_helper_outb(cpu_env, v, n);
 593        break;
 594    case MO_16:
 595        gen_helper_outw(cpu_env, v, n);
 596        break;
 597    case MO_32:
 598        gen_helper_outl(cpu_env, v, n);
 599        break;
 600    default:
 601        tcg_abort();
 602    }
 603}
 604
 605static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
 606                         uint32_t svm_flags)
 607{
 608    target_ulong next_eip;
 609
 610    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
 611        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
 612        switch (ot) {
 613        case MO_8:
 614            gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
 615            break;
 616        case MO_16:
 617            gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
 618            break;
 619        case MO_32:
 620            gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
 621            break;
 622        default:
 623            tcg_abort();
 624        }
 625    }
 626    if(s->flags & HF_SVMI_MASK) {
 627        gen_update_cc_op(s);
 628        gen_jmp_im(cur_eip);
 629        svm_flags |= (1 << (4 + ot));
 630        next_eip = s->pc - s->cs_base;
 631        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
 632        gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
 633                                tcg_const_i32(svm_flags),
 634                                tcg_const_i32(next_eip - cur_eip));
 635    }
 636}
 637
 638static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 639{
 640    gen_string_movl_A0_ESI(s);
 641    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
 642    gen_string_movl_A0_EDI(s);
 643    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
 644    gen_op_movl_T0_Dshift(ot);
 645    gen_op_add_reg_T0(s->aflag, R_ESI);
 646    gen_op_add_reg_T0(s->aflag, R_EDI);
 647}
 648
 649static void gen_op_update1_cc(void)
 650{
 651    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 652}
 653
 654static void gen_op_update2_cc(void)
 655{
 656    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
 657    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 658}
 659
 660static void gen_op_update3_cc(TCGv reg)
 661{
 662    tcg_gen_mov_tl(cpu_cc_src2, reg);
 663    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
 664    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 665}
 666
 667static inline void gen_op_testl_T0_T1_cc(void)
 668{
 669    tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
 670}
 671
 672static void gen_op_update_neg_cc(void)
 673{
 674    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 675    tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
 676    tcg_gen_movi_tl(cpu_cc_srcT, 0);
 677}
 678
 679/* compute all eflags to cc_src */
 680static void gen_compute_eflags(DisasContext *s)
 681{
 682    TCGv zero, dst, src1, src2;
 683    int live, dead;
 684
 685    if (s->cc_op == CC_OP_EFLAGS) {
 686        return;
 687    }
 688    if (s->cc_op == CC_OP_CLR) {
 689        tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
 690        set_cc_op(s, CC_OP_EFLAGS);
 691        return;
 692    }
 693
 694    TCGV_UNUSED(zero);
 695    dst = cpu_cc_dst;
 696    src1 = cpu_cc_src;
 697    src2 = cpu_cc_src2;
 698
 699    /* Take care to not read values that are not live.  */
 700    live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
 701    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
 702    if (dead) {
 703        zero = tcg_const_tl(0);
 704        if (dead & USES_CC_DST) {
 705            dst = zero;
 706        }
 707        if (dead & USES_CC_SRC) {
 708            src1 = zero;
 709        }
 710        if (dead & USES_CC_SRC2) {
 711            src2 = zero;
 712        }
 713    }
 714
 715    gen_update_cc_op(s);
 716    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
 717    set_cc_op(s, CC_OP_EFLAGS);
 718
 719    if (dead) {
 720        tcg_temp_free(zero);
 721    }
 722}
 723
 724typedef struct CCPrepare {
 725    TCGCond cond;
 726    TCGv reg;
 727    TCGv reg2;
 728    target_ulong imm;
 729    target_ulong mask;
 730    bool use_reg2;
 731    bool no_setcond;
 732} CCPrepare;
 733
 734/* compute eflags.C to reg */
 735static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 736{
 737    TCGv t0, t1;
 738    int size, shift;
 739
 740    switch (s->cc_op) {
 741    case CC_OP_SUBB ... CC_OP_SUBQ:
 742        /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
 743        size = s->cc_op - CC_OP_SUBB;
 744        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 745        /* If no temporary was used, be careful not to alias t1 and t0.  */
 746        t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg;
 747        tcg_gen_mov_tl(t0, cpu_cc_srcT);
 748        gen_extu(size, t0);
 749        goto add_sub;
 750
 751    case CC_OP_ADDB ... CC_OP_ADDQ:
 752        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
 753        size = s->cc_op - CC_OP_ADDB;
 754        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 755        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 756    add_sub:
 757        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
 758                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 759
 760    case CC_OP_LOGICB ... CC_OP_LOGICQ:
 761    case CC_OP_CLR:
 762    case CC_OP_POPCNT:
 763        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 764
 765    case CC_OP_INCB ... CC_OP_INCQ:
 766    case CC_OP_DECB ... CC_OP_DECQ:
 767        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 768                             .mask = -1, .no_setcond = true };
 769
 770    case CC_OP_SHLB ... CC_OP_SHLQ:
 771        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
 772        size = s->cc_op - CC_OP_SHLB;
 773        shift = (8 << size) - 1;
 774        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 775                             .mask = (target_ulong)1 << shift };
 776
 777    case CC_OP_MULB ... CC_OP_MULQ:
 778        return (CCPrepare) { .cond = TCG_COND_NE,
 779                             .reg = cpu_cc_src, .mask = -1 };
 780
 781    case CC_OP_BMILGB ... CC_OP_BMILGQ:
 782        size = s->cc_op - CC_OP_BMILGB;
 783        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
 784        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 785
 786    case CC_OP_ADCX:
 787    case CC_OP_ADCOX:
 788        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
 789                             .mask = -1, .no_setcond = true };
 790
 791    case CC_OP_EFLAGS:
 792    case CC_OP_SARB ... CC_OP_SARQ:
 793        /* CC_SRC & 1 */
 794        return (CCPrepare) { .cond = TCG_COND_NE,
 795                             .reg = cpu_cc_src, .mask = CC_C };
 796
 797    default:
 798       /* The need to compute only C from CC_OP_DYNAMIC is important
 799          in efficiently implementing e.g. INC at the start of a TB.  */
 800       gen_update_cc_op(s);
 801       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
 802                               cpu_cc_src2, cpu_cc_op);
 803       return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 804                            .mask = -1, .no_setcond = true };
 805    }
 806}
 807
 808/* compute eflags.P to reg */
 809static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 810{
 811    gen_compute_eflags(s);
 812    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 813                         .mask = CC_P };
 814}
 815
 816/* compute eflags.S to reg */
 817static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 818{
 819    switch (s->cc_op) {
 820    case CC_OP_DYNAMIC:
 821        gen_compute_eflags(s);
 822        /* FALLTHRU */
 823    case CC_OP_EFLAGS:
 824    case CC_OP_ADCX:
 825    case CC_OP_ADOX:
 826    case CC_OP_ADCOX:
 827        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 828                             .mask = CC_S };
 829    case CC_OP_CLR:
 830    case CC_OP_POPCNT:
 831        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 832    default:
 833        {
 834            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 835            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
 836            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
 837        }
 838    }
 839}
 840
 841/* compute eflags.O to reg */
 842static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 843{
 844    switch (s->cc_op) {
 845    case CC_OP_ADOX:
 846    case CC_OP_ADCOX:
 847        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
 848                             .mask = -1, .no_setcond = true };
 849    case CC_OP_CLR:
 850    case CC_OP_POPCNT:
 851        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 852    default:
 853        gen_compute_eflags(s);
 854        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 855                             .mask = CC_O };
 856    }
 857}
 858
 859/* compute eflags.Z to reg */
 860static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 861{
 862    switch (s->cc_op) {
 863    case CC_OP_DYNAMIC:
 864        gen_compute_eflags(s);
 865        /* FALLTHRU */
 866    case CC_OP_EFLAGS:
 867    case CC_OP_ADCX:
 868    case CC_OP_ADOX:
 869    case CC_OP_ADCOX:
 870        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 871                             .mask = CC_Z };
 872    case CC_OP_CLR:
 873        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
 874    case CC_OP_POPCNT:
 875        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
 876                             .mask = -1 };
 877    default:
 878        {
 879            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 880            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 881            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 882        }
 883    }
 884}
 885
 886/* perform a conditional store into register 'reg' according to jump opcode
 887   value 'b'. In the fast case, T0 is guaranted not to be used. */
 888static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 889{
 890    int inv, jcc_op, cond;
 891    TCGMemOp size;
 892    CCPrepare cc;
 893    TCGv t0;
 894
 895    inv = b & 1;
 896    jcc_op = (b >> 1) & 7;
 897
 898    switch (s->cc_op) {
 899    case CC_OP_SUBB ... CC_OP_SUBQ:
 900        /* We optimize relational operators for the cmp/jcc case.  */
 901        size = s->cc_op - CC_OP_SUBB;
 902        switch (jcc_op) {
 903        case JCC_BE:
 904            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
 905            gen_extu(size, cpu_tmp4);
 906            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 907            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
 908                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 909            break;
 910
 911        case JCC_L:
 912            cond = TCG_COND_LT;
 913            goto fast_jcc_l;
 914        case JCC_LE:
 915            cond = TCG_COND_LE;
 916        fast_jcc_l:
 917            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
 918            gen_exts(size, cpu_tmp4);
 919            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
 920            cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
 921                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 922            break;
 923
 924        default:
 925            goto slow_jcc;
 926        }
 927        break;
 928
 929    default:
 930    slow_jcc:
 931        /* This actually generates good code for JC, JZ and JS.  */
 932        switch (jcc_op) {
 933        case JCC_O:
 934            cc = gen_prepare_eflags_o(s, reg);
 935            break;
 936        case JCC_B:
 937            cc = gen_prepare_eflags_c(s, reg);
 938            break;
 939        case JCC_Z:
 940            cc = gen_prepare_eflags_z(s, reg);
 941            break;
 942        case JCC_BE:
 943            gen_compute_eflags(s);
 944            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 945                               .mask = CC_Z | CC_C };
 946            break;
 947        case JCC_S:
 948            cc = gen_prepare_eflags_s(s, reg);
 949            break;
 950        case JCC_P:
 951            cc = gen_prepare_eflags_p(s, reg);
 952            break;
 953        case JCC_L:
 954            gen_compute_eflags(s);
 955            if (TCGV_EQUAL(reg, cpu_cc_src)) {
 956                reg = cpu_tmp0;
 957            }
 958            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 959            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 960            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 961                               .mask = CC_S };
 962            break;
 963        default:
 964        case JCC_LE:
 965            gen_compute_eflags(s);
 966            if (TCGV_EQUAL(reg, cpu_cc_src)) {
 967                reg = cpu_tmp0;
 968            }
 969            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 970            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 971            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 972                               .mask = CC_S | CC_Z };
 973            break;
 974        }
 975        break;
 976    }
 977
 978    if (inv) {
 979        cc.cond = tcg_invert_cond(cc.cond);
 980    }
 981    return cc;
 982}
 983
 984static void gen_setcc1(DisasContext *s, int b, TCGv reg)
 985{
 986    CCPrepare cc = gen_prepare_cc(s, b, reg);
 987
 988    if (cc.no_setcond) {
 989        if (cc.cond == TCG_COND_EQ) {
 990            tcg_gen_xori_tl(reg, cc.reg, 1);
 991        } else {
 992            tcg_gen_mov_tl(reg, cc.reg);
 993        }
 994        return;
 995    }
 996
 997    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
 998        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
 999        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1000        tcg_gen_andi_tl(reg, reg, 1);
1001        return;
1002    }
1003    if (cc.mask != -1) {
1004        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1005        cc.reg = reg;
1006    }
1007    if (cc.use_reg2) {
1008        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1009    } else {
1010        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1011    }
1012}
1013
1014static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1015{
1016    gen_setcc1(s, JCC_B << 1, reg);
1017}
1018
1019/* generate a conditional jump to label 'l1' according to jump opcode
1020   value 'b'. In the fast case, T0 is guaranted not to be used. */
1021static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1022{
1023    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1024
1025    if (cc.mask != -1) {
1026        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1027        cc.reg = cpu_T0;
1028    }
1029    if (cc.use_reg2) {
1030        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1031    } else {
1032        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1033    }
1034}
1035
1036/* Generate a conditional jump to label 'l1' according to jump opcode
1037   value 'b'. In the fast case, T0 is guaranted not to be used.
1038   A translation block must end soon.  */
1039static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1040{
1041    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1042
1043    gen_update_cc_op(s);
1044    if (cc.mask != -1) {
1045        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1046        cc.reg = cpu_T0;
1047    }
1048    set_cc_op(s, CC_OP_DYNAMIC);
1049    if (cc.use_reg2) {
1050        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1051    } else {
1052        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1053    }
1054}
1055
1056/* XXX: does not work with gdbstub "ice" single step - not a
1057   serious problem */
1058static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1059{
1060    TCGLabel *l1 = gen_new_label();
1061    TCGLabel *l2 = gen_new_label();
1062    gen_op_jnz_ecx(s->aflag, l1);
1063    gen_set_label(l2);
1064    gen_jmp_tb(s, next_eip, 1);
1065    gen_set_label(l1);
1066    return l2;
1067}
1068
1069static inline void gen_stos(DisasContext *s, TCGMemOp ot)
1070{
1071    gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
1072    gen_string_movl_A0_EDI(s);
1073    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1074    gen_op_movl_T0_Dshift(ot);
1075    gen_op_add_reg_T0(s->aflag, R_EDI);
1076}
1077
1078static inline void gen_lods(DisasContext *s, TCGMemOp ot)
1079{
1080    gen_string_movl_A0_ESI(s);
1081    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1082    gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
1083    gen_op_movl_T0_Dshift(ot);
1084    gen_op_add_reg_T0(s->aflag, R_ESI);
1085}
1086
1087static inline void gen_scas(DisasContext *s, TCGMemOp ot)
1088{
1089    gen_string_movl_A0_EDI(s);
1090    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1091    gen_op(s, OP_CMPL, ot, R_EAX);
1092    gen_op_movl_T0_Dshift(ot);
1093    gen_op_add_reg_T0(s->aflag, R_EDI);
1094}
1095
1096static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
1097{
1098    gen_string_movl_A0_EDI(s);
1099    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1100    gen_string_movl_A0_ESI(s);
1101    gen_op(s, OP_CMPL, ot, OR_TMP0);
1102    gen_op_movl_T0_Dshift(ot);
1103    gen_op_add_reg_T0(s->aflag, R_ESI);
1104    gen_op_add_reg_T0(s->aflag, R_EDI);
1105}
1106
1107static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1108{
1109    if (s->flags & HF_IOBPT_MASK) {
1110        TCGv_i32 t_size = tcg_const_i32(1 << ot);
1111        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1112
1113        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1114        tcg_temp_free_i32(t_size);
1115        tcg_temp_free(t_next);
1116    }
1117}
1118
1119
1120static inline void gen_ins(DisasContext *s, TCGMemOp ot)
1121{
1122    if (s->tb->cflags & CF_USE_ICOUNT) {
1123        gen_io_start();
1124    }
1125    gen_string_movl_A0_EDI(s);
1126    /* Note: we must do this dummy write first to be restartable in
1127       case of page fault. */
1128    tcg_gen_movi_tl(cpu_T0, 0);
1129    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1130    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1131    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1132    gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
1133    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1134    gen_op_movl_T0_Dshift(ot);
1135    gen_op_add_reg_T0(s->aflag, R_EDI);
1136    gen_bpt_io(s, cpu_tmp2_i32, ot);
1137    if (s->tb->cflags & CF_USE_ICOUNT) {
1138        gen_io_end();
1139    }
1140}
1141
1142static inline void gen_outs(DisasContext *s, TCGMemOp ot)
1143{
1144    if (s->tb->cflags & CF_USE_ICOUNT) {
1145        gen_io_start();
1146    }
1147    gen_string_movl_A0_ESI(s);
1148    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1149
1150    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1151    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1152    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0);
1153    gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
1154    gen_op_movl_T0_Dshift(ot);
1155    gen_op_add_reg_T0(s->aflag, R_ESI);
1156    gen_bpt_io(s, cpu_tmp2_i32, ot);
1157    if (s->tb->cflags & CF_USE_ICOUNT) {
1158        gen_io_end();
1159    }
1160}
1161
1162/* same method as Valgrind : we generate jumps to current or next
1163   instruction */
1164#define GEN_REPZ(op)                                                          \
1165static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1166                                 target_ulong cur_eip, target_ulong next_eip) \
1167{                                                                             \
1168    TCGLabel *l2;                                                             \
1169    gen_update_cc_op(s);                                                      \
1170    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1171    gen_ ## op(s, ot);                                                        \
1172    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1173    /* a loop would cause two single step exceptions if ECX = 1               \
1174       before rep string_insn */                                              \
1175    if (s->repz_opt)                                                          \
1176        gen_op_jz_ecx(s->aflag, l2);                                          \
1177    gen_jmp(s, cur_eip);                                                      \
1178}
1179
1180#define GEN_REPZ2(op)                                                         \
1181static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1182                                   target_ulong cur_eip,                      \
1183                                   target_ulong next_eip,                     \
1184                                   int nz)                                    \
1185{                                                                             \
1186    TCGLabel *l2;                                                             \
1187    gen_update_cc_op(s);                                                      \
1188    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1189    gen_ ## op(s, ot);                                                        \
1190    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1191    gen_update_cc_op(s);                                                      \
1192    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1193    if (s->repz_opt)                                                          \
1194        gen_op_jz_ecx(s->aflag, l2);                                          \
1195    gen_jmp(s, cur_eip);                                                      \
1196}
1197
1198GEN_REPZ(movs)
1199GEN_REPZ(stos)
1200GEN_REPZ(lods)
1201GEN_REPZ(ins)
1202GEN_REPZ(outs)
1203GEN_REPZ2(scas)
1204GEN_REPZ2(cmps)
1205
1206static void gen_helper_fp_arith_ST0_FT0(int op)
1207{
1208    switch (op) {
1209    case 0:
1210        gen_helper_fadd_ST0_FT0(cpu_env);
1211        break;
1212    case 1:
1213        gen_helper_fmul_ST0_FT0(cpu_env);
1214        break;
1215    case 2:
1216        gen_helper_fcom_ST0_FT0(cpu_env);
1217        break;
1218    case 3:
1219        gen_helper_fcom_ST0_FT0(cpu_env);
1220        break;
1221    case 4:
1222        gen_helper_fsub_ST0_FT0(cpu_env);
1223        break;
1224    case 5:
1225        gen_helper_fsubr_ST0_FT0(cpu_env);
1226        break;
1227    case 6:
1228        gen_helper_fdiv_ST0_FT0(cpu_env);
1229        break;
1230    case 7:
1231        gen_helper_fdivr_ST0_FT0(cpu_env);
1232        break;
1233    }
1234}
1235
1236/* NOTE the exception in "r" op ordering */
1237static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1238{
1239    TCGv_i32 tmp = tcg_const_i32(opreg);
1240    switch (op) {
1241    case 0:
1242        gen_helper_fadd_STN_ST0(cpu_env, tmp);
1243        break;
1244    case 1:
1245        gen_helper_fmul_STN_ST0(cpu_env, tmp);
1246        break;
1247    case 4:
1248        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1249        break;
1250    case 5:
1251        gen_helper_fsub_STN_ST0(cpu_env, tmp);
1252        break;
1253    case 6:
1254        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1255        break;
1256    case 7:
1257        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1258        break;
1259    }
1260}
1261
1262/* if d == OR_TMP0, it means memory operand (address in A0) */
1263static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
1264{
1265    if (d != OR_TMP0) {
1266        gen_op_mov_v_reg(ot, cpu_T0, d);
1267    } else if (!(s1->prefix & PREFIX_LOCK)) {
1268        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1269    }
1270    switch(op) {
1271    case OP_ADCL:
1272        gen_compute_eflags_c(s1, cpu_tmp4);
1273        if (s1->prefix & PREFIX_LOCK) {
1274            tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
1275            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
1276                                        s1->mem_index, ot | MO_LE);
1277        } else {
1278            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1279            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
1280            gen_op_st_rm_T0_A0(s1, ot, d);
1281        }
1282        gen_op_update3_cc(cpu_tmp4);
1283        set_cc_op(s1, CC_OP_ADCB + ot);
1284        break;
1285    case OP_SBBL:
1286        gen_compute_eflags_c(s1, cpu_tmp4);
1287        if (s1->prefix & PREFIX_LOCK) {
1288            tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
1289            tcg_gen_neg_tl(cpu_T0, cpu_T0);
1290            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
1291                                        s1->mem_index, ot | MO_LE);
1292        } else {
1293            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1294            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
1295            gen_op_st_rm_T0_A0(s1, ot, d);
1296        }
1297        gen_op_update3_cc(cpu_tmp4);
1298        set_cc_op(s1, CC_OP_SBBB + ot);
1299        break;
1300    case OP_ADDL:
1301        if (s1->prefix & PREFIX_LOCK) {
1302            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1303                                        s1->mem_index, ot | MO_LE);
1304        } else {
1305            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1306            gen_op_st_rm_T0_A0(s1, ot, d);
1307        }
1308        gen_op_update2_cc();
1309        set_cc_op(s1, CC_OP_ADDB + ot);
1310        break;
1311    case OP_SUBL:
1312        if (s1->prefix & PREFIX_LOCK) {
1313            tcg_gen_neg_tl(cpu_T0, cpu_T1);
1314            tcg_gen_atomic_fetch_add_tl(cpu_cc_srcT, cpu_A0, cpu_T0,
1315                                        s1->mem_index, ot | MO_LE);
1316            tcg_gen_sub_tl(cpu_T0, cpu_cc_srcT, cpu_T1);
1317        } else {
1318            tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1319            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1320            gen_op_st_rm_T0_A0(s1, ot, d);
1321        }
1322        gen_op_update2_cc();
1323        set_cc_op(s1, CC_OP_SUBB + ot);
1324        break;
1325    default:
1326    case OP_ANDL:
1327        if (s1->prefix & PREFIX_LOCK) {
1328            tcg_gen_atomic_and_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1329                                        s1->mem_index, ot | MO_LE);
1330        } else {
1331            tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
1332            gen_op_st_rm_T0_A0(s1, ot, d);
1333        }
1334        gen_op_update1_cc();
1335        set_cc_op(s1, CC_OP_LOGICB + ot);
1336        break;
1337    case OP_ORL:
1338        if (s1->prefix & PREFIX_LOCK) {
1339            tcg_gen_atomic_or_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1340                                       s1->mem_index, ot | MO_LE);
1341        } else {
1342            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1343            gen_op_st_rm_T0_A0(s1, ot, d);
1344        }
1345        gen_op_update1_cc();
1346        set_cc_op(s1, CC_OP_LOGICB + ot);
1347        break;
1348    case OP_XORL:
1349        if (s1->prefix & PREFIX_LOCK) {
1350            tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1351                                        s1->mem_index, ot | MO_LE);
1352        } else {
1353            tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
1354            gen_op_st_rm_T0_A0(s1, ot, d);
1355        }
1356        gen_op_update1_cc();
1357        set_cc_op(s1, CC_OP_LOGICB + ot);
1358        break;
1359    case OP_CMPL:
1360        tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
1361        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1362        tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
1363        set_cc_op(s1, CC_OP_SUBB + ot);
1364        break;
1365    }
1366}
1367
1368/* if d == OR_TMP0, it means memory operand (address in A0) */
1369static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
1370{
1371    if (s1->prefix & PREFIX_LOCK) {
1372        tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
1373        tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
1374                                    s1->mem_index, ot | MO_LE);
1375    } else {
1376        if (d != OR_TMP0) {
1377            gen_op_mov_v_reg(ot, cpu_T0, d);
1378        } else {
1379            gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1380        }
1381        tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
1382        gen_op_st_rm_T0_A0(s1, ot, d);
1383    }
1384
1385    gen_compute_eflags_c(s1, cpu_cc_src);
1386    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1387    set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1388}
1389
1390static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
1391                            TCGv shm1, TCGv count, bool is_right)
1392{
1393    TCGv_i32 z32, s32, oldop;
1394    TCGv z_tl;
1395
1396    /* Store the results into the CC variables.  If we know that the
1397       variable must be dead, store unconditionally.  Otherwise we'll
1398       need to not disrupt the current contents.  */
1399    z_tl = tcg_const_tl(0);
1400    if (cc_op_live[s->cc_op] & USES_CC_DST) {
1401        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1402                           result, cpu_cc_dst);
1403    } else {
1404        tcg_gen_mov_tl(cpu_cc_dst, result);
1405    }
1406    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1407        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1408                           shm1, cpu_cc_src);
1409    } else {
1410        tcg_gen_mov_tl(cpu_cc_src, shm1);
1411    }
1412    tcg_temp_free(z_tl);
1413
1414    /* Get the two potential CC_OP values into temporaries.  */
1415    tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1416    if (s->cc_op == CC_OP_DYNAMIC) {
1417        oldop = cpu_cc_op;
1418    } else {
1419        tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
1420        oldop = cpu_tmp3_i32;
1421    }
1422
1423    /* Conditionally store the CC_OP value.  */
1424    z32 = tcg_const_i32(0);
1425    s32 = tcg_temp_new_i32();
1426    tcg_gen_trunc_tl_i32(s32, count);
1427    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
1428    tcg_temp_free_i32(z32);
1429    tcg_temp_free_i32(s32);
1430
1431    /* The CC_OP value is no longer predictable.  */
1432    set_cc_op(s, CC_OP_DYNAMIC);
1433}
1434
1435static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1436                            int is_right, int is_arith)
1437{
1438    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1439
1440    /* load */
1441    if (op1 == OR_TMP0) {
1442        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1443    } else {
1444        gen_op_mov_v_reg(ot, cpu_T0, op1);
1445    }
1446
1447    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1448    tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1);
1449
1450    if (is_right) {
1451        if (is_arith) {
1452            gen_exts(ot, cpu_T0);
1453            tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1454            tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
1455        } else {
1456            gen_extu(ot, cpu_T0);
1457            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1458            tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
1459        }
1460    } else {
1461        tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1462        tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
1463    }
1464
1465    /* store */
1466    gen_op_st_rm_T0_A0(s, ot, op1);
1467
1468    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right);
1469}
1470
1471static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1472                            int is_right, int is_arith)
1473{
1474    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1475
1476    /* load */
1477    if (op1 == OR_TMP0)
1478        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1479    else
1480        gen_op_mov_v_reg(ot, cpu_T0, op1);
1481
1482    op2 &= mask;
1483    if (op2 != 0) {
1484        if (is_right) {
1485            if (is_arith) {
1486                gen_exts(ot, cpu_T0);
1487                tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1);
1488                tcg_gen_sari_tl(cpu_T0, cpu_T0, op2);
1489            } else {
1490                gen_extu(ot, cpu_T0);
1491                tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1);
1492                tcg_gen_shri_tl(cpu_T0, cpu_T0, op2);
1493            }
1494        } else {
1495            tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1);
1496            tcg_gen_shli_tl(cpu_T0, cpu_T0, op2);
1497        }
1498    }
1499
1500    /* store */
1501    gen_op_st_rm_T0_A0(s, ot, op1);
1502
1503    /* update eflags if non zero shift */
1504    if (op2 != 0) {
1505        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1506        tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1507        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1508    }
1509}
1510
1511static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
1512{
1513    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1514    TCGv_i32 t0, t1;
1515
1516    /* load */
1517    if (op1 == OR_TMP0) {
1518        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1519    } else {
1520        gen_op_mov_v_reg(ot, cpu_T0, op1);
1521    }
1522
1523    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1524
1525    switch (ot) {
1526    case MO_8:
1527        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1528        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
1529        tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101);
1530        goto do_long;
1531    case MO_16:
1532        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1533        tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16);
1534        goto do_long;
1535    do_long:
1536#ifdef TARGET_X86_64
1537    case MO_32:
1538        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1539        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
1540        if (is_right) {
1541            tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1542        } else {
1543            tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1544        }
1545        tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1546        break;
1547#endif
1548    default:
1549        if (is_right) {
1550            tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1);
1551        } else {
1552            tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1);
1553        }
1554        break;
1555    }
1556
1557    /* store */
1558    gen_op_st_rm_T0_A0(s, ot, op1);
1559
1560    /* We'll need the flags computed into CC_SRC.  */
1561    gen_compute_eflags(s);
1562
1563    /* The value that was "rotated out" is now present at the other end
1564       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1565       since we've computed the flags into CC_SRC, these variables are
1566       currently dead.  */
1567    if (is_right) {
1568        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1569        tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1570        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1571    } else {
1572        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1573        tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1574    }
1575    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1576    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1577
1578    /* Now conditionally store the new CC_OP value.  If the shift count
1579       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1580       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1581       exactly as we computed above.  */
1582    t0 = tcg_const_i32(0);
1583    t1 = tcg_temp_new_i32();
1584    tcg_gen_trunc_tl_i32(t1, cpu_T1);
1585    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); 
1586    tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
1587    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1588                        cpu_tmp2_i32, cpu_tmp3_i32);
1589    tcg_temp_free_i32(t0);
1590    tcg_temp_free_i32(t1);
1591
1592    /* The CC_OP value is no longer predictable.  */ 
1593    set_cc_op(s, CC_OP_DYNAMIC);
1594}
1595
1596static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1597                          int is_right)
1598{
1599    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1600    int shift;
1601
1602    /* load */
1603    if (op1 == OR_TMP0) {
1604        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1605    } else {
1606        gen_op_mov_v_reg(ot, cpu_T0, op1);
1607    }
1608
1609    op2 &= mask;
1610    if (op2 != 0) {
1611        switch (ot) {
1612#ifdef TARGET_X86_64
1613        case MO_32:
1614            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1615            if (is_right) {
1616                tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1617            } else {
1618                tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1619            }
1620            tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1621            break;
1622#endif
1623        default:
1624            if (is_right) {
1625                tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2);
1626            } else {
1627                tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2);
1628            }
1629            break;
1630        case MO_8:
1631            mask = 7;
1632            goto do_shifts;
1633        case MO_16:
1634            mask = 15;
1635        do_shifts:
1636            shift = op2 & mask;
1637            if (is_right) {
1638                shift = mask + 1 - shift;
1639            }
1640            gen_extu(ot, cpu_T0);
1641            tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift);
1642            tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift);
1643            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
1644            break;
1645        }
1646    }
1647
1648    /* store */
1649    gen_op_st_rm_T0_A0(s, ot, op1);
1650
1651    if (op2 != 0) {
1652        /* Compute the flags into CC_SRC.  */
1653        gen_compute_eflags(s);
1654
1655        /* The value that was "rotated out" is now present at the other end
1656           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1657           since we've computed the flags into CC_SRC, these variables are
1658           currently dead.  */
1659        if (is_right) {
1660            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1661            tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1662            tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1663        } else {
1664            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1665            tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1666        }
1667        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1668        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1669        set_cc_op(s, CC_OP_ADCOX);
1670    }
1671}
1672
1673/* XXX: add faster immediate = 1 case */
1674static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1675                           int is_right)
1676{
1677    gen_compute_eflags(s);
1678    assert(s->cc_op == CC_OP_EFLAGS);
1679
1680    /* load */
1681    if (op1 == OR_TMP0)
1682        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1683    else
1684        gen_op_mov_v_reg(ot, cpu_T0, op1);
1685    
1686    if (is_right) {
1687        switch (ot) {
1688        case MO_8:
1689            gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1690            break;
1691        case MO_16:
1692            gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1693            break;
1694        case MO_32:
1695            gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1696            break;
1697#ifdef TARGET_X86_64
1698        case MO_64:
1699            gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1700            break;
1701#endif
1702        default:
1703            tcg_abort();
1704        }
1705    } else {
1706        switch (ot) {
1707        case MO_8:
1708            gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1709            break;
1710        case MO_16:
1711            gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1712            break;
1713        case MO_32:
1714            gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1715            break;
1716#ifdef TARGET_X86_64
1717        case MO_64:
1718            gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1719            break;
1720#endif
1721        default:
1722            tcg_abort();
1723        }
1724    }
1725    /* store */
1726    gen_op_st_rm_T0_A0(s, ot, op1);
1727}
1728
1729/* XXX: add faster immediate case */
1730static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1731                             bool is_right, TCGv count_in)
1732{
1733    target_ulong mask = (ot == MO_64 ? 63 : 31);
1734    TCGv count;
1735
1736    /* load */
1737    if (op1 == OR_TMP0) {
1738        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1739    } else {
1740        gen_op_mov_v_reg(ot, cpu_T0, op1);
1741    }
1742
1743    count = tcg_temp_new();
1744    tcg_gen_andi_tl(count, count_in, mask);
1745
1746    switch (ot) {
1747    case MO_16:
1748        /* Note: we implement the Intel behaviour for shift count > 16.
1749           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1750           portion by constructing it as a 32-bit value.  */
1751        if (is_right) {
1752            tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16);
1753            tcg_gen_mov_tl(cpu_T1, cpu_T0);
1754            tcg_gen_mov_tl(cpu_T0, cpu_tmp0);
1755        } else {
1756            tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16);
1757        }
1758        /* FALLTHRU */
1759#ifdef TARGET_X86_64
1760    case MO_32:
1761        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1762        tcg_gen_subi_tl(cpu_tmp0, count, 1);
1763        if (is_right) {
1764            tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1);
1765            tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1766            tcg_gen_shr_i64(cpu_T0, cpu_T0, count);
1767        } else {
1768            tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0);
1769            tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1770            tcg_gen_shl_i64(cpu_T0, cpu_T0, count);
1771            tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
1772            tcg_gen_shri_i64(cpu_T0, cpu_T0, 32);
1773        }
1774        break;
1775#endif
1776    default:
1777        tcg_gen_subi_tl(cpu_tmp0, count, 1);
1778        if (is_right) {
1779            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1780
1781            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1782            tcg_gen_shr_tl(cpu_T0, cpu_T0, count);
1783            tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
1784        } else {
1785            tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1786            if (ot == MO_16) {
1787                /* Only needed if count > 16, for Intel behaviour.  */
1788                tcg_gen_subfi_tl(cpu_tmp4, 33, count);
1789                tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4);
1790                tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
1791            }
1792
1793            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1794            tcg_gen_shl_tl(cpu_T0, cpu_T0, count);
1795            tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
1796        }
1797        tcg_gen_movi_tl(cpu_tmp4, 0);
1798        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
1799                           cpu_tmp4, cpu_T1);
1800        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1801        break;
1802    }
1803
1804    /* store */
1805    gen_op_st_rm_T0_A0(s, ot, op1);
1806
1807    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right);
1808    tcg_temp_free(count);
1809}
1810
1811static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
1812{
1813    if (s != OR_TMP1)
1814        gen_op_mov_v_reg(ot, cpu_T1, s);
1815    switch(op) {
1816    case OP_ROL:
1817        gen_rot_rm_T1(s1, ot, d, 0);
1818        break;
1819    case OP_ROR:
1820        gen_rot_rm_T1(s1, ot, d, 1);
1821        break;
1822    case OP_SHL:
1823    case OP_SHL1:
1824        gen_shift_rm_T1(s1, ot, d, 0, 0);
1825        break;
1826    case OP_SHR:
1827        gen_shift_rm_T1(s1, ot, d, 1, 0);
1828        break;
1829    case OP_SAR:
1830        gen_shift_rm_T1(s1, ot, d, 1, 1);
1831        break;
1832    case OP_RCL:
1833        gen_rotc_rm_T1(s1, ot, d, 0);
1834        break;
1835    case OP_RCR:
1836        gen_rotc_rm_T1(s1, ot, d, 1);
1837        break;
1838    }
1839}
1840
1841static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
1842{
1843    switch(op) {
1844    case OP_ROL:
1845        gen_rot_rm_im(s1, ot, d, c, 0);
1846        break;
1847    case OP_ROR:
1848        gen_rot_rm_im(s1, ot, d, c, 1);
1849        break;
1850    case OP_SHL:
1851    case OP_SHL1:
1852        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1853        break;
1854    case OP_SHR:
1855        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1856        break;
1857    case OP_SAR:
1858        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1859        break;
1860    default:
1861        /* currently not optimized */
1862        tcg_gen_movi_tl(cpu_T1, c);
1863        gen_shift(s1, op, ot, d, OR_TMP1);
1864        break;
1865    }
1866}
1867
1868/* Decompose an address.  */
1869
1870typedef struct AddressParts {
1871    int def_seg;
1872    int base;
1873    int index;
1874    int scale;
1875    target_long disp;
1876} AddressParts;
1877
1878static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1879                                    int modrm)
1880{
1881    int def_seg, base, index, scale, mod, rm;
1882    target_long disp;
1883    bool havesib;
1884
1885    def_seg = R_DS;
1886    index = -1;
1887    scale = 0;
1888    disp = 0;
1889
1890    mod = (modrm >> 6) & 3;
1891    rm = modrm & 7;
1892    base = rm | REX_B(s);
1893
1894    if (mod == 3) {
1895        /* Normally filtered out earlier, but including this path
1896           simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
1897        goto done;
1898    }
1899
1900    switch (s->aflag) {
1901    case MO_64:
1902    case MO_32:
1903        havesib = 0;
1904        if (rm == 4) {
1905            int code = cpu_ldub_code(env, s->pc++);
1906            scale = (code >> 6) & 3;
1907            index = ((code >> 3) & 7) | REX_X(s);
1908            if (index == 4) {
1909                index = -1;  /* no index */
1910            }
1911            base = (code & 7) | REX_B(s);
1912            havesib = 1;
1913        }
1914
1915        switch (mod) {
1916        case 0:
1917            if ((base & 7) == 5) {
1918                base = -1;
1919                disp = (int32_t)cpu_ldl_code(env, s->pc);
1920                s->pc += 4;
1921                if (CODE64(s) && !havesib) {
1922                    base = -2;
1923                    disp += s->pc + s->rip_offset;
1924                }
1925            }
1926            break;
1927        case 1:
1928            disp = (int8_t)cpu_ldub_code(env, s->pc++);
1929            break;
1930        default:
1931        case 2:
1932            disp = (int32_t)cpu_ldl_code(env, s->pc);
1933            s->pc += 4;
1934            break;
1935        }
1936
1937        /* For correct popl handling with esp.  */
1938        if (base == R_ESP && s->popl_esp_hack) {
1939            disp += s->popl_esp_hack;
1940        }
1941        if (base == R_EBP || base == R_ESP) {
1942            def_seg = R_SS;
1943        }
1944        break;
1945
1946    case MO_16:
1947        if (mod == 0) {
1948            if (rm == 6) {
1949                base = -1;
1950                disp = cpu_lduw_code(env, s->pc);
1951                s->pc += 2;
1952                break;
1953            }
1954        } else if (mod == 1) {
1955            disp = (int8_t)cpu_ldub_code(env, s->pc++);
1956        } else {
1957            disp = (int16_t)cpu_lduw_code(env, s->pc);
1958            s->pc += 2;
1959        }
1960
1961        switch (rm) {
1962        case 0:
1963            base = R_EBX;
1964            index = R_ESI;
1965            break;
1966        case 1:
1967            base = R_EBX;
1968            index = R_EDI;
1969            break;
1970        case 2:
1971            base = R_EBP;
1972            index = R_ESI;
1973            def_seg = R_SS;
1974            break;
1975        case 3:
1976            base = R_EBP;
1977            index = R_EDI;
1978            def_seg = R_SS;
1979            break;
1980        case 4:
1981            base = R_ESI;
1982            break;
1983        case 5:
1984            base = R_EDI;
1985            break;
1986        case 6:
1987            base = R_EBP;
1988            def_seg = R_SS;
1989            break;
1990        default:
1991        case 7:
1992            base = R_EBX;
1993            break;
1994        }
1995        break;
1996
1997    default:
1998        tcg_abort();
1999    }
2000
2001 done:
2002    return (AddressParts){ def_seg, base, index, scale, disp };
2003}
2004
2005/* Compute the address, with a minimum number of TCG ops.  */
2006static TCGv gen_lea_modrm_1(AddressParts a)
2007{
2008    TCGv ea;
2009
2010    TCGV_UNUSED(ea);
2011    if (a.index >= 0) {
2012        if (a.scale == 0) {
2013            ea = cpu_regs[a.index];
2014        } else {
2015            tcg_gen_shli_tl(cpu_A0, cpu_regs[a.index], a.scale);
2016            ea = cpu_A0;
2017        }
2018        if (a.base >= 0) {
2019            tcg_gen_add_tl(cpu_A0, ea, cpu_regs[a.base]);
2020            ea = cpu_A0;
2021        }
2022    } else if (a.base >= 0) {
2023        ea = cpu_regs[a.base];
2024    }
2025    if (TCGV_IS_UNUSED(ea)) {
2026        tcg_gen_movi_tl(cpu_A0, a.disp);
2027        ea = cpu_A0;
2028    } else if (a.disp != 0) {
2029        tcg_gen_addi_tl(cpu_A0, ea, a.disp);
2030        ea = cpu_A0;
2031    }
2032
2033    return ea;
2034}
2035
2036static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2037{
2038    AddressParts a = gen_lea_modrm_0(env, s, modrm);
2039    TCGv ea = gen_lea_modrm_1(a);
2040    gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2041}
2042
2043static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2044{
2045    (void)gen_lea_modrm_0(env, s, modrm);
2046}
2047
2048/* Used for BNDCL, BNDCU, BNDCN.  */
2049static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2050                      TCGCond cond, TCGv_i64 bndv)
2051{
2052    TCGv ea = gen_lea_modrm_1(gen_lea_modrm_0(env, s, modrm));
2053
2054    tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
2055    if (!CODE64(s)) {
2056        tcg_gen_ext32u_i64(cpu_tmp1_i64, cpu_tmp1_i64);
2057    }
2058    tcg_gen_setcond_i64(cond, cpu_tmp1_i64, cpu_tmp1_i64, bndv);
2059    tcg_gen_extrl_i64_i32(cpu_tmp2_i32, cpu_tmp1_i64);
2060    gen_helper_bndck(cpu_env, cpu_tmp2_i32);
2061}
2062
2063/* used for LEA and MOV AX, mem */
2064static void gen_add_A0_ds_seg(DisasContext *s)
2065{
2066    gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override);
2067}
2068
2069/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2070   OR_TMP0 */
2071static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2072                           TCGMemOp ot, int reg, int is_store)
2073{
2074    int mod, rm;
2075
2076    mod = (modrm >> 6) & 3;
2077    rm = (modrm & 7) | REX_B(s);
2078    if (mod == 3) {
2079        if (is_store) {
2080            if (reg != OR_TMP0)
2081                gen_op_mov_v_reg(ot, cpu_T0, reg);
2082            gen_op_mov_reg_v(ot, rm, cpu_T0);
2083        } else {
2084            gen_op_mov_v_reg(ot, cpu_T0, rm);
2085            if (reg != OR_TMP0)
2086                gen_op_mov_reg_v(ot, reg, cpu_T0);
2087        }
2088    } else {
2089        gen_lea_modrm(env, s, modrm);
2090        if (is_store) {
2091            if (reg != OR_TMP0)
2092                gen_op_mov_v_reg(ot, cpu_T0, reg);
2093            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
2094        } else {
2095            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
2096            if (reg != OR_TMP0)
2097                gen_op_mov_reg_v(ot, reg, cpu_T0);
2098        }
2099    }
2100}
2101
2102static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
2103{
2104    uint32_t ret;
2105
2106    switch (ot) {
2107    case MO_8:
2108        ret = cpu_ldub_code(env, s->pc);
2109        s->pc++;
2110        break;
2111    case MO_16:
2112        ret = cpu_lduw_code(env, s->pc);
2113        s->pc += 2;
2114        break;
2115    case MO_32:
2116#ifdef TARGET_X86_64
2117    case MO_64:
2118#endif
2119        ret = cpu_ldl_code(env, s->pc);
2120        s->pc += 4;
2121        break;
2122    default:
2123        tcg_abort();
2124    }
2125    return ret;
2126}
2127
2128static inline int insn_const_size(TCGMemOp ot)
2129{
2130    if (ot <= MO_32) {
2131        return 1 << ot;
2132    } else {
2133        return 4;
2134    }
2135}
2136
2137static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2138{
2139#ifndef CONFIG_USER_ONLY
2140    return (pc & TARGET_PAGE_MASK) == (s->tb->pc & TARGET_PAGE_MASK) ||
2141           (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2142#else
2143    return true;
2144#endif
2145}
2146
2147static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2148{
2149    target_ulong pc = s->cs_base + eip;
2150
2151    if (use_goto_tb(s, pc))  {
2152        /* jump to same page: we can use a direct jump */
2153        tcg_gen_goto_tb(tb_num);
2154        gen_jmp_im(eip);
2155        tcg_gen_exit_tb((uintptr_t)s->tb + tb_num);
2156    } else {
2157        /* jump to another page */
2158        gen_jmp_im(eip);
2159        gen_jr(s, cpu_tmp0);
2160    }
2161}
2162
2163static inline void gen_jcc(DisasContext *s, int b,
2164                           target_ulong val, target_ulong next_eip)
2165{
2166    TCGLabel *l1, *l2;
2167
2168    if (s->jmp_opt) {
2169        l1 = gen_new_label();
2170        gen_jcc1(s, b, l1);
2171
2172        gen_goto_tb(s, 0, next_eip);
2173
2174        gen_set_label(l1);
2175        gen_goto_tb(s, 1, val);
2176        s->is_jmp = DISAS_TB_JUMP;
2177    } else {
2178        l1 = gen_new_label();
2179        l2 = gen_new_label();
2180        gen_jcc1(s, b, l1);
2181
2182        gen_jmp_im(next_eip);
2183        tcg_gen_br(l2);
2184
2185        gen_set_label(l1);
2186        gen_jmp_im(val);
2187        gen_set_label(l2);
2188        gen_eob(s);
2189    }
2190}
2191
2192static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
2193                        int modrm, int reg)
2194{
2195    CCPrepare cc;
2196
2197    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2198
2199    cc = gen_prepare_cc(s, b, cpu_T1);
2200    if (cc.mask != -1) {
2201        TCGv t0 = tcg_temp_new();
2202        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2203        cc.reg = t0;
2204    }
2205    if (!cc.use_reg2) {
2206        cc.reg2 = tcg_const_tl(cc.imm);
2207    }
2208
2209    tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2,
2210                       cpu_T0, cpu_regs[reg]);
2211    gen_op_mov_reg_v(ot, reg, cpu_T0);
2212
2213    if (cc.mask != -1) {
2214        tcg_temp_free(cc.reg);
2215    }
2216    if (!cc.use_reg2) {
2217        tcg_temp_free(cc.reg2);
2218    }
2219}
2220
2221static inline void gen_op_movl_T0_seg(int seg_reg)
2222{
2223    tcg_gen_ld32u_tl(cpu_T0, cpu_env,
2224                     offsetof(CPUX86State,segs[seg_reg].selector));
2225}
2226
2227static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2228{
2229    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
2230    tcg_gen_st32_tl(cpu_T0, cpu_env,
2231                    offsetof(CPUX86State,segs[seg_reg].selector));
2232    tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4);
2233}
2234
2235/* move T0 to seg_reg and compute if the CPU state may change. Never
2236   call this function with seg_reg == R_CS */
2237static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2238{
2239    if (s->pe && !s->vm86) {
2240        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
2241        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
2242        /* abort translation because the addseg value may change or
2243           because ss32 may change. For R_SS, translation must always
2244           stop as a special handling must be done to disable hardware
2245           interrupts for the next instruction */
2246        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
2247            s->is_jmp = DISAS_TB_JUMP;
2248    } else {
2249        gen_op_movl_seg_T0_vm(seg_reg);
2250        if (seg_reg == R_SS)
2251            s->is_jmp = DISAS_TB_JUMP;
2252    }
2253}
2254
2255static inline int svm_is_rep(int prefixes)
2256{
2257    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2258}
2259
2260static inline void
2261gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2262                              uint32_t type, uint64_t param)
2263{
2264    /* no SVM activated; fast case */
2265    if (likely(!(s->flags & HF_SVMI_MASK)))
2266        return;
2267    gen_update_cc_op(s);
2268    gen_jmp_im(pc_start - s->cs_base);
2269    gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2270                                         tcg_const_i64(param));
2271}
2272
2273static inline void
2274gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2275{
2276    gen_svm_check_intercept_param(s, pc_start, type, 0);
2277}
2278
2279static inline void gen_stack_update(DisasContext *s, int addend)
2280{
2281    gen_op_add_reg_im(mo_stacksize(s), R_ESP, addend);
2282}
2283
2284/* Generate a push. It depends on ss32, addseg and dflag.  */
2285static void gen_push_v(DisasContext *s, TCGv val)
2286{
2287    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2288    TCGMemOp a_ot = mo_stacksize(s);
2289    int size = 1 << d_ot;
2290    TCGv new_esp = cpu_A0;
2291
2292    tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
2293
2294    if (!CODE64(s)) {
2295        if (s->addseg) {
2296            new_esp = cpu_tmp4;
2297            tcg_gen_mov_tl(new_esp, cpu_A0);
2298        }
2299        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2300    }
2301
2302    gen_op_st_v(s, d_ot, val, cpu_A0);
2303    gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
2304}
2305
2306/* two step pop is necessary for precise exceptions */
2307static TCGMemOp gen_pop_T0(DisasContext *s)
2308{
2309    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2310
2311    gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2312    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2313
2314    return d_ot;
2315}
2316
2317static inline void gen_pop_update(DisasContext *s, TCGMemOp ot)
2318{
2319    gen_stack_update(s, 1 << ot);
2320}
2321
2322static inline void gen_stack_A0(DisasContext *s)
2323{
2324    gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2325}
2326
2327static void gen_pusha(DisasContext *s)
2328{
2329    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2330    TCGMemOp d_ot = s->dflag;
2331    int size = 1 << d_ot;
2332    int i;
2333
2334    for (i = 0; i < 8; i++) {
2335        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size);
2336        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2337        gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0);
2338    }
2339
2340    gen_stack_update(s, -8 * size);
2341}
2342
2343static void gen_popa(DisasContext *s)
2344{
2345    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2346    TCGMemOp d_ot = s->dflag;
2347    int size = 1 << d_ot;
2348    int i;
2349
2350    for (i = 0; i < 8; i++) {
2351        /* ESP is not reloaded */
2352        if (7 - i == R_ESP) {
2353            continue;
2354        }
2355        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size);
2356        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2357        gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2358        gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
2359    }
2360
2361    gen_stack_update(s, 8 * size);
2362}
2363
2364static void gen_enter(DisasContext *s, int esp_addend, int level)
2365{
2366    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2367    TCGMemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2368    int size = 1 << d_ot;
2369
2370    /* Push BP; compute FrameTemp into T1.  */
2371    tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
2372    gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
2373    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0);
2374
2375    level &= 31;
2376    if (level != 0) {
2377        int i;
2378
2379        /* Copy level-1 pointers from the previous frame.  */
2380        for (i = 1; i < level; ++i) {
2381            tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i);
2382            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2383            gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0);
2384
2385            tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i);
2386            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2387            gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0);
2388        }
2389
2390        /* Push the current FrameTemp as the last level.  */
2391        tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level);
2392        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2393        gen_op_st_v(s, d_ot, cpu_T1, cpu_A0);
2394    }
2395
2396    /* Copy the FrameTemp value to EBP.  */
2397    gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1);
2398
2399    /* Compute the final value of ESP.  */
2400    tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level);
2401    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2402}
2403
2404static void gen_leave(DisasContext *s)
2405{
2406    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2407    TCGMemOp a_ot = mo_stacksize(s);
2408
2409    gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2410    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2411
2412    tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
2413
2414    gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0);
2415    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2416}
2417
2418static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2419{
2420    gen_update_cc_op(s);
2421    gen_jmp_im(cur_eip);
2422    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
2423    s->is_jmp = DISAS_TB_JUMP;
2424}
2425
2426/* Generate #UD for the current instruction.  The assumption here is that
2427   the instruction is known, but it isn't allowed in the current cpu mode.  */
2428static void gen_illegal_opcode(DisasContext *s)
2429{
2430    gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
2431}
2432
2433/* Similarly, except that the assumption here is that we don't decode
2434   the instruction at all -- either a missing opcode, an unimplemented
2435   feature, or just a bogus instruction stream.  */
2436static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2437{
2438    gen_illegal_opcode(s);
2439
2440    if (qemu_loglevel_mask(LOG_UNIMP)) {
2441        target_ulong pc = s->pc_start, end = s->pc;
2442        qemu_log_lock();
2443        qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2444        for (; pc < end; ++pc) {
2445            qemu_log(" %02x", cpu_ldub_code(env, pc));
2446        }
2447        qemu_log("\n");
2448        qemu_log_unlock();
2449    }
2450}
2451
2452/* an interrupt is different from an exception because of the
2453   privilege checks */
2454static void gen_interrupt(DisasContext *s, int intno,
2455                          target_ulong cur_eip, target_ulong next_eip)
2456{
2457    gen_update_cc_op(s);
2458    gen_jmp_im(cur_eip);
2459    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2460                               tcg_const_i32(next_eip - cur_eip));
2461    s->is_jmp = DISAS_TB_JUMP;
2462}
2463
2464static void gen_debug(DisasContext *s, target_ulong cur_eip)
2465{
2466    gen_update_cc_op(s);
2467    gen_jmp_im(cur_eip);
2468    gen_helper_debug(cpu_env);
2469    s->is_jmp = DISAS_TB_JUMP;
2470}
2471
2472static void gen_set_hflag(DisasContext *s, uint32_t mask)
2473{
2474    if ((s->flags & mask) == 0) {
2475        TCGv_i32 t = tcg_temp_new_i32();
2476        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2477        tcg_gen_ori_i32(t, t, mask);
2478        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2479        tcg_temp_free_i32(t);
2480        s->flags |= mask;
2481    }
2482}
2483
2484static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2485{
2486    if (s->flags & mask) {
2487        TCGv_i32 t = tcg_temp_new_i32();
2488        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2489        tcg_gen_andi_i32(t, t, ~mask);
2490        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2491        tcg_temp_free_i32(t);
2492        s->flags &= ~mask;
2493    }
2494}
2495
2496/* Clear BND registers during legacy branches.  */
2497static void gen_bnd_jmp(DisasContext *s)
2498{
2499    /* Clear the registers only if BND prefix is missing, MPX is enabled,
2500       and if the BNDREGs are known to be in use (non-zero) already.
2501       The helper itself will check BNDPRESERVE at runtime.  */
2502    if ((s->prefix & PREFIX_REPNZ) == 0
2503        && (s->flags & HF_MPX_EN_MASK) != 0
2504        && (s->flags & HF_MPX_IU_MASK) != 0) {
2505        gen_helper_bnd_jmp(cpu_env);
2506    }
2507}
2508
2509/* Generate an end of block. Trace exception is also generated if needed.
2510   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2511   If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2512   S->TF.  This is used by the syscall/sysret insns.  */
2513static void
2514do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, TCGv jr)
2515{
2516    gen_update_cc_op(s);
2517
2518    /* If several instructions disable interrupts, only the first does it.  */
2519    if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2520        gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2521    } else {
2522        gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2523    }
2524
2525    if (s->tb->flags & HF_RF_MASK) {
2526        gen_helper_reset_rf(cpu_env);
2527    }
2528    if (s->singlestep_enabled) {
2529        gen_helper_debug(cpu_env);
2530    } else if (recheck_tf) {
2531        gen_helper_rechecking_single_step(cpu_env);
2532        tcg_gen_exit_tb(0);
2533    } else if (s->tf) {
2534        gen_helper_single_step(cpu_env);
2535    } else if (!TCGV_IS_UNUSED(jr)) {
2536        TCGv vaddr = tcg_temp_new();
2537
2538        tcg_gen_add_tl(vaddr, jr, cpu_seg_base[R_CS]);
2539        tcg_gen_lookup_and_goto_ptr(vaddr);
2540        tcg_temp_free(vaddr);
2541    } else {
2542        tcg_gen_exit_tb(0);
2543    }
2544    s->is_jmp = DISAS_TB_JUMP;
2545}
2546
2547static inline void
2548gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2549{
2550    TCGv unused;
2551
2552    TCGV_UNUSED(unused);
2553    do_gen_eob_worker(s, inhibit, recheck_tf, unused);
2554}
2555
2556/* End of block.
2557   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2558static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2559{
2560    gen_eob_worker(s, inhibit, false);
2561}
2562
2563/* End of block, resetting the inhibit irq flag.  */
2564static void gen_eob(DisasContext *s)
2565{
2566    gen_eob_worker(s, false, false);
2567}
2568
2569/* Jump to register */
2570static void gen_jr(DisasContext *s, TCGv dest)
2571{
2572    do_gen_eob_worker(s, false, false, dest);
2573}
2574
2575/* generate a jump to eip. No segment change must happen before as a
2576   direct call to the next block may occur */
2577static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2578{
2579    gen_update_cc_op(s);
2580    set_cc_op(s, CC_OP_DYNAMIC);
2581    if (s->jmp_opt) {
2582        gen_goto_tb(s, tb_num, eip);
2583        s->is_jmp = DISAS_TB_JUMP;
2584    } else {
2585        gen_jmp_im(eip);
2586        gen_eob(s);
2587    }
2588}
2589
2590static void gen_jmp(DisasContext *s, target_ulong eip)
2591{
2592    gen_jmp_tb(s, eip, 0);
2593}
2594
2595static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2596{
2597    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2598    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2599}
2600
2601static inline void gen_stq_env_A0(DisasContext *s, int offset)
2602{
2603    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2604    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2605}
2606
2607static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2608{
2609    int mem_index = s->mem_index;
2610    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2611    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2612    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2613    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2614    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2615}
2616
2617static inline void gen_sto_env_A0(DisasContext *s, int offset)
2618{
2619    int mem_index = s->mem_index;
2620    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2621    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2622    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2623    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2624    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2625}
2626
2627static inline void gen_op_movo(int d_offset, int s_offset)
2628{
2629    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2630    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2631    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2632    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2633}
2634
2635static inline void gen_op_movq(int d_offset, int s_offset)
2636{
2637    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2638    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2639}
2640
2641static inline void gen_op_movl(int d_offset, int s_offset)
2642{
2643    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2644    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2645}
2646
2647static inline void gen_op_movq_env_0(int d_offset)
2648{
2649    tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2650    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2651}
2652
2653typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2654typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2655typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2656typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2657typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2658typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2659                               TCGv_i32 val);
2660typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2661typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2662                               TCGv val);
2663
2664#define SSE_SPECIAL ((void *)1)
2665#define SSE_DUMMY ((void *)2)
2666
2667#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2668#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2669                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2670
2671static const SSEFunc_0_epp sse_op_table1[256][4] = {
2672    /* 3DNow! extensions */
2673    [0x0e] = { SSE_DUMMY }, /* femms */
2674    [0x0f] = { SSE_DUMMY }, /* pf... */
2675    /* pure SSE operations */
2676    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2677    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2678    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2679    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2680    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2681    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2682    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2683    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2684
2685    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2686    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2687    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2688    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2689    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2690    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2691    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2692    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2693    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2694    [0x51] = SSE_FOP(sqrt),
2695    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2696    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2697    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2698    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2699    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2700    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2701    [0x58] = SSE_FOP(add),
2702    [0x59] = SSE_FOP(mul),
2703    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2704               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2705    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2706    [0x5c] = SSE_FOP(sub),
2707    [0x5d] = SSE_FOP(min),
2708    [0x5e] = SSE_FOP(div),
2709    [0x5f] = SSE_FOP(max),
2710
2711    [0xc2] = SSE_FOP(cmpeq),
2712    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2713               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2714
2715    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2716    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2717    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2718
2719    /* MMX ops and their SSE extensions */
2720    [0x60] = MMX_OP2(punpcklbw),
2721    [0x61] = MMX_OP2(punpcklwd),
2722    [0x62] = MMX_OP2(punpckldq),
2723    [0x63] = MMX_OP2(packsswb),
2724    [0x64] = MMX_OP2(pcmpgtb),
2725    [0x65] = MMX_OP2(pcmpgtw),
2726    [0x66] = MMX_OP2(pcmpgtl),
2727    [0x67] = MMX_OP2(packuswb),
2728    [0x68] = MMX_OP2(punpckhbw),
2729    [0x69] = MMX_OP2(punpckhwd),
2730    [0x6a] = MMX_OP2(punpckhdq),
2731    [0x6b] = MMX_OP2(packssdw),
2732    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2733    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2734    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2735    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2736    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2737               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2738               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2739               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2740    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2741    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2742    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2743    [0x74] = MMX_OP2(pcmpeqb),
2744    [0x75] = MMX_OP2(pcmpeqw),
2745    [0x76] = MMX_OP2(pcmpeql),
2746    [0x77] = { SSE_DUMMY }, /* emms */
2747    [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2748    [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2749    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2750    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2751    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2752    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2753    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2754    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2755    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2756    [0xd1] = MMX_OP2(psrlw),
2757    [0xd2] = MMX_OP2(psrld),
2758    [0xd3] = MMX_OP2(psrlq),
2759    [0xd4] = MMX_OP2(paddq),
2760    [0xd5] = MMX_OP2(pmullw),
2761    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2762    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2763    [0xd8] = MMX_OP2(psubusb),
2764    [0xd9] = MMX_OP2(psubusw),
2765    [0xda] = MMX_OP2(pminub),
2766    [0xdb] = MMX_OP2(pand),
2767    [0xdc] = MMX_OP2(paddusb),
2768    [0xdd] = MMX_OP2(paddusw),
2769    [0xde] = MMX_OP2(pmaxub),
2770    [0xdf] = MMX_OP2(pandn),
2771    [0xe0] = MMX_OP2(pavgb),
2772    [0xe1] = MMX_OP2(psraw),
2773    [0xe2] = MMX_OP2(psrad),
2774    [0xe3] = MMX_OP2(pavgw),
2775    [0xe4] = MMX_OP2(pmulhuw),
2776    [0xe5] = MMX_OP2(pmulhw),
2777    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2778    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2779    [0xe8] = MMX_OP2(psubsb),
2780    [0xe9] = MMX_OP2(psubsw),
2781    [0xea] = MMX_OP2(pminsw),
2782    [0xeb] = MMX_OP2(por),
2783    [0xec] = MMX_OP2(paddsb),
2784    [0xed] = MMX_OP2(paddsw),
2785    [0xee] = MMX_OP2(pmaxsw),
2786    [0xef] = MMX_OP2(pxor),
2787    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2788    [0xf1] = MMX_OP2(psllw),
2789    [0xf2] = MMX_OP2(pslld),
2790    [0xf3] = MMX_OP2(psllq),
2791    [0xf4] = MMX_OP2(pmuludq),
2792    [0xf5] = MMX_OP2(pmaddwd),
2793    [0xf6] = MMX_OP2(psadbw),
2794    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2795               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2796    [0xf8] = MMX_OP2(psubb),
2797    [0xf9] = MMX_OP2(psubw),
2798    [0xfa] = MMX_OP2(psubl),
2799    [0xfb] = MMX_OP2(psubq),
2800    [0xfc] = MMX_OP2(paddb),
2801    [0xfd] = MMX_OP2(paddw),
2802    [0xfe] = MMX_OP2(paddl),
2803};
2804
2805static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2806    [0 + 2] = MMX_OP2(psrlw),
2807    [0 + 4] = MMX_OP2(psraw),
2808    [0 + 6] = MMX_OP2(psllw),
2809    [8 + 2] = MMX_OP2(psrld),
2810    [8 + 4] = MMX_OP2(psrad),
2811    [8 + 6] = MMX_OP2(pslld),
2812    [16 + 2] = MMX_OP2(psrlq),
2813    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2814    [16 + 6] = MMX_OP2(psllq),
2815    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2816};
2817
2818static const SSEFunc_0_epi sse_op_table3ai[] = {
2819    gen_helper_cvtsi2ss,
2820    gen_helper_cvtsi2sd
2821};
2822
2823#ifdef TARGET_X86_64
2824static const SSEFunc_0_epl sse_op_table3aq[] = {
2825    gen_helper_cvtsq2ss,
2826    gen_helper_cvtsq2sd
2827};
2828#endif
2829
2830static const SSEFunc_i_ep sse_op_table3bi[] = {
2831    gen_helper_cvttss2si,
2832    gen_helper_cvtss2si,
2833    gen_helper_cvttsd2si,
2834    gen_helper_cvtsd2si
2835};
2836
2837#ifdef TARGET_X86_64
2838static const SSEFunc_l_ep sse_op_table3bq[] = {
2839    gen_helper_cvttss2sq,
2840    gen_helper_cvtss2sq,
2841    gen_helper_cvttsd2sq,
2842    gen_helper_cvtsd2sq
2843};
2844#endif
2845
2846static const SSEFunc_0_epp sse_op_table4[8][4] = {
2847    SSE_FOP(cmpeq),
2848    SSE_FOP(cmplt),
2849    SSE_FOP(cmple),
2850    SSE_FOP(cmpunord),
2851    SSE_FOP(cmpneq),
2852    SSE_FOP(cmpnlt),
2853    SSE_FOP(cmpnle),
2854    SSE_FOP(cmpord),
2855};
2856
2857static const SSEFunc_0_epp sse_op_table5[256] = {
2858    [0x0c] = gen_helper_pi2fw,
2859    [0x0d] = gen_helper_pi2fd,
2860    [0x1c] = gen_helper_pf2iw,
2861    [0x1d] = gen_helper_pf2id,
2862    [0x8a] = gen_helper_pfnacc,
2863    [0x8e] = gen_helper_pfpnacc,
2864    [0x90] = gen_helper_pfcmpge,
2865    [0x94] = gen_helper_pfmin,
2866    [0x96] = gen_helper_pfrcp,
2867    [0x97] = gen_helper_pfrsqrt,
2868    [0x9a] = gen_helper_pfsub,
2869    [0x9e] = gen_helper_pfadd,
2870    [0xa0] = gen_helper_pfcmpgt,
2871    [0xa4] = gen_helper_pfmax,
2872    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2873    [0xa7] = gen_helper_movq, /* pfrsqit1 */
2874    [0xaa] = gen_helper_pfsubr,
2875    [0xae] = gen_helper_pfacc,
2876    [0xb0] = gen_helper_pfcmpeq,
2877    [0xb4] = gen_helper_pfmul,
2878    [0xb6] = gen_helper_movq, /* pfrcpit2 */
2879    [0xb7] = gen_helper_pmulhrw_mmx,
2880    [0xbb] = gen_helper_pswapd,
2881    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2882};
2883
2884struct SSEOpHelper_epp {
2885    SSEFunc_0_epp op[2];
2886    uint32_t ext_mask;
2887};
2888
2889struct SSEOpHelper_eppi {
2890    SSEFunc_0_eppi op[2];
2891    uint32_t ext_mask;
2892};
2893
2894#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2895#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2896#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2897#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2898#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2899        CPUID_EXT_PCLMULQDQ }
2900#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2901
2902static const struct SSEOpHelper_epp sse_op_table6[256] = {
2903    [0x00] = SSSE3_OP(pshufb),
2904    [0x01] = SSSE3_OP(phaddw),
2905    [0x02] = SSSE3_OP(phaddd),
2906    [0x03] = SSSE3_OP(phaddsw),
2907    [0x04] = SSSE3_OP(pmaddubsw),
2908    [0x05] = SSSE3_OP(phsubw),
2909    [0x06] = SSSE3_OP(phsubd),
2910    [0x07] = SSSE3_OP(phsubsw),
2911    [0x08] = SSSE3_OP(psignb),
2912    [0x09] = SSSE3_OP(psignw),
2913    [0x0a] = SSSE3_OP(psignd),
2914    [0x0b] = SSSE3_OP(pmulhrsw),
2915    [0x10] = SSE41_OP(pblendvb),
2916    [0x14] = SSE41_OP(blendvps),
2917    [0x15] = SSE41_OP(blendvpd),
2918    [0x17] = SSE41_OP(ptest),
2919    [0x1c] = SSSE3_OP(pabsb),
2920    [0x1d] = SSSE3_OP(pabsw),
2921    [0x1e] = SSSE3_OP(pabsd),
2922    [0x20] = SSE41_OP(pmovsxbw),
2923    [0x21] = SSE41_OP(pmovsxbd),
2924    [0x22] = SSE41_OP(pmovsxbq),
2925    [0x23] = SSE41_OP(pmovsxwd),
2926    [0x24] = SSE41_OP(pmovsxwq),
2927    [0x25] = SSE41_OP(pmovsxdq),
2928    [0x28] = SSE41_OP(pmuldq),
2929    [0x29] = SSE41_OP(pcmpeqq),
2930    [0x2a] = SSE41_SPECIAL, /* movntqda */
2931    [0x2b] = SSE41_OP(packusdw),
2932    [0x30] = SSE41_OP(pmovzxbw),
2933    [0x31] = SSE41_OP(pmovzxbd),
2934    [0x32] = SSE41_OP(pmovzxbq),
2935    [0x33] = SSE41_OP(pmovzxwd),
2936    [0x34] = SSE41_OP(pmovzxwq),
2937    [0x35] = SSE41_OP(pmovzxdq),
2938    [0x37] = SSE42_OP(pcmpgtq),
2939    [0x38] = SSE41_OP(pminsb),
2940    [0x39] = SSE41_OP(pminsd),
2941    [0x3a] = SSE41_OP(pminuw),
2942    [0x3b] = SSE41_OP(pminud),
2943    [0x3c] = SSE41_OP(pmaxsb),
2944    [0x3d] = SSE41_OP(pmaxsd),
2945    [0x3e] = SSE41_OP(pmaxuw),
2946    [0x3f] = SSE41_OP(pmaxud),
2947    [0x40] = SSE41_OP(pmulld),
2948    [0x41] = SSE41_OP(phminposuw),
2949    [0xdb] = AESNI_OP(aesimc),
2950    [0xdc] = AESNI_OP(aesenc),
2951    [0xdd] = AESNI_OP(aesenclast),
2952    [0xde] = AESNI_OP(aesdec),
2953    [0xdf] = AESNI_OP(aesdeclast),
2954};
2955
2956static const struct SSEOpHelper_eppi sse_op_table7[256] = {
2957    [0x08] = SSE41_OP(roundps),
2958    [0x09] = SSE41_OP(roundpd),
2959    [0x0a] = SSE41_OP(roundss),
2960    [0x0b] = SSE41_OP(roundsd),
2961    [0x0c] = SSE41_OP(blendps),
2962    [0x0d] = SSE41_OP(blendpd),
2963    [0x0e] = SSE41_OP(pblendw),
2964    [0x0f] = SSSE3_OP(palignr),
2965    [0x14] = SSE41_SPECIAL, /* pextrb */
2966    [0x15] = SSE41_SPECIAL, /* pextrw */
2967    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
2968    [0x17] = SSE41_SPECIAL, /* extractps */
2969    [0x20] = SSE41_SPECIAL, /* pinsrb */
2970    [0x21] = SSE41_SPECIAL, /* insertps */
2971    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
2972    [0x40] = SSE41_OP(dpps),
2973    [0x41] = SSE41_OP(dppd),
2974    [0x42] = SSE41_OP(mpsadbw),
2975    [0x44] = PCLMULQDQ_OP(pclmulqdq),
2976    [0x60] = SSE42_OP(pcmpestrm),
2977    [0x61] = SSE42_OP(pcmpestri),
2978    [0x62] = SSE42_OP(pcmpistrm),
2979    [0x63] = SSE42_OP(pcmpistri),
2980    [0xdf] = AESNI_OP(aeskeygenassist),
2981};
2982
2983static void gen_sse(CPUX86State *env, DisasContext *s, int b,
2984                    target_ulong pc_start, int rex_r)
2985{
2986    int b1, op1_offset, op2_offset, is_xmm, val;
2987    int modrm, mod, rm, reg;
2988    SSEFunc_0_epp sse_fn_epp;
2989    SSEFunc_0_eppi sse_fn_eppi;
2990    SSEFunc_0_ppi sse_fn_ppi;
2991    SSEFunc_0_eppt sse_fn_eppt;
2992    TCGMemOp ot;
2993
2994    b &= 0xff;
2995    if (s->prefix & PREFIX_DATA)
2996        b1 = 1;
2997    else if (s->prefix & PREFIX_REPZ)
2998        b1 = 2;
2999    else if (s->prefix & PREFIX_REPNZ)
3000        b1 = 3;
3001    else
3002        b1 = 0;
3003    sse_fn_epp = sse_op_table1[b][b1];
3004    if (!sse_fn_epp) {
3005        goto unknown_op;
3006    }
3007    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3008        is_xmm = 1;
3009    } else {
3010        if (b1 == 0) {
3011            /* MMX case */
3012            is_xmm = 0;
3013        } else {
3014            is_xmm = 1;
3015        }
3016    }
3017    /* simple MMX/SSE operation */
3018    if (s->flags & HF_TS_MASK) {
3019        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3020        return;
3021    }
3022    if (s->flags & HF_EM_MASK) {
3023    illegal_op:
3024        gen_illegal_opcode(s);
3025        return;
3026    }
3027    if (is_xmm
3028        && !(s->flags & HF_OSFXSR_MASK)
3029        && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
3030        goto unknown_op;
3031    }
3032    if (b == 0x0e) {
3033        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3034            /* If we were fully decoding this we might use illegal_op.  */
3035            goto unknown_op;
3036        }
3037        /* femms */
3038        gen_helper_emms(cpu_env);
3039        return;
3040    }
3041    if (b == 0x77) {
3042        /* emms */
3043        gen_helper_emms(cpu_env);
3044        return;
3045    }
3046    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3047       the static cpu state) */
3048    if (!is_xmm) {
3049        gen_helper_enter_mmx(cpu_env);
3050    }
3051
3052    modrm = cpu_ldub_code(env, s->pc++);
3053    reg = ((modrm >> 3) & 7);
3054    if (is_xmm)
3055        reg |= rex_r;
3056    mod = (modrm >> 6) & 3;
3057    if (sse_fn_epp == SSE_SPECIAL) {
3058        b |= (b1 << 8);
3059        switch(b) {
3060        case 0x0e7: /* movntq */
3061            if (mod == 3) {
3062                goto illegal_op;
3063            }
3064            gen_lea_modrm(env, s, modrm);
3065            gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3066            break;
3067        case 0x1e7: /* movntdq */
3068        case 0x02b: /* movntps */
3069        case 0x12b: /* movntps */
3070            if (mod == 3)
3071                goto illegal_op;
3072            gen_lea_modrm(env, s, modrm);
3073            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3074            break;
3075        case 0x3f0: /* lddqu */
3076            if (mod == 3)
3077                goto illegal_op;
3078            gen_lea_modrm(env, s, modrm);
3079            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3080            break;
3081        case 0x22b: /* movntss */
3082        case 0x32b: /* movntsd */
3083            if (mod == 3)
3084                goto illegal_op;
3085            gen_lea_modrm(env, s, modrm);
3086            if (b1 & 1) {
3087                gen_stq_env_A0(s, offsetof(CPUX86State,
3088                                           xmm_regs[reg].ZMM_Q(0)));
3089            } else {
3090                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
3091                    xmm_regs[reg].ZMM_L(0)));
3092                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3093            }
3094            break;
3095        case 0x6e: /* movd mm, ea */
3096#ifdef TARGET_X86_64
3097            if (s->dflag == MO_64) {
3098                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3099                tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
3100            } else
3101#endif
3102            {
3103                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3104                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3105                                 offsetof(CPUX86State,fpregs[reg].mmx));
3106                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3107                gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
3108            }
3109            break;
3110        case 0x16e: /* movd xmm, ea */
3111#ifdef TARGET_X86_64
3112            if (s->dflag == MO_64) {
3113                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3114                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3115                                 offsetof(CPUX86State,xmm_regs[reg]));
3116                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0);
3117            } else
3118#endif
3119            {
3120                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3121                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3122                                 offsetof(CPUX86State,xmm_regs[reg]));
3123                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3124                gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
3125            }
3126            break;
3127        case 0x6f: /* movq mm, ea */
3128            if (mod != 3) {
3129                gen_lea_modrm(env, s, modrm);
3130                gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3131            } else {
3132                rm = (modrm & 7);
3133                tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3134                               offsetof(CPUX86State,fpregs[rm].mmx));
3135                tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3136                               offsetof(CPUX86State,fpregs[reg].mmx));
3137            }
3138            break;
3139        case 0x010: /* movups */
3140        case 0x110: /* movupd */
3141        case 0x028: /* movaps */
3142        case 0x128: /* movapd */
3143        case 0x16f: /* movdqa xmm, ea */
3144        case 0x26f: /* movdqu xmm, ea */
3145            if (mod != 3) {
3146                gen_lea_modrm(env, s, modrm);
3147                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3148            } else {
3149                rm = (modrm & 7) | REX_B(s);
3150                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
3151                            offsetof(CPUX86State,xmm_regs[rm]));
3152            }
3153            break;
3154        case 0x210: /* movss xmm, ea */
3155            if (mod != 3) {
3156                gen_lea_modrm(env, s, modrm);
3157                gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3158                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3159                tcg_gen_movi_tl(cpu_T0, 0);
3160                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3161                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3162                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3163            } else {
3164                rm = (modrm & 7) | REX_B(s);
3165                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3166                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3167            }
3168            break;
3169        case 0x310: /* movsd xmm, ea */
3170            if (mod != 3) {
3171                gen_lea_modrm(env, s, modrm);
3172                gen_ldq_env_A0(s, offsetof(CPUX86State,
3173                                           xmm_regs[reg].ZMM_Q(0)));
3174                tcg_gen_movi_tl(cpu_T0, 0);
3175                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3176                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3177            } else {
3178                rm = (modrm & 7) | REX_B(s);
3179                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3180                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3181            }
3182            break;
3183        case 0x012: /* movlps */
3184        case 0x112: /* movlpd */
3185            if (mod != 3) {
3186                gen_lea_modrm(env, s, modrm);
3187                gen_ldq_env_A0(s, offsetof(CPUX86State,
3188                                           xmm_regs[reg].ZMM_Q(0)));
3189            } else {
3190                /* movhlps */
3191                rm = (modrm & 7) | REX_B(s);
3192                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3193                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3194            }
3195            break;
3196        case 0x212: /* movsldup */
3197            if (mod != 3) {
3198                gen_lea_modrm(env, s, modrm);
3199                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3200            } else {
3201                rm = (modrm & 7) | REX_B(s);
3202                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3203                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3204                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3205                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3206            }
3207            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3208                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3209            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3210                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3211            break;
3212        case 0x312: /* movddup */
3213            if (mod != 3) {
3214                gen_lea_modrm(env, s, modrm);
3215                gen_ldq_env_A0(s, offsetof(CPUX86State,
3216                                           xmm_regs[reg].ZMM_Q(0)));
3217            } else {
3218                rm = (modrm & 7) | REX_B(s);
3219                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3220                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3221            }
3222            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3223                        offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3224            break;
3225        case 0x016: /* movhps */
3226        case 0x116: /* movhpd */
3227            if (mod != 3) {
3228                gen_lea_modrm(env, s, modrm);
3229                gen_ldq_env_A0(s, offsetof(CPUX86State,
3230                                           xmm_regs[reg].ZMM_Q(1)));
3231            } else {
3232                /* movlhps */
3233                rm = (modrm & 7) | REX_B(s);
3234                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3235                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3236            }
3237            break;
3238        case 0x216: /* movshdup */
3239            if (mod != 3) {
3240                gen_lea_modrm(env, s, modrm);
3241                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3242            } else {
3243                rm = (modrm & 7) | REX_B(s);
3244                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3245                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3246                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3247                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3248            }
3249            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3250                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3251            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3252                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3253            break;
3254        case 0x178:
3255        case 0x378:
3256            {
3257                int bit_index, field_length;
3258
3259                if (b1 == 1 && reg != 0)
3260                    goto illegal_op;
3261                field_length = cpu_ldub_code(env, s->pc++) & 0x3F;
3262                bit_index = cpu_ldub_code(env, s->pc++) & 0x3F;
3263                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3264                    offsetof(CPUX86State,xmm_regs[reg]));
3265                if (b1 == 1)
3266                    gen_helper_extrq_i(cpu_env, cpu_ptr0,
3267                                       tcg_const_i32(bit_index),
3268                                       tcg_const_i32(field_length));
3269                else
3270                    gen_helper_insertq_i(cpu_env, cpu_ptr0,
3271                                         tcg_const_i32(bit_index),
3272                                         tcg_const_i32(field_length));
3273            }
3274            break;
3275        case 0x7e: /* movd ea, mm */
3276#ifdef TARGET_X86_64
3277            if (s->dflag == MO_64) {
3278                tcg_gen_ld_i64(cpu_T0, cpu_env,
3279                               offsetof(CPUX86State,fpregs[reg].mmx));
3280                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3281            } else
3282#endif
3283            {
3284                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3285                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3286                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3287            }
3288            break;
3289        case 0x17e: /* movd ea, xmm */
3290#ifdef TARGET_X86_64
3291            if (s->dflag == MO_64) {
3292                tcg_gen_ld_i64(cpu_T0, cpu_env,
3293                               offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3294                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3295            } else
3296#endif
3297            {
3298                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3299                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3300                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3301            }
3302            break;
3303        case 0x27e: /* movq xmm, ea */
3304            if (mod != 3) {
3305                gen_lea_modrm(env, s, modrm);
3306                gen_ldq_env_A0(s, offsetof(CPUX86State,
3307                                           xmm_regs[reg].ZMM_Q(0)));
3308            } else {
3309                rm = (modrm & 7) | REX_B(s);
3310                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3311                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3312            }
3313            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3314            break;
3315        case 0x7f: /* movq ea, mm */
3316            if (mod != 3) {
3317                gen_lea_modrm(env, s, modrm);
3318                gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3319            } else {
3320                rm = (modrm & 7);
3321                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
3322                            offsetof(CPUX86State,fpregs[reg].mmx));
3323            }
3324            break;
3325        case 0x011: /* movups */
3326        case 0x111: /* movupd */
3327        case 0x029: /* movaps */
3328        case 0x129: /* movapd */
3329        case 0x17f: /* movdqa ea, xmm */
3330        case 0x27f: /* movdqu ea, xmm */
3331            if (mod != 3) {
3332                gen_lea_modrm(env, s, modrm);
3333                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3334            } else {
3335                rm = (modrm & 7) | REX_B(s);
3336                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
3337                            offsetof(CPUX86State,xmm_regs[reg]));
3338            }
3339            break;
3340        case 0x211: /* movss ea, xmm */
3341            if (mod != 3) {
3342                gen_lea_modrm(env, s, modrm);
3343                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3344                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3345            } else {
3346                rm = (modrm & 7) | REX_B(s);
3347                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
3348                            offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3349            }
3350            break;
3351        case 0x311: /* movsd ea, xmm */
3352            if (mod != 3) {
3353                gen_lea_modrm(env, s, modrm);
3354                gen_stq_env_A0(s, offsetof(CPUX86State,
3355                                           xmm_regs[reg].ZMM_Q(0)));
3356            } else {
3357                rm = (modrm & 7) | REX_B(s);
3358                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
3359                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3360            }
3361            break;
3362        case 0x013: /* movlps */
3363        case 0x113: /* movlpd */
3364            if (mod != 3) {
3365                gen_lea_modrm(env, s, modrm);
3366                gen_stq_env_A0(s, offsetof(CPUX86State,
3367                                           xmm_regs[reg].ZMM_Q(0)));
3368            } else {
3369                goto illegal_op;
3370            }
3371            break;
3372        case 0x017: /* movhps */
3373        case 0x117: /* movhpd */
3374            if (mod != 3) {
3375                gen_lea_modrm(env, s, modrm);
3376                gen_stq_env_A0(s, offsetof(CPUX86State,
3377                                           xmm_regs[reg].ZMM_Q(1)));
3378            } else {
3379                goto illegal_op;
3380            }
3381            break;
3382        case 0x71: /* shift mm, im */
3383        case 0x72:
3384        case 0x73:
3385        case 0x171: /* shift xmm, im */
3386        case 0x172:
3387        case 0x173:
3388            if (b1 >= 2) {
3389                goto unknown_op;
3390            }
3391            val = cpu_ldub_code(env, s->pc++);
3392            if (is_xmm) {
3393                tcg_gen_movi_tl(cpu_T0, val);
3394                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
3395                tcg_gen_movi_tl(cpu_T0, 0);
3396                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(1)));
3397                op1_offset = offsetof(CPUX86State,xmm_t0);
3398            } else {
3399                tcg_gen_movi_tl(cpu_T0, val);
3400                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
3401                tcg_gen_movi_tl(cpu_T0, 0);
3402                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
3403                op1_offset = offsetof(CPUX86State,mmx_t0);
3404            }
3405            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3406                                       (((modrm >> 3)) & 7)][b1];
3407            if (!sse_fn_epp) {
3408                goto unknown_op;
3409            }
3410            if (is_xmm) {
3411                rm = (modrm & 7) | REX_B(s);
3412                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3413            } else {
3414                rm = (modrm & 7);
3415                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3416            }
3417            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3418            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
3419            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3420            break;
3421        case 0x050: /* movmskps */
3422            rm = (modrm & 7) | REX_B(s);
3423            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3424                             offsetof(CPUX86State,xmm_regs[rm]));
3425            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3426            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3427            break;
3428        case 0x150: /* movmskpd */
3429            rm = (modrm & 7) | REX_B(s);
3430            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3431                             offsetof(CPUX86State,xmm_regs[rm]));
3432            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3433            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3434            break;
3435        case 0x02a: /* cvtpi2ps */
3436        case 0x12a: /* cvtpi2pd */
3437            gen_helper_enter_mmx(cpu_env);
3438            if (mod != 3) {
3439                gen_lea_modrm(env, s, modrm);
3440                op2_offset = offsetof(CPUX86State,mmx_t0);
3441                gen_ldq_env_A0(s, op2_offset);
3442            } else {
3443                rm = (modrm & 7);
3444                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3445            }
3446            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3447            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3448            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3449            switch(b >> 8) {
3450            case 0x0:
3451                gen_helper_cvtpi2ps(cpu_env, cpu_ptr0, cpu_ptr1);
3452                break;
3453            default:
3454            case 0x1:
3455                gen_helper_cvtpi2pd(cpu_env, cpu_ptr0, cpu_ptr1);
3456                break;
3457            }
3458            break;
3459        case 0x22a: /* cvtsi2ss */
3460        case 0x32a: /* cvtsi2sd */
3461            ot = mo_64_32(s->dflag);
3462            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3463            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3464            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3465            if (ot == MO_32) {
3466                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3467                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3468                sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
3469            } else {
3470#ifdef TARGET_X86_64
3471                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3472                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T0);
3473#else
3474                goto illegal_op;
3475#endif
3476            }
3477            break;
3478        case 0x02c: /* cvttps2pi */
3479        case 0x12c: /* cvttpd2pi */
3480        case 0x02d: /* cvtps2pi */
3481        case 0x12d: /* cvtpd2pi */
3482            gen_helper_enter_mmx(cpu_env);
3483            if (mod != 3) {
3484                gen_lea_modrm(env, s, modrm);
3485                op2_offset = offsetof(CPUX86State,xmm_t0);
3486                gen_ldo_env_A0(s, op2_offset);
3487            } else {
3488                rm = (modrm & 7) | REX_B(s);
3489                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3490            }
3491            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3492            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3493            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3494            switch(b) {
3495            case 0x02c:
3496                gen_helper_cvttps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3497                break;
3498            case 0x12c:
3499                gen_helper_cvttpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3500                break;
3501            case 0x02d:
3502                gen_helper_cvtps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3503                break;
3504            case 0x12d:
3505                gen_helper_cvtpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3506                break;
3507            }
3508            break;
3509        case 0x22c: /* cvttss2si */
3510        case 0x32c: /* cvttsd2si */
3511        case 0x22d: /* cvtss2si */
3512        case 0x32d: /* cvtsd2si */
3513            ot = mo_64_32(s->dflag);
3514            if (mod != 3) {
3515                gen_lea_modrm(env, s, modrm);
3516                if ((b >> 8) & 1) {
3517                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3518                } else {
3519                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3520                    tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
3521                }
3522                op2_offset = offsetof(CPUX86State,xmm_t0);
3523            } else {
3524                rm = (modrm & 7) | REX_B(s);
3525                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3526            }
3527            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3528            if (ot == MO_32) {
3529                SSEFunc_i_ep sse_fn_i_ep =
3530                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3531                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3532                tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
3533            } else {
3534#ifdef TARGET_X86_64
3535                SSEFunc_l_ep sse_fn_l_ep =
3536                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3537                sse_fn_l_ep(cpu_T0, cpu_env, cpu_ptr0);
3538#else
3539                goto illegal_op;
3540#endif
3541            }
3542            gen_op_mov_reg_v(ot, reg, cpu_T0);
3543            break;
3544        case 0xc4: /* pinsrw */
3545        case 0x1c4:
3546            s->rip_offset = 1;
3547            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3548            val = cpu_ldub_code(env, s->pc++);
3549            if (b1) {
3550                val &= 7;
3551                tcg_gen_st16_tl(cpu_T0, cpu_env,
3552                                offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3553            } else {
3554                val &= 3;
3555                tcg_gen_st16_tl(cpu_T0, cpu_env,
3556                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3557            }
3558            break;
3559        case 0xc5: /* pextrw */
3560        case 0x1c5:
3561            if (mod != 3)
3562                goto illegal_op;
3563            ot = mo_64_32(s->dflag);
3564            val = cpu_ldub_code(env, s->pc++);
3565            if (b1) {
3566                val &= 7;
3567                rm = (modrm & 7) | REX_B(s);
3568                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
3569                                 offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3570            } else {
3571                val &= 3;
3572                rm = (modrm & 7);
3573                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
3574                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3575            }
3576            reg = ((modrm >> 3) & 7) | rex_r;
3577            gen_op_mov_reg_v(ot, reg, cpu_T0);
3578            break;
3579        case 0x1d6: /* movq ea, xmm */
3580            if (mod != 3) {
3581                gen_lea_modrm(env, s, modrm);
3582                gen_stq_env_A0(s, offsetof(CPUX86State,
3583                                           xmm_regs[reg].ZMM_Q(0)));
3584            } else {
3585                rm = (modrm & 7) | REX_B(s);
3586                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
3587                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3588                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3589            }
3590            break;
3591        case 0x2d6: /* movq2dq */
3592            gen_helper_enter_mmx(cpu_env);
3593            rm = (modrm & 7);
3594            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3595                        offsetof(CPUX86State,fpregs[rm].mmx));
3596            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3597            break;
3598        case 0x3d6: /* movdq2q */
3599            gen_helper_enter_mmx(cpu_env);
3600            rm = (modrm & 7) | REX_B(s);
3601            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3602                        offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3603            break;
3604        case 0xd7: /* pmovmskb */
3605        case 0x1d7:
3606            if (mod != 3)
3607                goto illegal_op;
3608            if (b1) {
3609                rm = (modrm & 7) | REX_B(s);
3610                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
3611                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3612            } else {
3613                rm = (modrm & 7);
3614                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
3615                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3616            }
3617            reg = ((modrm >> 3) & 7) | rex_r;
3618            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3619            break;
3620
3621        case 0x138:
3622        case 0x038:
3623            b = modrm;
3624            if ((b & 0xf0) == 0xf0) {
3625                goto do_0f_38_fx;
3626            }
3627            modrm = cpu_ldub_code(env, s->pc++);
3628            rm = modrm & 7;
3629            reg = ((modrm >> 3) & 7) | rex_r;
3630            mod = (modrm >> 6) & 3;
3631            if (b1 >= 2) {
3632                goto unknown_op;
3633            }
3634
3635            sse_fn_epp = sse_op_table6[b].op[b1];
3636            if (!sse_fn_epp) {
3637                goto unknown_op;
3638            }
3639            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3640                goto illegal_op;
3641
3642            if (b1) {
3643                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3644                if (mod == 3) {
3645                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3646                } else {
3647                    op2_offset = offsetof(CPUX86State,xmm_t0);
3648                    gen_lea_modrm(env, s, modrm);
3649                    switch (b) {
3650                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3651                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3652                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3653                        gen_ldq_env_A0(s, op2_offset +
3654                                        offsetof(ZMMReg, ZMM_Q(0)));
3655                        break;
3656                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3657                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3658                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
3659                                            s->mem_index, MO_LEUL);
3660                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
3661                                        offsetof(ZMMReg, ZMM_L(0)));
3662                        break;
3663                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3664                        tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
3665                                           s->mem_index, MO_LEUW);
3666                        tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
3667                                        offsetof(ZMMReg, ZMM_W(0)));
3668                        break;
3669                    case 0x2a:            /* movntqda */
3670                        gen_ldo_env_A0(s, op1_offset);
3671                        return;
3672                    default:
3673                        gen_ldo_env_A0(s, op2_offset);
3674                    }
3675                }
3676            } else {
3677                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3678                if (mod == 3) {
3679                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3680                } else {
3681                    op2_offset = offsetof(CPUX86State,mmx_t0);
3682                    gen_lea_modrm(env, s, modrm);
3683                    gen_ldq_env_A0(s, op2_offset);
3684                }
3685            }
3686            if (sse_fn_epp == SSE_SPECIAL) {
3687                goto unknown_op;
3688            }
3689
3690            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3691            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3692            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3693
3694            if (b == 0x17) {
3695                set_cc_op(s, CC_OP_EFLAGS);
3696            }
3697            break;
3698
3699        case 0x238:
3700        case 0x338:
3701        do_0f_38_fx:
3702            /* Various integer extensions at 0f 38 f[0-f].  */
3703            b = modrm | (b1 << 8);
3704            modrm = cpu_ldub_code(env, s->pc++);
3705            reg = ((modrm >> 3) & 7) | rex_r;
3706
3707            switch (b) {
3708            case 0x3f0: /* crc32 Gd,Eb */
3709            case 0x3f1: /* crc32 Gd,Ey */
3710            do_crc32:
3711                if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3712                    goto illegal_op;
3713                }
3714                if ((b & 0xff) == 0xf0) {
3715                    ot = MO_8;
3716                } else if (s->dflag != MO_64) {
3717                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3718                } else {
3719                    ot = MO_64;
3720                }
3721
3722                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
3723                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3724                gen_helper_crc32(cpu_T0, cpu_tmp2_i32,
3725                                 cpu_T0, tcg_const_i32(8 << ot));
3726
3727                ot = mo_64_32(s->dflag);
3728                gen_op_mov_reg_v(ot, reg, cpu_T0);
3729                break;
3730
3731            case 0x1f0: /* crc32 or movbe */
3732            case 0x1f1:
3733                /* For these insns, the f3 prefix is supposed to have priority
3734                   over the 66 prefix, but that's not what we implement above
3735                   setting b1.  */
3736                if (s->prefix & PREFIX_REPNZ) {
3737                    goto do_crc32;
3738                }
3739                /* FALLTHRU */
3740            case 0x0f0: /* movbe Gy,My */
3741            case 0x0f1: /* movbe My,Gy */
3742                if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3743                    goto illegal_op;
3744                }
3745                if (s->dflag != MO_64) {
3746                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3747                } else {
3748                    ot = MO_64;
3749                }
3750
3751                gen_lea_modrm(env, s, modrm);
3752                if ((b & 1) == 0) {
3753                    tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
3754                                       s->mem_index, ot | MO_BE);
3755                    gen_op_mov_reg_v(ot, reg, cpu_T0);
3756                } else {
3757                    tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
3758                                       s->mem_index, ot | MO_BE);
3759                }
3760                break;
3761
3762            case 0x0f2: /* andn Gy, By, Ey */
3763                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3764                    || !(s->prefix & PREFIX_VEX)
3765                    || s->vex_l != 0) {
3766                    goto illegal_op;
3767                }
3768                ot = mo_64_32(s->dflag);
3769                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3770                tcg_gen_andc_tl(cpu_T0, cpu_regs[s->vex_v], cpu_T0);
3771                gen_op_mov_reg_v(ot, reg, cpu_T0);
3772                gen_op_update1_cc();
3773                set_cc_op(s, CC_OP_LOGICB + ot);
3774                break;
3775
3776            case 0x0f7: /* bextr Gy, Ey, By */
3777                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3778                    || !(s->prefix & PREFIX_VEX)
3779                    || s->vex_l != 0) {
3780                    goto illegal_op;
3781                }
3782                ot = mo_64_32(s->dflag);
3783                {
3784                    TCGv bound, zero;
3785
3786                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3787                    /* Extract START, and shift the operand.
3788                       Shifts larger than operand size get zeros.  */
3789                    tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
3790                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_A0);
3791
3792                    bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3793                    zero = tcg_const_tl(0);
3794                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, cpu_A0, bound,
3795                                       cpu_T0, zero);
3796                    tcg_temp_free(zero);
3797
3798                    /* Extract the LEN into a mask.  Lengths larger than
3799                       operand size get all ones.  */
3800                    tcg_gen_extract_tl(cpu_A0, cpu_regs[s->vex_v], 8, 8);
3801                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
3802                                       cpu_A0, bound);
3803                    tcg_temp_free(bound);
3804                    tcg_gen_movi_tl(cpu_T1, 1);
3805                    tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_A0);
3806                    tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
3807                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
3808
3809                    gen_op_mov_reg_v(ot, reg, cpu_T0);
3810                    gen_op_update1_cc();
3811                    set_cc_op(s, CC_OP_LOGICB + ot);
3812                }
3813                break;
3814
3815            case 0x0f5: /* bzhi Gy, Ey, By */
3816                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3817                    || !(s->prefix & PREFIX_VEX)
3818                    || s->vex_l != 0) {
3819                    goto illegal_op;
3820                }
3821                ot = mo_64_32(s->dflag);
3822                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3823                tcg_gen_ext8u_tl(cpu_T1, cpu_regs[s->vex_v]);
3824                {
3825                    TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3826                    /* Note that since we're using BMILG (in order to get O
3827                       cleared) we need to store the inverse into C.  */
3828                    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3829                                       cpu_T1, bound);
3830                    tcg_gen_movcond_tl(TCG_COND_GT, cpu_T1, cpu_T1,
3831                                       bound, bound, cpu_T1);
3832                    tcg_temp_free(bound);
3833                }
3834                tcg_gen_movi_tl(cpu_A0, -1);
3835                tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T1);
3836                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_A0);
3837                gen_op_mov_reg_v(ot, reg, cpu_T0);
3838                gen_op_update1_cc();
3839                set_cc_op(s, CC_OP_BMILGB + ot);
3840                break;
3841
3842            case 0x3f6: /* mulx By, Gy, rdx, Ey */
3843                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3844                    || !(s->prefix & PREFIX_VEX)
3845                    || s->vex_l != 0) {
3846                    goto illegal_op;
3847                }
3848                ot = mo_64_32(s->dflag);
3849                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3850                switch (ot) {
3851                default:
3852                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3853                    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
3854                    tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
3855                                      cpu_tmp2_i32, cpu_tmp3_i32);
3856                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
3857                    tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
3858                    break;
3859#ifdef TARGET_X86_64
3860                case MO_64:
3861                    tcg_gen_mulu2_i64(cpu_T0, cpu_T1,
3862                                      cpu_T0, cpu_regs[R_EDX]);
3863                    tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T0);
3864                    tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
3865                    break;
3866#endif
3867                }
3868                break;
3869
3870            case 0x3f5: /* pdep Gy, By, Ey */
3871                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3872                    || !(s->prefix & PREFIX_VEX)
3873                    || s->vex_l != 0) {
3874                    goto illegal_op;
3875                }
3876                ot = mo_64_32(s->dflag);
3877                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3878                /* Note that by zero-extending the mask operand, we
3879                   automatically handle zero-extending the result.  */
3880                if (ot == MO_64) {
3881                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
3882                } else {
3883                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
3884                }
3885                gen_helper_pdep(cpu_regs[reg], cpu_T0, cpu_T1);
3886                break;
3887
3888            case 0x2f5: /* pext Gy, By, Ey */
3889                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3890                    || !(s->prefix & PREFIX_VEX)
3891                    || s->vex_l != 0) {
3892                    goto illegal_op;
3893                }
3894                ot = mo_64_32(s->dflag);
3895                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3896                /* Note that by zero-extending the mask operand, we
3897                   automatically handle zero-extending the result.  */
3898                if (ot == MO_64) {
3899                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
3900                } else {
3901                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
3902                }
3903                gen_helper_pext(cpu_regs[reg], cpu_T0, cpu_T1);
3904                break;
3905
3906            case 0x1f6: /* adcx Gy, Ey */
3907            case 0x2f6: /* adox Gy, Ey */
3908                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3909                    goto illegal_op;
3910                } else {
3911                    TCGv carry_in, carry_out, zero;
3912                    int end_op;
3913
3914                    ot = mo_64_32(s->dflag);
3915                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3916
3917                    /* Re-use the carry-out from a previous round.  */
3918                    TCGV_UNUSED(carry_in);
3919                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3920                    switch (s->cc_op) {
3921                    case CC_OP_ADCX:
3922                        if (b == 0x1f6) {
3923                            carry_in = cpu_cc_dst;
3924                            end_op = CC_OP_ADCX;
3925                        } else {
3926                            end_op = CC_OP_ADCOX;
3927                        }
3928                        break;
3929                    case CC_OP_ADOX:
3930                        if (b == 0x1f6) {
3931                            end_op = CC_OP_ADCOX;
3932                        } else {
3933                            carry_in = cpu_cc_src2;
3934                            end_op = CC_OP_ADOX;
3935                        }
3936                        break;
3937                    case CC_OP_ADCOX:
3938                        end_op = CC_OP_ADCOX;
3939                        carry_in = carry_out;
3940                        break;
3941                    default:
3942                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
3943                        break;
3944                    }
3945                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
3946                    if (TCGV_IS_UNUSED(carry_in)) {
3947                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
3948                            gen_compute_eflags(s);
3949                        }
3950                        carry_in = cpu_tmp0;
3951                        tcg_gen_extract_tl(carry_in, cpu_cc_src,
3952                                           ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
3953                    }
3954
3955                    switch (ot) {
3956#ifdef TARGET_X86_64
3957                    case MO_32:
3958                        /* If we know TL is 64-bit, and we want a 32-bit
3959                           result, just do everything in 64-bit arithmetic.  */
3960                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
3961                        tcg_gen_ext32u_i64(cpu_T0, cpu_T0);
3962                        tcg_gen_add_i64(cpu_T0, cpu_T0, cpu_regs[reg]);
3963                        tcg_gen_add_i64(cpu_T0, cpu_T0, carry_in);
3964                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T0);
3965                        tcg_gen_shri_i64(carry_out, cpu_T0, 32);
3966                        break;
3967#endif
3968                    default:
3969                        /* Otherwise compute the carry-out in two steps.  */
3970                        zero = tcg_const_tl(0);
3971                        tcg_gen_add2_tl(cpu_T0, carry_out,
3972                                        cpu_T0, zero,
3973                                        carry_in, zero);
3974                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
3975                                        cpu_regs[reg], carry_out,
3976                                        cpu_T0, zero);
3977                        tcg_temp_free(zero);
3978                        break;
3979                    }
3980                    set_cc_op(s, end_op);
3981                }
3982                break;
3983
3984            case 0x1f7: /* shlx Gy, Ey, By */
3985            case 0x2f7: /* sarx Gy, Ey, By */
3986            case 0x3f7: /* shrx Gy, Ey, By */
3987                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3988                    || !(s->prefix & PREFIX_VEX)
3989                    || s->vex_l != 0) {
3990                    goto illegal_op;
3991                }
3992                ot = mo_64_32(s->dflag);
3993                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3994                if (ot == MO_64) {
3995                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 63);
3996                } else {
3997                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
3998                }
3999                if (b == 0x1f7) {
4000                    tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
4001                } else if (b == 0x2f7) {
4002                    if (ot != MO_64) {
4003                        tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
4004                    }
4005                    tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
4006                } else {
4007                    if (ot != MO_64) {
4008                        tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
4009                    }
4010                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
4011                }
4012                gen_op_mov_reg_v(ot, reg, cpu_T0);
4013                break;
4014
4015            case 0x0f3:
4016            case 0x1f3:
4017            case 0x2f3:
4018            case 0x3f3: /* Group 17 */
4019                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4020                    || !(s->prefix & PREFIX_VEX)
4021                    || s->vex_l != 0) {
4022                    goto illegal_op;
4023                }
4024                ot = mo_64_32(s->dflag);
4025                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4026
4027                switch (reg & 7) {
4028                case 1: /* blsr By,Ey */
4029                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
4030                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
4031                    gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
4032                    gen_op_update2_cc();
4033                    set_cc_op(s, CC_OP_BMILGB + ot);
4034                    break;
4035
4036                case 2: /* blsmsk By,Ey */
4037                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4038                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
4039                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
4040                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4041                    set_cc_op(s, CC_OP_BMILGB + ot);
4042                    break;
4043
4044                case 3: /* blsi By, Ey */
4045                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4046                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
4047                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
4048                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4049                    set_cc_op(s, CC_OP_BMILGB + ot);
4050                    break;
4051
4052                default:
4053                    goto unknown_op;
4054                }
4055                break;
4056
4057            default:
4058                goto unknown_op;
4059            }
4060            break;
4061
4062        case 0x03a:
4063        case 0x13a:
4064            b = modrm;
4065            modrm = cpu_ldub_code(env, s->pc++);
4066            rm = modrm & 7;
4067            reg = ((modrm >> 3) & 7) | rex_r;
4068            mod = (modrm >> 6) & 3;
4069            if (b1 >= 2) {
4070                goto unknown_op;
4071            }
4072
4073            sse_fn_eppi = sse_op_table7[b].op[b1];
4074            if (!sse_fn_eppi) {
4075                goto unknown_op;
4076            }
4077            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4078                goto illegal_op;
4079
4080            if (sse_fn_eppi == SSE_SPECIAL) {
4081                ot = mo_64_32(s->dflag);
4082                rm = (modrm & 7) | REX_B(s);
4083                s->rip_offset = 1;
4084                if (mod != 3)
4085                    gen_lea_modrm(env, s, modrm);
4086                reg = ((modrm >> 3) & 7) | rex_r;
4087                val = cpu_ldub_code(env, s->pc++);
4088                switch (b) {
4089                case 0x14: /* pextrb */
4090                    tcg_gen_ld8u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4091                                            xmm_regs[reg].ZMM_B(val & 15)));
4092                    if (mod == 3) {
4093                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4094                    } else {
4095                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4096                                           s->mem_index, MO_UB);
4097                    }
4098                    break;
4099                case 0x15: /* pextrw */
4100                    tcg_gen_ld16u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4101                                            xmm_regs[reg].ZMM_W(val & 7)));
4102                    if (mod == 3) {
4103                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4104                    } else {
4105                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4106                                           s->mem_index, MO_LEUW);
4107                    }
4108                    break;
4109                case 0x16:
4110                    if (ot == MO_32) { /* pextrd */
4111                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4112                                        offsetof(CPUX86State,
4113                                                xmm_regs[reg].ZMM_L(val & 3)));
4114                        if (mod == 3) {
4115                            tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
4116                        } else {
4117                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
4118                                                s->mem_index, MO_LEUL);
4119                        }
4120                    } else { /* pextrq */
4121#ifdef TARGET_X86_64
4122                        tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
4123                                        offsetof(CPUX86State,
4124                                                xmm_regs[reg].ZMM_Q(val & 1)));
4125                        if (mod == 3) {
4126                            tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
4127                        } else {
4128                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
4129                                                s->mem_index, MO_LEQ);
4130                        }
4131#else
4132                        goto illegal_op;
4133#endif
4134                    }
4135                    break;
4136                case 0x17: /* extractps */
4137                    tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4138                                            xmm_regs[reg].ZMM_L(val & 3)));
4139                    if (mod == 3) {
4140                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4141                    } else {
4142                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4143                                           s->mem_index, MO_LEUL);
4144                    }
4145                    break;
4146                case 0x20: /* pinsrb */
4147                    if (mod == 3) {
4148                        gen_op_mov_v_reg(MO_32, cpu_T0, rm);
4149                    } else {
4150                        tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
4151                                           s->mem_index, MO_UB);
4152                    }
4153                    tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4154                                            xmm_regs[reg].ZMM_B(val & 15)));
4155                    break;
4156                case 0x21: /* insertps */
4157                    if (mod == 3) {
4158                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4159                                        offsetof(CPUX86State,xmm_regs[rm]
4160                                                .ZMM_L((val >> 6) & 3)));
4161                    } else {
4162                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4163                                            s->mem_index, MO_LEUL);
4164                    }
4165                    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4166                                    offsetof(CPUX86State,xmm_regs[reg]
4167                                            .ZMM_L((val >> 4) & 3)));
4168                    if ((val >> 0) & 1)
4169                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4170                                        cpu_env, offsetof(CPUX86State,
4171                                                xmm_regs[reg].ZMM_L(0)));
4172                    if ((val >> 1) & 1)
4173                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4174                                        cpu_env, offsetof(CPUX86State,
4175                                                xmm_regs[reg].ZMM_L(1)));
4176                    if ((val >> 2) & 1)
4177                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4178                                        cpu_env, offsetof(CPUX86State,
4179                                                xmm_regs[reg].ZMM_L(2)));
4180                    if ((val >> 3) & 1)
4181                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4182                                        cpu_env, offsetof(CPUX86State,
4183                                                xmm_regs[reg].ZMM_L(3)));
4184                    break;
4185                case 0x22:
4186                    if (ot == MO_32) { /* pinsrd */
4187                        if (mod == 3) {
4188                            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
4189                        } else {
4190                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4191                                                s->mem_index, MO_LEUL);
4192                        }
4193                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4194                                        offsetof(CPUX86State,
4195                                                xmm_regs[reg].ZMM_L(val & 3)));
4196                    } else { /* pinsrq */
4197#ifdef TARGET_X86_64
4198                        if (mod == 3) {
4199                            gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
4200                        } else {
4201                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
4202                                                s->mem_index, MO_LEQ);
4203                        }
4204                        tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
4205                                        offsetof(CPUX86State,
4206                                                xmm_regs[reg].ZMM_Q(val & 1)));
4207#else
4208                        goto illegal_op;
4209#endif
4210                    }
4211                    break;
4212                }
4213                return;
4214            }
4215
4216            if (b1) {
4217                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4218                if (mod == 3) {
4219                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4220                } else {
4221                    op2_offset = offsetof(CPUX86State,xmm_t0);
4222                    gen_lea_modrm(env, s, modrm);
4223                    gen_ldo_env_A0(s, op2_offset);
4224                }
4225            } else {
4226                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4227                if (mod == 3) {
4228                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4229                } else {
4230                    op2_offset = offsetof(CPUX86State,mmx_t0);
4231                    gen_lea_modrm(env, s, modrm);
4232                    gen_ldq_env_A0(s, op2_offset);
4233                }
4234            }
4235            val = cpu_ldub_code(env, s->pc++);
4236
4237            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4238                set_cc_op(s, CC_OP_EFLAGS);
4239
4240                if (s->dflag == MO_64) {
4241                    /* The helper must use entire 64-bit gp registers */
4242                    val |= 1 << 8;
4243                }
4244            }
4245
4246            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4247            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4248            sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4249            break;
4250
4251        case 0x33a:
4252            /* Various integer extensions at 0f 3a f[0-f].  */
4253            b = modrm | (b1 << 8);
4254            modrm = cpu_ldub_code(env, s->pc++);
4255            reg = ((modrm >> 3) & 7) | rex_r;
4256
4257            switch (b) {
4258            case 0x3f0: /* rorx Gy,Ey, Ib */
4259                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4260                    || !(s->prefix & PREFIX_VEX)
4261                    || s->vex_l != 0) {
4262                    goto illegal_op;
4263                }
4264                ot = mo_64_32(s->dflag);
4265                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4266                b = cpu_ldub_code(env, s->pc++);
4267                if (ot == MO_64) {
4268                    tcg_gen_rotri_tl(cpu_T0, cpu_T0, b & 63);
4269                } else {
4270                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4271                    tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
4272                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
4273                }
4274                gen_op_mov_reg_v(ot, reg, cpu_T0);
4275                break;
4276
4277            default:
4278                goto unknown_op;
4279            }
4280            break;
4281
4282        default:
4283        unknown_op:
4284            gen_unknown_opcode(env, s);
4285            return;
4286        }
4287    } else {
4288        /* generic MMX or SSE operation */
4289        switch(b) {
4290        case 0x70: /* pshufx insn */
4291        case 0xc6: /* pshufx insn */
4292        case 0xc2: /* compare insns */
4293            s->rip_offset = 1;
4294            break;
4295        default:
4296            break;
4297        }
4298        if (is_xmm) {
4299            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4300            if (mod != 3) {
4301                int sz = 4;
4302
4303                gen_lea_modrm(env, s, modrm);
4304                op2_offset = offsetof(CPUX86State,xmm_t0);
4305
4306                switch (b) {
4307                case 0x50 ... 0x5a:
4308                case 0x5c ... 0x5f:
4309                case 0xc2:
4310                    /* Most sse scalar operations.  */
4311                    if (b1 == 2) {
4312                        sz = 2;
4313                    } else if (b1 == 3) {
4314                        sz = 3;
4315                    }
4316                    break;
4317
4318                case 0x2e:  /* ucomis[sd] */
4319                case 0x2f:  /* comis[sd] */
4320                    if (b1 == 0) {
4321                        sz = 2;
4322                    } else {
4323                        sz = 3;
4324                    }
4325                    break;
4326                }
4327
4328                switch (sz) {
4329                case 2:
4330                    /* 32 bit access */
4331                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
4332                    tcg_gen_st32_tl(cpu_T0, cpu_env,
4333                                    offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4334                    break;
4335                case 3:
4336                    /* 64 bit access */
4337                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4338                    break;
4339                default:
4340                    /* 128 bit access */
4341                    gen_ldo_env_A0(s, op2_offset);
4342                    break;
4343                }
4344            } else {
4345                rm = (modrm & 7) | REX_B(s);
4346                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4347            }
4348        } else {
4349            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4350            if (mod != 3) {
4351                gen_lea_modrm(env, s, modrm);
4352                op2_offset = offsetof(CPUX86State,mmx_t0);
4353                gen_ldq_env_A0(s, op2_offset);
4354            } else {
4355                rm = (modrm & 7);
4356                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4357            }
4358        }
4359        switch(b) {
4360        case 0x0f: /* 3DNow! data insns */
4361            val = cpu_ldub_code(env, s->pc++);
4362            sse_fn_epp = sse_op_table5[val];
4363            if (!sse_fn_epp) {
4364                goto unknown_op;
4365            }
4366            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4367                goto illegal_op;
4368            }
4369            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4370            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4371            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4372            break;
4373        case 0x70: /* pshufx insn */
4374        case 0xc6: /* pshufx insn */
4375            val = cpu_ldub_code(env, s->pc++);
4376            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4377            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4378            /* XXX: introduce a new table? */
4379            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4380            sse_fn_ppi(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4381            break;
4382        case 0xc2:
4383            /* compare insns */
4384            val = cpu_ldub_code(env, s->pc++);
4385            if (val >= 8)
4386                goto unknown_op;
4387            sse_fn_epp = sse_op_table4[val][b1];
4388
4389            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4390            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4391            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4392            break;
4393        case 0xf7:
4394            /* maskmov : we must prepare A0 */
4395            if (mod != 3)
4396                goto illegal_op;
4397            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]);
4398            gen_extu(s->aflag, cpu_A0);
4399            gen_add_A0_ds_seg(s);
4400
4401            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4402            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4403            /* XXX: introduce a new table? */
4404            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4405            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
4406            break;
4407        default:
4408            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4409            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4410            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4411            break;
4412        }
4413        if (b == 0x2e || b == 0x2f) {
4414            set_cc_op(s, CC_OP_EFLAGS);
4415        }
4416    }
4417}
4418
4419/* convert one instruction. s->is_jmp is set if the translation must
4420   be stopped. Return the next pc value */
4421static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
4422                               target_ulong pc_start)
4423{
4424    int b, prefixes;
4425    int shift;
4426    TCGMemOp ot, aflag, dflag;
4427    int modrm, reg, rm, mod, op, opreg, val;
4428    target_ulong next_eip, tval;
4429    int rex_w, rex_r;
4430
4431    s->pc_start = s->pc = pc_start;
4432    prefixes = 0;
4433    s->override = -1;
4434    rex_w = -1;
4435    rex_r = 0;
4436#ifdef TARGET_X86_64
4437    s->rex_x = 0;
4438    s->rex_b = 0;
4439    x86_64_hregs = 0;
4440#endif
4441    s->rip_offset = 0; /* for relative ip address */
4442    s->vex_l = 0;
4443    s->vex_v = 0;
4444 next_byte:
4445    /* x86 has an upper limit of 15 bytes for an instruction. Since we
4446     * do not want to decode and generate IR for an illegal
4447     * instruction, the following check limits the instruction size to
4448     * 25 bytes: 14 prefix + 1 opc + 6 (modrm+sib+ofs) + 4 imm */
4449    if (s->pc - pc_start > 14) {
4450        goto illegal_op;
4451    }
4452    b = cpu_ldub_code(env, s->pc);
4453    s->pc++;
4454    /* Collect prefixes.  */
4455    switch (b) {
4456    case 0xf3:
4457        prefixes |= PREFIX_REPZ;
4458        goto next_byte;
4459    case 0xf2:
4460        prefixes |= PREFIX_REPNZ;
4461        goto next_byte;
4462    case 0xf0:
4463        prefixes |= PREFIX_LOCK;
4464        goto next_byte;
4465    case 0x2e:
4466        s->override = R_CS;
4467        goto next_byte;
4468    case 0x36:
4469        s->override = R_SS;
4470        goto next_byte;
4471    case 0x3e:
4472        s->override = R_DS;
4473        goto next_byte;
4474    case 0x26:
4475        s->override = R_ES;
4476        goto next_byte;
4477    case 0x64:
4478        s->override = R_FS;
4479        goto next_byte;
4480    case 0x65:
4481        s->override = R_GS;
4482        goto next_byte;
4483    case 0x66:
4484        prefixes |= PREFIX_DATA;
4485        goto next_byte;
4486    case 0x67:
4487        prefixes |= PREFIX_ADR;
4488        goto next_byte;
4489#ifdef TARGET_X86_64
4490    case 0x40 ... 0x4f:
4491        if (CODE64(s)) {
4492            /* REX prefix */
4493            rex_w = (b >> 3) & 1;
4494            rex_r = (b & 0x4) << 1;
4495            s->rex_x = (b & 0x2) << 2;
4496            REX_B(s) = (b & 0x1) << 3;
4497            x86_64_hregs = 1; /* select uniform byte register addressing */
4498            goto next_byte;
4499        }
4500        break;
4501#endif
4502    case 0xc5: /* 2-byte VEX */
4503    case 0xc4: /* 3-byte VEX */
4504        /* VEX prefixes cannot be used except in 32-bit mode.
4505           Otherwise the instruction is LES or LDS.  */
4506        if (s->code32 && !s->vm86) {
4507            static const int pp_prefix[4] = {
4508                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4509            };
4510            int vex3, vex2 = cpu_ldub_code(env, s->pc);
4511
4512            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4513                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4514                   otherwise the instruction is LES or LDS.  */
4515                break;
4516            }
4517            s->pc++;
4518
4519            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4520            if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4521                            | PREFIX_LOCK | PREFIX_DATA)) {
4522                goto illegal_op;
4523            }
4524#ifdef TARGET_X86_64
4525            if (x86_64_hregs) {
4526                goto illegal_op;
4527            }
4528#endif
4529            rex_r = (~vex2 >> 4) & 8;
4530            if (b == 0xc5) {
4531                vex3 = vex2;
4532                b = cpu_ldub_code(env, s->pc++);
4533            } else {
4534#ifdef TARGET_X86_64
4535                s->rex_x = (~vex2 >> 3) & 8;
4536                s->rex_b = (~vex2 >> 2) & 8;
4537#endif
4538                vex3 = cpu_ldub_code(env, s->pc++);
4539                rex_w = (vex3 >> 7) & 1;
4540                switch (vex2 & 0x1f) {
4541                case 0x01: /* Implied 0f leading opcode bytes.  */
4542                    b = cpu_ldub_code(env, s->pc++) | 0x100;
4543                    break;
4544                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4545                    b = 0x138;
4546                    break;
4547                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4548                    b = 0x13a;
4549                    break;
4550                default:   /* Reserved for future use.  */
4551                    goto unknown_op;
4552                }
4553            }
4554            s->vex_v = (~vex3 >> 3) & 0xf;
4555            s->vex_l = (vex3 >> 2) & 1;
4556            prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4557        }
4558        break;
4559    }
4560
4561    /* Post-process prefixes.  */
4562    if (CODE64(s)) {
4563        /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4564           data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4565           over 0x66 if both are present.  */
4566        dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4567        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4568        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4569    } else {
4570        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4571        if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4572            dflag = MO_32;
4573        } else {
4574            dflag = MO_16;
4575        }
4576        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4577        if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4578            aflag = MO_32;
4579        }  else {
4580            aflag = MO_16;
4581        }
4582    }
4583
4584    s->prefix = prefixes;
4585    s->aflag = aflag;
4586    s->dflag = dflag;
4587
4588    /* now check op code */
4589 reswitch:
4590    switch(b) {
4591    case 0x0f:
4592        /**************************/
4593        /* extended op code */
4594        b = cpu_ldub_code(env, s->pc++) | 0x100;
4595        goto reswitch;
4596
4597        /**************************/
4598        /* arith & logic */
4599    case 0x00 ... 0x05:
4600    case 0x08 ... 0x0d:
4601    case 0x10 ... 0x15:
4602    case 0x18 ... 0x1d:
4603    case 0x20 ... 0x25:
4604    case 0x28 ... 0x2d:
4605    case 0x30 ... 0x35:
4606    case 0x38 ... 0x3d:
4607        {
4608            int op, f, val;
4609            op = (b >> 3) & 7;
4610            f = (b >> 1) & 3;
4611
4612            ot = mo_b_d(b, dflag);
4613
4614            switch(f) {
4615            case 0: /* OP Ev, Gv */
4616                modrm = cpu_ldub_code(env, s->pc++);
4617                reg = ((modrm >> 3) & 7) | rex_r;
4618                mod = (modrm >> 6) & 3;
4619                rm = (modrm & 7) | REX_B(s);
4620                if (mod != 3) {
4621                    gen_lea_modrm(env, s, modrm);
4622                    opreg = OR_TMP0;
4623                } else if (op == OP_XORL && rm == reg) {
4624                xor_zero:
4625                    /* xor reg, reg optimisation */
4626                    set_cc_op(s, CC_OP_CLR);
4627                    tcg_gen_movi_tl(cpu_T0, 0);
4628                    gen_op_mov_reg_v(ot, reg, cpu_T0);
4629                    break;
4630                } else {
4631                    opreg = rm;
4632                }
4633                gen_op_mov_v_reg(ot, cpu_T1, reg);
4634                gen_op(s, op, ot, opreg);
4635                break;
4636            case 1: /* OP Gv, Ev */
4637                modrm = cpu_ldub_code(env, s->pc++);
4638                mod = (modrm >> 6) & 3;
4639                reg = ((modrm >> 3) & 7) | rex_r;
4640                rm = (modrm & 7) | REX_B(s);
4641                if (mod != 3) {
4642                    gen_lea_modrm(env, s, modrm);
4643                    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4644                } else if (op == OP_XORL && rm == reg) {
4645                    goto xor_zero;
4646                } else {
4647                    gen_op_mov_v_reg(ot, cpu_T1, rm);
4648                }
4649                gen_op(s, op, ot, reg);
4650                break;
4651            case 2: /* OP A, Iv */
4652                val = insn_get(env, s, ot);
4653                tcg_gen_movi_tl(cpu_T1, val);
4654                gen_op(s, op, ot, OR_EAX);
4655                break;
4656            }
4657        }
4658        break;
4659
4660    case 0x82:
4661        if (CODE64(s))
4662            goto illegal_op;
4663    case 0x80: /* GRP1 */
4664    case 0x81:
4665    case 0x83:
4666        {
4667            int val;
4668
4669            ot = mo_b_d(b, dflag);
4670
4671            modrm = cpu_ldub_code(env, s->pc++);
4672            mod = (modrm >> 6) & 3;
4673            rm = (modrm & 7) | REX_B(s);
4674            op = (modrm >> 3) & 7;
4675
4676            if (mod != 3) {
4677                if (b == 0x83)
4678                    s->rip_offset = 1;
4679                else
4680                    s->rip_offset = insn_const_size(ot);
4681                gen_lea_modrm(env, s, modrm);
4682                opreg = OR_TMP0;
4683            } else {
4684                opreg = rm;
4685            }
4686
4687            switch(b) {
4688            default:
4689            case 0x80:
4690            case 0x81:
4691            case 0x82:
4692                val = insn_get(env, s, ot);
4693                break;
4694            case 0x83:
4695                val = (int8_t)insn_get(env, s, MO_8);
4696                break;
4697            }
4698            tcg_gen_movi_tl(cpu_T1, val);
4699            gen_op(s, op, ot, opreg);
4700        }
4701        break;
4702
4703        /**************************/
4704        /* inc, dec, and other misc arith */
4705    case 0x40 ... 0x47: /* inc Gv */
4706        ot = dflag;
4707        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4708        break;
4709    case 0x48 ... 0x4f: /* dec Gv */
4710        ot = dflag;
4711        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4712        break;
4713    case 0xf6: /* GRP3 */
4714    case 0xf7:
4715        ot = mo_b_d(b, dflag);
4716
4717        modrm = cpu_ldub_code(env, s->pc++);
4718        mod = (modrm >> 6) & 3;
4719        rm = (modrm & 7) | REX_B(s);
4720        op = (modrm >> 3) & 7;
4721        if (mod != 3) {
4722            if (op == 0) {
4723                s->rip_offset = insn_const_size(ot);
4724            }
4725            gen_lea_modrm(env, s, modrm);
4726            /* For those below that handle locked memory, don't load here.  */
4727            if (!(s->prefix & PREFIX_LOCK)
4728                || op != 2) {
4729                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
4730            }
4731        } else {
4732            gen_op_mov_v_reg(ot, cpu_T0, rm);
4733        }
4734
4735        switch(op) {
4736        case 0: /* test */
4737            val = insn_get(env, s, ot);
4738            tcg_gen_movi_tl(cpu_T1, val);
4739            gen_op_testl_T0_T1_cc();
4740            set_cc_op(s, CC_OP_LOGICB + ot);
4741            break;
4742        case 2: /* not */
4743            if (s->prefix & PREFIX_LOCK) {
4744                if (mod == 3) {
4745                    goto illegal_op;
4746                }
4747                tcg_gen_movi_tl(cpu_T0, ~0);
4748                tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
4749                                            s->mem_index, ot | MO_LE);
4750            } else {
4751                tcg_gen_not_tl(cpu_T0, cpu_T0);
4752                if (mod != 3) {
4753                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
4754                } else {
4755                    gen_op_mov_reg_v(ot, rm, cpu_T0);
4756                }
4757            }
4758            break;
4759        case 3: /* neg */
4760            if (s->prefix & PREFIX_LOCK) {
4761                TCGLabel *label1;
4762                TCGv a0, t0, t1, t2;
4763
4764                if (mod == 3) {
4765                    goto illegal_op;
4766                }
4767                a0 = tcg_temp_local_new();
4768                t0 = tcg_temp_local_new();
4769                label1 = gen_new_label();
4770
4771                tcg_gen_mov_tl(a0, cpu_A0);
4772                tcg_gen_mov_tl(t0, cpu_T0);
4773
4774                gen_set_label(label1);
4775                t1 = tcg_temp_new();
4776                t2 = tcg_temp_new();
4777                tcg_gen_mov_tl(t2, t0);
4778                tcg_gen_neg_tl(t1, t0);
4779                tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4780                                          s->mem_index, ot | MO_LE);
4781                tcg_temp_free(t1);
4782                tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4783
4784                tcg_temp_free(t2);
4785                tcg_temp_free(a0);
4786                tcg_gen_mov_tl(cpu_T0, t0);
4787                tcg_temp_free(t0);
4788            } else {
4789                tcg_gen_neg_tl(cpu_T0, cpu_T0);
4790                if (mod != 3) {
4791                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
4792                } else {
4793                    gen_op_mov_reg_v(ot, rm, cpu_T0);
4794                }
4795            }
4796            gen_op_update_neg_cc();
4797            set_cc_op(s, CC_OP_SUBB + ot);
4798            break;
4799        case 4: /* mul */
4800            switch(ot) {
4801            case MO_8:
4802                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
4803                tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
4804                tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
4805                /* XXX: use 32 bit mul which could be faster */
4806                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4807                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4808                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4809                tcg_gen_andi_tl(cpu_cc_src, cpu_T0, 0xff00);
4810                set_cc_op(s, CC_OP_MULB);
4811                break;
4812            case MO_16:
4813                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
4814                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4815                tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
4816                /* XXX: use 32 bit mul which could be faster */
4817                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4818                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4819                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4820                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
4821                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4822                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4823                set_cc_op(s, CC_OP_MULW);
4824                break;
4825            default:
4826            case MO_32:
4827                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4828                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4829                tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4830                                  cpu_tmp2_i32, cpu_tmp3_i32);
4831                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4832                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4833                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4834                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4835                set_cc_op(s, CC_OP_MULL);
4836                break;
4837#ifdef TARGET_X86_64
4838            case MO_64:
4839                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4840                                  cpu_T0, cpu_regs[R_EAX]);
4841                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4842                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4843                set_cc_op(s, CC_OP_MULQ);
4844                break;
4845#endif
4846            }
4847            break;
4848        case 5: /* imul */
4849            switch(ot) {
4850            case MO_8:
4851                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
4852                tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
4853                tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
4854                /* XXX: use 32 bit mul which could be faster */
4855                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4856                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4857                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4858                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T0);
4859                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
4860                set_cc_op(s, CC_OP_MULB);
4861                break;
4862            case MO_16:
4863                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
4864                tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
4865                tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
4866                /* XXX: use 32 bit mul which could be faster */
4867                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4868                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4869                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4870                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
4871                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
4872                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
4873                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4874                set_cc_op(s, CC_OP_MULW);
4875                break;
4876            default:
4877            case MO_32:
4878                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4879                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4880                tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4881                                  cpu_tmp2_i32, cpu_tmp3_i32);
4882                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4883                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4884                tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
4885                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4886                tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
4887                tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
4888                set_cc_op(s, CC_OP_MULL);
4889                break;
4890#ifdef TARGET_X86_64
4891            case MO_64:
4892                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4893                                  cpu_T0, cpu_regs[R_EAX]);
4894                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4895                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4896                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4897                set_cc_op(s, CC_OP_MULQ);
4898                break;
4899#endif
4900            }
4901            break;
4902        case 6: /* div */
4903            switch(ot) {
4904            case MO_8:
4905                gen_helper_divb_AL(cpu_env, cpu_T0);
4906                break;
4907            case MO_16:
4908                gen_helper_divw_AX(cpu_env, cpu_T0);
4909                break;
4910            default:
4911            case MO_32:
4912                gen_helper_divl_EAX(cpu_env, cpu_T0);
4913                break;
4914#ifdef TARGET_X86_64
4915            case MO_64:
4916                gen_helper_divq_EAX(cpu_env, cpu_T0);
4917                break;
4918#endif
4919            }
4920            break;
4921        case 7: /* idiv */
4922            switch(ot) {
4923            case MO_8:
4924                gen_helper_idivb_AL(cpu_env, cpu_T0);
4925                break;
4926            case MO_16:
4927                gen_helper_idivw_AX(cpu_env, cpu_T0);
4928                break;
4929            default:
4930            case MO_32:
4931                gen_helper_idivl_EAX(cpu_env, cpu_T0);
4932                break;
4933#ifdef TARGET_X86_64
4934            case MO_64:
4935                gen_helper_idivq_EAX(cpu_env, cpu_T0);
4936                break;
4937#endif
4938            }
4939            break;
4940        default:
4941            goto unknown_op;
4942        }
4943        break;
4944
4945    case 0xfe: /* GRP4 */
4946    case 0xff: /* GRP5 */
4947        ot = mo_b_d(b, dflag);
4948
4949        modrm = cpu_ldub_code(env, s->pc++);
4950        mod = (modrm >> 6) & 3;
4951        rm = (modrm & 7) | REX_B(s);
4952        op = (modrm >> 3) & 7;
4953        if (op >= 2 && b == 0xfe) {
4954            goto unknown_op;
4955        }
4956        if (CODE64(s)) {
4957            if (op == 2 || op == 4) {
4958                /* operand size for jumps is 64 bit */
4959                ot = MO_64;
4960            } else if (op == 3 || op == 5) {
4961                ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
4962            } else if (op == 6) {
4963                /* default push size is 64 bit */
4964                ot = mo_pushpop(s, dflag);
4965            }
4966        }
4967        if (mod != 3) {
4968            gen_lea_modrm(env, s, modrm);
4969            if (op >= 2 && op != 3 && op != 5)
4970                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
4971        } else {
4972            gen_op_mov_v_reg(ot, cpu_T0, rm);
4973        }
4974
4975        switch(op) {
4976        case 0: /* inc Ev */
4977            if (mod != 3)
4978                opreg = OR_TMP0;
4979            else
4980                opreg = rm;
4981            gen_inc(s, ot, opreg, 1);
4982            break;
4983        case 1: /* dec Ev */
4984            if (mod != 3)
4985                opreg = OR_TMP0;
4986            else
4987                opreg = rm;
4988            gen_inc(s, ot, opreg, -1);
4989            break;
4990        case 2: /* call Ev */
4991            /* XXX: optimize if memory (no 'and' is necessary) */
4992            if (dflag == MO_16) {
4993                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4994            }
4995            next_eip = s->pc - s->cs_base;
4996            tcg_gen_movi_tl(cpu_T1, next_eip);
4997            gen_push_v(s, cpu_T1);
4998            gen_op_jmp_v(cpu_T0);
4999            gen_bnd_jmp(s);
5000            gen_jr(s, cpu_T0);
5001            break;
5002        case 3: /* lcall Ev */
5003            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5004            gen_add_A0_im(s, 1 << ot);
5005            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
5006        do_lcall:
5007            if (s->pe && !s->vm86) {
5008                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
5009                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
5010                                           tcg_const_i32(dflag - 1),
5011                                           tcg_const_tl(s->pc - s->cs_base));
5012            } else {
5013                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
5014                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
5015                                      tcg_const_i32(dflag - 1),
5016                                      tcg_const_i32(s->pc - s->cs_base));
5017            }
5018            tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
5019            gen_jr(s, cpu_tmp4);
5020            break;
5021        case 4: /* jmp Ev */
5022            if (dflag == MO_16) {
5023                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
5024            }
5025            gen_op_jmp_v(cpu_T0);
5026            gen_bnd_jmp(s);
5027            gen_jr(s, cpu_T0);
5028            break;
5029        case 5: /* ljmp Ev */
5030            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5031            gen_add_A0_im(s, 1 << ot);
5032            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
5033        do_ljmp:
5034            if (s->pe && !s->vm86) {
5035                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
5036                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
5037                                          tcg_const_tl(s->pc - s->cs_base));
5038            } else {
5039                gen_op_movl_seg_T0_vm(R_CS);
5040                gen_op_jmp_v(cpu_T1);
5041            }
5042            tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
5043            gen_jr(s, cpu_tmp4);
5044            break;
5045        case 6: /* push Ev */
5046            gen_push_v(s, cpu_T0);
5047            break;
5048        default:
5049            goto unknown_op;
5050        }
5051        break;
5052
5053    case 0x84: /* test Ev, Gv */
5054    case 0x85:
5055        ot = mo_b_d(b, dflag);
5056
5057        modrm = cpu_ldub_code(env, s->pc++);
5058        reg = ((modrm >> 3) & 7) | rex_r;
5059
5060        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5061        gen_op_mov_v_reg(ot, cpu_T1, reg);
5062        gen_op_testl_T0_T1_cc();
5063        set_cc_op(s, CC_OP_LOGICB + ot);
5064        break;
5065
5066    case 0xa8: /* test eAX, Iv */
5067    case 0xa9:
5068        ot = mo_b_d(b, dflag);
5069        val = insn_get(env, s, ot);
5070
5071        gen_op_mov_v_reg(ot, cpu_T0, OR_EAX);
5072        tcg_gen_movi_tl(cpu_T1, val);
5073        gen_op_testl_T0_T1_cc();
5074        set_cc_op(s, CC_OP_LOGICB + ot);
5075        break;
5076
5077    case 0x98: /* CWDE/CBW */
5078        switch (dflag) {
5079#ifdef TARGET_X86_64
5080        case MO_64:
5081            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
5082            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
5083            gen_op_mov_reg_v(MO_64, R_EAX, cpu_T0);
5084            break;
5085#endif
5086        case MO_32:
5087            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
5088            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5089            gen_op_mov_reg_v(MO_32, R_EAX, cpu_T0);
5090            break;
5091        case MO_16:
5092            gen_op_mov_v_reg(MO_8, cpu_T0, R_EAX);
5093            tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
5094            gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
5095            break;
5096        default:
5097            tcg_abort();
5098        }
5099        break;
5100    case 0x99: /* CDQ/CWD */
5101        switch (dflag) {
5102#ifdef TARGET_X86_64
5103        case MO_64:
5104            gen_op_mov_v_reg(MO_64, cpu_T0, R_EAX);
5105            tcg_gen_sari_tl(cpu_T0, cpu_T0, 63);
5106            gen_op_mov_reg_v(MO_64, R_EDX, cpu_T0);
5107            break;
5108#endif
5109        case MO_32:
5110            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
5111            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
5112            tcg_gen_sari_tl(cpu_T0, cpu_T0, 31);
5113            gen_op_mov_reg_v(MO_32, R_EDX, cpu_T0);
5114            break;
5115        case MO_16:
5116            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
5117            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5118            tcg_gen_sari_tl(cpu_T0, cpu_T0, 15);
5119            gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
5120            break;
5121        default:
5122            tcg_abort();
5123        }
5124        break;
5125    case 0x1af: /* imul Gv, Ev */
5126    case 0x69: /* imul Gv, Ev, I */
5127    case 0x6b:
5128        ot = dflag;
5129        modrm = cpu_ldub_code(env, s->pc++);
5130        reg = ((modrm >> 3) & 7) | rex_r;
5131        if (b == 0x69)
5132            s->rip_offset = insn_const_size(ot);
5133        else if (b == 0x6b)
5134            s->rip_offset = 1;
5135        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5136        if (b == 0x69) {
5137            val = insn_get(env, s, ot);
5138            tcg_gen_movi_tl(cpu_T1, val);
5139        } else if (b == 0x6b) {
5140            val = (int8_t)insn_get(env, s, MO_8);
5141            tcg_gen_movi_tl(cpu_T1, val);
5142        } else {
5143            gen_op_mov_v_reg(ot, cpu_T1, reg);
5144        }
5145        switch (ot) {
5146#ifdef TARGET_X86_64
5147        case MO_64:
5148            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, cpu_T0, cpu_T1);
5149            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5150            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5151            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
5152            break;
5153#endif
5154        case MO_32:
5155            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
5156            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
5157            tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
5158                              cpu_tmp2_i32, cpu_tmp3_i32);
5159            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
5160            tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
5161            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5162            tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
5163            tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
5164            break;
5165        default:
5166            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5167            tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
5168            /* XXX: use 32 bit mul which could be faster */
5169            tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
5170            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
5171            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
5172            tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
5173            gen_op_mov_reg_v(ot, reg, cpu_T0);
5174            break;
5175        }
5176        set_cc_op(s, CC_OP_MULB + ot);
5177        break;
5178    case 0x1c0:
5179    case 0x1c1: /* xadd Ev, Gv */
5180        ot = mo_b_d(b, dflag);
5181        modrm = cpu_ldub_code(env, s->pc++);
5182        reg = ((modrm >> 3) & 7) | rex_r;
5183        mod = (modrm >> 6) & 3;
5184        gen_op_mov_v_reg(ot, cpu_T0, reg);
5185        if (mod == 3) {
5186            rm = (modrm & 7) | REX_B(s);
5187            gen_op_mov_v_reg(ot, cpu_T1, rm);
5188            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5189            gen_op_mov_reg_v(ot, reg, cpu_T1);
5190            gen_op_mov_reg_v(ot, rm, cpu_T0);
5191        } else {
5192            gen_lea_modrm(env, s, modrm);
5193            if (s->prefix & PREFIX_LOCK) {
5194                tcg_gen_atomic_fetch_add_tl(cpu_T1, cpu_A0, cpu_T0,
5195                                            s->mem_index, ot | MO_LE);
5196                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5197            } else {
5198                gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5199                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5200                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5201            }
5202            gen_op_mov_reg_v(ot, reg, cpu_T1);
5203        }
5204        gen_op_update2_cc();
5205        set_cc_op(s, CC_OP_ADDB + ot);
5206        break;
5207    case 0x1b0:
5208    case 0x1b1: /* cmpxchg Ev, Gv */
5209        {
5210            TCGv oldv, newv, cmpv;
5211
5212            ot = mo_b_d(b, dflag);
5213            modrm = cpu_ldub_code(env, s->pc++);
5214            reg = ((modrm >> 3) & 7) | rex_r;
5215            mod = (modrm >> 6) & 3;
5216            oldv = tcg_temp_new();
5217            newv = tcg_temp_new();
5218            cmpv = tcg_temp_new();
5219            gen_op_mov_v_reg(ot, newv, reg);
5220            tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5221
5222            if (s->prefix & PREFIX_LOCK) {
5223                if (mod == 3) {
5224                    goto illegal_op;
5225                }
5226                gen_lea_modrm(env, s, modrm);
5227                tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv,
5228                                          s->mem_index, ot | MO_LE);
5229                gen_op_mov_reg_v(ot, R_EAX, oldv);
5230            } else {
5231                if (mod == 3) {
5232                    rm = (modrm & 7) | REX_B(s);
5233                    gen_op_mov_v_reg(ot, oldv, rm);
5234                } else {
5235                    gen_lea_modrm(env, s, modrm);
5236                    gen_op_ld_v(s, ot, oldv, cpu_A0);
5237                    rm = 0; /* avoid warning */
5238                }
5239                gen_extu(ot, oldv);
5240                gen_extu(ot, cmpv);
5241                /* store value = (old == cmp ? new : old);  */
5242                tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5243                if (mod == 3) {
5244                    gen_op_mov_reg_v(ot, R_EAX, oldv);
5245                    gen_op_mov_reg_v(ot, rm, newv);
5246                } else {
5247                    /* Perform an unconditional store cycle like physical cpu;
5248                       must be before changing accumulator to ensure
5249                       idempotency if the store faults and the instruction
5250                       is restarted */
5251                    gen_op_st_v(s, ot, newv, cpu_A0);
5252                    gen_op_mov_reg_v(ot, R_EAX, oldv);
5253                }
5254            }
5255            tcg_gen_mov_tl(cpu_cc_src, oldv);
5256            tcg_gen_mov_tl(cpu_cc_srcT, cmpv);
5257            tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5258            set_cc_op(s, CC_OP_SUBB + ot);
5259            tcg_temp_free(oldv);
5260            tcg_temp_free(newv);
5261            tcg_temp_free(cmpv);
5262        }
5263        break;
5264    case 0x1c7: /* cmpxchg8b */
5265        modrm = cpu_ldub_code(env, s->pc++);
5266        mod = (modrm >> 6) & 3;
5267        if ((mod == 3) || ((modrm & 0x38) != 0x8))
5268            goto illegal_op;
5269#ifdef TARGET_X86_64
5270        if (dflag == MO_64) {
5271            if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
5272                goto illegal_op;
5273            gen_lea_modrm(env, s, modrm);
5274            if ((s->prefix & PREFIX_LOCK) && parallel_cpus) {
5275                gen_helper_cmpxchg16b(cpu_env, cpu_A0);
5276            } else {
5277                gen_helper_cmpxchg16b_unlocked(cpu_env, cpu_A0);
5278            }
5279        } else
5280#endif        
5281        {
5282            if (!(s->cpuid_features & CPUID_CX8))
5283                goto illegal_op;
5284            gen_lea_modrm(env, s, modrm);
5285            if ((s->prefix & PREFIX_LOCK) && parallel_cpus) {
5286                gen_helper_cmpxchg8b(cpu_env, cpu_A0);
5287            } else {
5288                gen_helper_cmpxchg8b_unlocked(cpu_env, cpu_A0);
5289            }
5290        }
5291        set_cc_op(s, CC_OP_EFLAGS);
5292        break;
5293
5294        /**************************/
5295        /* push/pop */
5296    case 0x50 ... 0x57: /* push */
5297        gen_op_mov_v_reg(MO_32, cpu_T0, (b & 7) | REX_B(s));
5298        gen_push_v(s, cpu_T0);
5299        break;
5300    case 0x58 ... 0x5f: /* pop */
5301        ot = gen_pop_T0(s);
5302        /* NOTE: order is important for pop %sp */
5303        gen_pop_update(s, ot);
5304        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T0);
5305        break;
5306    case 0x60: /* pusha */
5307        if (CODE64(s))
5308            goto illegal_op;
5309        gen_pusha(s);
5310        break;
5311    case 0x61: /* popa */
5312        if (CODE64(s))
5313            goto illegal_op;
5314        gen_popa(s);
5315        break;
5316    case 0x68: /* push Iv */
5317    case 0x6a:
5318        ot = mo_pushpop(s, dflag);
5319        if (b == 0x68)
5320            val = insn_get(env, s, ot);
5321        else
5322            val = (int8_t)insn_get(env, s, MO_8);
5323        tcg_gen_movi_tl(cpu_T0, val);
5324        gen_push_v(s, cpu_T0);
5325        break;
5326    case 0x8f: /* pop Ev */
5327        modrm = cpu_ldub_code(env, s->pc++);
5328        mod = (modrm >> 6) & 3;
5329        ot = gen_pop_T0(s);
5330        if (mod == 3) {
5331            /* NOTE: order is important for pop %sp */
5332            gen_pop_update(s, ot);
5333            rm = (modrm & 7) | REX_B(s);
5334            gen_op_mov_reg_v(ot, rm, cpu_T0);
5335        } else {
5336            /* NOTE: order is important too for MMU exceptions */
5337            s->popl_esp_hack = 1 << ot;
5338            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5339            s->popl_esp_hack = 0;
5340            gen_pop_update(s, ot);
5341        }
5342        break;
5343    case 0xc8: /* enter */
5344        {
5345            int level;
5346            val = cpu_lduw_code(env, s->pc);
5347            s->pc += 2;
5348            level = cpu_ldub_code(env, s->pc++);
5349            gen_enter(s, val, level);
5350        }
5351        break;
5352    case 0xc9: /* leave */
5353        gen_leave(s);
5354        break;
5355    case 0x06: /* push es */
5356    case 0x0e: /* push cs */
5357    case 0x16: /* push ss */
5358    case 0x1e: /* push ds */
5359        if (CODE64(s))
5360            goto illegal_op;
5361        gen_op_movl_T0_seg(b >> 3);
5362        gen_push_v(s, cpu_T0);
5363        break;
5364    case 0x1a0: /* push fs */
5365    case 0x1a8: /* push gs */
5366        gen_op_movl_T0_seg((b >> 3) & 7);
5367        gen_push_v(s, cpu_T0);
5368        break;
5369    case 0x07: /* pop es */
5370    case 0x17: /* pop ss */
5371    case 0x1f: /* pop ds */
5372        if (CODE64(s))
5373            goto illegal_op;
5374        reg = b >> 3;
5375        ot = gen_pop_T0(s);
5376        gen_movl_seg_T0(s, reg);
5377        gen_pop_update(s, ot);
5378        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5379        if (s->is_jmp) {
5380            gen_jmp_im(s->pc - s->cs_base);
5381            if (reg == R_SS) {
5382                s->tf = 0;
5383                gen_eob_inhibit_irq(s, true);
5384            } else {
5385                gen_eob(s);
5386            }
5387        }
5388        break;
5389    case 0x1a1: /* pop fs */
5390    case 0x1a9: /* pop gs */
5391        ot = gen_pop_T0(s);
5392        gen_movl_seg_T0(s, (b >> 3) & 7);
5393        gen_pop_update(s, ot);
5394        if (s->is_jmp) {
5395            gen_jmp_im(s->pc - s->cs_base);
5396            gen_eob(s);
5397        }
5398        break;
5399
5400        /**************************/
5401        /* mov */
5402    case 0x88:
5403    case 0x89: /* mov Gv, Ev */
5404        ot = mo_b_d(b, dflag);
5405        modrm = cpu_ldub_code(env, s->pc++);
5406        reg = ((modrm >> 3) & 7) | rex_r;
5407
5408        /* generate a generic store */
5409        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5410        break;
5411    case 0xc6:
5412    case 0xc7: /* mov Ev, Iv */
5413        ot = mo_b_d(b, dflag);
5414        modrm = cpu_ldub_code(env, s->pc++);
5415        mod = (modrm >> 6) & 3;
5416        if (mod != 3) {
5417            s->rip_offset = insn_const_size(ot);
5418            gen_lea_modrm(env, s, modrm);
5419        }
5420        val = insn_get(env, s, ot);
5421        tcg_gen_movi_tl(cpu_T0, val);
5422        if (mod != 3) {
5423            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5424        } else {
5425            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
5426        }
5427        break;
5428    case 0x8a:
5429    case 0x8b: /* mov Ev, Gv */
5430        ot = mo_b_d(b, dflag);
5431        modrm = cpu_ldub_code(env, s->pc++);
5432        reg = ((modrm >> 3) & 7) | rex_r;
5433
5434        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5435        gen_op_mov_reg_v(ot, reg, cpu_T0);
5436        break;
5437    case 0x8e: /* mov seg, Gv */
5438        modrm = cpu_ldub_code(env, s->pc++);
5439        reg = (modrm >> 3) & 7;
5440        if (reg >= 6 || reg == R_CS)
5441            goto illegal_op;
5442        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5443        gen_movl_seg_T0(s, reg);
5444        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5445        if (s->is_jmp) {
5446            gen_jmp_im(s->pc - s->cs_base);
5447            if (reg == R_SS) {
5448                s->tf = 0;
5449                gen_eob_inhibit_irq(s, true);
5450            } else {
5451                gen_eob(s);
5452            }
5453        }
5454        break;
5455    case 0x8c: /* mov Gv, seg */
5456        modrm = cpu_ldub_code(env, s->pc++);
5457        reg = (modrm >> 3) & 7;
5458        mod = (modrm >> 6) & 3;
5459        if (reg >= 6)
5460            goto illegal_op;
5461        gen_op_movl_T0_seg(reg);
5462        ot = mod == 3 ? dflag : MO_16;
5463        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5464        break;
5465
5466    case 0x1b6: /* movzbS Gv, Eb */
5467    case 0x1b7: /* movzwS Gv, Eb */
5468    case 0x1be: /* movsbS Gv, Eb */
5469    case 0x1bf: /* movswS Gv, Eb */
5470        {
5471            TCGMemOp d_ot;
5472            TCGMemOp s_ot;
5473
5474            /* d_ot is the size of destination */
5475            d_ot = dflag;
5476            /* ot is the size of source */
5477            ot = (b & 1) + MO_8;
5478            /* s_ot is the sign+size of source */
5479            s_ot = b & 8 ? MO_SIGN | ot : ot;
5480
5481            modrm = cpu_ldub_code(env, s->pc++);
5482            reg = ((modrm >> 3) & 7) | rex_r;
5483            mod = (modrm >> 6) & 3;
5484            rm = (modrm & 7) | REX_B(s);
5485
5486            if (mod == 3) {
5487                if (s_ot == MO_SB && byte_reg_is_xH(rm)) {
5488                    tcg_gen_sextract_tl(cpu_T0, cpu_regs[rm - 4], 8, 8);
5489                } else {
5490                    gen_op_mov_v_reg(ot, cpu_T0, rm);
5491                    switch (s_ot) {
5492                    case MO_UB:
5493                        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
5494                        break;
5495                    case MO_SB:
5496                        tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
5497                        break;
5498                    case MO_UW:
5499                        tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
5500                        break;
5501                    default:
5502                    case MO_SW:
5503                        tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5504                        break;
5505                    }
5506                }
5507                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
5508            } else {
5509                gen_lea_modrm(env, s, modrm);
5510                gen_op_ld_v(s, s_ot, cpu_T0, cpu_A0);
5511                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
5512            }
5513        }
5514        break;
5515
5516    case 0x8d: /* lea */
5517        modrm = cpu_ldub_code(env, s->pc++);
5518        mod = (modrm >> 6) & 3;
5519        if (mod == 3)
5520            goto illegal_op;
5521        reg = ((modrm >> 3) & 7) | rex_r;
5522        {
5523            AddressParts a = gen_lea_modrm_0(env, s, modrm);
5524            TCGv ea = gen_lea_modrm_1(a);
5525            gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5526            gen_op_mov_reg_v(dflag, reg, cpu_A0);
5527        }
5528        break;
5529
5530    case 0xa0: /* mov EAX, Ov */
5531    case 0xa1:
5532    case 0xa2: /* mov Ov, EAX */
5533    case 0xa3:
5534        {
5535            target_ulong offset_addr;
5536
5537            ot = mo_b_d(b, dflag);
5538            switch (s->aflag) {
5539#ifdef TARGET_X86_64
5540            case MO_64:
5541                offset_addr = cpu_ldq_code(env, s->pc);
5542                s->pc += 8;
5543                break;
5544#endif
5545            default:
5546                offset_addr = insn_get(env, s, s->aflag);
5547                break;
5548            }
5549            tcg_gen_movi_tl(cpu_A0, offset_addr);
5550            gen_add_A0_ds_seg(s);
5551            if ((b & 2) == 0) {
5552                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
5553                gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
5554            } else {
5555                gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
5556                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5557            }
5558        }
5559        break;
5560    case 0xd7: /* xlat */
5561        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]);
5562        tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
5563        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T0);
5564        gen_extu(s->aflag, cpu_A0);
5565        gen_add_A0_ds_seg(s);
5566        gen_op_ld_v(s, MO_8, cpu_T0, cpu_A0);
5567        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
5568        break;
5569    case 0xb0 ... 0xb7: /* mov R, Ib */
5570        val = insn_get(env, s, MO_8);
5571        tcg_gen_movi_tl(cpu_T0, val);
5572        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T0);
5573        break;
5574    case 0xb8 ... 0xbf: /* mov R, Iv */
5575#ifdef TARGET_X86_64
5576        if (dflag == MO_64) {
5577            uint64_t tmp;
5578            /* 64 bit case */
5579            tmp = cpu_ldq_code(env, s->pc);
5580            s->pc += 8;
5581            reg = (b & 7) | REX_B(s);
5582            tcg_gen_movi_tl(cpu_T0, tmp);
5583            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
5584        } else
5585#endif
5586        {
5587            ot = dflag;
5588            val = insn_get(env, s, ot);
5589            reg = (b & 7) | REX_B(s);
5590            tcg_gen_movi_tl(cpu_T0, val);
5591            gen_op_mov_reg_v(ot, reg, cpu_T0);
5592        }
5593        break;
5594
5595    case 0x91 ... 0x97: /* xchg R, EAX */
5596    do_xchg_reg_eax:
5597        ot = dflag;
5598        reg = (b & 7) | REX_B(s);
5599        rm = R_EAX;
5600        goto do_xchg_reg;
5601    case 0x86:
5602    case 0x87: /* xchg Ev, Gv */
5603        ot = mo_b_d(b, dflag);
5604        modrm = cpu_ldub_code(env, s->pc++);
5605        reg = ((modrm >> 3) & 7) | rex_r;
5606        mod = (modrm >> 6) & 3;
5607        if (mod == 3) {
5608            rm = (modrm & 7) | REX_B(s);
5609        do_xchg_reg:
5610            gen_op_mov_v_reg(ot, cpu_T0, reg);
5611            gen_op_mov_v_reg(ot, cpu_T1, rm);
5612            gen_op_mov_reg_v(ot, rm, cpu_T0);
5613            gen_op_mov_reg_v(ot, reg, cpu_T1);
5614        } else {
5615            gen_lea_modrm(env, s, modrm);
5616            gen_op_mov_v_reg(ot, cpu_T0, reg);
5617            /* for xchg, lock is implicit */
5618            tcg_gen_atomic_xchg_tl(cpu_T1, cpu_A0, cpu_T0,
5619                                   s->mem_index, ot | MO_LE);
5620            gen_op_mov_reg_v(ot, reg, cpu_T1);
5621        }
5622        break;
5623    case 0xc4: /* les Gv */
5624        /* In CODE64 this is VEX3; see above.  */
5625        op = R_ES;
5626        goto do_lxx;
5627    case 0xc5: /* lds Gv */
5628        /* In CODE64 this is VEX2; see above.  */
5629        op = R_DS;
5630        goto do_lxx;
5631    case 0x1b2: /* lss Gv */
5632        op = R_SS;
5633        goto do_lxx;
5634    case 0x1b4: /* lfs Gv */
5635        op = R_FS;
5636        goto do_lxx;
5637    case 0x1b5: /* lgs Gv */
5638        op = R_GS;
5639    do_lxx:
5640        ot = dflag != MO_16 ? MO_32 : MO_16;
5641        modrm = cpu_ldub_code(env, s->pc++);
5642        reg = ((modrm >> 3) & 7) | rex_r;
5643        mod = (modrm >> 6) & 3;
5644        if (mod == 3)
5645            goto illegal_op;
5646        gen_lea_modrm(env, s, modrm);
5647        gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5648        gen_add_A0_im(s, 1 << ot);
5649        /* load the segment first to handle exceptions properly */
5650        gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
5651        gen_movl_seg_T0(s, op);
5652        /* then put the data */
5653        gen_op_mov_reg_v(ot, reg, cpu_T1);
5654        if (s->is_jmp) {
5655            gen_jmp_im(s->pc - s->cs_base);
5656            gen_eob(s);
5657        }
5658        break;
5659
5660        /************************/
5661        /* shifts */
5662    case 0xc0:
5663    case 0xc1:
5664        /* shift Ev,Ib */
5665        shift = 2;
5666    grp2:
5667        {
5668            ot = mo_b_d(b, dflag);
5669            modrm = cpu_ldub_code(env, s->pc++);
5670            mod = (modrm >> 6) & 3;
5671            op = (modrm >> 3) & 7;
5672
5673            if (mod != 3) {
5674                if (shift == 2) {
5675                    s->rip_offset = 1;
5676                }
5677                gen_lea_modrm(env, s, modrm);
5678                opreg = OR_TMP0;
5679            } else {
5680                opreg = (modrm & 7) | REX_B(s);
5681            }
5682
5683            /* simpler op */
5684            if (shift == 0) {
5685                gen_shift(s, op, ot, opreg, OR_ECX);
5686            } else {
5687                if (shift == 2) {
5688                    shift = cpu_ldub_code(env, s->pc++);
5689                }
5690                gen_shifti(s, op, ot, opreg, shift);
5691            }
5692        }
5693        break;
5694    case 0xd0:
5695    case 0xd1:
5696        /* shift Ev,1 */
5697        shift = 1;
5698        goto grp2;
5699    case 0xd2:
5700    case 0xd3:
5701        /* shift Ev,cl */
5702        shift = 0;
5703        goto grp2;
5704
5705    case 0x1a4: /* shld imm */
5706        op = 0;
5707        shift = 1;
5708        goto do_shiftd;
5709    case 0x1a5: /* shld cl */
5710        op = 0;
5711        shift = 0;
5712        goto do_shiftd;
5713    case 0x1ac: /* shrd imm */
5714        op = 1;
5715        shift = 1;
5716        goto do_shiftd;
5717    case 0x1ad: /* shrd cl */
5718        op = 1;
5719        shift = 0;
5720    do_shiftd:
5721        ot = dflag;
5722        modrm = cpu_ldub_code(env, s->pc++);
5723        mod = (modrm >> 6) & 3;
5724        rm = (modrm & 7) | REX_B(s);
5725        reg = ((modrm >> 3) & 7) | rex_r;
5726        if (mod != 3) {
5727            gen_lea_modrm(env, s, modrm);
5728            opreg = OR_TMP0;
5729        } else {
5730            opreg = rm;
5731        }
5732        gen_op_mov_v_reg(ot, cpu_T1, reg);
5733
5734        if (shift) {
5735            TCGv imm = tcg_const_tl(cpu_ldub_code(env, s->pc++));
5736            gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5737            tcg_temp_free(imm);
5738        } else {
5739            gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5740        }
5741        break;
5742
5743        /************************/
5744        /* floats */
5745    case 0xd8 ... 0xdf:
5746        if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5747            /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5748            /* XXX: what to do if illegal op ? */
5749            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5750            break;
5751        }
5752        modrm = cpu_ldub_code(env, s->pc++);
5753        mod = (modrm >> 6) & 3;
5754        rm = modrm & 7;
5755        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5756        if (mod != 3) {
5757            /* memory op */
5758            gen_lea_modrm(env, s, modrm);
5759            switch(op) {
5760            case 0x00 ... 0x07: /* fxxxs */
5761            case 0x10 ... 0x17: /* fixxxl */
5762            case 0x20 ... 0x27: /* fxxxl */
5763            case 0x30 ... 0x37: /* fixxx */
5764                {
5765                    int op1;
5766                    op1 = op & 7;
5767
5768                    switch(op >> 4) {
5769                    case 0:
5770                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5771                                            s->mem_index, MO_LEUL);
5772                        gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
5773                        break;
5774                    case 1:
5775                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5776                                            s->mem_index, MO_LEUL);
5777                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5778                        break;
5779                    case 2:
5780                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5781                                            s->mem_index, MO_LEQ);
5782                        gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
5783                        break;
5784                    case 3:
5785                    default:
5786                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5787                                            s->mem_index, MO_LESW);
5788                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5789                        break;
5790                    }
5791
5792                    gen_helper_fp_arith_ST0_FT0(op1);
5793                    if (op1 == 3) {
5794                        /* fcomp needs pop */
5795                        gen_helper_fpop(cpu_env);
5796                    }
5797                }
5798                break;
5799            case 0x08: /* flds */
5800            case 0x0a: /* fsts */
5801            case 0x0b: /* fstps */
5802            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5803            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5804            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5805                switch(op & 7) {
5806                case 0:
5807                    switch(op >> 4) {
5808                    case 0:
5809                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5810                                            s->mem_index, MO_LEUL);
5811                        gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
5812                        break;
5813                    case 1:
5814                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5815                                            s->mem_index, MO_LEUL);
5816                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5817                        break;
5818                    case 2:
5819                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5820                                            s->mem_index, MO_LEQ);
5821                        gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
5822                        break;
5823                    case 3:
5824                    default:
5825                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5826                                            s->mem_index, MO_LESW);
5827                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5828                        break;
5829                    }
5830                    break;
5831                case 1:
5832                    /* XXX: the corresponding CPUID bit must be tested ! */
5833                    switch(op >> 4) {
5834                    case 1:
5835                        gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
5836                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5837                                            s->mem_index, MO_LEUL);
5838                        break;
5839                    case 2:
5840                        gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
5841                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5842                                            s->mem_index, MO_LEQ);
5843                        break;
5844                    case 3:
5845                    default:
5846                        gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
5847                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5848                                            s->mem_index, MO_LEUW);
5849                        break;
5850                    }
5851                    gen_helper_fpop(cpu_env);
5852                    break;
5853                default:
5854                    switch(op >> 4) {
5855                    case 0:
5856                        gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
5857                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5858                                            s->mem_index, MO_LEUL);
5859                        break;
5860                    case 1:
5861                        gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
5862                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5863                                            s->mem_index, MO_LEUL);
5864                        break;
5865                    case 2:
5866                        gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
5867                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5868                                            s->mem_index, MO_LEQ);
5869                        break;
5870                    case 3:
5871                    default:
5872                        gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
5873                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5874                                            s->mem_index, MO_LEUW);
5875                        break;
5876                    }
5877                    if ((op & 7) == 3)
5878                        gen_helper_fpop(cpu_env);
5879                    break;
5880                }
5881                break;
5882            case 0x0c: /* fldenv mem */
5883                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5884                break;
5885            case 0x0d: /* fldcw mem */
5886                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5887                                    s->mem_index, MO_LEUW);
5888                gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
5889                break;
5890            case 0x0e: /* fnstenv mem */
5891                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5892                break;
5893            case 0x0f: /* fnstcw mem */
5894                gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
5895                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5896                                    s->mem_index, MO_LEUW);
5897                break;
5898            case 0x1d: /* fldt mem */
5899                gen_helper_fldt_ST0(cpu_env, cpu_A0);
5900                break;
5901            case 0x1f: /* fstpt mem */
5902                gen_helper_fstt_ST0(cpu_env, cpu_A0);
5903                gen_helper_fpop(cpu_env);
5904                break;
5905            case 0x2c: /* frstor mem */
5906                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5907                break;
5908            case 0x2e: /* fnsave mem */
5909                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5910                break;
5911            case 0x2f: /* fnstsw mem */
5912                gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
5913                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5914                                    s->mem_index, MO_LEUW);
5915                break;
5916            case 0x3c: /* fbld */
5917                gen_helper_fbld_ST0(cpu_env, cpu_A0);
5918                break;
5919            case 0x3e: /* fbstp */
5920                gen_helper_fbst_ST0(cpu_env, cpu_A0);
5921                gen_helper_fpop(cpu_env);
5922                break;
5923            case 0x3d: /* fildll */
5924                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5925                gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
5926                break;
5927            case 0x3f: /* fistpll */
5928                gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
5929                tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5930                gen_helper_fpop(cpu_env);
5931                break;
5932            default:
5933                goto unknown_op;
5934            }
5935        } else {
5936            /* register float ops */
5937            opreg = rm;
5938
5939            switch(op) {
5940            case 0x08: /* fld sti */
5941                gen_helper_fpush(cpu_env);
5942                gen_helper_fmov_ST0_STN(cpu_env,
5943                                        tcg_const_i32((opreg + 1) & 7));
5944                break;
5945            case 0x09: /* fxchg sti */
5946            case 0x29: /* fxchg4 sti, undocumented op */
5947            case 0x39: /* fxchg7 sti, undocumented op */
5948                gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
5949                break;
5950            case 0x0a: /* grp d9/2 */
5951                switch(rm) {
5952                case 0: /* fnop */
5953                    /* check exceptions (FreeBSD FPU probe) */
5954                    gen_helper_fwait(cpu_env);
5955                    break;
5956                default:
5957                    goto unknown_op;
5958                }
5959                break;
5960            case 0x0c: /* grp d9/4 */
5961                switch(rm) {
5962                case 0: /* fchs */
5963                    gen_helper_fchs_ST0(cpu_env);
5964                    break;
5965                case 1: /* fabs */
5966                    gen_helper_fabs_ST0(cpu_env);
5967                    break;
5968                case 4: /* ftst */
5969                    gen_helper_fldz_FT0(cpu_env);
5970                    gen_helper_fcom_ST0_FT0(cpu_env);
5971                    break;
5972                case 5: /* fxam */
5973                    gen_helper_fxam_ST0(cpu_env);
5974                    break;
5975                default:
5976                    goto unknown_op;
5977                }
5978                break;
5979            case 0x0d: /* grp d9/5 */
5980                {
5981                    switch(rm) {
5982                    case 0:
5983                        gen_helper_fpush(cpu_env);
5984                        gen_helper_fld1_ST0(cpu_env);
5985                        break;
5986                    case 1:
5987                        gen_helper_fpush(cpu_env);
5988                        gen_helper_fldl2t_ST0(cpu_env);
5989                        break;
5990                    case 2:
5991                        gen_helper_fpush(cpu_env);
5992                        gen_helper_fldl2e_ST0(cpu_env);
5993                        break;
5994                    case 3:
5995                        gen_helper_fpush(cpu_env);
5996                        gen_helper_fldpi_ST0(cpu_env);
5997                        break;
5998                    case 4:
5999                        gen_helper_fpush(cpu_env);
6000                        gen_helper_fldlg2_ST0(cpu_env);
6001                        break;
6002                    case 5:
6003                        gen_helper_fpush(cpu_env);
6004                        gen_helper_fldln2_ST0(cpu_env);
6005                        break;
6006                    case 6:
6007                        gen_helper_fpush(cpu_env);
6008                        gen_helper_fldz_ST0(cpu_env);
6009                        break;
6010                    default:
6011                        goto unknown_op;
6012                    }
6013                }
6014                break;
6015            case 0x0e: /* grp d9/6 */
6016                switch(rm) {
6017                case 0: /* f2xm1 */
6018                    gen_helper_f2xm1(cpu_env);
6019                    break;
6020                case 1: /* fyl2x */
6021                    gen_helper_fyl2x(cpu_env);
6022                    break;
6023                case 2: /* fptan */
6024                    gen_helper_fptan(cpu_env);
6025                    break;
6026                case 3: /* fpatan */
6027                    gen_helper_fpatan(cpu_env);
6028                    break;
6029                case 4: /* fxtract */
6030                    gen_helper_fxtract(cpu_env);
6031                    break;
6032                case 5: /* fprem1 */
6033                    gen_helper_fprem1(cpu_env);
6034                    break;
6035                case 6: /* fdecstp */
6036                    gen_helper_fdecstp(cpu_env);
6037                    break;
6038                default:
6039                case 7: /* fincstp */
6040                    gen_helper_fincstp(cpu_env);
6041                    break;
6042                }
6043                break;
6044            case 0x0f: /* grp d9/7 */
6045                switch(rm) {
6046                case 0: /* fprem */
6047                    gen_helper_fprem(cpu_env);
6048                    break;
6049                case 1: /* fyl2xp1 */
6050                    gen_helper_fyl2xp1(cpu_env);
6051                    break;
6052                case 2: /* fsqrt */
6053                    gen_helper_fsqrt(cpu_env);
6054                    break;
6055                case 3: /* fsincos */
6056                    gen_helper_fsincos(cpu_env);
6057                    break;
6058                case 5: /* fscale */
6059                    gen_helper_fscale(cpu_env);
6060                    break;
6061                case 4: /* frndint */
6062                    gen_helper_frndint(cpu_env);
6063                    break;
6064                case 6: /* fsin */
6065                    gen_helper_fsin(cpu_env);
6066                    break;
6067                default:
6068                case 7: /* fcos */
6069                    gen_helper_fcos(cpu_env);
6070                    break;
6071                }
6072                break;
6073            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6074            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6075            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6076                {
6077                    int op1;
6078
6079                    op1 = op & 7;
6080                    if (op >= 0x20) {
6081                        gen_helper_fp_arith_STN_ST0(op1, opreg);
6082                        if (op >= 0x30)
6083                            gen_helper_fpop(cpu_env);
6084                    } else {
6085                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6086                        gen_helper_fp_arith_ST0_FT0(op1);
6087                    }
6088                }
6089                break;
6090            case 0x02: /* fcom */
6091            case 0x22: /* fcom2, undocumented op */
6092                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6093                gen_helper_fcom_ST0_FT0(cpu_env);
6094                break;
6095            case 0x03: /* fcomp */
6096            case 0x23: /* fcomp3, undocumented op */
6097            case 0x32: /* fcomp5, undocumented op */
6098                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6099                gen_helper_fcom_ST0_FT0(cpu_env);
6100                gen_helper_fpop(cpu_env);
6101                break;
6102            case 0x15: /* da/5 */
6103                switch(rm) {
6104                case 1: /* fucompp */
6105                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6106                    gen_helper_fucom_ST0_FT0(cpu_env);
6107                    gen_helper_fpop(cpu_env);
6108                    gen_helper_fpop(cpu_env);
6109                    break;
6110                default:
6111                    goto unknown_op;
6112                }
6113                break;
6114            case 0x1c:
6115                switch(rm) {
6116                case 0: /* feni (287 only, just do nop here) */
6117                    break;
6118                case 1: /* fdisi (287 only, just do nop here) */
6119                    break;
6120                case 2: /* fclex */
6121                    gen_helper_fclex(cpu_env);
6122                    break;
6123                case 3: /* fninit */
6124                    gen_helper_fninit(cpu_env);
6125                    break;
6126                case 4: /* fsetpm (287 only, just do nop here) */
6127                    break;
6128                default:
6129                    goto unknown_op;
6130                }
6131                break;
6132            case 0x1d: /* fucomi */
6133                if (!(s->cpuid_features & CPUID_CMOV)) {
6134                    goto illegal_op;
6135                }
6136                gen_update_cc_op(s);
6137                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6138                gen_helper_fucomi_ST0_FT0(cpu_env);
6139                set_cc_op(s, CC_OP_EFLAGS);
6140                break;
6141            case 0x1e: /* fcomi */
6142                if (!(s->cpuid_features & CPUID_CMOV)) {
6143                    goto illegal_op;
6144                }
6145                gen_update_cc_op(s);
6146                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6147                gen_helper_fcomi_ST0_FT0(cpu_env);
6148                set_cc_op(s, CC_OP_EFLAGS);
6149                break;
6150            case 0x28: /* ffree sti */
6151                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6152                break;
6153            case 0x2a: /* fst sti */
6154                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6155                break;
6156            case 0x2b: /* fstp sti */
6157            case 0x0b: /* fstp1 sti, undocumented op */
6158            case 0x3a: /* fstp8 sti, undocumented op */
6159            case 0x3b: /* fstp9 sti, undocumented op */
6160                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6161                gen_helper_fpop(cpu_env);
6162                break;
6163            case 0x2c: /* fucom st(i) */
6164                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6165                gen_helper_fucom_ST0_FT0(cpu_env);
6166                break;
6167            case 0x2d: /* fucomp st(i) */
6168                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6169                gen_helper_fucom_ST0_FT0(cpu_env);
6170                gen_helper_fpop(cpu_env);
6171                break;
6172            case 0x33: /* de/3 */
6173                switch(rm) {
6174                case 1: /* fcompp */
6175                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6176                    gen_helper_fcom_ST0_FT0(cpu_env);
6177                    gen_helper_fpop(cpu_env);
6178                    gen_helper_fpop(cpu_env);
6179                    break;
6180                default:
6181                    goto unknown_op;
6182                }
6183                break;
6184            case 0x38: /* ffreep sti, undocumented op */
6185                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6186                gen_helper_fpop(cpu_env);
6187                break;
6188            case 0x3c: /* df/4 */
6189                switch(rm) {
6190                case 0:
6191                    gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
6192                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
6193                    gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
6194                    break;
6195                default:
6196                    goto unknown_op;
6197                }
6198                break;
6199            case 0x3d: /* fucomip */
6200                if (!(s->cpuid_features & CPUID_CMOV)) {
6201                    goto illegal_op;
6202                }
6203                gen_update_cc_op(s);
6204                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6205                gen_helper_fucomi_ST0_FT0(cpu_env);
6206                gen_helper_fpop(cpu_env);
6207                set_cc_op(s, CC_OP_EFLAGS);
6208                break;
6209            case 0x3e: /* fcomip */
6210                if (!(s->cpuid_features & CPUID_CMOV)) {
6211                    goto illegal_op;
6212                }
6213                gen_update_cc_op(s);
6214                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6215                gen_helper_fcomi_ST0_FT0(cpu_env);
6216                gen_helper_fpop(cpu_env);
6217                set_cc_op(s, CC_OP_EFLAGS);
6218                break;
6219            case 0x10 ... 0x13: /* fcmovxx */
6220            case 0x18 ... 0x1b:
6221                {
6222                    int op1;
6223                    TCGLabel *l1;
6224                    static const uint8_t fcmov_cc[8] = {
6225                        (JCC_B << 1),
6226                        (JCC_Z << 1),
6227                        (JCC_BE << 1),
6228                        (JCC_P << 1),
6229                    };
6230
6231                    if (!(s->cpuid_features & CPUID_CMOV)) {
6232                        goto illegal_op;
6233                    }
6234                    op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6235                    l1 = gen_new_label();
6236                    gen_jcc1_noeob(s, op1, l1);
6237                    gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6238                    gen_set_label(l1);
6239                }
6240                break;
6241            default:
6242                goto unknown_op;
6243            }
6244        }
6245        break;
6246        /************************/
6247        /* string ops */
6248
6249    case 0xa4: /* movsS */
6250    case 0xa5:
6251        ot = mo_b_d(b, dflag);
6252        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6253            gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6254        } else {
6255            gen_movs(s, ot);
6256        }
6257        break;
6258
6259    case 0xaa: /* stosS */
6260    case 0xab:
6261        ot = mo_b_d(b, dflag);
6262        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6263            gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6264        } else {
6265            gen_stos(s, ot);
6266        }
6267        break;
6268    case 0xac: /* lodsS */
6269    case 0xad:
6270        ot = mo_b_d(b, dflag);
6271        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6272            gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6273        } else {
6274            gen_lods(s, ot);
6275        }
6276        break;
6277    case 0xae: /* scasS */
6278    case 0xaf:
6279        ot = mo_b_d(b, dflag);
6280        if (prefixes & PREFIX_REPNZ) {
6281            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6282        } else if (prefixes & PREFIX_REPZ) {
6283            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6284        } else {
6285            gen_scas(s, ot);
6286        }
6287        break;
6288
6289    case 0xa6: /* cmpsS */
6290    case 0xa7:
6291        ot = mo_b_d(b, dflag);
6292        if (prefixes & PREFIX_REPNZ) {
6293            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6294        } else if (prefixes & PREFIX_REPZ) {
6295            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6296        } else {
6297            gen_cmps(s, ot);
6298        }
6299        break;
6300    case 0x6c: /* insS */
6301    case 0x6d:
6302        ot = mo_b_d32(b, dflag);
6303        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6304        gen_check_io(s, ot, pc_start - s->cs_base, 
6305                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6306        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6307            gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6308        } else {
6309            gen_ins(s, ot);
6310            if (s->tb->cflags & CF_USE_ICOUNT) {
6311                gen_jmp(s, s->pc - s->cs_base);
6312            }
6313        }
6314        break;
6315    case 0x6e: /* outsS */
6316    case 0x6f:
6317        ot = mo_b_d32(b, dflag);
6318        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6319        gen_check_io(s, ot, pc_start - s->cs_base,
6320                     svm_is_rep(prefixes) | 4);
6321        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6322            gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6323        } else {
6324            gen_outs(s, ot);
6325            if (s->tb->cflags & CF_USE_ICOUNT) {
6326                gen_jmp(s, s->pc - s->cs_base);
6327            }
6328        }
6329        break;
6330
6331        /************************/
6332        /* port I/O */
6333
6334    case 0xe4:
6335    case 0xe5:
6336        ot = mo_b_d32(b, dflag);
6337        val = cpu_ldub_code(env, s->pc++);
6338        tcg_gen_movi_tl(cpu_T0, val);
6339        gen_check_io(s, ot, pc_start - s->cs_base,
6340                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6341        if (s->tb->cflags & CF_USE_ICOUNT) {
6342            gen_io_start();
6343        }
6344        tcg_gen_movi_i32(cpu_tmp2_i32, val);
6345        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
6346        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
6347        gen_bpt_io(s, cpu_tmp2_i32, ot);
6348        if (s->tb->cflags & CF_USE_ICOUNT) {
6349            gen_io_end();
6350            gen_jmp(s, s->pc - s->cs_base);
6351        }
6352        break;
6353    case 0xe6:
6354    case 0xe7:
6355        ot = mo_b_d32(b, dflag);
6356        val = cpu_ldub_code(env, s->pc++);
6357        tcg_gen_movi_tl(cpu_T0, val);
6358        gen_check_io(s, ot, pc_start - s->cs_base,
6359                     svm_is_rep(prefixes));
6360        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
6361
6362        if (s->tb->cflags & CF_USE_ICOUNT) {
6363            gen_io_start();
6364        }
6365        tcg_gen_movi_i32(cpu_tmp2_i32, val);
6366        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
6367        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6368        gen_bpt_io(s, cpu_tmp2_i32, ot);
6369        if (s->tb->cflags & CF_USE_ICOUNT) {
6370            gen_io_end();
6371            gen_jmp(s, s->pc - s->cs_base);
6372        }
6373        break;
6374    case 0xec:
6375    case 0xed:
6376        ot = mo_b_d32(b, dflag);
6377        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6378        gen_check_io(s, ot, pc_start - s->cs_base,
6379                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6380        if (s->tb->cflags & CF_USE_ICOUNT) {
6381            gen_io_start();
6382        }
6383        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6384        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
6385        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
6386        gen_bpt_io(s, cpu_tmp2_i32, ot);
6387        if (s->tb->cflags & CF_USE_ICOUNT) {
6388            gen_io_end();
6389            gen_jmp(s, s->pc - s->cs_base);
6390        }
6391        break;
6392    case 0xee:
6393    case 0xef:
6394        ot = mo_b_d32(b, dflag);
6395        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6396        gen_check_io(s, ot, pc_start - s->cs_base,
6397                     svm_is_rep(prefixes));
6398        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
6399
6400        if (s->tb->cflags & CF_USE_ICOUNT) {
6401            gen_io_start();
6402        }
6403        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6404        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
6405        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6406        gen_bpt_io(s, cpu_tmp2_i32, ot);
6407        if (s->tb->cflags & CF_USE_ICOUNT) {
6408            gen_io_end();
6409            gen_jmp(s, s->pc - s->cs_base);
6410        }
6411        break;
6412
6413        /************************/
6414        /* control */
6415    case 0xc2: /* ret im */
6416        val = cpu_ldsw_code(env, s->pc);
6417        s->pc += 2;
6418        ot = gen_pop_T0(s);
6419        gen_stack_update(s, val + (1 << ot));
6420        /* Note that gen_pop_T0 uses a zero-extending load.  */
6421        gen_op_jmp_v(cpu_T0);
6422        gen_bnd_jmp(s);
6423        gen_jr(s, cpu_T0);
6424        break;
6425    case 0xc3: /* ret */
6426        ot = gen_pop_T0(s);
6427        gen_pop_update(s, ot);
6428        /* Note that gen_pop_T0 uses a zero-extending load.  */
6429        gen_op_jmp_v(cpu_T0);
6430        gen_bnd_jmp(s);
6431        gen_jr(s, cpu_T0);
6432        break;
6433    case 0xca: /* lret im */
6434        val = cpu_ldsw_code(env, s->pc);
6435        s->pc += 2;
6436    do_lret:
6437        if (s->pe && !s->vm86) {
6438            gen_update_cc_op(s);
6439            gen_jmp_im(pc_start - s->cs_base);
6440            gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6441                                      tcg_const_i32(val));
6442        } else {
6443            gen_stack_A0(s);
6444            /* pop offset */
6445            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
6446            /* NOTE: keeping EIP updated is not a problem in case of
6447               exception */
6448            gen_op_jmp_v(cpu_T0);
6449            /* pop selector */
6450            gen_add_A0_im(s, 1 << dflag);
6451            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
6452            gen_op_movl_seg_T0_vm(R_CS);
6453            /* add stack offset */
6454            gen_stack_update(s, val + (2 << dflag));
6455        }
6456        gen_eob(s);
6457        break;
6458    case 0xcb: /* lret */
6459        val = 0;
6460        goto do_lret;
6461    case 0xcf: /* iret */
6462        gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6463        if (!s->pe) {
6464            /* real mode */
6465            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6466            set_cc_op(s, CC_OP_EFLAGS);
6467        } else if (s->vm86) {
6468            if (s->iopl != 3) {
6469                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6470            } else {
6471                gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6472                set_cc_op(s, CC_OP_EFLAGS);
6473            }
6474        } else {
6475            gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6476                                      tcg_const_i32(s->pc - s->cs_base));
6477            set_cc_op(s, CC_OP_EFLAGS);
6478        }
6479        gen_eob(s);
6480        break;
6481    case 0xe8: /* call im */
6482        {
6483            if (dflag != MO_16) {
6484                tval = (int32_t)insn_get(env, s, MO_32);
6485            } else {
6486                tval = (int16_t)insn_get(env, s, MO_16);
6487            }
6488            next_eip = s->pc - s->cs_base;
6489            tval += next_eip;
6490            if (dflag == MO_16) {
6491                tval &= 0xffff;
6492            } else if (!CODE64(s)) {
6493                tval &= 0xffffffff;
6494            }
6495            tcg_gen_movi_tl(cpu_T0, next_eip);
6496            gen_push_v(s, cpu_T0);
6497            gen_bnd_jmp(s);
6498            gen_jmp(s, tval);
6499        }
6500        break;
6501    case 0x9a: /* lcall im */
6502        {
6503            unsigned int selector, offset;
6504
6505            if (CODE64(s))
6506                goto illegal_op;
6507            ot = dflag;
6508            offset = insn_get(env, s, ot);
6509            selector = insn_get(env, s, MO_16);
6510
6511            tcg_gen_movi_tl(cpu_T0, selector);
6512            tcg_gen_movi_tl(cpu_T1, offset);
6513        }
6514        goto do_lcall;
6515    case 0xe9: /* jmp im */
6516        if (dflag != MO_16) {
6517            tval = (int32_t)insn_get(env, s, MO_32);
6518        } else {
6519            tval = (int16_t)insn_get(env, s, MO_16);
6520        }
6521        tval += s->pc - s->cs_base;
6522        if (dflag == MO_16) {
6523            tval &= 0xffff;
6524        } else if (!CODE64(s)) {
6525            tval &= 0xffffffff;
6526        }
6527        gen_bnd_jmp(s);
6528        gen_jmp(s, tval);
6529        break;
6530    case 0xea: /* ljmp im */
6531        {
6532            unsigned int selector, offset;
6533
6534            if (CODE64(s))
6535                goto illegal_op;
6536            ot = dflag;
6537            offset = insn_get(env, s, ot);
6538            selector = insn_get(env, s, MO_16);
6539
6540            tcg_gen_movi_tl(cpu_T0, selector);
6541            tcg_gen_movi_tl(cpu_T1, offset);
6542        }
6543        goto do_ljmp;
6544    case 0xeb: /* jmp Jb */
6545        tval = (int8_t)insn_get(env, s, MO_8);
6546        tval += s->pc - s->cs_base;
6547        if (dflag == MO_16) {
6548            tval &= 0xffff;
6549        }
6550        gen_jmp(s, tval);
6551        break;
6552    case 0x70 ... 0x7f: /* jcc Jb */
6553        tval = (int8_t)insn_get(env, s, MO_8);
6554        goto do_jcc;
6555    case 0x180 ... 0x18f: /* jcc Jv */
6556        if (dflag != MO_16) {
6557            tval = (int32_t)insn_get(env, s, MO_32);
6558        } else {
6559            tval = (int16_t)insn_get(env, s, MO_16);
6560        }
6561    do_jcc:
6562        next_eip = s->pc - s->cs_base;
6563        tval += next_eip;
6564        if (dflag == MO_16) {
6565            tval &= 0xffff;
6566        }
6567        gen_bnd_jmp(s);
6568        gen_jcc(s, b, tval, next_eip);
6569        break;
6570
6571    case 0x190 ... 0x19f: /* setcc Gv */
6572        modrm = cpu_ldub_code(env, s->pc++);
6573        gen_setcc1(s, b, cpu_T0);
6574        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6575        break;
6576    case 0x140 ... 0x14f: /* cmov Gv, Ev */
6577        if (!(s->cpuid_features & CPUID_CMOV)) {
6578            goto illegal_op;
6579        }
6580        ot = dflag;
6581        modrm = cpu_ldub_code(env, s->pc++);
6582        reg = ((modrm >> 3) & 7) | rex_r;
6583        gen_cmovcc1(env, s, ot, b, modrm, reg);
6584        break;
6585
6586        /************************/
6587        /* flags */
6588    case 0x9c: /* pushf */
6589        gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6590        if (s->vm86 && s->iopl != 3) {
6591            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6592        } else {
6593            gen_update_cc_op(s);
6594            gen_helper_read_eflags(cpu_T0, cpu_env);
6595            gen_push_v(s, cpu_T0);
6596        }
6597        break;
6598    case 0x9d: /* popf */
6599        gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6600        if (s->vm86 && s->iopl != 3) {
6601            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6602        } else {
6603            ot = gen_pop_T0(s);
6604            if (s->cpl == 0) {
6605                if (dflag != MO_16) {
6606                    gen_helper_write_eflags(cpu_env, cpu_T0,
6607                                            tcg_const_i32((TF_MASK | AC_MASK |
6608                                                           ID_MASK | NT_MASK |
6609                                                           IF_MASK |
6610                                                           IOPL_MASK)));
6611                } else {
6612                    gen_helper_write_eflags(cpu_env, cpu_T0,
6613                                            tcg_const_i32((TF_MASK | AC_MASK |
6614                                                           ID_MASK | NT_MASK |
6615                                                           IF_MASK | IOPL_MASK)
6616                                                          & 0xffff));
6617                }
6618            } else {
6619                if (s->cpl <= s->iopl) {
6620                    if (dflag != MO_16) {
6621                        gen_helper_write_eflags(cpu_env, cpu_T0,
6622                                                tcg_const_i32((TF_MASK |
6623                                                               AC_MASK |
6624                                                               ID_MASK |
6625                                                               NT_MASK |
6626                                                               IF_MASK)));
6627                    } else {
6628                        gen_helper_write_eflags(cpu_env, cpu_T0,
6629                                                tcg_const_i32((TF_MASK |
6630                                                               AC_MASK |
6631                                                               ID_MASK |
6632                                                               NT_MASK |
6633                                                               IF_MASK)
6634                                                              & 0xffff));
6635                    }
6636                } else {
6637                    if (dflag != MO_16) {
6638                        gen_helper_write_eflags(cpu_env, cpu_T0,
6639                                           tcg_const_i32((TF_MASK | AC_MASK |
6640                                                          ID_MASK | NT_MASK)));
6641                    } else {
6642                        gen_helper_write_eflags(cpu_env, cpu_T0,
6643                                           tcg_const_i32((TF_MASK | AC_MASK |
6644                                                          ID_MASK | NT_MASK)
6645                                                         & 0xffff));
6646                    }
6647                }
6648            }
6649            gen_pop_update(s, ot);
6650            set_cc_op(s, CC_OP_EFLAGS);
6651            /* abort translation because TF/AC flag may change */
6652            gen_jmp_im(s->pc - s->cs_base);
6653            gen_eob(s);
6654        }
6655        break;
6656    case 0x9e: /* sahf */
6657        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6658            goto illegal_op;
6659        gen_op_mov_v_reg(MO_8, cpu_T0, R_AH);
6660        gen_compute_eflags(s);
6661        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6662        tcg_gen_andi_tl(cpu_T0, cpu_T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6663        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T0);
6664        break;
6665    case 0x9f: /* lahf */
6666        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6667            goto illegal_op;
6668        gen_compute_eflags(s);
6669        /* Note: gen_compute_eflags() only gives the condition codes */
6670        tcg_gen_ori_tl(cpu_T0, cpu_cc_src, 0x02);
6671        gen_op_mov_reg_v(MO_8, R_AH, cpu_T0);
6672        break;
6673    case 0xf5: /* cmc */
6674        gen_compute_eflags(s);
6675        tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6676        break;
6677    case 0xf8: /* clc */
6678        gen_compute_eflags(s);
6679        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6680        break;
6681    case 0xf9: /* stc */
6682        gen_compute_eflags(s);
6683        tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6684        break;
6685    case 0xfc: /* cld */
6686        tcg_gen_movi_i32(cpu_tmp2_i32, 1);
6687        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6688        break;
6689    case 0xfd: /* std */
6690        tcg_gen_movi_i32(cpu_tmp2_i32, -1);
6691        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6692        break;
6693
6694        /************************/
6695        /* bit operations */
6696    case 0x1ba: /* bt/bts/btr/btc Gv, im */
6697        ot = dflag;
6698        modrm = cpu_ldub_code(env, s->pc++);
6699        op = (modrm >> 3) & 7;
6700        mod = (modrm >> 6) & 3;
6701        rm = (modrm & 7) | REX_B(s);
6702        if (mod != 3) {
6703            s->rip_offset = 1;
6704            gen_lea_modrm(env, s, modrm);
6705            if (!(s->prefix & PREFIX_LOCK)) {
6706                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6707            }
6708        } else {
6709            gen_op_mov_v_reg(ot, cpu_T0, rm);
6710        }
6711        /* load shift */
6712        val = cpu_ldub_code(env, s->pc++);
6713        tcg_gen_movi_tl(cpu_T1, val);
6714        if (op < 4)
6715            goto unknown_op;
6716        op -= 4;
6717        goto bt_op;
6718    case 0x1a3: /* bt Gv, Ev */
6719        op = 0;
6720        goto do_btx;
6721    case 0x1ab: /* bts */
6722        op = 1;
6723        goto do_btx;
6724    case 0x1b3: /* btr */
6725        op = 2;
6726        goto do_btx;
6727    case 0x1bb: /* btc */
6728        op = 3;
6729    do_btx:
6730        ot = dflag;
6731        modrm = cpu_ldub_code(env, s->pc++);
6732        reg = ((modrm >> 3) & 7) | rex_r;
6733        mod = (modrm >> 6) & 3;
6734        rm = (modrm & 7) | REX_B(s);
6735        gen_op_mov_v_reg(MO_32, cpu_T1, reg);
6736        if (mod != 3) {
6737            AddressParts a = gen_lea_modrm_0(env, s, modrm);
6738            /* specific case: we need to add a displacement */
6739            gen_exts(ot, cpu_T1);
6740            tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
6741            tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
6742            tcg_gen_add_tl(cpu_A0, gen_lea_modrm_1(a), cpu_tmp0);
6743            gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
6744            if (!(s->prefix & PREFIX_LOCK)) {
6745                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6746            }
6747        } else {
6748            gen_op_mov_v_reg(ot, cpu_T0, rm);
6749        }
6750    bt_op:
6751        tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
6752        tcg_gen_movi_tl(cpu_tmp0, 1);
6753        tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
6754        if (s->prefix & PREFIX_LOCK) {
6755            switch (op) {
6756            case 0: /* bt */
6757                /* Needs no atomic ops; we surpressed the normal
6758                   memory load for LOCK above so do it now.  */
6759                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6760                break;
6761            case 1: /* bts */
6762                tcg_gen_atomic_fetch_or_tl(cpu_T0, cpu_A0, cpu_tmp0,
6763                                           s->mem_index, ot | MO_LE);
6764                break;
6765            case 2: /* btr */
6766                tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
6767                tcg_gen_atomic_fetch_and_tl(cpu_T0, cpu_A0, cpu_tmp0,
6768                                            s->mem_index, ot | MO_LE);
6769                break;
6770            default:
6771            case 3: /* btc */
6772                tcg_gen_atomic_fetch_xor_tl(cpu_T0, cpu_A0, cpu_tmp0,
6773                                            s->mem_index, ot | MO_LE);
6774                break;
6775            }
6776            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
6777        } else {
6778            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
6779            switch (op) {
6780            case 0: /* bt */
6781                /* Data already loaded; nothing to do.  */
6782                break;
6783            case 1: /* bts */
6784                tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
6785                break;
6786            case 2: /* btr */
6787                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
6788                break;
6789            default:
6790            case 3: /* btc */
6791                tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
6792                break;
6793            }
6794            if (op != 0) {
6795                if (mod != 3) {
6796                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
6797                } else {
6798                    gen_op_mov_reg_v(ot, rm, cpu_T0);
6799                }
6800            }
6801        }
6802
6803        /* Delay all CC updates until after the store above.  Note that
6804           C is the result of the test, Z is unchanged, and the others
6805           are all undefined.  */
6806        switch (s->cc_op) {
6807        case CC_OP_MULB ... CC_OP_MULQ:
6808        case CC_OP_ADDB ... CC_OP_ADDQ:
6809        case CC_OP_ADCB ... CC_OP_ADCQ:
6810        case CC_OP_SUBB ... CC_OP_SUBQ:
6811        case CC_OP_SBBB ... CC_OP_SBBQ:
6812        case CC_OP_LOGICB ... CC_OP_LOGICQ:
6813        case CC_OP_INCB ... CC_OP_INCQ:
6814        case CC_OP_DECB ... CC_OP_DECQ:
6815        case CC_OP_SHLB ... CC_OP_SHLQ:
6816        case CC_OP_SARB ... CC_OP_SARQ:
6817        case CC_OP_BMILGB ... CC_OP_BMILGQ:
6818            /* Z was going to be computed from the non-zero status of CC_DST.
6819               We can get that same Z value (and the new C value) by leaving
6820               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6821               same width.  */
6822            tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
6823            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6824            break;
6825        default:
6826            /* Otherwise, generate EFLAGS and replace the C bit.  */
6827            gen_compute_eflags(s);
6828            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4,
6829                               ctz32(CC_C), 1);
6830            break;
6831        }
6832        break;
6833    case 0x1bc: /* bsf / tzcnt */
6834    case 0x1bd: /* bsr / lzcnt */
6835        ot = dflag;
6836        modrm = cpu_ldub_code(env, s->pc++);
6837        reg = ((modrm >> 3) & 7) | rex_r;
6838        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6839        gen_extu(ot, cpu_T0);
6840
6841        /* Note that lzcnt and tzcnt are in different extensions.  */
6842        if ((prefixes & PREFIX_REPZ)
6843            && (b & 1
6844                ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6845                : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6846            int size = 8 << ot;
6847            /* For lzcnt/tzcnt, C bit is defined related to the input. */
6848            tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
6849            if (b & 1) {
6850                /* For lzcnt, reduce the target_ulong result by the
6851                   number of zeros that we expect to find at the top.  */
6852                tcg_gen_clzi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS);
6853                tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size);
6854            } else {
6855                /* For tzcnt, a zero input must return the operand size.  */
6856                tcg_gen_ctzi_tl(cpu_T0, cpu_T0, size);
6857            }
6858            /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
6859            gen_op_update1_cc();
6860            set_cc_op(s, CC_OP_BMILGB + ot);
6861        } else {
6862            /* For bsr/bsf, only the Z bit is defined and it is related
6863               to the input and not the result.  */
6864            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
6865            set_cc_op(s, CC_OP_LOGICB + ot);
6866
6867            /* ??? The manual says that the output is undefined when the
6868               input is zero, but real hardware leaves it unchanged, and
6869               real programs appear to depend on that.  Accomplish this
6870               by passing the output as the value to return upon zero.  */
6871            if (b & 1) {
6872                /* For bsr, return the bit index of the first 1 bit,
6873                   not the count of leading zeros.  */
6874                tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
6875                tcg_gen_clz_tl(cpu_T0, cpu_T0, cpu_T1);
6876                tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1);
6877            } else {
6878                tcg_gen_ctz_tl(cpu_T0, cpu_T0, cpu_regs[reg]);
6879            }
6880        }
6881        gen_op_mov_reg_v(ot, reg, cpu_T0);
6882        break;
6883        /************************/
6884        /* bcd */
6885    case 0x27: /* daa */
6886        if (CODE64(s))
6887            goto illegal_op;
6888        gen_update_cc_op(s);
6889        gen_helper_daa(cpu_env);
6890        set_cc_op(s, CC_OP_EFLAGS);
6891        break;
6892    case 0x2f: /* das */
6893        if (CODE64(s))
6894            goto illegal_op;
6895        gen_update_cc_op(s);
6896        gen_helper_das(cpu_env);
6897        set_cc_op(s, CC_OP_EFLAGS);
6898        break;
6899    case 0x37: /* aaa */
6900        if (CODE64(s))
6901            goto illegal_op;
6902        gen_update_cc_op(s);
6903        gen_helper_aaa(cpu_env);
6904        set_cc_op(s, CC_OP_EFLAGS);
6905        break;
6906    case 0x3f: /* aas */
6907        if (CODE64(s))
6908            goto illegal_op;
6909        gen_update_cc_op(s);
6910        gen_helper_aas(cpu_env);
6911        set_cc_op(s, CC_OP_EFLAGS);
6912        break;
6913    case 0xd4: /* aam */
6914        if (CODE64(s))
6915            goto illegal_op;
6916        val = cpu_ldub_code(env, s->pc++);
6917        if (val == 0) {
6918            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
6919        } else {
6920            gen_helper_aam(cpu_env, tcg_const_i32(val));
6921            set_cc_op(s, CC_OP_LOGICB);
6922        }
6923        break;
6924    case 0xd5: /* aad */
6925        if (CODE64(s))
6926            goto illegal_op;
6927        val = cpu_ldub_code(env, s->pc++);
6928        gen_helper_aad(cpu_env, tcg_const_i32(val));
6929        set_cc_op(s, CC_OP_LOGICB);
6930        break;
6931        /************************/
6932        /* misc */
6933    case 0x90: /* nop */
6934        /* XXX: correct lock test for all insn */
6935        if (prefixes & PREFIX_LOCK) {
6936            goto illegal_op;
6937        }
6938        /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
6939        if (REX_B(s)) {
6940            goto do_xchg_reg_eax;
6941        }
6942        if (prefixes & PREFIX_REPZ) {
6943            gen_update_cc_op(s);
6944            gen_jmp_im(pc_start - s->cs_base);
6945            gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
6946            s->is_jmp = DISAS_TB_JUMP;
6947        }
6948        break;
6949    case 0x9b: /* fwait */
6950        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
6951            (HF_MP_MASK | HF_TS_MASK)) {
6952            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
6953        } else {
6954            gen_helper_fwait(cpu_env);
6955        }
6956        break;
6957    case 0xcc: /* int3 */
6958        gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
6959        break;
6960    case 0xcd: /* int N */
6961        val = cpu_ldub_code(env, s->pc++);
6962        if (s->vm86 && s->iopl != 3) {
6963            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6964        } else {
6965            gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
6966        }
6967        break;
6968    case 0xce: /* into */
6969        if (CODE64(s))
6970            goto illegal_op;
6971        gen_update_cc_op(s);
6972        gen_jmp_im(pc_start - s->cs_base);
6973        gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
6974        break;
6975#ifdef WANT_ICEBP
6976    case 0xf1: /* icebp (undocumented, exits to external debugger) */
6977        gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
6978#if 1
6979        gen_debug(s, pc_start - s->cs_base);
6980#else
6981        /* start debug */
6982        tb_flush(CPU(x86_env_get_cpu(env)));
6983        qemu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
6984#endif
6985        break;
6986#endif
6987    case 0xfa: /* cli */
6988        if (!s->vm86) {
6989            if (s->cpl <= s->iopl) {
6990                gen_helper_cli(cpu_env);
6991            } else {
6992                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6993            }
6994        } else {
6995            if (s->iopl == 3) {
6996                gen_helper_cli(cpu_env);
6997            } else {
6998                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6999            }
7000        }
7001        break;
7002    case 0xfb: /* sti */
7003        if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
7004            gen_helper_sti(cpu_env);
7005            /* interruptions are enabled only the first insn after sti */
7006            gen_jmp_im(s->pc - s->cs_base);
7007            gen_eob_inhibit_irq(s, true);
7008        } else {
7009            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7010        }
7011        break;
7012    case 0x62: /* bound */
7013        if (CODE64(s))
7014            goto illegal_op;
7015        ot = dflag;
7016        modrm = cpu_ldub_code(env, s->pc++);
7017        reg = (modrm >> 3) & 7;
7018        mod = (modrm >> 6) & 3;
7019        if (mod == 3)
7020            goto illegal_op;
7021        gen_op_mov_v_reg(ot, cpu_T0, reg);
7022        gen_lea_modrm(env, s, modrm);
7023        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
7024        if (ot == MO_16) {
7025            gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
7026        } else {
7027            gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
7028        }
7029        break;
7030    case 0x1c8 ... 0x1cf: /* bswap reg */
7031        reg = (b & 7) | REX_B(s);
7032#ifdef TARGET_X86_64
7033        if (dflag == MO_64) {
7034            gen_op_mov_v_reg(MO_64, cpu_T0, reg);
7035            tcg_gen_bswap64_i64(cpu_T0, cpu_T0);
7036            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
7037        } else
7038#endif
7039        {
7040            gen_op_mov_v_reg(MO_32, cpu_T0, reg);
7041            tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
7042            tcg_gen_bswap32_tl(cpu_T0, cpu_T0);
7043            gen_op_mov_reg_v(MO_32, reg, cpu_T0);
7044        }
7045        break;
7046    case 0xd6: /* salc */
7047        if (CODE64(s))
7048            goto illegal_op;
7049        gen_compute_eflags_c(s, cpu_T0);
7050        tcg_gen_neg_tl(cpu_T0, cpu_T0);
7051        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
7052        break;
7053    case 0xe0: /* loopnz */
7054    case 0xe1: /* loopz */
7055    case 0xe2: /* loop */
7056    case 0xe3: /* jecxz */
7057        {
7058            TCGLabel *l1, *l2, *l3;
7059
7060            tval = (int8_t)insn_get(env, s, MO_8);
7061            next_eip = s->pc - s->cs_base;
7062            tval += next_eip;
7063            if (dflag == MO_16) {
7064                tval &= 0xffff;
7065            }
7066
7067            l1 = gen_new_label();
7068            l2 = gen_new_label();
7069            l3 = gen_new_label();
7070            b &= 3;
7071            switch(b) {
7072            case 0: /* loopnz */
7073            case 1: /* loopz */
7074                gen_op_add_reg_im(s->aflag, R_ECX, -1);
7075                gen_op_jz_ecx(s->aflag, l3);
7076                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7077                break;
7078            case 2: /* loop */
7079                gen_op_add_reg_im(s->aflag, R_ECX, -1);
7080                gen_op_jnz_ecx(s->aflag, l1);
7081                break;
7082            default:
7083            case 3: /* jcxz */
7084                gen_op_jz_ecx(s->aflag, l1);
7085                break;
7086            }
7087
7088            gen_set_label(l3);
7089            gen_jmp_im(next_eip);
7090            tcg_gen_br(l2);
7091
7092            gen_set_label(l1);
7093            gen_jmp_im(tval);
7094            gen_set_label(l2);
7095            gen_eob(s);
7096        }
7097        break;
7098    case 0x130: /* wrmsr */
7099    case 0x132: /* rdmsr */
7100        if (s->cpl != 0) {
7101            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7102        } else {
7103            gen_update_cc_op(s);
7104            gen_jmp_im(pc_start - s->cs_base);
7105            if (b & 2) {
7106                gen_helper_rdmsr(cpu_env);
7107            } else {
7108                gen_helper_wrmsr(cpu_env);
7109            }
7110        }
7111        break;
7112    case 0x131: /* rdtsc */
7113        gen_update_cc_op(s);
7114        gen_jmp_im(pc_start - s->cs_base);
7115        if (s->tb->cflags & CF_USE_ICOUNT) {
7116            gen_io_start();
7117        }
7118        gen_helper_rdtsc(cpu_env);
7119        if (s->tb->cflags & CF_USE_ICOUNT) {
7120            gen_io_end();
7121            gen_jmp(s, s->pc - s->cs_base);
7122        }
7123        break;
7124    case 0x133: /* rdpmc */
7125        gen_update_cc_op(s);
7126        gen_jmp_im(pc_start - s->cs_base);
7127        gen_helper_rdpmc(cpu_env);
7128        break;
7129    case 0x134: /* sysenter */
7130        /* For Intel SYSENTER is valid on 64-bit */
7131        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7132            goto illegal_op;
7133        if (!s->pe) {
7134            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7135        } else {
7136            gen_helper_sysenter(cpu_env);
7137            gen_eob(s);
7138        }
7139        break;
7140    case 0x135: /* sysexit */
7141        /* For Intel SYSEXIT is valid on 64-bit */
7142        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7143            goto illegal_op;
7144        if (!s->pe) {
7145            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7146        } else {
7147            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7148            gen_eob(s);
7149        }
7150        break;
7151#ifdef TARGET_X86_64
7152    case 0x105: /* syscall */
7153        /* XXX: is it usable in real mode ? */
7154        gen_update_cc_op(s);
7155        gen_jmp_im(pc_start - s->cs_base);
7156        gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7157        /* TF handling for the syscall insn is different. The TF bit is  checked
7158           after the syscall insn completes. This allows #DB to not be
7159           generated after one has entered CPL0 if TF is set in FMASK.  */
7160        gen_eob_worker(s, false, true);
7161        break;
7162    case 0x107: /* sysret */
7163        if (!s->pe) {
7164            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7165        } else {
7166            gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7167            /* condition codes are modified only in long mode */
7168            if (s->lma) {
7169                set_cc_op(s, CC_OP_EFLAGS);
7170            }
7171            /* TF handling for the sysret insn is different. The TF bit is
7172               checked after the sysret insn completes. This allows #DB to be
7173               generated "as if" the syscall insn in userspace has just
7174               completed.  */
7175            gen_eob_worker(s, false, true);
7176        }
7177        break;
7178#endif
7179    case 0x1a2: /* cpuid */
7180        gen_update_cc_op(s);
7181        gen_jmp_im(pc_start - s->cs_base);
7182        gen_helper_cpuid(cpu_env);
7183        break;
7184    case 0xf4: /* hlt */
7185        if (s->cpl != 0) {
7186            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7187        } else {
7188            gen_update_cc_op(s);
7189            gen_jmp_im(pc_start - s->cs_base);
7190            gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7191            s->is_jmp = DISAS_TB_JUMP;
7192        }
7193        break;
7194    case 0x100:
7195        modrm = cpu_ldub_code(env, s->pc++);
7196        mod = (modrm >> 6) & 3;
7197        op = (modrm >> 3) & 7;
7198        switch(op) {
7199        case 0: /* sldt */
7200            if (!s->pe || s->vm86)
7201                goto illegal_op;
7202            gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7203            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
7204                             offsetof(CPUX86State, ldt.selector));
7205            ot = mod == 3 ? dflag : MO_16;
7206            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7207            break;
7208        case 2: /* lldt */
7209            if (!s->pe || s->vm86)
7210                goto illegal_op;
7211            if (s->cpl != 0) {
7212                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7213            } else {
7214                gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7215                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7216                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
7217                gen_helper_lldt(cpu_env, cpu_tmp2_i32);
7218            }
7219            break;
7220        case 1: /* str */
7221            if (!s->pe || s->vm86)
7222                goto illegal_op;
7223            gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7224            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
7225                             offsetof(CPUX86State, tr.selector));
7226            ot = mod == 3 ? dflag : MO_16;
7227            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7228            break;
7229        case 3: /* ltr */
7230            if (!s->pe || s->vm86)
7231                goto illegal_op;
7232            if (s->cpl != 0) {
7233                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7234            } else {
7235                gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7236                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7237                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
7238                gen_helper_ltr(cpu_env, cpu_tmp2_i32);
7239            }
7240            break;
7241        case 4: /* verr */
7242        case 5: /* verw */
7243            if (!s->pe || s->vm86)
7244                goto illegal_op;
7245            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7246            gen_update_cc_op(s);
7247            if (op == 4) {
7248                gen_helper_verr(cpu_env, cpu_T0);
7249            } else {
7250                gen_helper_verw(cpu_env, cpu_T0);
7251            }
7252            set_cc_op(s, CC_OP_EFLAGS);
7253            break;
7254        default:
7255            goto unknown_op;
7256        }
7257        break;
7258
7259    case 0x101:
7260        modrm = cpu_ldub_code(env, s->pc++);
7261        switch (modrm) {
7262        CASE_MODRM_MEM_OP(0): /* sgdt */
7263            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7264            gen_lea_modrm(env, s, modrm);
7265            tcg_gen_ld32u_tl(cpu_T0,
7266                             cpu_env, offsetof(CPUX86State, gdt.limit));
7267            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
7268            gen_add_A0_im(s, 2);
7269            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
7270            if (dflag == MO_16) {
7271                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7272            }
7273            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7274            break;
7275
7276        case 0xc8: /* monitor */
7277            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7278                goto illegal_op;
7279            }
7280            gen_update_cc_op(s);
7281            gen_jmp_im(pc_start - s->cs_base);
7282            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
7283            gen_extu(s->aflag, cpu_A0);
7284            gen_add_A0_ds_seg(s);
7285            gen_helper_monitor(cpu_env, cpu_A0);
7286            break;
7287
7288        case 0xc9: /* mwait */
7289            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7290                goto illegal_op;
7291            }
7292            gen_update_cc_op(s);
7293            gen_jmp_im(pc_start - s->cs_base);
7294            gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7295            gen_eob(s);
7296            break;
7297
7298        case 0xca: /* clac */
7299            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7300                || s->cpl != 0) {
7301                goto illegal_op;
7302            }
7303            gen_helper_clac(cpu_env);
7304            gen_jmp_im(s->pc - s->cs_base);
7305            gen_eob(s);
7306            break;
7307
7308        case 0xcb: /* stac */
7309            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7310                || s->cpl != 0) {
7311                goto illegal_op;
7312            }
7313            gen_helper_stac(cpu_env);
7314            gen_jmp_im(s->pc - s->cs_base);
7315            gen_eob(s);
7316            break;
7317
7318        CASE_MODRM_MEM_OP(1): /* sidt */
7319            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7320            gen_lea_modrm(env, s, modrm);
7321            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
7322            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
7323            gen_add_A0_im(s, 2);
7324            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
7325            if (dflag == MO_16) {
7326                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7327            }
7328            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7329            break;
7330
7331        case 0xd0: /* xgetbv */
7332            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7333                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7334                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7335                goto illegal_op;
7336            }
7337            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7338            gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
7339            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
7340            break;
7341
7342        case 0xd1: /* xsetbv */
7343            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7344                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7345                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7346                goto illegal_op;
7347            }
7348            if (s->cpl != 0) {
7349                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7350                break;
7351            }
7352            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7353                                  cpu_regs[R_EDX]);
7354            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7355            gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
7356            /* End TB because translation flags may change.  */
7357            gen_jmp_im(s->pc - s->cs_base);
7358            gen_eob(s);
7359            break;
7360
7361        case 0xd8: /* VMRUN */
7362            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7363                goto illegal_op;
7364            }
7365            if (s->cpl != 0) {
7366                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7367                break;
7368            }
7369            gen_update_cc_op(s);
7370            gen_jmp_im(pc_start - s->cs_base);
7371            gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7372                             tcg_const_i32(s->pc - pc_start));
7373            tcg_gen_exit_tb(0);
7374            s->is_jmp = DISAS_TB_JUMP;
7375            break;
7376
7377        case 0xd9: /* VMMCALL */
7378            if (!(s->flags & HF_SVME_MASK)) {
7379                goto illegal_op;
7380            }
7381            gen_update_cc_op(s);
7382            gen_jmp_im(pc_start - s->cs_base);
7383            gen_helper_vmmcall(cpu_env);
7384            break;
7385
7386        case 0xda: /* VMLOAD */
7387            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7388                goto illegal_op;
7389            }
7390            if (s->cpl != 0) {
7391                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7392                break;
7393            }
7394            gen_update_cc_op(s);
7395            gen_jmp_im(pc_start - s->cs_base);
7396            gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7397            break;
7398
7399        case 0xdb: /* VMSAVE */
7400            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7401                goto illegal_op;
7402            }
7403            if (s->cpl != 0) {
7404                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7405                break;
7406            }
7407            gen_update_cc_op(s);
7408            gen_jmp_im(pc_start - s->cs_base);
7409            gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7410            break;
7411
7412        case 0xdc: /* STGI */
7413            if ((!(s->flags & HF_SVME_MASK)
7414                   && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7415                || !s->pe) {
7416                goto illegal_op;
7417            }
7418            if (s->cpl != 0) {
7419                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7420                break;
7421            }
7422            gen_update_cc_op(s);
7423            gen_jmp_im(pc_start - s->cs_base);
7424            gen_helper_stgi(cpu_env);
7425            break;
7426
7427        case 0xdd: /* CLGI */
7428            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7429                goto illegal_op;
7430            }
7431            if (s->cpl != 0) {
7432                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7433                break;
7434            }
7435            gen_update_cc_op(s);
7436            gen_jmp_im(pc_start - s->cs_base);
7437            gen_helper_clgi(cpu_env);
7438            break;
7439
7440        case 0xde: /* SKINIT */
7441            if ((!(s->flags & HF_SVME_MASK)
7442                 && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7443                || !s->pe) {
7444                goto illegal_op;
7445            }
7446            gen_update_cc_op(s);
7447            gen_jmp_im(pc_start - s->cs_base);
7448            gen_helper_skinit(cpu_env);
7449            break;
7450
7451        case 0xdf: /* INVLPGA */
7452            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7453                goto illegal_op;
7454            }
7455            if (s->cpl != 0) {
7456                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7457                break;
7458            }
7459            gen_update_cc_op(s);
7460            gen_jmp_im(pc_start - s->cs_base);
7461            gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
7462            break;
7463
7464        CASE_MODRM_MEM_OP(2): /* lgdt */
7465            if (s->cpl != 0) {
7466                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7467                break;
7468            }
7469            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
7470            gen_lea_modrm(env, s, modrm);
7471            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
7472            gen_add_A0_im(s, 2);
7473            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7474            if (dflag == MO_16) {
7475                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7476            }
7477            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
7478            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7479            break;
7480
7481        CASE_MODRM_MEM_OP(3): /* lidt */
7482            if (s->cpl != 0) {
7483                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7484                break;
7485            }
7486            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
7487            gen_lea_modrm(env, s, modrm);
7488            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
7489            gen_add_A0_im(s, 2);
7490            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7491            if (dflag == MO_16) {
7492                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7493            }
7494            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
7495            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
7496            break;
7497
7498        CASE_MODRM_OP(4): /* smsw */
7499            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7500            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
7501            if (CODE64(s)) {
7502                mod = (modrm >> 6) & 3;
7503                ot = (mod != 3 ? MO_16 : s->dflag);
7504            } else {
7505                ot = MO_16;
7506            }
7507            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7508            break;
7509        case 0xee: /* rdpkru */
7510            if (prefixes & PREFIX_LOCK) {
7511                goto illegal_op;
7512            }
7513            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7514            gen_helper_rdpkru(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
7515            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
7516            break;
7517        case 0xef: /* wrpkru */
7518            if (prefixes & PREFIX_LOCK) {
7519                goto illegal_op;
7520            }
7521            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7522                                  cpu_regs[R_EDX]);
7523            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7524            gen_helper_wrpkru(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
7525            break;
7526        CASE_MODRM_OP(6): /* lmsw */
7527            if (s->cpl != 0) {
7528                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7529                break;
7530            }
7531            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7532            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7533            gen_helper_lmsw(cpu_env, cpu_T0);
7534            gen_jmp_im(s->pc - s->cs_base);
7535            gen_eob(s);
7536            break;
7537
7538        CASE_MODRM_MEM_OP(7): /* invlpg */
7539            if (s->cpl != 0) {
7540                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7541                break;
7542            }
7543            gen_update_cc_op(s);
7544            gen_jmp_im(pc_start - s->cs_base);
7545            gen_lea_modrm(env, s, modrm);
7546            gen_helper_invlpg(cpu_env, cpu_A0);
7547            gen_jmp_im(s->pc - s->cs_base);
7548            gen_eob(s);
7549            break;
7550
7551        case 0xf8: /* swapgs */
7552#ifdef TARGET_X86_64
7553            if (CODE64(s)) {
7554                if (s->cpl != 0) {
7555                    gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7556                } else {
7557                    tcg_gen_mov_tl(cpu_T0, cpu_seg_base[R_GS]);
7558                    tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7559                                  offsetof(CPUX86State, kernelgsbase));
7560                    tcg_gen_st_tl(cpu_T0, cpu_env,
7561                                  offsetof(CPUX86State, kernelgsbase));
7562                }
7563                break;
7564            }
7565#endif
7566            goto illegal_op;
7567
7568        case 0xf9: /* rdtscp */
7569            if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7570                goto illegal_op;
7571            }
7572            gen_update_cc_op(s);
7573            gen_jmp_im(pc_start - s->cs_base);
7574            if (s->tb->cflags & CF_USE_ICOUNT) {
7575                gen_io_start();
7576            }
7577            gen_helper_rdtscp(cpu_env);
7578            if (s->tb->cflags & CF_USE_ICOUNT) {
7579                gen_io_end();
7580                gen_jmp(s, s->pc - s->cs_base);
7581            }
7582            break;
7583
7584        default:
7585            goto unknown_op;
7586        }
7587        break;
7588
7589    case 0x108: /* invd */
7590    case 0x109: /* wbinvd */
7591        if (s->cpl != 0) {
7592            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7593        } else {
7594            gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7595            /* nothing to do */
7596        }
7597        break;
7598    case 0x63: /* arpl or movslS (x86_64) */
7599#ifdef TARGET_X86_64
7600        if (CODE64(s)) {
7601            int d_ot;
7602            /* d_ot is the size of destination */
7603            d_ot = dflag;
7604
7605            modrm = cpu_ldub_code(env, s->pc++);
7606            reg = ((modrm >> 3) & 7) | rex_r;
7607            mod = (modrm >> 6) & 3;
7608            rm = (modrm & 7) | REX_B(s);
7609
7610            if (mod == 3) {
7611                gen_op_mov_v_reg(MO_32, cpu_T0, rm);
7612                /* sign extend */
7613                if (d_ot == MO_64) {
7614                    tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
7615                }
7616                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
7617            } else {
7618                gen_lea_modrm(env, s, modrm);
7619                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, cpu_A0);
7620                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
7621            }
7622        } else
7623#endif
7624        {
7625            TCGLabel *label1;
7626            TCGv t0, t1, t2, a0;
7627
7628            if (!s->pe || s->vm86)
7629                goto illegal_op;
7630            t0 = tcg_temp_local_new();
7631            t1 = tcg_temp_local_new();
7632            t2 = tcg_temp_local_new();
7633            ot = MO_16;
7634            modrm = cpu_ldub_code(env, s->pc++);
7635            reg = (modrm >> 3) & 7;
7636            mod = (modrm >> 6) & 3;
7637            rm = modrm & 7;
7638            if (mod != 3) {
7639                gen_lea_modrm(env, s, modrm);
7640                gen_op_ld_v(s, ot, t0, cpu_A0);
7641                a0 = tcg_temp_local_new();
7642                tcg_gen_mov_tl(a0, cpu_A0);
7643            } else {
7644                gen_op_mov_v_reg(ot, t0, rm);
7645                TCGV_UNUSED(a0);
7646            }
7647            gen_op_mov_v_reg(ot, t1, reg);
7648            tcg_gen_andi_tl(cpu_tmp0, t0, 3);
7649            tcg_gen_andi_tl(t1, t1, 3);
7650            tcg_gen_movi_tl(t2, 0);
7651            label1 = gen_new_label();
7652            tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
7653            tcg_gen_andi_tl(t0, t0, ~3);
7654            tcg_gen_or_tl(t0, t0, t1);
7655            tcg_gen_movi_tl(t2, CC_Z);
7656            gen_set_label(label1);
7657            if (mod != 3) {
7658                gen_op_st_v(s, ot, t0, a0);
7659                tcg_temp_free(a0);
7660           } else {
7661                gen_op_mov_reg_v(ot, rm, t0);
7662            }
7663            gen_compute_eflags(s);
7664            tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7665            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7666            tcg_temp_free(t0);
7667            tcg_temp_free(t1);
7668            tcg_temp_free(t2);
7669        }
7670        break;
7671    case 0x102: /* lar */
7672    case 0x103: /* lsl */
7673        {
7674            TCGLabel *label1;
7675            TCGv t0;
7676            if (!s->pe || s->vm86)
7677                goto illegal_op;
7678            ot = dflag != MO_16 ? MO_32 : MO_16;
7679            modrm = cpu_ldub_code(env, s->pc++);
7680            reg = ((modrm >> 3) & 7) | rex_r;
7681            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7682            t0 = tcg_temp_local_new();
7683            gen_update_cc_op(s);
7684            if (b == 0x102) {
7685                gen_helper_lar(t0, cpu_env, cpu_T0);
7686            } else {
7687                gen_helper_lsl(t0, cpu_env, cpu_T0);
7688            }
7689            tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
7690            label1 = gen_new_label();
7691            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
7692            gen_op_mov_reg_v(ot, reg, t0);
7693            gen_set_label(label1);
7694            set_cc_op(s, CC_OP_EFLAGS);
7695            tcg_temp_free(t0);
7696        }
7697        break;
7698    case 0x118:
7699        modrm = cpu_ldub_code(env, s->pc++);
7700        mod = (modrm >> 6) & 3;
7701        op = (modrm >> 3) & 7;
7702        switch(op) {
7703        case 0: /* prefetchnta */
7704        case 1: /* prefetchnt0 */
7705        case 2: /* prefetchnt0 */
7706        case 3: /* prefetchnt0 */
7707            if (mod == 3)
7708                goto illegal_op;
7709            gen_nop_modrm(env, s, modrm);
7710            /* nothing more to do */
7711            break;
7712        default: /* nop (multi byte) */
7713            gen_nop_modrm(env, s, modrm);
7714            break;
7715        }
7716        break;
7717    case 0x11a:
7718        modrm = cpu_ldub_code(env, s->pc++);
7719        if (s->flags & HF_MPX_EN_MASK) {
7720            mod = (modrm >> 6) & 3;
7721            reg = ((modrm >> 3) & 7) | rex_r;
7722            if (prefixes & PREFIX_REPZ) {
7723                /* bndcl */
7724                if (reg >= 4
7725                    || (prefixes & PREFIX_LOCK)
7726                    || s->aflag == MO_16) {
7727                    goto illegal_op;
7728                }
7729                gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7730            } else if (prefixes & PREFIX_REPNZ) {
7731                /* bndcu */
7732                if (reg >= 4
7733                    || (prefixes & PREFIX_LOCK)
7734                    || s->aflag == MO_16) {
7735                    goto illegal_op;
7736                }
7737                TCGv_i64 notu = tcg_temp_new_i64();
7738                tcg_gen_not_i64(notu, cpu_bndu[reg]);
7739                gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7740                tcg_temp_free_i64(notu);
7741            } else if (prefixes & PREFIX_DATA) {
7742                /* bndmov -- from reg/mem */
7743                if (reg >= 4 || s->aflag == MO_16) {
7744                    goto illegal_op;
7745                }
7746                if (mod == 3) {
7747                    int reg2 = (modrm & 7) | REX_B(s);
7748                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7749                        goto illegal_op;
7750                    }
7751                    if (s->flags & HF_MPX_IU_MASK) {
7752                        tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7753                        tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7754                    }
7755                } else {
7756                    gen_lea_modrm(env, s, modrm);
7757                    if (CODE64(s)) {
7758                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
7759                                            s->mem_index, MO_LEQ);
7760                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
7761                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
7762                                            s->mem_index, MO_LEQ);
7763                    } else {
7764                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
7765                                            s->mem_index, MO_LEUL);
7766                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
7767                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
7768                                            s->mem_index, MO_LEUL);
7769                    }
7770                    /* bnd registers are now in-use */
7771                    gen_set_hflag(s, HF_MPX_IU_MASK);
7772                }
7773            } else if (mod != 3) {
7774                /* bndldx */
7775                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7776                if (reg >= 4
7777                    || (prefixes & PREFIX_LOCK)
7778                    || s->aflag == MO_16
7779                    || a.base < -1) {
7780                    goto illegal_op;
7781                }
7782                if (a.base >= 0) {
7783                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
7784                } else {
7785                    tcg_gen_movi_tl(cpu_A0, 0);
7786                }
7787                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
7788                if (a.index >= 0) {
7789                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
7790                } else {
7791                    tcg_gen_movi_tl(cpu_T0, 0);
7792                }
7793                if (CODE64(s)) {
7794                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, cpu_A0, cpu_T0);
7795                    tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7796                                   offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7797                } else {
7798                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, cpu_A0, cpu_T0);
7799                    tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7800                    tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7801                }
7802                gen_set_hflag(s, HF_MPX_IU_MASK);
7803            }
7804        }
7805        gen_nop_modrm(env, s, modrm);
7806        break;
7807    case 0x11b:
7808        modrm = cpu_ldub_code(env, s->pc++);
7809        if (s->flags & HF_MPX_EN_MASK) {
7810            mod = (modrm >> 6) & 3;
7811            reg = ((modrm >> 3) & 7) | rex_r;
7812            if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7813                /* bndmk */
7814                if (reg >= 4
7815                    || (prefixes & PREFIX_LOCK)
7816                    || s->aflag == MO_16) {
7817                    goto illegal_op;
7818                }
7819                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7820                if (a.base >= 0) {
7821                    tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7822                    if (!CODE64(s)) {
7823                        tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7824                    }
7825                } else if (a.base == -1) {
7826                    /* no base register has lower bound of 0 */
7827                    tcg_gen_movi_i64(cpu_bndl[reg], 0);
7828                } else {
7829                    /* rip-relative generates #ud */
7830                    goto illegal_op;
7831                }
7832                tcg_gen_not_tl(cpu_A0, gen_lea_modrm_1(a));
7833                if (!CODE64(s)) {
7834                    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
7835                }
7836                tcg_gen_extu_tl_i64(cpu_bndu[reg], cpu_A0);
7837                /* bnd registers are now in-use */
7838                gen_set_hflag(s, HF_MPX_IU_MASK);
7839                break;
7840            } else if (prefixes & PREFIX_REPNZ) {
7841                /* bndcn */
7842                if (reg >= 4
7843                    || (prefixes & PREFIX_LOCK)
7844                    || s->aflag == MO_16) {
7845                    goto illegal_op;
7846                }
7847                gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7848            } else if (prefixes & PREFIX_DATA) {
7849                /* bndmov -- to reg/mem */
7850                if (reg >= 4 || s->aflag == MO_16) {
7851                    goto illegal_op;
7852                }
7853                if (mod == 3) {
7854                    int reg2 = (modrm & 7) | REX_B(s);
7855                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7856                        goto illegal_op;
7857                    }
7858                    if (s->flags & HF_MPX_IU_MASK) {
7859                        tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7860                        tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
7861                    }
7862                } else {
7863                    gen_lea_modrm(env, s, modrm);
7864                    if (CODE64(s)) {
7865                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
7866                                            s->mem_index, MO_LEQ);
7867                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
7868                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
7869                                            s->mem_index, MO_LEQ);
7870                    } else {
7871                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
7872                                            s->mem_index, MO_LEUL);
7873                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
7874                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
7875                                            s->mem_index, MO_LEUL);
7876                    }
7877                }
7878            } else if (mod != 3) {
7879                /* bndstx */
7880                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7881                if (reg >= 4
7882                    || (prefixes & PREFIX_LOCK)
7883                    || s->aflag == MO_16
7884                    || a.base < -1) {
7885                    goto illegal_op;
7886                }
7887                if (a.base >= 0) {
7888                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
7889                } else {
7890                    tcg_gen_movi_tl(cpu_A0, 0);
7891                }
7892                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
7893                if (a.index >= 0) {
7894                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
7895                } else {
7896                    tcg_gen_movi_tl(cpu_T0, 0);
7897                }
7898                if (CODE64(s)) {
7899                    gen_helper_bndstx64(cpu_env, cpu_A0, cpu_T0,
7900                                        cpu_bndl[reg], cpu_bndu[reg]);
7901                } else {
7902                    gen_helper_bndstx32(cpu_env, cpu_A0, cpu_T0,
7903                                        cpu_bndl[reg], cpu_bndu[reg]);
7904                }
7905            }
7906        }
7907        gen_nop_modrm(env, s, modrm);
7908        break;
7909    case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
7910        modrm = cpu_ldub_code(env, s->pc++);
7911        gen_nop_modrm(env, s, modrm);
7912        break;
7913    case 0x120: /* mov reg, crN */
7914    case 0x122: /* mov crN, reg */
7915        if (s->cpl != 0) {
7916            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7917        } else {
7918            modrm = cpu_ldub_code(env, s->pc++);
7919            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7920             * AMD documentation (24594.pdf) and testing of
7921             * intel 386 and 486 processors all show that the mod bits
7922             * are assumed to be 1's, regardless of actual values.
7923             */
7924            rm = (modrm & 7) | REX_B(s);
7925            reg = ((modrm >> 3) & 7) | rex_r;
7926            if (CODE64(s))
7927                ot = MO_64;
7928            else
7929                ot = MO_32;
7930            if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
7931                (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
7932                reg = 8;
7933            }
7934            switch(reg) {
7935            case 0:
7936            case 2:
7937            case 3:
7938            case 4:
7939            case 8:
7940                gen_update_cc_op(s);
7941                gen_jmp_im(pc_start - s->cs_base);
7942                if (b & 2) {
7943                    if (s->tb->cflags & CF_USE_ICOUNT) {
7944                        gen_io_start();
7945                    }
7946                    gen_op_mov_v_reg(ot, cpu_T0, rm);
7947                    gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
7948                                         cpu_T0);
7949                    if (s->tb->cflags & CF_USE_ICOUNT) {
7950                        gen_io_end();
7951                    }
7952                    gen_jmp_im(s->pc - s->cs_base);
7953                    gen_eob(s);
7954                } else {
7955                    if (s->tb->cflags & CF_USE_ICOUNT) {
7956                        gen_io_start();
7957                    }
7958                    gen_helper_read_crN(cpu_T0, cpu_env, tcg_const_i32(reg));
7959                    gen_op_mov_reg_v(ot, rm, cpu_T0);
7960                    if (s->tb->cflags & CF_USE_ICOUNT) {
7961                        gen_io_end();
7962                    }
7963                }
7964                break;
7965            default:
7966                goto unknown_op;
7967            }
7968        }
7969        break;
7970    case 0x121: /* mov reg, drN */
7971    case 0x123: /* mov drN, reg */
7972        if (s->cpl != 0) {
7973            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7974        } else {
7975            modrm = cpu_ldub_code(env, s->pc++);
7976            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7977             * AMD documentation (24594.pdf) and testing of
7978             * intel 386 and 486 processors all show that the mod bits
7979             * are assumed to be 1's, regardless of actual values.
7980             */
7981            rm = (modrm & 7) | REX_B(s);
7982            reg = ((modrm >> 3) & 7) | rex_r;
7983            if (CODE64(s))
7984                ot = MO_64;
7985            else
7986                ot = MO_32;
7987            if (reg >= 8) {
7988                goto illegal_op;
7989            }
7990            if (b & 2) {
7991                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
7992                gen_op_mov_v_reg(ot, cpu_T0, rm);
7993                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
7994                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T0);
7995                gen_jmp_im(s->pc - s->cs_base);
7996                gen_eob(s);
7997            } else {
7998                gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
7999                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
8000                gen_helper_get_dr(cpu_T0, cpu_env, cpu_tmp2_i32);
8001                gen_op_mov_reg_v(ot, rm, cpu_T0);
8002            }
8003        }
8004        break;
8005    case 0x106: /* clts */
8006        if (s->cpl != 0) {
8007            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8008        } else {
8009            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
8010            gen_helper_clts(cpu_env);
8011            /* abort block because static cpu state changed */
8012            gen_jmp_im(s->pc - s->cs_base);
8013            gen_eob(s);
8014        }
8015        break;
8016    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8017    case 0x1c3: /* MOVNTI reg, mem */
8018        if (!(s->cpuid_features & CPUID_SSE2))
8019            goto illegal_op;
8020        ot = mo_64_32(dflag);
8021        modrm = cpu_ldub_code(env, s->pc++);
8022        mod = (modrm >> 6) & 3;
8023        if (mod == 3)
8024            goto illegal_op;
8025        reg = ((modrm >> 3) & 7) | rex_r;
8026        /* generate a generic store */
8027        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8028        break;
8029    case 0x1ae:
8030        modrm = cpu_ldub_code(env, s->pc++);
8031        switch (modrm) {
8032        CASE_MODRM_MEM_OP(0): /* fxsave */
8033            if (!(s->cpuid_features & CPUID_FXSR)
8034                || (prefixes & PREFIX_LOCK)) {
8035                goto illegal_op;
8036            }
8037            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8038                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8039                break;
8040            }
8041            gen_lea_modrm(env, s, modrm);
8042            gen_helper_fxsave(cpu_env, cpu_A0);
8043            break;
8044
8045        CASE_MODRM_MEM_OP(1): /* fxrstor */
8046            if (!(s->cpuid_features & CPUID_FXSR)
8047                || (prefixes & PREFIX_LOCK)) {
8048                goto illegal_op;
8049            }
8050            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8051                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8052                break;
8053            }
8054            gen_lea_modrm(env, s, modrm);
8055            gen_helper_fxrstor(cpu_env, cpu_A0);
8056            break;
8057
8058        CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8059            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8060                goto illegal_op;
8061            }
8062            if (s->flags & HF_TS_MASK) {
8063                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8064                break;
8065            }
8066            gen_lea_modrm(env, s, modrm);
8067            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, s->mem_index, MO_LEUL);
8068            gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
8069            break;
8070
8071        CASE_MODRM_MEM_OP(3): /* stmxcsr */
8072            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8073                goto illegal_op;
8074            }
8075            if (s->flags & HF_TS_MASK) {
8076                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8077                break;
8078            }
8079            gen_lea_modrm(env, s, modrm);
8080            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
8081            gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
8082            break;
8083
8084        CASE_MODRM_MEM_OP(4): /* xsave */
8085            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8086                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8087                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8088                goto illegal_op;
8089            }
8090            gen_lea_modrm(env, s, modrm);
8091            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
8092                                  cpu_regs[R_EDX]);
8093            gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
8094            break;
8095
8096        CASE_MODRM_MEM_OP(5): /* xrstor */
8097            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8098                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8099                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8100                goto illegal_op;
8101            }
8102            gen_lea_modrm(env, s, modrm);
8103            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
8104                                  cpu_regs[R_EDX]);
8105            gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
8106            /* XRSTOR is how MPX is enabled, which changes how
8107               we translate.  Thus we need to end the TB.  */
8108            gen_update_cc_op(s);
8109            gen_jmp_im(s->pc - s->cs_base);
8110            gen_eob(s);
8111            break;
8112
8113        CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8114            if (prefixes & PREFIX_LOCK) {
8115                goto illegal_op;
8116            }
8117            if (prefixes & PREFIX_DATA) {
8118                /* clwb */
8119                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8120                    goto illegal_op;
8121                }
8122                gen_nop_modrm(env, s, modrm);
8123            } else {
8124                /* xsaveopt */
8125                if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8126                    || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8127                    || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8128                    goto illegal_op;
8129                }
8130                gen_lea_modrm(env, s, modrm);
8131                tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
8132                                      cpu_regs[R_EDX]);
8133                gen_helper_xsaveopt(cpu_env, cpu_A0, cpu_tmp1_i64);
8134            }
8135            break;
8136
8137        CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8138            if (prefixes & PREFIX_LOCK) {
8139                goto illegal_op;
8140            }
8141            if (prefixes & PREFIX_DATA) {
8142                /* clflushopt */
8143                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8144                    goto illegal_op;
8145                }
8146            } else {
8147                /* clflush */
8148                if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8149                    || !(s->cpuid_features & CPUID_CLFLUSH)) {
8150                    goto illegal_op;
8151                }
8152            }
8153            gen_nop_modrm(env, s, modrm);
8154            break;
8155
8156        case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8157        case 0xc8 ... 0xc8: /* rdgsbase (f3 0f ae /1) */
8158        case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8159        case 0xd8 ... 0xd8: /* wrgsbase (f3 0f ae /3) */
8160            if (CODE64(s)
8161                && (prefixes & PREFIX_REPZ)
8162                && !(prefixes & PREFIX_LOCK)
8163                && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8164                TCGv base, treg, src, dst;
8165
8166                /* Preserve hflags bits by testing CR4 at runtime.  */
8167                tcg_gen_movi_i32(cpu_tmp2_i32, CR4_FSGSBASE_MASK);
8168                gen_helper_cr4_testbit(cpu_env, cpu_tmp2_i32);
8169
8170                base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8171                treg = cpu_regs[(modrm & 7) | REX_B(s)];
8172
8173                if (modrm & 0x10) {
8174                    /* wr*base */
8175                    dst = base, src = treg;
8176                } else {
8177                    /* rd*base */
8178                    dst = treg, src = base;
8179                }
8180
8181                if (s->dflag == MO_32) {
8182                    tcg_gen_ext32u_tl(dst, src);
8183                } else {
8184                    tcg_gen_mov_tl(dst, src);
8185                }
8186                break;
8187            }
8188            goto unknown_op;
8189
8190        case 0xf8: /* sfence / pcommit */
8191            if (prefixes & PREFIX_DATA) {
8192                /* pcommit */
8193                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8194                    || (prefixes & PREFIX_LOCK)) {
8195                    goto illegal_op;
8196                }
8197                break;
8198            }
8199            /* fallthru */
8200        case 0xf9 ... 0xff: /* sfence */
8201            if (!(s->cpuid_features & CPUID_SSE)
8202                || (prefixes & PREFIX_LOCK)) {
8203                goto illegal_op;
8204            }
8205            tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8206            break;
8207        case 0xe8 ... 0xef: /* lfence */
8208            if (!(s->cpuid_features & CPUID_SSE)
8209                || (prefixes & PREFIX_LOCK)) {
8210                goto illegal_op;
8211            }
8212            tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8213            break;
8214        case 0xf0 ... 0xf7: /* mfence */
8215            if (!(s->cpuid_features & CPUID_SSE2)
8216                || (prefixes & PREFIX_LOCK)) {
8217                goto illegal_op;
8218            }
8219            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8220            break;
8221
8222        default:
8223            goto unknown_op;
8224        }
8225        break;
8226
8227    case 0x10d: /* 3DNow! prefetch(w) */
8228        modrm = cpu_ldub_code(env, s->pc++);
8229        mod = (modrm >> 6) & 3;
8230        if (mod == 3)
8231            goto illegal_op;
8232        gen_nop_modrm(env, s, modrm);
8233        break;
8234    case 0x1aa: /* rsm */
8235        gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
8236        if (!(s->flags & HF_SMM_MASK))
8237            goto illegal_op;
8238        gen_update_cc_op(s);
8239        gen_jmp_im(s->pc - s->cs_base);
8240        gen_helper_rsm(cpu_env);
8241        gen_eob(s);
8242        break;
8243    case 0x1b8: /* SSE4.2 popcnt */
8244        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8245             PREFIX_REPZ)
8246            goto illegal_op;
8247        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8248            goto illegal_op;
8249
8250        modrm = cpu_ldub_code(env, s->pc++);
8251        reg = ((modrm >> 3) & 7) | rex_r;
8252
8253        if (s->prefix & PREFIX_DATA) {
8254            ot = MO_16;
8255        } else {
8256            ot = mo_64_32(dflag);
8257        }
8258
8259        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8260        gen_extu(ot, cpu_T0);
8261        tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
8262        tcg_gen_ctpop_tl(cpu_T0, cpu_T0);
8263        gen_op_mov_reg_v(ot, reg, cpu_T0);
8264
8265        set_cc_op(s, CC_OP_POPCNT);
8266        break;
8267    case 0x10e ... 0x10f:
8268        /* 3DNow! instructions, ignore prefixes */
8269        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8270    case 0x110 ... 0x117:
8271    case 0x128 ... 0x12f:
8272    case 0x138 ... 0x13a:
8273    case 0x150 ... 0x179:
8274    case 0x17c ... 0x17f:
8275    case 0x1c2:
8276    case 0x1c4 ... 0x1c6:
8277    case 0x1d0 ... 0x1fe:
8278        gen_sse(env, s, b, pc_start, rex_r);
8279        break;
8280    default:
8281        goto unknown_op;
8282    }
8283    return s->pc;
8284 illegal_op:
8285    gen_illegal_opcode(s);
8286    return s->pc;
8287 unknown_op:
8288    gen_unknown_opcode(env, s);
8289    return s->pc;
8290}
8291
8292void tcg_x86_init(void)
8293{
8294    static const char reg_names[CPU_NB_REGS][4] = {
8295#ifdef TARGET_X86_64
8296        [R_EAX] = "rax",
8297        [R_EBX] = "rbx",
8298        [R_ECX] = "rcx",
8299        [R_EDX] = "rdx",
8300        [R_ESI] = "rsi",
8301        [R_EDI] = "rdi",
8302        [R_EBP] = "rbp",
8303        [R_ESP] = "rsp",
8304        [8]  = "r8",
8305        [9]  = "r9",
8306        [10] = "r10",
8307        [11] = "r11",
8308        [12] = "r12",
8309        [13] = "r13",
8310        [14] = "r14",
8311        [15] = "r15",
8312#else
8313        [R_EAX] = "eax",
8314        [R_EBX] = "ebx",
8315        [R_ECX] = "ecx",
8316        [R_EDX] = "edx",
8317        [R_ESI] = "esi",
8318        [R_EDI] = "edi",
8319        [R_EBP] = "ebp",
8320        [R_ESP] = "esp",
8321#endif
8322    };
8323    static const char seg_base_names[6][8] = {
8324        [R_CS] = "cs_base",
8325        [R_DS] = "ds_base",
8326        [R_ES] = "es_base",
8327        [R_FS] = "fs_base",
8328        [R_GS] = "gs_base",
8329        [R_SS] = "ss_base",
8330    };
8331    static const char bnd_regl_names[4][8] = {
8332        "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8333    };
8334    static const char bnd_regu_names[4][8] = {
8335        "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8336    };
8337    int i;
8338    static bool initialized;
8339
8340    if (initialized) {
8341        return;
8342    }
8343    initialized = true;
8344
8345    cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
8346    tcg_ctx.tcg_env = cpu_env;
8347    cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8348                                       offsetof(CPUX86State, cc_op), "cc_op");
8349    cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8350                                    "cc_dst");
8351    cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8352                                    "cc_src");
8353    cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8354                                     "cc_src2");
8355
8356    for (i = 0; i < CPU_NB_REGS; ++i) {
8357        cpu_regs[i] = tcg_global_mem_new(cpu_env,
8358                                         offsetof(CPUX86State, regs[i]),
8359                                         reg_names[i]);
8360    }
8361
8362    for (i = 0; i < 6; ++i) {
8363        cpu_seg_base[i]
8364            = tcg_global_mem_new(cpu_env,
8365                                 offsetof(CPUX86State, segs[i].base),
8366                                 seg_base_names[i]);
8367    }
8368
8369    for (i = 0; i < 4; ++i) {
8370        cpu_bndl[i]
8371            = tcg_global_mem_new_i64(cpu_env,
8372                                     offsetof(CPUX86State, bnd_regs[i].lb),
8373                                     bnd_regl_names[i]);
8374        cpu_bndu[i]
8375            = tcg_global_mem_new_i64(cpu_env,
8376                                     offsetof(CPUX86State, bnd_regs[i].ub),
8377                                     bnd_regu_names[i]);
8378    }
8379}
8380
8381/* generate intermediate code for basic block 'tb'.  */
8382void gen_intermediate_code(CPUState *cs, TranslationBlock *tb)
8383{
8384    CPUX86State *env = cs->env_ptr;
8385    DisasContext dc1, *dc = &dc1;
8386    target_ulong pc_ptr;
8387    uint32_t flags;
8388    target_ulong pc_start;
8389    target_ulong cs_base;
8390    int num_insns;
8391    int max_insns;
8392
8393    /* generate intermediate code */
8394    pc_start = tb->pc;
8395    cs_base = tb->cs_base;
8396    flags = tb->flags;
8397
8398    dc->pe = (flags >> HF_PE_SHIFT) & 1;
8399    dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
8400    dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
8401    dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
8402    dc->f_st = 0;
8403    dc->vm86 = (flags >> VM_SHIFT) & 1;
8404    dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
8405    dc->iopl = (flags >> IOPL_SHIFT) & 3;
8406    dc->tf = (flags >> TF_SHIFT) & 1;
8407    dc->singlestep_enabled = cs->singlestep_enabled;
8408    dc->cc_op = CC_OP_DYNAMIC;
8409    dc->cc_op_dirty = false;
8410    dc->cs_base = cs_base;
8411    dc->tb = tb;
8412    dc->popl_esp_hack = 0;
8413    /* select memory access functions */
8414    dc->mem_index = 0;
8415#ifdef CONFIG_SOFTMMU
8416    dc->mem_index = cpu_mmu_index(env, false);
8417#endif
8418    dc->cpuid_features = env->features[FEAT_1_EDX];
8419    dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8420    dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8421    dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8422    dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8423    dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8424#ifdef TARGET_X86_64
8425    dc->lma = (flags >> HF_LMA_SHIFT) & 1;
8426    dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
8427#endif
8428    dc->flags = flags;
8429    dc->jmp_opt = !(dc->tf || cs->singlestep_enabled ||
8430                    (flags & HF_INHIBIT_IRQ_MASK));
8431    /* Do not optimize repz jumps at all in icount mode, because
8432       rep movsS instructions are execured with different paths
8433       in !repz_opt and repz_opt modes. The first one was used
8434       always except single step mode. And this setting
8435       disables jumps optimization and control paths become
8436       equivalent in run and single step modes.
8437       Now there will be no jump optimization for repz in
8438       record/replay modes and there will always be an
8439       additional step for ecx=0 when icount is enabled.
8440     */
8441    dc->repz_opt = !dc->jmp_opt && !(tb->cflags & CF_USE_ICOUNT);
8442#if 0
8443    /* check addseg logic */
8444    if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
8445        printf("ERROR addseg\n");
8446#endif
8447
8448    cpu_T0 = tcg_temp_new();
8449    cpu_T1 = tcg_temp_new();
8450    cpu_A0 = tcg_temp_new();
8451
8452    cpu_tmp0 = tcg_temp_new();
8453    cpu_tmp1_i64 = tcg_temp_new_i64();
8454    cpu_tmp2_i32 = tcg_temp_new_i32();
8455    cpu_tmp3_i32 = tcg_temp_new_i32();
8456    cpu_tmp4 = tcg_temp_new();
8457    cpu_ptr0 = tcg_temp_new_ptr();
8458    cpu_ptr1 = tcg_temp_new_ptr();
8459    cpu_cc_srcT = tcg_temp_local_new();
8460
8461    dc->is_jmp = DISAS_NEXT;
8462    pc_ptr = pc_start;
8463    num_insns = 0;
8464    max_insns = tb->cflags & CF_COUNT_MASK;
8465    if (max_insns == 0) {
8466        max_insns = CF_COUNT_MASK;
8467    }
8468    if (max_insns > TCG_MAX_INSNS) {
8469        max_insns = TCG_MAX_INSNS;
8470    }
8471
8472    gen_tb_start(tb);
8473    for(;;) {
8474        tcg_gen_insn_start(pc_ptr, dc->cc_op);
8475        num_insns++;
8476
8477        /* If RF is set, suppress an internally generated breakpoint.  */
8478        if (unlikely(cpu_breakpoint_test(cs, pc_ptr,
8479                                         tb->flags & HF_RF_MASK
8480                                         ? BP_GDB : BP_ANY))) {
8481            gen_debug(dc, pc_ptr - dc->cs_base);
8482            /* The address covered by the breakpoint must be included in
8483               [tb->pc, tb->pc + tb->size) in order to for it to be
8484               properly cleared -- thus we increment the PC here so that
8485               the logic setting tb->size below does the right thing.  */
8486            pc_ptr += 1;
8487            goto done_generating;
8488        }
8489        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
8490            gen_io_start();
8491        }
8492
8493        pc_ptr = disas_insn(env, dc, pc_ptr);
8494        /* stop translation if indicated */
8495        if (dc->is_jmp)
8496            break;
8497        /* if single step mode, we generate only one instruction and
8498           generate an exception */
8499        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8500           the flag and abort the translation to give the irqs a
8501           change to be happen */
8502        if (dc->tf || dc->singlestep_enabled ||
8503            (flags & HF_INHIBIT_IRQ_MASK)) {
8504            gen_jmp_im(pc_ptr - dc->cs_base);
8505            gen_eob(dc);
8506            break;
8507        }
8508        /* Do not cross the boundary of the pages in icount mode,
8509           it can cause an exception. Do it only when boundary is
8510           crossed by the first instruction in the block.
8511           If current instruction already crossed the bound - it's ok,
8512           because an exception hasn't stopped this code.
8513         */
8514        if ((tb->cflags & CF_USE_ICOUNT)
8515            && ((pc_ptr & TARGET_PAGE_MASK)
8516                != ((pc_ptr + TARGET_MAX_INSN_SIZE - 1) & TARGET_PAGE_MASK)
8517                || (pc_ptr & ~TARGET_PAGE_MASK) == 0)) {
8518            gen_jmp_im(pc_ptr - dc->cs_base);
8519            gen_eob(dc);
8520            break;
8521        }
8522        /* if too long translation, stop generation too */
8523        if (tcg_op_buf_full() ||
8524            (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
8525            num_insns >= max_insns) {
8526            gen_jmp_im(pc_ptr - dc->cs_base);
8527            gen_eob(dc);
8528            break;
8529        }
8530        if (singlestep) {
8531            gen_jmp_im(pc_ptr - dc->cs_base);
8532            gen_eob(dc);
8533            break;
8534        }
8535    }
8536    if (tb->cflags & CF_LAST_IO)
8537        gen_io_end();
8538done_generating:
8539    gen_tb_end(tb, num_insns);
8540
8541#ifdef DEBUG_DISAS
8542    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
8543        && qemu_log_in_addr_range(pc_start)) {
8544        int disas_flags;
8545        qemu_log_lock();
8546        qemu_log("----------------\n");
8547        qemu_log("IN: %s\n", lookup_symbol(pc_start));
8548#ifdef TARGET_X86_64
8549        if (dc->code64)
8550            disas_flags = 2;
8551        else
8552#endif
8553            disas_flags = !dc->code32;
8554        log_target_disas(cs, pc_start, pc_ptr - pc_start, disas_flags);
8555        qemu_log("\n");
8556        qemu_log_unlock();
8557    }
8558#endif
8559
8560    tb->size = pc_ptr - pc_start;
8561    tb->icount = num_insns;
8562}
8563
8564void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8565                          target_ulong *data)
8566{
8567    int cc_op = data[1];
8568    env->eip = data[0] - tb->cs_base;
8569    if (cc_op != CC_OP_DYNAMIC) {
8570        env->cc_op = cc_op;
8571    }
8572}
8573