qemu/target/i386/translate.c
<<
>>
Prefs
   1/*
   2 *  i386 translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "qemu/host-utils.h"
  22#include "cpu.h"
  23#include "disas/disas.h"
  24#include "exec/exec-all.h"
  25#include "tcg-op.h"
  26#include "exec/cpu_ldst.h"
  27#include "exec/translator.h"
  28
  29#include "exec/helper-proto.h"
  30#include "exec/helper-gen.h"
  31
  32#include "trace-tcg.h"
  33#include "exec/log.h"
  34
  35#define PREFIX_REPZ   0x01
  36#define PREFIX_REPNZ  0x02
  37#define PREFIX_LOCK   0x04
  38#define PREFIX_DATA   0x08
  39#define PREFIX_ADR    0x10
  40#define PREFIX_VEX    0x20
  41
  42#ifdef TARGET_X86_64
  43#define CODE64(s) ((s)->code64)
  44#define REX_X(s) ((s)->rex_x)
  45#define REX_B(s) ((s)->rex_b)
  46#else
  47#define CODE64(s) 0
  48#define REX_X(s) 0
  49#define REX_B(s) 0
  50#endif
  51
  52#ifdef TARGET_X86_64
  53# define ctztl  ctz64
  54# define clztl  clz64
  55#else
  56# define ctztl  ctz32
  57# define clztl  clz32
  58#endif
  59
  60/* For a switch indexed by MODRM, match all memory operands for a given OP.  */
  61#define CASE_MODRM_MEM_OP(OP) \
  62    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  63    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  64    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
  65
  66#define CASE_MODRM_OP(OP) \
  67    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  68    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  69    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
  70    case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
  71
  72//#define MACRO_TEST   1
  73
  74/* global register indexes */
  75static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
  76static TCGv_i32 cpu_cc_op;
  77static TCGv cpu_regs[CPU_NB_REGS];
  78static TCGv cpu_seg_base[6];
  79static TCGv_i64 cpu_bndl[4];
  80static TCGv_i64 cpu_bndu[4];
  81
  82#include "exec/gen-icount.h"
  83
  84typedef struct DisasContext {
  85    DisasContextBase base;
  86
  87    /* current insn context */
  88    int override; /* -1 if no override */
  89    int prefix;
  90    TCGMemOp aflag;
  91    TCGMemOp dflag;
  92    target_ulong pc_start;
  93    target_ulong pc; /* pc = eip + cs_base */
  94    /* current block context */
  95    target_ulong cs_base; /* base of CS segment */
  96    int pe;     /* protected mode */
  97    int code32; /* 32 bit code segment */
  98#ifdef TARGET_X86_64
  99    int lma;    /* long mode active */
 100    int code64; /* 64 bit code segment */
 101    int rex_x, rex_b;
 102#endif
 103    int vex_l;  /* vex vector length */
 104    int vex_v;  /* vex vvvv register, without 1's complement.  */
 105    int ss32;   /* 32 bit stack segment */
 106    CCOp cc_op;  /* current CC operation */
 107    bool cc_op_dirty;
 108#ifdef TARGET_X86_64
 109    bool x86_64_hregs;
 110#endif
 111    int addseg; /* non zero if either DS/ES/SS have a non zero base */
 112    int f_st;   /* currently unused */
 113    int vm86;   /* vm86 mode */
 114    int cpl;
 115    int iopl;
 116    int tf;     /* TF cpu flag */
 117    int jmp_opt; /* use direct block chaining for direct jumps */
 118    int repz_opt; /* optimize jumps within repz instructions */
 119    int mem_index; /* select memory access functions */
 120    uint64_t flags; /* all execution flags */
 121    int popl_esp_hack; /* for correct popl with esp base handling */
 122    int rip_offset; /* only used in x86_64, but left for simplicity */
 123    int cpuid_features;
 124    int cpuid_ext_features;
 125    int cpuid_ext2_features;
 126    int cpuid_ext3_features;
 127    int cpuid_7_0_ebx_features;
 128    int cpuid_xsave_features;
 129
 130    /* TCG local temps */
 131    TCGv cc_srcT;
 132    TCGv A0;
 133    TCGv T0;
 134    TCGv T1;
 135
 136    /* TCG local register indexes (only used inside old micro ops) */
 137    TCGv tmp0;
 138    TCGv tmp4;
 139    TCGv_ptr ptr0;
 140    TCGv_ptr ptr1;
 141    TCGv_i32 tmp2_i32;
 142    TCGv_i32 tmp3_i32;
 143    TCGv_i64 tmp1_i64;
 144
 145    sigjmp_buf jmpbuf;
 146} DisasContext;
 147
 148static void gen_eob(DisasContext *s);
 149static void gen_jr(DisasContext *s, TCGv dest);
 150static void gen_jmp(DisasContext *s, target_ulong eip);
 151static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 152static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
 153
 154/* i386 arith/logic operations */
 155enum {
 156    OP_ADDL,
 157    OP_ORL,
 158    OP_ADCL,
 159    OP_SBBL,
 160    OP_ANDL,
 161    OP_SUBL,
 162    OP_XORL,
 163    OP_CMPL,
 164};
 165
 166/* i386 shift ops */
 167enum {
 168    OP_ROL,
 169    OP_ROR,
 170    OP_RCL,
 171    OP_RCR,
 172    OP_SHL,
 173    OP_SHR,
 174    OP_SHL1, /* undocumented */
 175    OP_SAR = 7,
 176};
 177
 178enum {
 179    JCC_O,
 180    JCC_B,
 181    JCC_Z,
 182    JCC_BE,
 183    JCC_S,
 184    JCC_P,
 185    JCC_L,
 186    JCC_LE,
 187};
 188
 189enum {
 190    /* I386 int registers */
 191    OR_EAX,   /* MUST be even numbered */
 192    OR_ECX,
 193    OR_EDX,
 194    OR_EBX,
 195    OR_ESP,
 196    OR_EBP,
 197    OR_ESI,
 198    OR_EDI,
 199
 200    OR_TMP0 = 16,    /* temporary operand register */
 201    OR_TMP1,
 202    OR_A0, /* temporary register used when doing address evaluation */
 203};
 204
 205enum {
 206    USES_CC_DST  = 1,
 207    USES_CC_SRC  = 2,
 208    USES_CC_SRC2 = 4,
 209    USES_CC_SRCT = 8,
 210};
 211
 212/* Bit set if the global variable is live after setting CC_OP to X.  */
 213static const uint8_t cc_op_live[CC_OP_NB] = {
 214    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 215    [CC_OP_EFLAGS] = USES_CC_SRC,
 216    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
 217    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
 218    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 219    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
 220    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 221    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
 222    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
 223    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
 224    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
 225    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
 226    [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
 227    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
 228    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
 229    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 230    [CC_OP_CLR] = 0,
 231    [CC_OP_POPCNT] = USES_CC_SRC,
 232};
 233
 234static void set_cc_op(DisasContext *s, CCOp op)
 235{
 236    int dead;
 237
 238    if (s->cc_op == op) {
 239        return;
 240    }
 241
 242    /* Discard CC computation that will no longer be used.  */
 243    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
 244    if (dead & USES_CC_DST) {
 245        tcg_gen_discard_tl(cpu_cc_dst);
 246    }
 247    if (dead & USES_CC_SRC) {
 248        tcg_gen_discard_tl(cpu_cc_src);
 249    }
 250    if (dead & USES_CC_SRC2) {
 251        tcg_gen_discard_tl(cpu_cc_src2);
 252    }
 253    if (dead & USES_CC_SRCT) {
 254        tcg_gen_discard_tl(s->cc_srcT);
 255    }
 256
 257    if (op == CC_OP_DYNAMIC) {
 258        /* The DYNAMIC setting is translator only, and should never be
 259           stored.  Thus we always consider it clean.  */
 260        s->cc_op_dirty = false;
 261    } else {
 262        /* Discard any computed CC_OP value (see shifts).  */
 263        if (s->cc_op == CC_OP_DYNAMIC) {
 264            tcg_gen_discard_i32(cpu_cc_op);
 265        }
 266        s->cc_op_dirty = true;
 267    }
 268    s->cc_op = op;
 269}
 270
 271static void gen_update_cc_op(DisasContext *s)
 272{
 273    if (s->cc_op_dirty) {
 274        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
 275        s->cc_op_dirty = false;
 276    }
 277}
 278
 279#ifdef TARGET_X86_64
 280
 281#define NB_OP_SIZES 4
 282
 283#else /* !TARGET_X86_64 */
 284
 285#define NB_OP_SIZES 3
 286
 287#endif /* !TARGET_X86_64 */
 288
 289#if defined(HOST_WORDS_BIGENDIAN)
 290#define REG_B_OFFSET (sizeof(target_ulong) - 1)
 291#define REG_H_OFFSET (sizeof(target_ulong) - 2)
 292#define REG_W_OFFSET (sizeof(target_ulong) - 2)
 293#define REG_L_OFFSET (sizeof(target_ulong) - 4)
 294#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
 295#else
 296#define REG_B_OFFSET 0
 297#define REG_H_OFFSET 1
 298#define REG_W_OFFSET 0
 299#define REG_L_OFFSET 0
 300#define REG_LH_OFFSET 4
 301#endif
 302
 303/* In instruction encodings for byte register accesses the
 304 * register number usually indicates "low 8 bits of register N";
 305 * however there are some special cases where N 4..7 indicates
 306 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
 307 * true for this special case, false otherwise.
 308 */
 309static inline bool byte_reg_is_xH(DisasContext *s, int reg)
 310{
 311    if (reg < 4) {
 312        return false;
 313    }
 314#ifdef TARGET_X86_64
 315    if (reg >= 8 || s->x86_64_hregs) {
 316        return false;
 317    }
 318#endif
 319    return true;
 320}
 321
 322/* Select the size of a push/pop operation.  */
 323static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
 324{
 325    if (CODE64(s)) {
 326        return ot == MO_16 ? MO_16 : MO_64;
 327    } else {
 328        return ot;
 329    }
 330}
 331
 332/* Select the size of the stack pointer.  */
 333static inline TCGMemOp mo_stacksize(DisasContext *s)
 334{
 335    return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
 336}
 337
 338/* Select only size 64 else 32.  Used for SSE operand sizes.  */
 339static inline TCGMemOp mo_64_32(TCGMemOp ot)
 340{
 341#ifdef TARGET_X86_64
 342    return ot == MO_64 ? MO_64 : MO_32;
 343#else
 344    return MO_32;
 345#endif
 346}
 347
 348/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
 349   byte vs word opcodes.  */
 350static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
 351{
 352    return b & 1 ? ot : MO_8;
 353}
 354
 355/* Select size 8 if lsb of B is clear, else OT capped at 32.
 356   Used for decoding operand size of port opcodes.  */
 357static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
 358{
 359    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 360}
 361
 362static void gen_op_mov_reg_v(DisasContext *s, TCGMemOp ot, int reg, TCGv t0)
 363{
 364    switch(ot) {
 365    case MO_8:
 366        if (!byte_reg_is_xH(s, reg)) {
 367            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
 368        } else {
 369            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
 370        }
 371        break;
 372    case MO_16:
 373        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
 374        break;
 375    case MO_32:
 376        /* For x86_64, this sets the higher half of register to zero.
 377           For i386, this is equivalent to a mov. */
 378        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
 379        break;
 380#ifdef TARGET_X86_64
 381    case MO_64:
 382        tcg_gen_mov_tl(cpu_regs[reg], t0);
 383        break;
 384#endif
 385    default:
 386        tcg_abort();
 387    }
 388}
 389
 390static inline
 391void gen_op_mov_v_reg(DisasContext *s, TCGMemOp ot, TCGv t0, int reg)
 392{
 393    if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
 394        tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
 395    } else {
 396        tcg_gen_mov_tl(t0, cpu_regs[reg]);
 397    }
 398}
 399
 400static void gen_add_A0_im(DisasContext *s, int val)
 401{
 402    tcg_gen_addi_tl(s->A0, s->A0, val);
 403    if (!CODE64(s)) {
 404        tcg_gen_ext32u_tl(s->A0, s->A0);
 405    }
 406}
 407
 408static inline void gen_op_jmp_v(TCGv dest)
 409{
 410    tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 411}
 412
 413static inline
 414void gen_op_add_reg_im(DisasContext *s, TCGMemOp size, int reg, int32_t val)
 415{
 416    tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
 417    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 418}
 419
 420static inline void gen_op_add_reg_T0(DisasContext *s, TCGMemOp size, int reg)
 421{
 422    tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
 423    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 424}
 425
 426static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 427{
 428    tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
 429}
 430
 431static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 432{
 433    tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 434}
 435
 436static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 437{
 438    if (d == OR_TMP0) {
 439        gen_op_st_v(s, idx, s->T0, s->A0);
 440    } else {
 441        gen_op_mov_reg_v(s, idx, d, s->T0);
 442    }
 443}
 444
 445static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
 446{
 447    tcg_gen_movi_tl(s->tmp0, pc);
 448    gen_op_jmp_v(s->tmp0);
 449}
 450
 451/* Compute SEG:REG into A0.  SEG is selected from the override segment
 452   (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
 453   indicate no override.  */
 454static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
 455                          int def_seg, int ovr_seg)
 456{
 457    switch (aflag) {
 458#ifdef TARGET_X86_64
 459    case MO_64:
 460        if (ovr_seg < 0) {
 461            tcg_gen_mov_tl(s->A0, a0);
 462            return;
 463        }
 464        break;
 465#endif
 466    case MO_32:
 467        /* 32 bit address */
 468        if (ovr_seg < 0 && s->addseg) {
 469            ovr_seg = def_seg;
 470        }
 471        if (ovr_seg < 0) {
 472            tcg_gen_ext32u_tl(s->A0, a0);
 473            return;
 474        }
 475        break;
 476    case MO_16:
 477        /* 16 bit address */
 478        tcg_gen_ext16u_tl(s->A0, a0);
 479        a0 = s->A0;
 480        if (ovr_seg < 0) {
 481            if (s->addseg) {
 482                ovr_seg = def_seg;
 483            } else {
 484                return;
 485            }
 486        }
 487        break;
 488    default:
 489        tcg_abort();
 490    }
 491
 492    if (ovr_seg >= 0) {
 493        TCGv seg = cpu_seg_base[ovr_seg];
 494
 495        if (aflag == MO_64) {
 496            tcg_gen_add_tl(s->A0, a0, seg);
 497        } else if (CODE64(s)) {
 498            tcg_gen_ext32u_tl(s->A0, a0);
 499            tcg_gen_add_tl(s->A0, s->A0, seg);
 500        } else {
 501            tcg_gen_add_tl(s->A0, a0, seg);
 502            tcg_gen_ext32u_tl(s->A0, s->A0);
 503        }
 504    }
 505}
 506
 507static inline void gen_string_movl_A0_ESI(DisasContext *s)
 508{
 509    gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
 510}
 511
 512static inline void gen_string_movl_A0_EDI(DisasContext *s)
 513{
 514    gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
 515}
 516
 517static inline void gen_op_movl_T0_Dshift(DisasContext *s, TCGMemOp ot)
 518{
 519    tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
 520    tcg_gen_shli_tl(s->T0, s->T0, ot);
 521};
 522
 523static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
 524{
 525    switch (size) {
 526    case MO_8:
 527        if (sign) {
 528            tcg_gen_ext8s_tl(dst, src);
 529        } else {
 530            tcg_gen_ext8u_tl(dst, src);
 531        }
 532        return dst;
 533    case MO_16:
 534        if (sign) {
 535            tcg_gen_ext16s_tl(dst, src);
 536        } else {
 537            tcg_gen_ext16u_tl(dst, src);
 538        }
 539        return dst;
 540#ifdef TARGET_X86_64
 541    case MO_32:
 542        if (sign) {
 543            tcg_gen_ext32s_tl(dst, src);
 544        } else {
 545            tcg_gen_ext32u_tl(dst, src);
 546        }
 547        return dst;
 548#endif
 549    default:
 550        return src;
 551    }
 552}
 553
 554static void gen_extu(TCGMemOp ot, TCGv reg)
 555{
 556    gen_ext_tl(reg, reg, ot, false);
 557}
 558
 559static void gen_exts(TCGMemOp ot, TCGv reg)
 560{
 561    gen_ext_tl(reg, reg, ot, true);
 562}
 563
 564static inline
 565void gen_op_jnz_ecx(DisasContext *s, TCGMemOp size, TCGLabel *label1)
 566{
 567    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
 568    gen_extu(size, s->tmp0);
 569    tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
 570}
 571
 572static inline
 573void gen_op_jz_ecx(DisasContext *s, TCGMemOp size, TCGLabel *label1)
 574{
 575    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
 576    gen_extu(size, s->tmp0);
 577    tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
 578}
 579
 580static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
 581{
 582    switch (ot) {
 583    case MO_8:
 584        gen_helper_inb(v, cpu_env, n);
 585        break;
 586    case MO_16:
 587        gen_helper_inw(v, cpu_env, n);
 588        break;
 589    case MO_32:
 590        gen_helper_inl(v, cpu_env, n);
 591        break;
 592    default:
 593        tcg_abort();
 594    }
 595}
 596
 597static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
 598{
 599    switch (ot) {
 600    case MO_8:
 601        gen_helper_outb(cpu_env, v, n);
 602        break;
 603    case MO_16:
 604        gen_helper_outw(cpu_env, v, n);
 605        break;
 606    case MO_32:
 607        gen_helper_outl(cpu_env, v, n);
 608        break;
 609    default:
 610        tcg_abort();
 611    }
 612}
 613
 614static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
 615                         uint32_t svm_flags)
 616{
 617    target_ulong next_eip;
 618
 619    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
 620        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 621        switch (ot) {
 622        case MO_8:
 623            gen_helper_check_iob(cpu_env, s->tmp2_i32);
 624            break;
 625        case MO_16:
 626            gen_helper_check_iow(cpu_env, s->tmp2_i32);
 627            break;
 628        case MO_32:
 629            gen_helper_check_iol(cpu_env, s->tmp2_i32);
 630            break;
 631        default:
 632            tcg_abort();
 633        }
 634    }
 635    if(s->flags & HF_GUEST_MASK) {
 636        gen_update_cc_op(s);
 637        gen_jmp_im(s, cur_eip);
 638        svm_flags |= (1 << (4 + ot));
 639        next_eip = s->pc - s->cs_base;
 640        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 641        gen_helper_svm_check_io(cpu_env, s->tmp2_i32,
 642                                tcg_const_i32(svm_flags),
 643                                tcg_const_i32(next_eip - cur_eip));
 644    }
 645}
 646
 647static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 648{
 649    gen_string_movl_A0_ESI(s);
 650    gen_op_ld_v(s, ot, s->T0, s->A0);
 651    gen_string_movl_A0_EDI(s);
 652    gen_op_st_v(s, ot, s->T0, s->A0);
 653    gen_op_movl_T0_Dshift(s, ot);
 654    gen_op_add_reg_T0(s, s->aflag, R_ESI);
 655    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 656}
 657
 658static void gen_op_update1_cc(DisasContext *s)
 659{
 660    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 661}
 662
 663static void gen_op_update2_cc(DisasContext *s)
 664{
 665    tcg_gen_mov_tl(cpu_cc_src, s->T1);
 666    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 667}
 668
 669static void gen_op_update3_cc(DisasContext *s, TCGv reg)
 670{
 671    tcg_gen_mov_tl(cpu_cc_src2, reg);
 672    tcg_gen_mov_tl(cpu_cc_src, s->T1);
 673    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 674}
 675
 676static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
 677{
 678    tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
 679}
 680
 681static void gen_op_update_neg_cc(DisasContext *s)
 682{
 683    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 684    tcg_gen_neg_tl(cpu_cc_src, s->T0);
 685    tcg_gen_movi_tl(s->cc_srcT, 0);
 686}
 687
 688/* compute all eflags to cc_src */
 689static void gen_compute_eflags(DisasContext *s)
 690{
 691    TCGv zero, dst, src1, src2;
 692    int live, dead;
 693
 694    if (s->cc_op == CC_OP_EFLAGS) {
 695        return;
 696    }
 697    if (s->cc_op == CC_OP_CLR) {
 698        tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
 699        set_cc_op(s, CC_OP_EFLAGS);
 700        return;
 701    }
 702
 703    zero = NULL;
 704    dst = cpu_cc_dst;
 705    src1 = cpu_cc_src;
 706    src2 = cpu_cc_src2;
 707
 708    /* Take care to not read values that are not live.  */
 709    live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
 710    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
 711    if (dead) {
 712        zero = tcg_const_tl(0);
 713        if (dead & USES_CC_DST) {
 714            dst = zero;
 715        }
 716        if (dead & USES_CC_SRC) {
 717            src1 = zero;
 718        }
 719        if (dead & USES_CC_SRC2) {
 720            src2 = zero;
 721        }
 722    }
 723
 724    gen_update_cc_op(s);
 725    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
 726    set_cc_op(s, CC_OP_EFLAGS);
 727
 728    if (dead) {
 729        tcg_temp_free(zero);
 730    }
 731}
 732
 733typedef struct CCPrepare {
 734    TCGCond cond;
 735    TCGv reg;
 736    TCGv reg2;
 737    target_ulong imm;
 738    target_ulong mask;
 739    bool use_reg2;
 740    bool no_setcond;
 741} CCPrepare;
 742
 743/* compute eflags.C to reg */
 744static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 745{
 746    TCGv t0, t1;
 747    int size, shift;
 748
 749    switch (s->cc_op) {
 750    case CC_OP_SUBB ... CC_OP_SUBQ:
 751        /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
 752        size = s->cc_op - CC_OP_SUBB;
 753        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 754        /* If no temporary was used, be careful not to alias t1 and t0.  */
 755        t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
 756        tcg_gen_mov_tl(t0, s->cc_srcT);
 757        gen_extu(size, t0);
 758        goto add_sub;
 759
 760    case CC_OP_ADDB ... CC_OP_ADDQ:
 761        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
 762        size = s->cc_op - CC_OP_ADDB;
 763        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 764        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 765    add_sub:
 766        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
 767                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 768
 769    case CC_OP_LOGICB ... CC_OP_LOGICQ:
 770    case CC_OP_CLR:
 771    case CC_OP_POPCNT:
 772        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 773
 774    case CC_OP_INCB ... CC_OP_INCQ:
 775    case CC_OP_DECB ... CC_OP_DECQ:
 776        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 777                             .mask = -1, .no_setcond = true };
 778
 779    case CC_OP_SHLB ... CC_OP_SHLQ:
 780        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
 781        size = s->cc_op - CC_OP_SHLB;
 782        shift = (8 << size) - 1;
 783        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 784                             .mask = (target_ulong)1 << shift };
 785
 786    case CC_OP_MULB ... CC_OP_MULQ:
 787        return (CCPrepare) { .cond = TCG_COND_NE,
 788                             .reg = cpu_cc_src, .mask = -1 };
 789
 790    case CC_OP_BMILGB ... CC_OP_BMILGQ:
 791        size = s->cc_op - CC_OP_BMILGB;
 792        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
 793        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 794
 795    case CC_OP_ADCX:
 796    case CC_OP_ADCOX:
 797        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
 798                             .mask = -1, .no_setcond = true };
 799
 800    case CC_OP_EFLAGS:
 801    case CC_OP_SARB ... CC_OP_SARQ:
 802        /* CC_SRC & 1 */
 803        return (CCPrepare) { .cond = TCG_COND_NE,
 804                             .reg = cpu_cc_src, .mask = CC_C };
 805
 806    default:
 807       /* The need to compute only C from CC_OP_DYNAMIC is important
 808          in efficiently implementing e.g. INC at the start of a TB.  */
 809       gen_update_cc_op(s);
 810       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
 811                               cpu_cc_src2, cpu_cc_op);
 812       return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 813                            .mask = -1, .no_setcond = true };
 814    }
 815}
 816
 817/* compute eflags.P to reg */
 818static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 819{
 820    gen_compute_eflags(s);
 821    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 822                         .mask = CC_P };
 823}
 824
 825/* compute eflags.S to reg */
 826static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 827{
 828    switch (s->cc_op) {
 829    case CC_OP_DYNAMIC:
 830        gen_compute_eflags(s);
 831        /* FALLTHRU */
 832    case CC_OP_EFLAGS:
 833    case CC_OP_ADCX:
 834    case CC_OP_ADOX:
 835    case CC_OP_ADCOX:
 836        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 837                             .mask = CC_S };
 838    case CC_OP_CLR:
 839    case CC_OP_POPCNT:
 840        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 841    default:
 842        {
 843            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 844            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
 845            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
 846        }
 847    }
 848}
 849
 850/* compute eflags.O to reg */
 851static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 852{
 853    switch (s->cc_op) {
 854    case CC_OP_ADOX:
 855    case CC_OP_ADCOX:
 856        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
 857                             .mask = -1, .no_setcond = true };
 858    case CC_OP_CLR:
 859    case CC_OP_POPCNT:
 860        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 861    default:
 862        gen_compute_eflags(s);
 863        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 864                             .mask = CC_O };
 865    }
 866}
 867
 868/* compute eflags.Z to reg */
 869static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 870{
 871    switch (s->cc_op) {
 872    case CC_OP_DYNAMIC:
 873        gen_compute_eflags(s);
 874        /* FALLTHRU */
 875    case CC_OP_EFLAGS:
 876    case CC_OP_ADCX:
 877    case CC_OP_ADOX:
 878    case CC_OP_ADCOX:
 879        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 880                             .mask = CC_Z };
 881    case CC_OP_CLR:
 882        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
 883    case CC_OP_POPCNT:
 884        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
 885                             .mask = -1 };
 886    default:
 887        {
 888            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 889            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 890            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 891        }
 892    }
 893}
 894
 895/* perform a conditional store into register 'reg' according to jump opcode
 896   value 'b'. In the fast case, T0 is guaranted not to be used. */
 897static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 898{
 899    int inv, jcc_op, cond;
 900    TCGMemOp size;
 901    CCPrepare cc;
 902    TCGv t0;
 903
 904    inv = b & 1;
 905    jcc_op = (b >> 1) & 7;
 906
 907    switch (s->cc_op) {
 908    case CC_OP_SUBB ... CC_OP_SUBQ:
 909        /* We optimize relational operators for the cmp/jcc case.  */
 910        size = s->cc_op - CC_OP_SUBB;
 911        switch (jcc_op) {
 912        case JCC_BE:
 913            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
 914            gen_extu(size, s->tmp4);
 915            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 916            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
 917                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 918            break;
 919
 920        case JCC_L:
 921            cond = TCG_COND_LT;
 922            goto fast_jcc_l;
 923        case JCC_LE:
 924            cond = TCG_COND_LE;
 925        fast_jcc_l:
 926            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
 927            gen_exts(size, s->tmp4);
 928            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
 929            cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
 930                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 931            break;
 932
 933        default:
 934            goto slow_jcc;
 935        }
 936        break;
 937
 938    default:
 939    slow_jcc:
 940        /* This actually generates good code for JC, JZ and JS.  */
 941        switch (jcc_op) {
 942        case JCC_O:
 943            cc = gen_prepare_eflags_o(s, reg);
 944            break;
 945        case JCC_B:
 946            cc = gen_prepare_eflags_c(s, reg);
 947            break;
 948        case JCC_Z:
 949            cc = gen_prepare_eflags_z(s, reg);
 950            break;
 951        case JCC_BE:
 952            gen_compute_eflags(s);
 953            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 954                               .mask = CC_Z | CC_C };
 955            break;
 956        case JCC_S:
 957            cc = gen_prepare_eflags_s(s, reg);
 958            break;
 959        case JCC_P:
 960            cc = gen_prepare_eflags_p(s, reg);
 961            break;
 962        case JCC_L:
 963            gen_compute_eflags(s);
 964            if (reg == cpu_cc_src) {
 965                reg = s->tmp0;
 966            }
 967            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 968            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 969            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 970                               .mask = CC_S };
 971            break;
 972        default:
 973        case JCC_LE:
 974            gen_compute_eflags(s);
 975            if (reg == cpu_cc_src) {
 976                reg = s->tmp0;
 977            }
 978            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 979            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 980            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 981                               .mask = CC_S | CC_Z };
 982            break;
 983        }
 984        break;
 985    }
 986
 987    if (inv) {
 988        cc.cond = tcg_invert_cond(cc.cond);
 989    }
 990    return cc;
 991}
 992
 993static void gen_setcc1(DisasContext *s, int b, TCGv reg)
 994{
 995    CCPrepare cc = gen_prepare_cc(s, b, reg);
 996
 997    if (cc.no_setcond) {
 998        if (cc.cond == TCG_COND_EQ) {
 999            tcg_gen_xori_tl(reg, cc.reg, 1);
1000        } else {
1001            tcg_gen_mov_tl(reg, cc.reg);
1002        }
1003        return;
1004    }
1005
1006    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1007        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1008        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1009        tcg_gen_andi_tl(reg, reg, 1);
1010        return;
1011    }
1012    if (cc.mask != -1) {
1013        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1014        cc.reg = reg;
1015    }
1016    if (cc.use_reg2) {
1017        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1018    } else {
1019        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1020    }
1021}
1022
1023static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1024{
1025    gen_setcc1(s, JCC_B << 1, reg);
1026}
1027
1028/* generate a conditional jump to label 'l1' according to jump opcode
1029   value 'b'. In the fast case, T0 is guaranted not to be used. */
1030static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1031{
1032    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1033
1034    if (cc.mask != -1) {
1035        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1036        cc.reg = s->T0;
1037    }
1038    if (cc.use_reg2) {
1039        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1040    } else {
1041        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1042    }
1043}
1044
1045/* Generate a conditional jump to label 'l1' according to jump opcode
1046   value 'b'. In the fast case, T0 is guaranted not to be used.
1047   A translation block must end soon.  */
1048static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1049{
1050    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1051
1052    gen_update_cc_op(s);
1053    if (cc.mask != -1) {
1054        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1055        cc.reg = s->T0;
1056    }
1057    set_cc_op(s, CC_OP_DYNAMIC);
1058    if (cc.use_reg2) {
1059        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1060    } else {
1061        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1062    }
1063}
1064
1065/* XXX: does not work with gdbstub "ice" single step - not a
1066   serious problem */
1067static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1068{
1069    TCGLabel *l1 = gen_new_label();
1070    TCGLabel *l2 = gen_new_label();
1071    gen_op_jnz_ecx(s, s->aflag, l1);
1072    gen_set_label(l2);
1073    gen_jmp_tb(s, next_eip, 1);
1074    gen_set_label(l1);
1075    return l2;
1076}
1077
1078static inline void gen_stos(DisasContext *s, TCGMemOp ot)
1079{
1080    gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1081    gen_string_movl_A0_EDI(s);
1082    gen_op_st_v(s, ot, s->T0, s->A0);
1083    gen_op_movl_T0_Dshift(s, ot);
1084    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1085}
1086
1087static inline void gen_lods(DisasContext *s, TCGMemOp ot)
1088{
1089    gen_string_movl_A0_ESI(s);
1090    gen_op_ld_v(s, ot, s->T0, s->A0);
1091    gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1092    gen_op_movl_T0_Dshift(s, ot);
1093    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1094}
1095
1096static inline void gen_scas(DisasContext *s, TCGMemOp ot)
1097{
1098    gen_string_movl_A0_EDI(s);
1099    gen_op_ld_v(s, ot, s->T1, s->A0);
1100    gen_op(s, OP_CMPL, ot, R_EAX);
1101    gen_op_movl_T0_Dshift(s, ot);
1102    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1103}
1104
1105static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
1106{
1107    gen_string_movl_A0_EDI(s);
1108    gen_op_ld_v(s, ot, s->T1, s->A0);
1109    gen_string_movl_A0_ESI(s);
1110    gen_op(s, OP_CMPL, ot, OR_TMP0);
1111    gen_op_movl_T0_Dshift(s, ot);
1112    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1113    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1114}
1115
1116static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1117{
1118    if (s->flags & HF_IOBPT_MASK) {
1119        TCGv_i32 t_size = tcg_const_i32(1 << ot);
1120        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1121
1122        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1123        tcg_temp_free_i32(t_size);
1124        tcg_temp_free(t_next);
1125    }
1126}
1127
1128
1129static inline void gen_ins(DisasContext *s, TCGMemOp ot)
1130{
1131    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1132        gen_io_start();
1133    }
1134    gen_string_movl_A0_EDI(s);
1135    /* Note: we must do this dummy write first to be restartable in
1136       case of page fault. */
1137    tcg_gen_movi_tl(s->T0, 0);
1138    gen_op_st_v(s, ot, s->T0, s->A0);
1139    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1140    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1141    gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1142    gen_op_st_v(s, ot, s->T0, s->A0);
1143    gen_op_movl_T0_Dshift(s, ot);
1144    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1145    gen_bpt_io(s, s->tmp2_i32, ot);
1146    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1147        gen_io_end();
1148    }
1149}
1150
1151static inline void gen_outs(DisasContext *s, TCGMemOp ot)
1152{
1153    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1154        gen_io_start();
1155    }
1156    gen_string_movl_A0_ESI(s);
1157    gen_op_ld_v(s, ot, s->T0, s->A0);
1158
1159    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1160    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1161    tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1162    gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1163    gen_op_movl_T0_Dshift(s, ot);
1164    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1165    gen_bpt_io(s, s->tmp2_i32, ot);
1166    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1167        gen_io_end();
1168    }
1169}
1170
1171/* same method as Valgrind : we generate jumps to current or next
1172   instruction */
1173#define GEN_REPZ(op)                                                          \
1174static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1175                                 target_ulong cur_eip, target_ulong next_eip) \
1176{                                                                             \
1177    TCGLabel *l2;                                                             \
1178    gen_update_cc_op(s);                                                      \
1179    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1180    gen_ ## op(s, ot);                                                        \
1181    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1182    /* a loop would cause two single step exceptions if ECX = 1               \
1183       before rep string_insn */                                              \
1184    if (s->repz_opt)                                                          \
1185        gen_op_jz_ecx(s, s->aflag, l2);                                       \
1186    gen_jmp(s, cur_eip);                                                      \
1187}
1188
1189#define GEN_REPZ2(op)                                                         \
1190static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1191                                   target_ulong cur_eip,                      \
1192                                   target_ulong next_eip,                     \
1193                                   int nz)                                    \
1194{                                                                             \
1195    TCGLabel *l2;                                                             \
1196    gen_update_cc_op(s);                                                      \
1197    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1198    gen_ ## op(s, ot);                                                        \
1199    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1200    gen_update_cc_op(s);                                                      \
1201    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1202    if (s->repz_opt)                                                          \
1203        gen_op_jz_ecx(s, s->aflag, l2);                                       \
1204    gen_jmp(s, cur_eip);                                                      \
1205}
1206
1207GEN_REPZ(movs)
1208GEN_REPZ(stos)
1209GEN_REPZ(lods)
1210GEN_REPZ(ins)
1211GEN_REPZ(outs)
1212GEN_REPZ2(scas)
1213GEN_REPZ2(cmps)
1214
1215static void gen_helper_fp_arith_ST0_FT0(int op)
1216{
1217    switch (op) {
1218    case 0:
1219        gen_helper_fadd_ST0_FT0(cpu_env);
1220        break;
1221    case 1:
1222        gen_helper_fmul_ST0_FT0(cpu_env);
1223        break;
1224    case 2:
1225        gen_helper_fcom_ST0_FT0(cpu_env);
1226        break;
1227    case 3:
1228        gen_helper_fcom_ST0_FT0(cpu_env);
1229        break;
1230    case 4:
1231        gen_helper_fsub_ST0_FT0(cpu_env);
1232        break;
1233    case 5:
1234        gen_helper_fsubr_ST0_FT0(cpu_env);
1235        break;
1236    case 6:
1237        gen_helper_fdiv_ST0_FT0(cpu_env);
1238        break;
1239    case 7:
1240        gen_helper_fdivr_ST0_FT0(cpu_env);
1241        break;
1242    }
1243}
1244
1245/* NOTE the exception in "r" op ordering */
1246static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1247{
1248    TCGv_i32 tmp = tcg_const_i32(opreg);
1249    switch (op) {
1250    case 0:
1251        gen_helper_fadd_STN_ST0(cpu_env, tmp);
1252        break;
1253    case 1:
1254        gen_helper_fmul_STN_ST0(cpu_env, tmp);
1255        break;
1256    case 4:
1257        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1258        break;
1259    case 5:
1260        gen_helper_fsub_STN_ST0(cpu_env, tmp);
1261        break;
1262    case 6:
1263        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1264        break;
1265    case 7:
1266        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1267        break;
1268    }
1269}
1270
1271static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1272{
1273    gen_update_cc_op(s);
1274    gen_jmp_im(s, cur_eip);
1275    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1276    s->base.is_jmp = DISAS_NORETURN;
1277}
1278
1279/* Generate #UD for the current instruction.  The assumption here is that
1280   the instruction is known, but it isn't allowed in the current cpu mode.  */
1281static void gen_illegal_opcode(DisasContext *s)
1282{
1283    gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1284}
1285
1286/* if d == OR_TMP0, it means memory operand (address in A0) */
1287static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
1288{
1289    if (d != OR_TMP0) {
1290        if (s1->prefix & PREFIX_LOCK) {
1291            /* Lock prefix when destination is not memory.  */
1292            gen_illegal_opcode(s1);
1293            return;
1294        }
1295        gen_op_mov_v_reg(s1, ot, s1->T0, d);
1296    } else if (!(s1->prefix & PREFIX_LOCK)) {
1297        gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1298    }
1299    switch(op) {
1300    case OP_ADCL:
1301        gen_compute_eflags_c(s1, s1->tmp4);
1302        if (s1->prefix & PREFIX_LOCK) {
1303            tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1304            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1305                                        s1->mem_index, ot | MO_LE);
1306        } else {
1307            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1308            tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1309            gen_op_st_rm_T0_A0(s1, ot, d);
1310        }
1311        gen_op_update3_cc(s1, s1->tmp4);
1312        set_cc_op(s1, CC_OP_ADCB + ot);
1313        break;
1314    case OP_SBBL:
1315        gen_compute_eflags_c(s1, s1->tmp4);
1316        if (s1->prefix & PREFIX_LOCK) {
1317            tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1318            tcg_gen_neg_tl(s1->T0, s1->T0);
1319            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1320                                        s1->mem_index, ot | MO_LE);
1321        } else {
1322            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1323            tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1324            gen_op_st_rm_T0_A0(s1, ot, d);
1325        }
1326        gen_op_update3_cc(s1, s1->tmp4);
1327        set_cc_op(s1, CC_OP_SBBB + ot);
1328        break;
1329    case OP_ADDL:
1330        if (s1->prefix & PREFIX_LOCK) {
1331            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1332                                        s1->mem_index, ot | MO_LE);
1333        } else {
1334            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1335            gen_op_st_rm_T0_A0(s1, ot, d);
1336        }
1337        gen_op_update2_cc(s1);
1338        set_cc_op(s1, CC_OP_ADDB + ot);
1339        break;
1340    case OP_SUBL:
1341        if (s1->prefix & PREFIX_LOCK) {
1342            tcg_gen_neg_tl(s1->T0, s1->T1);
1343            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1344                                        s1->mem_index, ot | MO_LE);
1345            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1346        } else {
1347            tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1348            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1349            gen_op_st_rm_T0_A0(s1, ot, d);
1350        }
1351        gen_op_update2_cc(s1);
1352        set_cc_op(s1, CC_OP_SUBB + ot);
1353        break;
1354    default:
1355    case OP_ANDL:
1356        if (s1->prefix & PREFIX_LOCK) {
1357            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1358                                        s1->mem_index, ot | MO_LE);
1359        } else {
1360            tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1361            gen_op_st_rm_T0_A0(s1, ot, d);
1362        }
1363        gen_op_update1_cc(s1);
1364        set_cc_op(s1, CC_OP_LOGICB + ot);
1365        break;
1366    case OP_ORL:
1367        if (s1->prefix & PREFIX_LOCK) {
1368            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1369                                       s1->mem_index, ot | MO_LE);
1370        } else {
1371            tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1372            gen_op_st_rm_T0_A0(s1, ot, d);
1373        }
1374        gen_op_update1_cc(s1);
1375        set_cc_op(s1, CC_OP_LOGICB + ot);
1376        break;
1377    case OP_XORL:
1378        if (s1->prefix & PREFIX_LOCK) {
1379            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1380                                        s1->mem_index, ot | MO_LE);
1381        } else {
1382            tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1383            gen_op_st_rm_T0_A0(s1, ot, d);
1384        }
1385        gen_op_update1_cc(s1);
1386        set_cc_op(s1, CC_OP_LOGICB + ot);
1387        break;
1388    case OP_CMPL:
1389        tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1390        tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1391        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1392        set_cc_op(s1, CC_OP_SUBB + ot);
1393        break;
1394    }
1395}
1396
1397/* if d == OR_TMP0, it means memory operand (address in A0) */
1398static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
1399{
1400    if (s1->prefix & PREFIX_LOCK) {
1401        tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1402        tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1403                                    s1->mem_index, ot | MO_LE);
1404    } else {
1405        if (d != OR_TMP0) {
1406            gen_op_mov_v_reg(s1, ot, s1->T0, d);
1407        } else {
1408            gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1409        }
1410        tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1411        gen_op_st_rm_T0_A0(s1, ot, d);
1412    }
1413
1414    gen_compute_eflags_c(s1, cpu_cc_src);
1415    tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1416    set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1417}
1418
1419static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
1420                            TCGv shm1, TCGv count, bool is_right)
1421{
1422    TCGv_i32 z32, s32, oldop;
1423    TCGv z_tl;
1424
1425    /* Store the results into the CC variables.  If we know that the
1426       variable must be dead, store unconditionally.  Otherwise we'll
1427       need to not disrupt the current contents.  */
1428    z_tl = tcg_const_tl(0);
1429    if (cc_op_live[s->cc_op] & USES_CC_DST) {
1430        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1431                           result, cpu_cc_dst);
1432    } else {
1433        tcg_gen_mov_tl(cpu_cc_dst, result);
1434    }
1435    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1436        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1437                           shm1, cpu_cc_src);
1438    } else {
1439        tcg_gen_mov_tl(cpu_cc_src, shm1);
1440    }
1441    tcg_temp_free(z_tl);
1442
1443    /* Get the two potential CC_OP values into temporaries.  */
1444    tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1445    if (s->cc_op == CC_OP_DYNAMIC) {
1446        oldop = cpu_cc_op;
1447    } else {
1448        tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1449        oldop = s->tmp3_i32;
1450    }
1451
1452    /* Conditionally store the CC_OP value.  */
1453    z32 = tcg_const_i32(0);
1454    s32 = tcg_temp_new_i32();
1455    tcg_gen_trunc_tl_i32(s32, count);
1456    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1457    tcg_temp_free_i32(z32);
1458    tcg_temp_free_i32(s32);
1459
1460    /* The CC_OP value is no longer predictable.  */
1461    set_cc_op(s, CC_OP_DYNAMIC);
1462}
1463
1464static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1465                            int is_right, int is_arith)
1466{
1467    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1468
1469    /* load */
1470    if (op1 == OR_TMP0) {
1471        gen_op_ld_v(s, ot, s->T0, s->A0);
1472    } else {
1473        gen_op_mov_v_reg(s, ot, s->T0, op1);
1474    }
1475
1476    tcg_gen_andi_tl(s->T1, s->T1, mask);
1477    tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1478
1479    if (is_right) {
1480        if (is_arith) {
1481            gen_exts(ot, s->T0);
1482            tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1483            tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1484        } else {
1485            gen_extu(ot, s->T0);
1486            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1487            tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1488        }
1489    } else {
1490        tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1491        tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1492    }
1493
1494    /* store */
1495    gen_op_st_rm_T0_A0(s, ot, op1);
1496
1497    gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1498}
1499
1500static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1501                            int is_right, int is_arith)
1502{
1503    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1504
1505    /* load */
1506    if (op1 == OR_TMP0)
1507        gen_op_ld_v(s, ot, s->T0, s->A0);
1508    else
1509        gen_op_mov_v_reg(s, ot, s->T0, op1);
1510
1511    op2 &= mask;
1512    if (op2 != 0) {
1513        if (is_right) {
1514            if (is_arith) {
1515                gen_exts(ot, s->T0);
1516                tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1517                tcg_gen_sari_tl(s->T0, s->T0, op2);
1518            } else {
1519                gen_extu(ot, s->T0);
1520                tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1521                tcg_gen_shri_tl(s->T0, s->T0, op2);
1522            }
1523        } else {
1524            tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1525            tcg_gen_shli_tl(s->T0, s->T0, op2);
1526        }
1527    }
1528
1529    /* store */
1530    gen_op_st_rm_T0_A0(s, ot, op1);
1531
1532    /* update eflags if non zero shift */
1533    if (op2 != 0) {
1534        tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1535        tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1536        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1537    }
1538}
1539
1540static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
1541{
1542    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1543    TCGv_i32 t0, t1;
1544
1545    /* load */
1546    if (op1 == OR_TMP0) {
1547        gen_op_ld_v(s, ot, s->T0, s->A0);
1548    } else {
1549        gen_op_mov_v_reg(s, ot, s->T0, op1);
1550    }
1551
1552    tcg_gen_andi_tl(s->T1, s->T1, mask);
1553
1554    switch (ot) {
1555    case MO_8:
1556        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1557        tcg_gen_ext8u_tl(s->T0, s->T0);
1558        tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1559        goto do_long;
1560    case MO_16:
1561        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1562        tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1563        goto do_long;
1564    do_long:
1565#ifdef TARGET_X86_64
1566    case MO_32:
1567        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1568        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1569        if (is_right) {
1570            tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1571        } else {
1572            tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1573        }
1574        tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1575        break;
1576#endif
1577    default:
1578        if (is_right) {
1579            tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1580        } else {
1581            tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1582        }
1583        break;
1584    }
1585
1586    /* store */
1587    gen_op_st_rm_T0_A0(s, ot, op1);
1588
1589    /* We'll need the flags computed into CC_SRC.  */
1590    gen_compute_eflags(s);
1591
1592    /* The value that was "rotated out" is now present at the other end
1593       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1594       since we've computed the flags into CC_SRC, these variables are
1595       currently dead.  */
1596    if (is_right) {
1597        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1598        tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1599        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1600    } else {
1601        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1602        tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1603    }
1604    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1605    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1606
1607    /* Now conditionally store the new CC_OP value.  If the shift count
1608       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1609       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1610       exactly as we computed above.  */
1611    t0 = tcg_const_i32(0);
1612    t1 = tcg_temp_new_i32();
1613    tcg_gen_trunc_tl_i32(t1, s->T1);
1614    tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1615    tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1616    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1617                        s->tmp2_i32, s->tmp3_i32);
1618    tcg_temp_free_i32(t0);
1619    tcg_temp_free_i32(t1);
1620
1621    /* The CC_OP value is no longer predictable.  */ 
1622    set_cc_op(s, CC_OP_DYNAMIC);
1623}
1624
1625static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1626                          int is_right)
1627{
1628    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1629    int shift;
1630
1631    /* load */
1632    if (op1 == OR_TMP0) {
1633        gen_op_ld_v(s, ot, s->T0, s->A0);
1634    } else {
1635        gen_op_mov_v_reg(s, ot, s->T0, op1);
1636    }
1637
1638    op2 &= mask;
1639    if (op2 != 0) {
1640        switch (ot) {
1641#ifdef TARGET_X86_64
1642        case MO_32:
1643            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1644            if (is_right) {
1645                tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1646            } else {
1647                tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1648            }
1649            tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1650            break;
1651#endif
1652        default:
1653            if (is_right) {
1654                tcg_gen_rotri_tl(s->T0, s->T0, op2);
1655            } else {
1656                tcg_gen_rotli_tl(s->T0, s->T0, op2);
1657            }
1658            break;
1659        case MO_8:
1660            mask = 7;
1661            goto do_shifts;
1662        case MO_16:
1663            mask = 15;
1664        do_shifts:
1665            shift = op2 & mask;
1666            if (is_right) {
1667                shift = mask + 1 - shift;
1668            }
1669            gen_extu(ot, s->T0);
1670            tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1671            tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1672            tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1673            break;
1674        }
1675    }
1676
1677    /* store */
1678    gen_op_st_rm_T0_A0(s, ot, op1);
1679
1680    if (op2 != 0) {
1681        /* Compute the flags into CC_SRC.  */
1682        gen_compute_eflags(s);
1683
1684        /* The value that was "rotated out" is now present at the other end
1685           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1686           since we've computed the flags into CC_SRC, these variables are
1687           currently dead.  */
1688        if (is_right) {
1689            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1690            tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1691            tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1692        } else {
1693            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1694            tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1695        }
1696        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1697        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1698        set_cc_op(s, CC_OP_ADCOX);
1699    }
1700}
1701
1702/* XXX: add faster immediate = 1 case */
1703static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1704                           int is_right)
1705{
1706    gen_compute_eflags(s);
1707    assert(s->cc_op == CC_OP_EFLAGS);
1708
1709    /* load */
1710    if (op1 == OR_TMP0)
1711        gen_op_ld_v(s, ot, s->T0, s->A0);
1712    else
1713        gen_op_mov_v_reg(s, ot, s->T0, op1);
1714    
1715    if (is_right) {
1716        switch (ot) {
1717        case MO_8:
1718            gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1719            break;
1720        case MO_16:
1721            gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1722            break;
1723        case MO_32:
1724            gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1725            break;
1726#ifdef TARGET_X86_64
1727        case MO_64:
1728            gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1729            break;
1730#endif
1731        default:
1732            tcg_abort();
1733        }
1734    } else {
1735        switch (ot) {
1736        case MO_8:
1737            gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1738            break;
1739        case MO_16:
1740            gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1741            break;
1742        case MO_32:
1743            gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1744            break;
1745#ifdef TARGET_X86_64
1746        case MO_64:
1747            gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1748            break;
1749#endif
1750        default:
1751            tcg_abort();
1752        }
1753    }
1754    /* store */
1755    gen_op_st_rm_T0_A0(s, ot, op1);
1756}
1757
1758/* XXX: add faster immediate case */
1759static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1760                             bool is_right, TCGv count_in)
1761{
1762    target_ulong mask = (ot == MO_64 ? 63 : 31);
1763    TCGv count;
1764
1765    /* load */
1766    if (op1 == OR_TMP0) {
1767        gen_op_ld_v(s, ot, s->T0, s->A0);
1768    } else {
1769        gen_op_mov_v_reg(s, ot, s->T0, op1);
1770    }
1771
1772    count = tcg_temp_new();
1773    tcg_gen_andi_tl(count, count_in, mask);
1774
1775    switch (ot) {
1776    case MO_16:
1777        /* Note: we implement the Intel behaviour for shift count > 16.
1778           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1779           portion by constructing it as a 32-bit value.  */
1780        if (is_right) {
1781            tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1782            tcg_gen_mov_tl(s->T1, s->T0);
1783            tcg_gen_mov_tl(s->T0, s->tmp0);
1784        } else {
1785            tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1786        }
1787        /* FALLTHRU */
1788#ifdef TARGET_X86_64
1789    case MO_32:
1790        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1791        tcg_gen_subi_tl(s->tmp0, count, 1);
1792        if (is_right) {
1793            tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1794            tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1795            tcg_gen_shr_i64(s->T0, s->T0, count);
1796        } else {
1797            tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1798            tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1799            tcg_gen_shl_i64(s->T0, s->T0, count);
1800            tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1801            tcg_gen_shri_i64(s->T0, s->T0, 32);
1802        }
1803        break;
1804#endif
1805    default:
1806        tcg_gen_subi_tl(s->tmp0, count, 1);
1807        if (is_right) {
1808            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1809
1810            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1811            tcg_gen_shr_tl(s->T0, s->T0, count);
1812            tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1813        } else {
1814            tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1815            if (ot == MO_16) {
1816                /* Only needed if count > 16, for Intel behaviour.  */
1817                tcg_gen_subfi_tl(s->tmp4, 33, count);
1818                tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1819                tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1820            }
1821
1822            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1823            tcg_gen_shl_tl(s->T0, s->T0, count);
1824            tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1825        }
1826        tcg_gen_movi_tl(s->tmp4, 0);
1827        tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1828                           s->tmp4, s->T1);
1829        tcg_gen_or_tl(s->T0, s->T0, s->T1);
1830        break;
1831    }
1832
1833    /* store */
1834    gen_op_st_rm_T0_A0(s, ot, op1);
1835
1836    gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1837    tcg_temp_free(count);
1838}
1839
1840static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
1841{
1842    if (s != OR_TMP1)
1843        gen_op_mov_v_reg(s1, ot, s1->T1, s);
1844    switch(op) {
1845    case OP_ROL:
1846        gen_rot_rm_T1(s1, ot, d, 0);
1847        break;
1848    case OP_ROR:
1849        gen_rot_rm_T1(s1, ot, d, 1);
1850        break;
1851    case OP_SHL:
1852    case OP_SHL1:
1853        gen_shift_rm_T1(s1, ot, d, 0, 0);
1854        break;
1855    case OP_SHR:
1856        gen_shift_rm_T1(s1, ot, d, 1, 0);
1857        break;
1858    case OP_SAR:
1859        gen_shift_rm_T1(s1, ot, d, 1, 1);
1860        break;
1861    case OP_RCL:
1862        gen_rotc_rm_T1(s1, ot, d, 0);
1863        break;
1864    case OP_RCR:
1865        gen_rotc_rm_T1(s1, ot, d, 1);
1866        break;
1867    }
1868}
1869
1870static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
1871{
1872    switch(op) {
1873    case OP_ROL:
1874        gen_rot_rm_im(s1, ot, d, c, 0);
1875        break;
1876    case OP_ROR:
1877        gen_rot_rm_im(s1, ot, d, c, 1);
1878        break;
1879    case OP_SHL:
1880    case OP_SHL1:
1881        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1882        break;
1883    case OP_SHR:
1884        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1885        break;
1886    case OP_SAR:
1887        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1888        break;
1889    default:
1890        /* currently not optimized */
1891        tcg_gen_movi_tl(s1->T1, c);
1892        gen_shift(s1, op, ot, d, OR_TMP1);
1893        break;
1894    }
1895}
1896
1897#define X86_MAX_INSN_LENGTH 15
1898
1899static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
1900{
1901    uint64_t pc = s->pc;
1902
1903    s->pc += num_bytes;
1904    if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
1905        /* If the instruction's 16th byte is on a different page than the 1st, a
1906         * page fault on the second page wins over the general protection fault
1907         * caused by the instruction being too long.
1908         * This can happen even if the operand is only one byte long!
1909         */
1910        if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
1911            volatile uint8_t unused =
1912                cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
1913            (void) unused;
1914        }
1915        siglongjmp(s->jmpbuf, 1);
1916    }
1917
1918    return pc;
1919}
1920
1921static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
1922{
1923    return cpu_ldub_code(env, advance_pc(env, s, 1));
1924}
1925
1926static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
1927{
1928    return cpu_ldsw_code(env, advance_pc(env, s, 2));
1929}
1930
1931static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
1932{
1933    return cpu_lduw_code(env, advance_pc(env, s, 2));
1934}
1935
1936static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
1937{
1938    return cpu_ldl_code(env, advance_pc(env, s, 4));
1939}
1940
1941#ifdef TARGET_X86_64
1942static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
1943{
1944    return cpu_ldq_code(env, advance_pc(env, s, 8));
1945}
1946#endif
1947
1948/* Decompose an address.  */
1949
1950typedef struct AddressParts {
1951    int def_seg;
1952    int base;
1953    int index;
1954    int scale;
1955    target_long disp;
1956} AddressParts;
1957
1958static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1959                                    int modrm)
1960{
1961    int def_seg, base, index, scale, mod, rm;
1962    target_long disp;
1963    bool havesib;
1964
1965    def_seg = R_DS;
1966    index = -1;
1967    scale = 0;
1968    disp = 0;
1969
1970    mod = (modrm >> 6) & 3;
1971    rm = modrm & 7;
1972    base = rm | REX_B(s);
1973
1974    if (mod == 3) {
1975        /* Normally filtered out earlier, but including this path
1976           simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
1977        goto done;
1978    }
1979
1980    switch (s->aflag) {
1981    case MO_64:
1982    case MO_32:
1983        havesib = 0;
1984        if (rm == 4) {
1985            int code = x86_ldub_code(env, s);
1986            scale = (code >> 6) & 3;
1987            index = ((code >> 3) & 7) | REX_X(s);
1988            if (index == 4) {
1989                index = -1;  /* no index */
1990            }
1991            base = (code & 7) | REX_B(s);
1992            havesib = 1;
1993        }
1994
1995        switch (mod) {
1996        case 0:
1997            if ((base & 7) == 5) {
1998                base = -1;
1999                disp = (int32_t)x86_ldl_code(env, s);
2000                if (CODE64(s) && !havesib) {
2001                    base = -2;
2002                    disp += s->pc + s->rip_offset;
2003                }
2004            }
2005            break;
2006        case 1:
2007            disp = (int8_t)x86_ldub_code(env, s);
2008            break;
2009        default:
2010        case 2:
2011            disp = (int32_t)x86_ldl_code(env, s);
2012            break;
2013        }
2014
2015        /* For correct popl handling with esp.  */
2016        if (base == R_ESP && s->popl_esp_hack) {
2017            disp += s->popl_esp_hack;
2018        }
2019        if (base == R_EBP || base == R_ESP) {
2020            def_seg = R_SS;
2021        }
2022        break;
2023
2024    case MO_16:
2025        if (mod == 0) {
2026            if (rm == 6) {
2027                base = -1;
2028                disp = x86_lduw_code(env, s);
2029                break;
2030            }
2031        } else if (mod == 1) {
2032            disp = (int8_t)x86_ldub_code(env, s);
2033        } else {
2034            disp = (int16_t)x86_lduw_code(env, s);
2035        }
2036
2037        switch (rm) {
2038        case 0:
2039            base = R_EBX;
2040            index = R_ESI;
2041            break;
2042        case 1:
2043            base = R_EBX;
2044            index = R_EDI;
2045            break;
2046        case 2:
2047            base = R_EBP;
2048            index = R_ESI;
2049            def_seg = R_SS;
2050            break;
2051        case 3:
2052            base = R_EBP;
2053            index = R_EDI;
2054            def_seg = R_SS;
2055            break;
2056        case 4:
2057            base = R_ESI;
2058            break;
2059        case 5:
2060            base = R_EDI;
2061            break;
2062        case 6:
2063            base = R_EBP;
2064            def_seg = R_SS;
2065            break;
2066        default:
2067        case 7:
2068            base = R_EBX;
2069            break;
2070        }
2071        break;
2072
2073    default:
2074        tcg_abort();
2075    }
2076
2077 done:
2078    return (AddressParts){ def_seg, base, index, scale, disp };
2079}
2080
2081/* Compute the address, with a minimum number of TCG ops.  */
2082static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2083{
2084    TCGv ea = NULL;
2085
2086    if (a.index >= 0) {
2087        if (a.scale == 0) {
2088            ea = cpu_regs[a.index];
2089        } else {
2090            tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2091            ea = s->A0;
2092        }
2093        if (a.base >= 0) {
2094            tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2095            ea = s->A0;
2096        }
2097    } else if (a.base >= 0) {
2098        ea = cpu_regs[a.base];
2099    }
2100    if (!ea) {
2101        tcg_gen_movi_tl(s->A0, a.disp);
2102        ea = s->A0;
2103    } else if (a.disp != 0) {
2104        tcg_gen_addi_tl(s->A0, ea, a.disp);
2105        ea = s->A0;
2106    }
2107
2108    return ea;
2109}
2110
2111static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2112{
2113    AddressParts a = gen_lea_modrm_0(env, s, modrm);
2114    TCGv ea = gen_lea_modrm_1(s, a);
2115    gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2116}
2117
2118static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2119{
2120    (void)gen_lea_modrm_0(env, s, modrm);
2121}
2122
2123/* Used for BNDCL, BNDCU, BNDCN.  */
2124static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2125                      TCGCond cond, TCGv_i64 bndv)
2126{
2127    TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2128
2129    tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2130    if (!CODE64(s)) {
2131        tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2132    }
2133    tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2134    tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2135    gen_helper_bndck(cpu_env, s->tmp2_i32);
2136}
2137
2138/* used for LEA and MOV AX, mem */
2139static void gen_add_A0_ds_seg(DisasContext *s)
2140{
2141    gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2142}
2143
2144/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2145   OR_TMP0 */
2146static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2147                           TCGMemOp ot, int reg, int is_store)
2148{
2149    int mod, rm;
2150
2151    mod = (modrm >> 6) & 3;
2152    rm = (modrm & 7) | REX_B(s);
2153    if (mod == 3) {
2154        if (is_store) {
2155            if (reg != OR_TMP0)
2156                gen_op_mov_v_reg(s, ot, s->T0, reg);
2157            gen_op_mov_reg_v(s, ot, rm, s->T0);
2158        } else {
2159            gen_op_mov_v_reg(s, ot, s->T0, rm);
2160            if (reg != OR_TMP0)
2161                gen_op_mov_reg_v(s, ot, reg, s->T0);
2162        }
2163    } else {
2164        gen_lea_modrm(env, s, modrm);
2165        if (is_store) {
2166            if (reg != OR_TMP0)
2167                gen_op_mov_v_reg(s, ot, s->T0, reg);
2168            gen_op_st_v(s, ot, s->T0, s->A0);
2169        } else {
2170            gen_op_ld_v(s, ot, s->T0, s->A0);
2171            if (reg != OR_TMP0)
2172                gen_op_mov_reg_v(s, ot, reg, s->T0);
2173        }
2174    }
2175}
2176
2177static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
2178{
2179    uint32_t ret;
2180
2181    switch (ot) {
2182    case MO_8:
2183        ret = x86_ldub_code(env, s);
2184        break;
2185    case MO_16:
2186        ret = x86_lduw_code(env, s);
2187        break;
2188    case MO_32:
2189#ifdef TARGET_X86_64
2190    case MO_64:
2191#endif
2192        ret = x86_ldl_code(env, s);
2193        break;
2194    default:
2195        tcg_abort();
2196    }
2197    return ret;
2198}
2199
2200static inline int insn_const_size(TCGMemOp ot)
2201{
2202    if (ot <= MO_32) {
2203        return 1 << ot;
2204    } else {
2205        return 4;
2206    }
2207}
2208
2209static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2210{
2211#ifndef CONFIG_USER_ONLY
2212    return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
2213           (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2214#else
2215    return true;
2216#endif
2217}
2218
2219static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2220{
2221    target_ulong pc = s->cs_base + eip;
2222
2223    if (use_goto_tb(s, pc))  {
2224        /* jump to same page: we can use a direct jump */
2225        tcg_gen_goto_tb(tb_num);
2226        gen_jmp_im(s, eip);
2227        tcg_gen_exit_tb(s->base.tb, tb_num);
2228        s->base.is_jmp = DISAS_NORETURN;
2229    } else {
2230        /* jump to another page */
2231        gen_jmp_im(s, eip);
2232        gen_jr(s, s->tmp0);
2233    }
2234}
2235
2236static inline void gen_jcc(DisasContext *s, int b,
2237                           target_ulong val, target_ulong next_eip)
2238{
2239    TCGLabel *l1, *l2;
2240
2241    if (s->jmp_opt) {
2242        l1 = gen_new_label();
2243        gen_jcc1(s, b, l1);
2244
2245        gen_goto_tb(s, 0, next_eip);
2246
2247        gen_set_label(l1);
2248        gen_goto_tb(s, 1, val);
2249    } else {
2250        l1 = gen_new_label();
2251        l2 = gen_new_label();
2252        gen_jcc1(s, b, l1);
2253
2254        gen_jmp_im(s, next_eip);
2255        tcg_gen_br(l2);
2256
2257        gen_set_label(l1);
2258        gen_jmp_im(s, val);
2259        gen_set_label(l2);
2260        gen_eob(s);
2261    }
2262}
2263
2264static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
2265                        int modrm, int reg)
2266{
2267    CCPrepare cc;
2268
2269    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2270
2271    cc = gen_prepare_cc(s, b, s->T1);
2272    if (cc.mask != -1) {
2273        TCGv t0 = tcg_temp_new();
2274        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2275        cc.reg = t0;
2276    }
2277    if (!cc.use_reg2) {
2278        cc.reg2 = tcg_const_tl(cc.imm);
2279    }
2280
2281    tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2282                       s->T0, cpu_regs[reg]);
2283    gen_op_mov_reg_v(s, ot, reg, s->T0);
2284
2285    if (cc.mask != -1) {
2286        tcg_temp_free(cc.reg);
2287    }
2288    if (!cc.use_reg2) {
2289        tcg_temp_free(cc.reg2);
2290    }
2291}
2292
2293static inline void gen_op_movl_T0_seg(DisasContext *s, int seg_reg)
2294{
2295    tcg_gen_ld32u_tl(s->T0, cpu_env,
2296                     offsetof(CPUX86State,segs[seg_reg].selector));
2297}
2298
2299static inline void gen_op_movl_seg_T0_vm(DisasContext *s, int seg_reg)
2300{
2301    tcg_gen_ext16u_tl(s->T0, s->T0);
2302    tcg_gen_st32_tl(s->T0, cpu_env,
2303                    offsetof(CPUX86State,segs[seg_reg].selector));
2304    tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2305}
2306
2307/* move T0 to seg_reg and compute if the CPU state may change. Never
2308   call this function with seg_reg == R_CS */
2309static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2310{
2311    if (s->pe && !s->vm86) {
2312        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2313        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2314        /* abort translation because the addseg value may change or
2315           because ss32 may change. For R_SS, translation must always
2316           stop as a special handling must be done to disable hardware
2317           interrupts for the next instruction */
2318        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS)) {
2319            s->base.is_jmp = DISAS_TOO_MANY;
2320        }
2321    } else {
2322        gen_op_movl_seg_T0_vm(s, seg_reg);
2323        if (seg_reg == R_SS) {
2324            s->base.is_jmp = DISAS_TOO_MANY;
2325        }
2326    }
2327}
2328
2329static inline int svm_is_rep(int prefixes)
2330{
2331    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2332}
2333
2334static inline void
2335gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2336                              uint32_t type, uint64_t param)
2337{
2338    /* no SVM activated; fast case */
2339    if (likely(!(s->flags & HF_GUEST_MASK)))
2340        return;
2341    gen_update_cc_op(s);
2342    gen_jmp_im(s, pc_start - s->cs_base);
2343    gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2344                                         tcg_const_i64(param));
2345}
2346
2347static inline void
2348gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2349{
2350    gen_svm_check_intercept_param(s, pc_start, type, 0);
2351}
2352
2353static inline void gen_stack_update(DisasContext *s, int addend)
2354{
2355    gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2356}
2357
2358/* Generate a push. It depends on ss32, addseg and dflag.  */
2359static void gen_push_v(DisasContext *s, TCGv val)
2360{
2361    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2362    TCGMemOp a_ot = mo_stacksize(s);
2363    int size = 1 << d_ot;
2364    TCGv new_esp = s->A0;
2365
2366    tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2367
2368    if (!CODE64(s)) {
2369        if (s->addseg) {
2370            new_esp = s->tmp4;
2371            tcg_gen_mov_tl(new_esp, s->A0);
2372        }
2373        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2374    }
2375
2376    gen_op_st_v(s, d_ot, val, s->A0);
2377    gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2378}
2379
2380/* two step pop is necessary for precise exceptions */
2381static TCGMemOp gen_pop_T0(DisasContext *s)
2382{
2383    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2384
2385    gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2386    gen_op_ld_v(s, d_ot, s->T0, s->A0);
2387
2388    return d_ot;
2389}
2390
2391static inline void gen_pop_update(DisasContext *s, TCGMemOp ot)
2392{
2393    gen_stack_update(s, 1 << ot);
2394}
2395
2396static inline void gen_stack_A0(DisasContext *s)
2397{
2398    gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2399}
2400
2401static void gen_pusha(DisasContext *s)
2402{
2403    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2404    TCGMemOp d_ot = s->dflag;
2405    int size = 1 << d_ot;
2406    int i;
2407
2408    for (i = 0; i < 8; i++) {
2409        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2410        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2411        gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2412    }
2413
2414    gen_stack_update(s, -8 * size);
2415}
2416
2417static void gen_popa(DisasContext *s)
2418{
2419    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2420    TCGMemOp d_ot = s->dflag;
2421    int size = 1 << d_ot;
2422    int i;
2423
2424    for (i = 0; i < 8; i++) {
2425        /* ESP is not reloaded */
2426        if (7 - i == R_ESP) {
2427            continue;
2428        }
2429        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2430        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2431        gen_op_ld_v(s, d_ot, s->T0, s->A0);
2432        gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2433    }
2434
2435    gen_stack_update(s, 8 * size);
2436}
2437
2438static void gen_enter(DisasContext *s, int esp_addend, int level)
2439{
2440    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2441    TCGMemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2442    int size = 1 << d_ot;
2443
2444    /* Push BP; compute FrameTemp into T1.  */
2445    tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2446    gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2447    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2448
2449    level &= 31;
2450    if (level != 0) {
2451        int i;
2452
2453        /* Copy level-1 pointers from the previous frame.  */
2454        for (i = 1; i < level; ++i) {
2455            tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2456            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2457            gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2458
2459            tcg_gen_subi_tl(s->A0, s->T1, size * i);
2460            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2461            gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2462        }
2463
2464        /* Push the current FrameTemp as the last level.  */
2465        tcg_gen_subi_tl(s->A0, s->T1, size * level);
2466        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2467        gen_op_st_v(s, d_ot, s->T1, s->A0);
2468    }
2469
2470    /* Copy the FrameTemp value to EBP.  */
2471    gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2472
2473    /* Compute the final value of ESP.  */
2474    tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2475    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2476}
2477
2478static void gen_leave(DisasContext *s)
2479{
2480    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2481    TCGMemOp a_ot = mo_stacksize(s);
2482
2483    gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2484    gen_op_ld_v(s, d_ot, s->T0, s->A0);
2485
2486    tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2487
2488    gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2489    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2490}
2491
2492/* Similarly, except that the assumption here is that we don't decode
2493   the instruction at all -- either a missing opcode, an unimplemented
2494   feature, or just a bogus instruction stream.  */
2495static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2496{
2497    gen_illegal_opcode(s);
2498
2499    if (qemu_loglevel_mask(LOG_UNIMP)) {
2500        target_ulong pc = s->pc_start, end = s->pc;
2501        qemu_log_lock();
2502        qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2503        for (; pc < end; ++pc) {
2504            qemu_log(" %02x", cpu_ldub_code(env, pc));
2505        }
2506        qemu_log("\n");
2507        qemu_log_unlock();
2508    }
2509}
2510
2511/* an interrupt is different from an exception because of the
2512   privilege checks */
2513static void gen_interrupt(DisasContext *s, int intno,
2514                          target_ulong cur_eip, target_ulong next_eip)
2515{
2516    gen_update_cc_op(s);
2517    gen_jmp_im(s, cur_eip);
2518    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2519                               tcg_const_i32(next_eip - cur_eip));
2520    s->base.is_jmp = DISAS_NORETURN;
2521}
2522
2523static void gen_debug(DisasContext *s, target_ulong cur_eip)
2524{
2525    gen_update_cc_op(s);
2526    gen_jmp_im(s, cur_eip);
2527    gen_helper_debug(cpu_env);
2528    s->base.is_jmp = DISAS_NORETURN;
2529}
2530
2531static void gen_set_hflag(DisasContext *s, uint32_t mask)
2532{
2533    if ((s->flags & mask) == 0) {
2534        TCGv_i32 t = tcg_temp_new_i32();
2535        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2536        tcg_gen_ori_i32(t, t, mask);
2537        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2538        tcg_temp_free_i32(t);
2539        s->flags |= mask;
2540    }
2541}
2542
2543static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2544{
2545    if (s->flags & mask) {
2546        TCGv_i32 t = tcg_temp_new_i32();
2547        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2548        tcg_gen_andi_i32(t, t, ~mask);
2549        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2550        tcg_temp_free_i32(t);
2551        s->flags &= ~mask;
2552    }
2553}
2554
2555/* Clear BND registers during legacy branches.  */
2556static void gen_bnd_jmp(DisasContext *s)
2557{
2558    /* Clear the registers only if BND prefix is missing, MPX is enabled,
2559       and if the BNDREGs are known to be in use (non-zero) already.
2560       The helper itself will check BNDPRESERVE at runtime.  */
2561    if ((s->prefix & PREFIX_REPNZ) == 0
2562        && (s->flags & HF_MPX_EN_MASK) != 0
2563        && (s->flags & HF_MPX_IU_MASK) != 0) {
2564        gen_helper_bnd_jmp(cpu_env);
2565    }
2566}
2567
2568/* Generate an end of block. Trace exception is also generated if needed.
2569   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2570   If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2571   S->TF.  This is used by the syscall/sysret insns.  */
2572static void
2573do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2574{
2575    gen_update_cc_op(s);
2576
2577    /* If several instructions disable interrupts, only the first does it.  */
2578    if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2579        gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2580    } else {
2581        gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2582    }
2583
2584    if (s->base.tb->flags & HF_RF_MASK) {
2585        gen_helper_reset_rf(cpu_env);
2586    }
2587    if (s->base.singlestep_enabled) {
2588        gen_helper_debug(cpu_env);
2589    } else if (recheck_tf) {
2590        gen_helper_rechecking_single_step(cpu_env);
2591        tcg_gen_exit_tb(NULL, 0);
2592    } else if (s->tf) {
2593        gen_helper_single_step(cpu_env);
2594    } else if (jr) {
2595        tcg_gen_lookup_and_goto_ptr();
2596    } else {
2597        tcg_gen_exit_tb(NULL, 0);
2598    }
2599    s->base.is_jmp = DISAS_NORETURN;
2600}
2601
2602static inline void
2603gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2604{
2605    do_gen_eob_worker(s, inhibit, recheck_tf, false);
2606}
2607
2608/* End of block.
2609   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2610static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2611{
2612    gen_eob_worker(s, inhibit, false);
2613}
2614
2615/* End of block, resetting the inhibit irq flag.  */
2616static void gen_eob(DisasContext *s)
2617{
2618    gen_eob_worker(s, false, false);
2619}
2620
2621/* Jump to register */
2622static void gen_jr(DisasContext *s, TCGv dest)
2623{
2624    do_gen_eob_worker(s, false, false, true);
2625}
2626
2627/* generate a jump to eip. No segment change must happen before as a
2628   direct call to the next block may occur */
2629static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2630{
2631    gen_update_cc_op(s);
2632    set_cc_op(s, CC_OP_DYNAMIC);
2633    if (s->jmp_opt) {
2634        gen_goto_tb(s, tb_num, eip);
2635    } else {
2636        gen_jmp_im(s, eip);
2637        gen_eob(s);
2638    }
2639}
2640
2641static void gen_jmp(DisasContext *s, target_ulong eip)
2642{
2643    gen_jmp_tb(s, eip, 0);
2644}
2645
2646static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2647{
2648    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2649    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2650}
2651
2652static inline void gen_stq_env_A0(DisasContext *s, int offset)
2653{
2654    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2655    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2656}
2657
2658static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2659{
2660    int mem_index = s->mem_index;
2661    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2662    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2663    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2664    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2665    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2666}
2667
2668static inline void gen_sto_env_A0(DisasContext *s, int offset)
2669{
2670    int mem_index = s->mem_index;
2671    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2672    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2673    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2674    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2675    tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2676}
2677
2678static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2679{
2680    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2681    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2682    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2683    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2684}
2685
2686static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2687{
2688    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2689    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2690}
2691
2692static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2693{
2694    tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2695    tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2696}
2697
2698static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2699{
2700    tcg_gen_movi_i64(s->tmp1_i64, 0);
2701    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2702}
2703
2704typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2705typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2706typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2707typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2708typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2709typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2710                               TCGv_i32 val);
2711typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2712typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2713                               TCGv val);
2714
2715#define SSE_SPECIAL ((void *)1)
2716#define SSE_DUMMY ((void *)2)
2717
2718#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2719#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2720                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2721
2722static const SSEFunc_0_epp sse_op_table1[256][4] = {
2723    /* 3DNow! extensions */
2724    [0x0e] = { SSE_DUMMY }, /* femms */
2725    [0x0f] = { SSE_DUMMY }, /* pf... */
2726    /* pure SSE operations */
2727    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2728    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2729    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2730    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2731    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2732    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2733    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2734    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2735
2736    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2737    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2738    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2739    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2740    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2741    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2742    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2743    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2744    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2745    [0x51] = SSE_FOP(sqrt),
2746    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2747    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2748    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2749    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2750    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2751    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2752    [0x58] = SSE_FOP(add),
2753    [0x59] = SSE_FOP(mul),
2754    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2755               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2756    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2757    [0x5c] = SSE_FOP(sub),
2758    [0x5d] = SSE_FOP(min),
2759    [0x5e] = SSE_FOP(div),
2760    [0x5f] = SSE_FOP(max),
2761
2762    [0xc2] = SSE_FOP(cmpeq),
2763    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2764               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2765
2766    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2767    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2768    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2769
2770    /* MMX ops and their SSE extensions */
2771    [0x60] = MMX_OP2(punpcklbw),
2772    [0x61] = MMX_OP2(punpcklwd),
2773    [0x62] = MMX_OP2(punpckldq),
2774    [0x63] = MMX_OP2(packsswb),
2775    [0x64] = MMX_OP2(pcmpgtb),
2776    [0x65] = MMX_OP2(pcmpgtw),
2777    [0x66] = MMX_OP2(pcmpgtl),
2778    [0x67] = MMX_OP2(packuswb),
2779    [0x68] = MMX_OP2(punpckhbw),
2780    [0x69] = MMX_OP2(punpckhwd),
2781    [0x6a] = MMX_OP2(punpckhdq),
2782    [0x6b] = MMX_OP2(packssdw),
2783    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2784    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2785    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2786    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2787    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2788               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2789               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2790               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2791    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2792    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2793    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2794    [0x74] = MMX_OP2(pcmpeqb),
2795    [0x75] = MMX_OP2(pcmpeqw),
2796    [0x76] = MMX_OP2(pcmpeql),
2797    [0x77] = { SSE_DUMMY }, /* emms */
2798    [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2799    [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2800    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2801    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2802    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2803    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2804    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2805    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2806    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2807    [0xd1] = MMX_OP2(psrlw),
2808    [0xd2] = MMX_OP2(psrld),
2809    [0xd3] = MMX_OP2(psrlq),
2810    [0xd4] = MMX_OP2(paddq),
2811    [0xd5] = MMX_OP2(pmullw),
2812    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2813    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2814    [0xd8] = MMX_OP2(psubusb),
2815    [0xd9] = MMX_OP2(psubusw),
2816    [0xda] = MMX_OP2(pminub),
2817    [0xdb] = MMX_OP2(pand),
2818    [0xdc] = MMX_OP2(paddusb),
2819    [0xdd] = MMX_OP2(paddusw),
2820    [0xde] = MMX_OP2(pmaxub),
2821    [0xdf] = MMX_OP2(pandn),
2822    [0xe0] = MMX_OP2(pavgb),
2823    [0xe1] = MMX_OP2(psraw),
2824    [0xe2] = MMX_OP2(psrad),
2825    [0xe3] = MMX_OP2(pavgw),
2826    [0xe4] = MMX_OP2(pmulhuw),
2827    [0xe5] = MMX_OP2(pmulhw),
2828    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2829    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2830    [0xe8] = MMX_OP2(psubsb),
2831    [0xe9] = MMX_OP2(psubsw),
2832    [0xea] = MMX_OP2(pminsw),
2833    [0xeb] = MMX_OP2(por),
2834    [0xec] = MMX_OP2(paddsb),
2835    [0xed] = MMX_OP2(paddsw),
2836    [0xee] = MMX_OP2(pmaxsw),
2837    [0xef] = MMX_OP2(pxor),
2838    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2839    [0xf1] = MMX_OP2(psllw),
2840    [0xf2] = MMX_OP2(pslld),
2841    [0xf3] = MMX_OP2(psllq),
2842    [0xf4] = MMX_OP2(pmuludq),
2843    [0xf5] = MMX_OP2(pmaddwd),
2844    [0xf6] = MMX_OP2(psadbw),
2845    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2846               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2847    [0xf8] = MMX_OP2(psubb),
2848    [0xf9] = MMX_OP2(psubw),
2849    [0xfa] = MMX_OP2(psubl),
2850    [0xfb] = MMX_OP2(psubq),
2851    [0xfc] = MMX_OP2(paddb),
2852    [0xfd] = MMX_OP2(paddw),
2853    [0xfe] = MMX_OP2(paddl),
2854};
2855
2856static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2857    [0 + 2] = MMX_OP2(psrlw),
2858    [0 + 4] = MMX_OP2(psraw),
2859    [0 + 6] = MMX_OP2(psllw),
2860    [8 + 2] = MMX_OP2(psrld),
2861    [8 + 4] = MMX_OP2(psrad),
2862    [8 + 6] = MMX_OP2(pslld),
2863    [16 + 2] = MMX_OP2(psrlq),
2864    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2865    [16 + 6] = MMX_OP2(psllq),
2866    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2867};
2868
2869static const SSEFunc_0_epi sse_op_table3ai[] = {
2870    gen_helper_cvtsi2ss,
2871    gen_helper_cvtsi2sd
2872};
2873
2874#ifdef TARGET_X86_64
2875static const SSEFunc_0_epl sse_op_table3aq[] = {
2876    gen_helper_cvtsq2ss,
2877    gen_helper_cvtsq2sd
2878};
2879#endif
2880
2881static const SSEFunc_i_ep sse_op_table3bi[] = {
2882    gen_helper_cvttss2si,
2883    gen_helper_cvtss2si,
2884    gen_helper_cvttsd2si,
2885    gen_helper_cvtsd2si
2886};
2887
2888#ifdef TARGET_X86_64
2889static const SSEFunc_l_ep sse_op_table3bq[] = {
2890    gen_helper_cvttss2sq,
2891    gen_helper_cvtss2sq,
2892    gen_helper_cvttsd2sq,
2893    gen_helper_cvtsd2sq
2894};
2895#endif
2896
2897static const SSEFunc_0_epp sse_op_table4[8][4] = {
2898    SSE_FOP(cmpeq),
2899    SSE_FOP(cmplt),
2900    SSE_FOP(cmple),
2901    SSE_FOP(cmpunord),
2902    SSE_FOP(cmpneq),
2903    SSE_FOP(cmpnlt),
2904    SSE_FOP(cmpnle),
2905    SSE_FOP(cmpord),
2906};
2907
2908static const SSEFunc_0_epp sse_op_table5[256] = {
2909    [0x0c] = gen_helper_pi2fw,
2910    [0x0d] = gen_helper_pi2fd,
2911    [0x1c] = gen_helper_pf2iw,
2912    [0x1d] = gen_helper_pf2id,
2913    [0x8a] = gen_helper_pfnacc,
2914    [0x8e] = gen_helper_pfpnacc,
2915    [0x90] = gen_helper_pfcmpge,
2916    [0x94] = gen_helper_pfmin,
2917    [0x96] = gen_helper_pfrcp,
2918    [0x97] = gen_helper_pfrsqrt,
2919    [0x9a] = gen_helper_pfsub,
2920    [0x9e] = gen_helper_pfadd,
2921    [0xa0] = gen_helper_pfcmpgt,
2922    [0xa4] = gen_helper_pfmax,
2923    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2924    [0xa7] = gen_helper_movq, /* pfrsqit1 */
2925    [0xaa] = gen_helper_pfsubr,
2926    [0xae] = gen_helper_pfacc,
2927    [0xb0] = gen_helper_pfcmpeq,
2928    [0xb4] = gen_helper_pfmul,
2929    [0xb6] = gen_helper_movq, /* pfrcpit2 */
2930    [0xb7] = gen_helper_pmulhrw_mmx,
2931    [0xbb] = gen_helper_pswapd,
2932    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2933};
2934
2935struct SSEOpHelper_epp {
2936    SSEFunc_0_epp op[2];
2937    uint32_t ext_mask;
2938};
2939
2940struct SSEOpHelper_eppi {
2941    SSEFunc_0_eppi op[2];
2942    uint32_t ext_mask;
2943};
2944
2945#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2946#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2947#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2948#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2949#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2950        CPUID_EXT_PCLMULQDQ }
2951#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2952
2953static const struct SSEOpHelper_epp sse_op_table6[256] = {
2954    [0x00] = SSSE3_OP(pshufb),
2955    [0x01] = SSSE3_OP(phaddw),
2956    [0x02] = SSSE3_OP(phaddd),
2957    [0x03] = SSSE3_OP(phaddsw),
2958    [0x04] = SSSE3_OP(pmaddubsw),
2959    [0x05] = SSSE3_OP(phsubw),
2960    [0x06] = SSSE3_OP(phsubd),
2961    [0x07] = SSSE3_OP(phsubsw),
2962    [0x08] = SSSE3_OP(psignb),
2963    [0x09] = SSSE3_OP(psignw),
2964    [0x0a] = SSSE3_OP(psignd),
2965    [0x0b] = SSSE3_OP(pmulhrsw),
2966    [0x10] = SSE41_OP(pblendvb),
2967    [0x14] = SSE41_OP(blendvps),
2968    [0x15] = SSE41_OP(blendvpd),
2969    [0x17] = SSE41_OP(ptest),
2970    [0x1c] = SSSE3_OP(pabsb),
2971    [0x1d] = SSSE3_OP(pabsw),
2972    [0x1e] = SSSE3_OP(pabsd),
2973    [0x20] = SSE41_OP(pmovsxbw),
2974    [0x21] = SSE41_OP(pmovsxbd),
2975    [0x22] = SSE41_OP(pmovsxbq),
2976    [0x23] = SSE41_OP(pmovsxwd),
2977    [0x24] = SSE41_OP(pmovsxwq),
2978    [0x25] = SSE41_OP(pmovsxdq),
2979    [0x28] = SSE41_OP(pmuldq),
2980    [0x29] = SSE41_OP(pcmpeqq),
2981    [0x2a] = SSE41_SPECIAL, /* movntqda */
2982    [0x2b] = SSE41_OP(packusdw),
2983    [0x30] = SSE41_OP(pmovzxbw),
2984    [0x31] = SSE41_OP(pmovzxbd),
2985    [0x32] = SSE41_OP(pmovzxbq),
2986    [0x33] = SSE41_OP(pmovzxwd),
2987    [0x34] = SSE41_OP(pmovzxwq),
2988    [0x35] = SSE41_OP(pmovzxdq),
2989    [0x37] = SSE42_OP(pcmpgtq),
2990    [0x38] = SSE41_OP(pminsb),
2991    [0x39] = SSE41_OP(pminsd),
2992    [0x3a] = SSE41_OP(pminuw),
2993    [0x3b] = SSE41_OP(pminud),
2994    [0x3c] = SSE41_OP(pmaxsb),
2995    [0x3d] = SSE41_OP(pmaxsd),
2996    [0x3e] = SSE41_OP(pmaxuw),
2997    [0x3f] = SSE41_OP(pmaxud),
2998    [0x40] = SSE41_OP(pmulld),
2999    [0x41] = SSE41_OP(phminposuw),
3000    [0xdb] = AESNI_OP(aesimc),
3001    [0xdc] = AESNI_OP(aesenc),
3002    [0xdd] = AESNI_OP(aesenclast),
3003    [0xde] = AESNI_OP(aesdec),
3004    [0xdf] = AESNI_OP(aesdeclast),
3005};
3006
3007static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3008    [0x08] = SSE41_OP(roundps),
3009    [0x09] = SSE41_OP(roundpd),
3010    [0x0a] = SSE41_OP(roundss),
3011    [0x0b] = SSE41_OP(roundsd),
3012    [0x0c] = SSE41_OP(blendps),
3013    [0x0d] = SSE41_OP(blendpd),
3014    [0x0e] = SSE41_OP(pblendw),
3015    [0x0f] = SSSE3_OP(palignr),
3016    [0x14] = SSE41_SPECIAL, /* pextrb */
3017    [0x15] = SSE41_SPECIAL, /* pextrw */
3018    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3019    [0x17] = SSE41_SPECIAL, /* extractps */
3020    [0x20] = SSE41_SPECIAL, /* pinsrb */
3021    [0x21] = SSE41_SPECIAL, /* insertps */
3022    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3023    [0x40] = SSE41_OP(dpps),
3024    [0x41] = SSE41_OP(dppd),
3025    [0x42] = SSE41_OP(mpsadbw),
3026    [0x44] = PCLMULQDQ_OP(pclmulqdq),
3027    [0x60] = SSE42_OP(pcmpestrm),
3028    [0x61] = SSE42_OP(pcmpestri),
3029    [0x62] = SSE42_OP(pcmpistrm),
3030    [0x63] = SSE42_OP(pcmpistri),
3031    [0xdf] = AESNI_OP(aeskeygenassist),
3032};
3033
3034static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3035                    target_ulong pc_start, int rex_r)
3036{
3037    int b1, op1_offset, op2_offset, is_xmm, val;
3038    int modrm, mod, rm, reg;
3039    SSEFunc_0_epp sse_fn_epp;
3040    SSEFunc_0_eppi sse_fn_eppi;
3041    SSEFunc_0_ppi sse_fn_ppi;
3042    SSEFunc_0_eppt sse_fn_eppt;
3043    TCGMemOp ot;
3044
3045    b &= 0xff;
3046    if (s->prefix & PREFIX_DATA)
3047        b1 = 1;
3048    else if (s->prefix & PREFIX_REPZ)
3049        b1 = 2;
3050    else if (s->prefix & PREFIX_REPNZ)
3051        b1 = 3;
3052    else
3053        b1 = 0;
3054    sse_fn_epp = sse_op_table1[b][b1];
3055    if (!sse_fn_epp) {
3056        goto unknown_op;
3057    }
3058    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3059        is_xmm = 1;
3060    } else {
3061        if (b1 == 0) {
3062            /* MMX case */
3063            is_xmm = 0;
3064        } else {
3065            is_xmm = 1;
3066        }
3067    }
3068    /* simple MMX/SSE operation */
3069    if (s->flags & HF_TS_MASK) {
3070        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3071        return;
3072    }
3073    if (s->flags & HF_EM_MASK) {
3074    illegal_op:
3075        gen_illegal_opcode(s);
3076        return;
3077    }
3078    if (is_xmm
3079        && !(s->flags & HF_OSFXSR_MASK)
3080        && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
3081        goto unknown_op;
3082    }
3083    if (b == 0x0e) {
3084        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3085            /* If we were fully decoding this we might use illegal_op.  */
3086            goto unknown_op;
3087        }
3088        /* femms */
3089        gen_helper_emms(cpu_env);
3090        return;
3091    }
3092    if (b == 0x77) {
3093        /* emms */
3094        gen_helper_emms(cpu_env);
3095        return;
3096    }
3097    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3098       the static cpu state) */
3099    if (!is_xmm) {
3100        gen_helper_enter_mmx(cpu_env);
3101    }
3102
3103    modrm = x86_ldub_code(env, s);
3104    reg = ((modrm >> 3) & 7);
3105    if (is_xmm)
3106        reg |= rex_r;
3107    mod = (modrm >> 6) & 3;
3108    if (sse_fn_epp == SSE_SPECIAL) {
3109        b |= (b1 << 8);
3110        switch(b) {
3111        case 0x0e7: /* movntq */
3112            if (mod == 3) {
3113                goto illegal_op;
3114            }
3115            gen_lea_modrm(env, s, modrm);
3116            gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3117            break;
3118        case 0x1e7: /* movntdq */
3119        case 0x02b: /* movntps */
3120        case 0x12b: /* movntps */
3121            if (mod == 3)
3122                goto illegal_op;
3123            gen_lea_modrm(env, s, modrm);
3124            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3125            break;
3126        case 0x3f0: /* lddqu */
3127            if (mod == 3)
3128                goto illegal_op;
3129            gen_lea_modrm(env, s, modrm);
3130            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3131            break;
3132        case 0x22b: /* movntss */
3133        case 0x32b: /* movntsd */
3134            if (mod == 3)
3135                goto illegal_op;
3136            gen_lea_modrm(env, s, modrm);
3137            if (b1 & 1) {
3138                gen_stq_env_A0(s, offsetof(CPUX86State,
3139                                           xmm_regs[reg].ZMM_Q(0)));
3140            } else {
3141                tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3142                    xmm_regs[reg].ZMM_L(0)));
3143                gen_op_st_v(s, MO_32, s->T0, s->A0);
3144            }
3145            break;
3146        case 0x6e: /* movd mm, ea */
3147#ifdef TARGET_X86_64
3148            if (s->dflag == MO_64) {
3149                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3150                tcg_gen_st_tl(s->T0, cpu_env,
3151                              offsetof(CPUX86State, fpregs[reg].mmx));
3152            } else
3153#endif
3154            {
3155                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3156                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3157                                 offsetof(CPUX86State,fpregs[reg].mmx));
3158                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3159                gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3160            }
3161            break;
3162        case 0x16e: /* movd xmm, ea */
3163#ifdef TARGET_X86_64
3164            if (s->dflag == MO_64) {
3165                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3166                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3167                                 offsetof(CPUX86State,xmm_regs[reg]));
3168                gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3169            } else
3170#endif
3171            {
3172                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3173                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3174                                 offsetof(CPUX86State,xmm_regs[reg]));
3175                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3176                gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3177            }
3178            break;
3179        case 0x6f: /* movq mm, ea */
3180            if (mod != 3) {
3181                gen_lea_modrm(env, s, modrm);
3182                gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3183            } else {
3184                rm = (modrm & 7);
3185                tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3186                               offsetof(CPUX86State,fpregs[rm].mmx));
3187                tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3188                               offsetof(CPUX86State,fpregs[reg].mmx));
3189            }
3190            break;
3191        case 0x010: /* movups */
3192        case 0x110: /* movupd */
3193        case 0x028: /* movaps */
3194        case 0x128: /* movapd */
3195        case 0x16f: /* movdqa xmm, ea */
3196        case 0x26f: /* movdqu xmm, ea */
3197            if (mod != 3) {
3198                gen_lea_modrm(env, s, modrm);
3199                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3200            } else {
3201                rm = (modrm & 7) | REX_B(s);
3202                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3203                            offsetof(CPUX86State,xmm_regs[rm]));
3204            }
3205            break;
3206        case 0x210: /* movss xmm, ea */
3207            if (mod != 3) {
3208                gen_lea_modrm(env, s, modrm);
3209                gen_op_ld_v(s, MO_32, s->T0, s->A0);
3210                tcg_gen_st32_tl(s->T0, cpu_env,
3211                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3212                tcg_gen_movi_tl(s->T0, 0);
3213                tcg_gen_st32_tl(s->T0, cpu_env,
3214                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3215                tcg_gen_st32_tl(s->T0, cpu_env,
3216                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3217                tcg_gen_st32_tl(s->T0, cpu_env,
3218                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3219            } else {
3220                rm = (modrm & 7) | REX_B(s);
3221                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3222                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3223            }
3224            break;
3225        case 0x310: /* movsd xmm, ea */
3226            if (mod != 3) {
3227                gen_lea_modrm(env, s, modrm);
3228                gen_ldq_env_A0(s, offsetof(CPUX86State,
3229                                           xmm_regs[reg].ZMM_Q(0)));
3230                tcg_gen_movi_tl(s->T0, 0);
3231                tcg_gen_st32_tl(s->T0, cpu_env,
3232                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3233                tcg_gen_st32_tl(s->T0, cpu_env,
3234                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3235            } else {
3236                rm = (modrm & 7) | REX_B(s);
3237                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3238                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3239            }
3240            break;
3241        case 0x012: /* movlps */
3242        case 0x112: /* movlpd */
3243            if (mod != 3) {
3244                gen_lea_modrm(env, s, modrm);
3245                gen_ldq_env_A0(s, offsetof(CPUX86State,
3246                                           xmm_regs[reg].ZMM_Q(0)));
3247            } else {
3248                /* movhlps */
3249                rm = (modrm & 7) | REX_B(s);
3250                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3251                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3252            }
3253            break;
3254        case 0x212: /* movsldup */
3255            if (mod != 3) {
3256                gen_lea_modrm(env, s, modrm);
3257                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3258            } else {
3259                rm = (modrm & 7) | REX_B(s);
3260                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3261                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3262                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3263                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3264            }
3265            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3266                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3267            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3268                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3269            break;
3270        case 0x312: /* movddup */
3271            if (mod != 3) {
3272                gen_lea_modrm(env, s, modrm);
3273                gen_ldq_env_A0(s, offsetof(CPUX86State,
3274                                           xmm_regs[reg].ZMM_Q(0)));
3275            } else {
3276                rm = (modrm & 7) | REX_B(s);
3277                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3278                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3279            }
3280            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3281                        offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3282            break;
3283        case 0x016: /* movhps */
3284        case 0x116: /* movhpd */
3285            if (mod != 3) {
3286                gen_lea_modrm(env, s, modrm);
3287                gen_ldq_env_A0(s, offsetof(CPUX86State,
3288                                           xmm_regs[reg].ZMM_Q(1)));
3289            } else {
3290                /* movlhps */
3291                rm = (modrm & 7) | REX_B(s);
3292                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3293                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3294            }
3295            break;
3296        case 0x216: /* movshdup */
3297            if (mod != 3) {
3298                gen_lea_modrm(env, s, modrm);
3299                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3300            } else {
3301                rm = (modrm & 7) | REX_B(s);
3302                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3303                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3304                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3305                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3306            }
3307            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3308                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3309            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3310                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3311            break;
3312        case 0x178:
3313        case 0x378:
3314            {
3315                int bit_index, field_length;
3316
3317                if (b1 == 1 && reg != 0)
3318                    goto illegal_op;
3319                field_length = x86_ldub_code(env, s) & 0x3F;
3320                bit_index = x86_ldub_code(env, s) & 0x3F;
3321                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3322                    offsetof(CPUX86State,xmm_regs[reg]));
3323                if (b1 == 1)
3324                    gen_helper_extrq_i(cpu_env, s->ptr0,
3325                                       tcg_const_i32(bit_index),
3326                                       tcg_const_i32(field_length));
3327                else
3328                    gen_helper_insertq_i(cpu_env, s->ptr0,
3329                                         tcg_const_i32(bit_index),
3330                                         tcg_const_i32(field_length));
3331            }
3332            break;
3333        case 0x7e: /* movd ea, mm */
3334#ifdef TARGET_X86_64
3335            if (s->dflag == MO_64) {
3336                tcg_gen_ld_i64(s->T0, cpu_env,
3337                               offsetof(CPUX86State,fpregs[reg].mmx));
3338                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3339            } else
3340#endif
3341            {
3342                tcg_gen_ld32u_tl(s->T0, cpu_env,
3343                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3344                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3345            }
3346            break;
3347        case 0x17e: /* movd ea, xmm */
3348#ifdef TARGET_X86_64
3349            if (s->dflag == MO_64) {
3350                tcg_gen_ld_i64(s->T0, cpu_env,
3351                               offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3352                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3353            } else
3354#endif
3355            {
3356                tcg_gen_ld32u_tl(s->T0, cpu_env,
3357                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3358                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3359            }
3360            break;
3361        case 0x27e: /* movq xmm, ea */
3362            if (mod != 3) {
3363                gen_lea_modrm(env, s, modrm);
3364                gen_ldq_env_A0(s, offsetof(CPUX86State,
3365                                           xmm_regs[reg].ZMM_Q(0)));
3366            } else {
3367                rm = (modrm & 7) | REX_B(s);
3368                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3369                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3370            }
3371            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3372            break;
3373        case 0x7f: /* movq ea, mm */
3374            if (mod != 3) {
3375                gen_lea_modrm(env, s, modrm);
3376                gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3377            } else {
3378                rm = (modrm & 7);
3379                gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3380                            offsetof(CPUX86State,fpregs[reg].mmx));
3381            }
3382            break;
3383        case 0x011: /* movups */
3384        case 0x111: /* movupd */
3385        case 0x029: /* movaps */
3386        case 0x129: /* movapd */
3387        case 0x17f: /* movdqa ea, xmm */
3388        case 0x27f: /* movdqu ea, xmm */
3389            if (mod != 3) {
3390                gen_lea_modrm(env, s, modrm);
3391                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3392            } else {
3393                rm = (modrm & 7) | REX_B(s);
3394                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3395                            offsetof(CPUX86State,xmm_regs[reg]));
3396            }
3397            break;
3398        case 0x211: /* movss ea, xmm */
3399            if (mod != 3) {
3400                gen_lea_modrm(env, s, modrm);
3401                tcg_gen_ld32u_tl(s->T0, cpu_env,
3402                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3403                gen_op_st_v(s, MO_32, s->T0, s->A0);
3404            } else {
3405                rm = (modrm & 7) | REX_B(s);
3406                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3407                            offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3408            }
3409            break;
3410        case 0x311: /* movsd ea, xmm */
3411            if (mod != 3) {
3412                gen_lea_modrm(env, s, modrm);
3413                gen_stq_env_A0(s, offsetof(CPUX86State,
3414                                           xmm_regs[reg].ZMM_Q(0)));
3415            } else {
3416                rm = (modrm & 7) | REX_B(s);
3417                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3418                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3419            }
3420            break;
3421        case 0x013: /* movlps */
3422        case 0x113: /* movlpd */
3423            if (mod != 3) {
3424                gen_lea_modrm(env, s, modrm);
3425                gen_stq_env_A0(s, offsetof(CPUX86State,
3426                                           xmm_regs[reg].ZMM_Q(0)));
3427            } else {
3428                goto illegal_op;
3429            }
3430            break;
3431        case 0x017: /* movhps */
3432        case 0x117: /* movhpd */
3433            if (mod != 3) {
3434                gen_lea_modrm(env, s, modrm);
3435                gen_stq_env_A0(s, offsetof(CPUX86State,
3436                                           xmm_regs[reg].ZMM_Q(1)));
3437            } else {
3438                goto illegal_op;
3439            }
3440            break;
3441        case 0x71: /* shift mm, im */
3442        case 0x72:
3443        case 0x73:
3444        case 0x171: /* shift xmm, im */
3445        case 0x172:
3446        case 0x173:
3447            if (b1 >= 2) {
3448                goto unknown_op;
3449            }
3450            val = x86_ldub_code(env, s);
3451            if (is_xmm) {
3452                tcg_gen_movi_tl(s->T0, val);
3453                tcg_gen_st32_tl(s->T0, cpu_env,
3454                                offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3455                tcg_gen_movi_tl(s->T0, 0);
3456                tcg_gen_st32_tl(s->T0, cpu_env,
3457                                offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3458                op1_offset = offsetof(CPUX86State,xmm_t0);
3459            } else {
3460                tcg_gen_movi_tl(s->T0, val);
3461                tcg_gen_st32_tl(s->T0, cpu_env,
3462                                offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3463                tcg_gen_movi_tl(s->T0, 0);
3464                tcg_gen_st32_tl(s->T0, cpu_env,
3465                                offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3466                op1_offset = offsetof(CPUX86State,mmx_t0);
3467            }
3468            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3469                                       (((modrm >> 3)) & 7)][b1];
3470            if (!sse_fn_epp) {
3471                goto unknown_op;
3472            }
3473            if (is_xmm) {
3474                rm = (modrm & 7) | REX_B(s);
3475                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3476            } else {
3477                rm = (modrm & 7);
3478                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3479            }
3480            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3481            tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3482            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3483            break;
3484        case 0x050: /* movmskps */
3485            rm = (modrm & 7) | REX_B(s);
3486            tcg_gen_addi_ptr(s->ptr0, cpu_env,
3487                             offsetof(CPUX86State,xmm_regs[rm]));
3488            gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3489            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3490            break;
3491        case 0x150: /* movmskpd */
3492            rm = (modrm & 7) | REX_B(s);
3493            tcg_gen_addi_ptr(s->ptr0, cpu_env,
3494                             offsetof(CPUX86State,xmm_regs[rm]));
3495            gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3496            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3497            break;
3498        case 0x02a: /* cvtpi2ps */
3499        case 0x12a: /* cvtpi2pd */
3500            gen_helper_enter_mmx(cpu_env);
3501            if (mod != 3) {
3502                gen_lea_modrm(env, s, modrm);
3503                op2_offset = offsetof(CPUX86State,mmx_t0);
3504                gen_ldq_env_A0(s, op2_offset);
3505            } else {
3506                rm = (modrm & 7);
3507                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3508            }
3509            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3510            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3511            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3512            switch(b >> 8) {
3513            case 0x0:
3514                gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3515                break;
3516            default:
3517            case 0x1:
3518                gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3519                break;
3520            }
3521            break;
3522        case 0x22a: /* cvtsi2ss */
3523        case 0x32a: /* cvtsi2sd */
3524            ot = mo_64_32(s->dflag);
3525            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3526            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3527            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3528            if (ot == MO_32) {
3529                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3530                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3531                sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3532            } else {
3533#ifdef TARGET_X86_64
3534                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3535                sse_fn_epl(cpu_env, s->ptr0, s->T0);
3536#else
3537                goto illegal_op;
3538#endif
3539            }
3540            break;
3541        case 0x02c: /* cvttps2pi */
3542        case 0x12c: /* cvttpd2pi */
3543        case 0x02d: /* cvtps2pi */
3544        case 0x12d: /* cvtpd2pi */
3545            gen_helper_enter_mmx(cpu_env);
3546            if (mod != 3) {
3547                gen_lea_modrm(env, s, modrm);
3548                op2_offset = offsetof(CPUX86State,xmm_t0);
3549                gen_ldo_env_A0(s, op2_offset);
3550            } else {
3551                rm = (modrm & 7) | REX_B(s);
3552                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3553            }
3554            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3555            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3556            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3557            switch(b) {
3558            case 0x02c:
3559                gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3560                break;
3561            case 0x12c:
3562                gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3563                break;
3564            case 0x02d:
3565                gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3566                break;
3567            case 0x12d:
3568                gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3569                break;
3570            }
3571            break;
3572        case 0x22c: /* cvttss2si */
3573        case 0x32c: /* cvttsd2si */
3574        case 0x22d: /* cvtss2si */
3575        case 0x32d: /* cvtsd2si */
3576            ot = mo_64_32(s->dflag);
3577            if (mod != 3) {
3578                gen_lea_modrm(env, s, modrm);
3579                if ((b >> 8) & 1) {
3580                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3581                } else {
3582                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
3583                    tcg_gen_st32_tl(s->T0, cpu_env,
3584                                    offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3585                }
3586                op2_offset = offsetof(CPUX86State,xmm_t0);
3587            } else {
3588                rm = (modrm & 7) | REX_B(s);
3589                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3590            }
3591            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3592            if (ot == MO_32) {
3593                SSEFunc_i_ep sse_fn_i_ep =
3594                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3595                sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3596                tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3597            } else {
3598#ifdef TARGET_X86_64
3599                SSEFunc_l_ep sse_fn_l_ep =
3600                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3601                sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3602#else
3603                goto illegal_op;
3604#endif
3605            }
3606            gen_op_mov_reg_v(s, ot, reg, s->T0);
3607            break;
3608        case 0xc4: /* pinsrw */
3609        case 0x1c4:
3610            s->rip_offset = 1;
3611            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3612            val = x86_ldub_code(env, s);
3613            if (b1) {
3614                val &= 7;
3615                tcg_gen_st16_tl(s->T0, cpu_env,
3616                                offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3617            } else {
3618                val &= 3;
3619                tcg_gen_st16_tl(s->T0, cpu_env,
3620                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3621            }
3622            break;
3623        case 0xc5: /* pextrw */
3624        case 0x1c5:
3625            if (mod != 3)
3626                goto illegal_op;
3627            ot = mo_64_32(s->dflag);
3628            val = x86_ldub_code(env, s);
3629            if (b1) {
3630                val &= 7;
3631                rm = (modrm & 7) | REX_B(s);
3632                tcg_gen_ld16u_tl(s->T0, cpu_env,
3633                                 offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3634            } else {
3635                val &= 3;
3636                rm = (modrm & 7);
3637                tcg_gen_ld16u_tl(s->T0, cpu_env,
3638                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3639            }
3640            reg = ((modrm >> 3) & 7) | rex_r;
3641            gen_op_mov_reg_v(s, ot, reg, s->T0);
3642            break;
3643        case 0x1d6: /* movq ea, xmm */
3644            if (mod != 3) {
3645                gen_lea_modrm(env, s, modrm);
3646                gen_stq_env_A0(s, offsetof(CPUX86State,
3647                                           xmm_regs[reg].ZMM_Q(0)));
3648            } else {
3649                rm = (modrm & 7) | REX_B(s);
3650                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3651                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3652                gen_op_movq_env_0(s,
3653                                  offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3654            }
3655            break;
3656        case 0x2d6: /* movq2dq */
3657            gen_helper_enter_mmx(cpu_env);
3658            rm = (modrm & 7);
3659            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3660                        offsetof(CPUX86State,fpregs[rm].mmx));
3661            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3662            break;
3663        case 0x3d6: /* movdq2q */
3664            gen_helper_enter_mmx(cpu_env);
3665            rm = (modrm & 7) | REX_B(s);
3666            gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3667                        offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3668            break;
3669        case 0xd7: /* pmovmskb */
3670        case 0x1d7:
3671            if (mod != 3)
3672                goto illegal_op;
3673            if (b1) {
3674                rm = (modrm & 7) | REX_B(s);
3675                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3676                                 offsetof(CPUX86State, xmm_regs[rm]));
3677                gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3678            } else {
3679                rm = (modrm & 7);
3680                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3681                                 offsetof(CPUX86State, fpregs[rm].mmx));
3682                gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3683            }
3684            reg = ((modrm >> 3) & 7) | rex_r;
3685            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3686            break;
3687
3688        case 0x138:
3689        case 0x038:
3690            b = modrm;
3691            if ((b & 0xf0) == 0xf0) {
3692                goto do_0f_38_fx;
3693            }
3694            modrm = x86_ldub_code(env, s);
3695            rm = modrm & 7;
3696            reg = ((modrm >> 3) & 7) | rex_r;
3697            mod = (modrm >> 6) & 3;
3698            if (b1 >= 2) {
3699                goto unknown_op;
3700            }
3701
3702            sse_fn_epp = sse_op_table6[b].op[b1];
3703            if (!sse_fn_epp) {
3704                goto unknown_op;
3705            }
3706            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3707                goto illegal_op;
3708
3709            if (b1) {
3710                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3711                if (mod == 3) {
3712                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3713                } else {
3714                    op2_offset = offsetof(CPUX86State,xmm_t0);
3715                    gen_lea_modrm(env, s, modrm);
3716                    switch (b) {
3717                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3718                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3719                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3720                        gen_ldq_env_A0(s, op2_offset +
3721                                        offsetof(ZMMReg, ZMM_Q(0)));
3722                        break;
3723                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3724                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3725                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3726                                            s->mem_index, MO_LEUL);
3727                        tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3728                                        offsetof(ZMMReg, ZMM_L(0)));
3729                        break;
3730                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3731                        tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3732                                           s->mem_index, MO_LEUW);
3733                        tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3734                                        offsetof(ZMMReg, ZMM_W(0)));
3735                        break;
3736                    case 0x2a:            /* movntqda */
3737                        gen_ldo_env_A0(s, op1_offset);
3738                        return;
3739                    default:
3740                        gen_ldo_env_A0(s, op2_offset);
3741                    }
3742                }
3743            } else {
3744                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3745                if (mod == 3) {
3746                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3747                } else {
3748                    op2_offset = offsetof(CPUX86State,mmx_t0);
3749                    gen_lea_modrm(env, s, modrm);
3750                    gen_ldq_env_A0(s, op2_offset);
3751                }
3752            }
3753            if (sse_fn_epp == SSE_SPECIAL) {
3754                goto unknown_op;
3755            }
3756
3757            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3758            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3759            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3760
3761            if (b == 0x17) {
3762                set_cc_op(s, CC_OP_EFLAGS);
3763            }
3764            break;
3765
3766        case 0x238:
3767        case 0x338:
3768        do_0f_38_fx:
3769            /* Various integer extensions at 0f 38 f[0-f].  */
3770            b = modrm | (b1 << 8);
3771            modrm = x86_ldub_code(env, s);
3772            reg = ((modrm >> 3) & 7) | rex_r;
3773
3774            switch (b) {
3775            case 0x3f0: /* crc32 Gd,Eb */
3776            case 0x3f1: /* crc32 Gd,Ey */
3777            do_crc32:
3778                if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3779                    goto illegal_op;
3780                }
3781                if ((b & 0xff) == 0xf0) {
3782                    ot = MO_8;
3783                } else if (s->dflag != MO_64) {
3784                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3785                } else {
3786                    ot = MO_64;
3787                }
3788
3789                tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3790                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3791                gen_helper_crc32(s->T0, s->tmp2_i32,
3792                                 s->T0, tcg_const_i32(8 << ot));
3793
3794                ot = mo_64_32(s->dflag);
3795                gen_op_mov_reg_v(s, ot, reg, s->T0);
3796                break;
3797
3798            case 0x1f0: /* crc32 or movbe */
3799            case 0x1f1:
3800                /* For these insns, the f3 prefix is supposed to have priority
3801                   over the 66 prefix, but that's not what we implement above
3802                   setting b1.  */
3803                if (s->prefix & PREFIX_REPNZ) {
3804                    goto do_crc32;
3805                }
3806                /* FALLTHRU */
3807            case 0x0f0: /* movbe Gy,My */
3808            case 0x0f1: /* movbe My,Gy */
3809                if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3810                    goto illegal_op;
3811                }
3812                if (s->dflag != MO_64) {
3813                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3814                } else {
3815                    ot = MO_64;
3816                }
3817
3818                gen_lea_modrm(env, s, modrm);
3819                if ((b & 1) == 0) {
3820                    tcg_gen_qemu_ld_tl(s->T0, s->A0,
3821                                       s->mem_index, ot | MO_BE);
3822                    gen_op_mov_reg_v(s, ot, reg, s->T0);
3823                } else {
3824                    tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3825                                       s->mem_index, ot | MO_BE);
3826                }
3827                break;
3828
3829            case 0x0f2: /* andn Gy, By, Ey */
3830                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3831                    || !(s->prefix & PREFIX_VEX)
3832                    || s->vex_l != 0) {
3833                    goto illegal_op;
3834                }
3835                ot = mo_64_32(s->dflag);
3836                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3837                tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3838                gen_op_mov_reg_v(s, ot, reg, s->T0);
3839                gen_op_update1_cc(s);
3840                set_cc_op(s, CC_OP_LOGICB + ot);
3841                break;
3842
3843            case 0x0f7: /* bextr Gy, Ey, By */
3844                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3845                    || !(s->prefix & PREFIX_VEX)
3846                    || s->vex_l != 0) {
3847                    goto illegal_op;
3848                }
3849                ot = mo_64_32(s->dflag);
3850                {
3851                    TCGv bound, zero;
3852
3853                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3854                    /* Extract START, and shift the operand.
3855                       Shifts larger than operand size get zeros.  */
3856                    tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3857                    tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3858
3859                    bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3860                    zero = tcg_const_tl(0);
3861                    tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3862                                       s->T0, zero);
3863                    tcg_temp_free(zero);
3864
3865                    /* Extract the LEN into a mask.  Lengths larger than
3866                       operand size get all ones.  */
3867                    tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3868                    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3869                                       s->A0, bound);
3870                    tcg_temp_free(bound);
3871                    tcg_gen_movi_tl(s->T1, 1);
3872                    tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3873                    tcg_gen_subi_tl(s->T1, s->T1, 1);
3874                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
3875
3876                    gen_op_mov_reg_v(s, ot, reg, s->T0);
3877                    gen_op_update1_cc(s);
3878                    set_cc_op(s, CC_OP_LOGICB + ot);
3879                }
3880                break;
3881
3882            case 0x0f5: /* bzhi Gy, Ey, By */
3883                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3884                    || !(s->prefix & PREFIX_VEX)
3885                    || s->vex_l != 0) {
3886                    goto illegal_op;
3887                }
3888                ot = mo_64_32(s->dflag);
3889                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3890                tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3891                {
3892                    TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3893                    /* Note that since we're using BMILG (in order to get O
3894                       cleared) we need to store the inverse into C.  */
3895                    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3896                                       s->T1, bound);
3897                    tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3898                                       bound, bound, s->T1);
3899                    tcg_temp_free(bound);
3900                }
3901                tcg_gen_movi_tl(s->A0, -1);
3902                tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3903                tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3904                gen_op_mov_reg_v(s, ot, reg, s->T0);
3905                gen_op_update1_cc(s);
3906                set_cc_op(s, CC_OP_BMILGB + ot);
3907                break;
3908
3909            case 0x3f6: /* mulx By, Gy, rdx, Ey */
3910                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3911                    || !(s->prefix & PREFIX_VEX)
3912                    || s->vex_l != 0) {
3913                    goto illegal_op;
3914                }
3915                ot = mo_64_32(s->dflag);
3916                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3917                switch (ot) {
3918                default:
3919                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3920                    tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
3921                    tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
3922                                      s->tmp2_i32, s->tmp3_i32);
3923                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
3924                    tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
3925                    break;
3926#ifdef TARGET_X86_64
3927                case MO_64:
3928                    tcg_gen_mulu2_i64(s->T0, s->T1,
3929                                      s->T0, cpu_regs[R_EDX]);
3930                    tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
3931                    tcg_gen_mov_i64(cpu_regs[reg], s->T1);
3932                    break;
3933#endif
3934                }
3935                break;
3936
3937            case 0x3f5: /* pdep Gy, By, Ey */
3938                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3939                    || !(s->prefix & PREFIX_VEX)
3940                    || s->vex_l != 0) {
3941                    goto illegal_op;
3942                }
3943                ot = mo_64_32(s->dflag);
3944                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3945                /* Note that by zero-extending the mask operand, we
3946                   automatically handle zero-extending the result.  */
3947                if (ot == MO_64) {
3948                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3949                } else {
3950                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3951                }
3952                gen_helper_pdep(cpu_regs[reg], s->T0, s->T1);
3953                break;
3954
3955            case 0x2f5: /* pext Gy, By, Ey */
3956                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3957                    || !(s->prefix & PREFIX_VEX)
3958                    || s->vex_l != 0) {
3959                    goto illegal_op;
3960                }
3961                ot = mo_64_32(s->dflag);
3962                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3963                /* Note that by zero-extending the mask operand, we
3964                   automatically handle zero-extending the result.  */
3965                if (ot == MO_64) {
3966                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3967                } else {
3968                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3969                }
3970                gen_helper_pext(cpu_regs[reg], s->T0, s->T1);
3971                break;
3972
3973            case 0x1f6: /* adcx Gy, Ey */
3974            case 0x2f6: /* adox Gy, Ey */
3975                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3976                    goto illegal_op;
3977                } else {
3978                    TCGv carry_in, carry_out, zero;
3979                    int end_op;
3980
3981                    ot = mo_64_32(s->dflag);
3982                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3983
3984                    /* Re-use the carry-out from a previous round.  */
3985                    carry_in = NULL;
3986                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3987                    switch (s->cc_op) {
3988                    case CC_OP_ADCX:
3989                        if (b == 0x1f6) {
3990                            carry_in = cpu_cc_dst;
3991                            end_op = CC_OP_ADCX;
3992                        } else {
3993                            end_op = CC_OP_ADCOX;
3994                        }
3995                        break;
3996                    case CC_OP_ADOX:
3997                        if (b == 0x1f6) {
3998                            end_op = CC_OP_ADCOX;
3999                        } else {
4000                            carry_in = cpu_cc_src2;
4001                            end_op = CC_OP_ADOX;
4002                        }
4003                        break;
4004                    case CC_OP_ADCOX:
4005                        end_op = CC_OP_ADCOX;
4006                        carry_in = carry_out;
4007                        break;
4008                    default:
4009                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4010                        break;
4011                    }
4012                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
4013                    if (!carry_in) {
4014                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4015                            gen_compute_eflags(s);
4016                        }
4017                        carry_in = s->tmp0;
4018                        tcg_gen_extract_tl(carry_in, cpu_cc_src,
4019                                           ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4020                    }
4021
4022                    switch (ot) {
4023#ifdef TARGET_X86_64
4024                    case MO_32:
4025                        /* If we know TL is 64-bit, and we want a 32-bit
4026                           result, just do everything in 64-bit arithmetic.  */
4027                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4028                        tcg_gen_ext32u_i64(s->T0, s->T0);
4029                        tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4030                        tcg_gen_add_i64(s->T0, s->T0, carry_in);
4031                        tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4032                        tcg_gen_shri_i64(carry_out, s->T0, 32);
4033                        break;
4034#endif
4035                    default:
4036                        /* Otherwise compute the carry-out in two steps.  */
4037                        zero = tcg_const_tl(0);
4038                        tcg_gen_add2_tl(s->T0, carry_out,
4039                                        s->T0, zero,
4040                                        carry_in, zero);
4041                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4042                                        cpu_regs[reg], carry_out,
4043                                        s->T0, zero);
4044                        tcg_temp_free(zero);
4045                        break;
4046                    }
4047                    set_cc_op(s, end_op);
4048                }
4049                break;
4050
4051            case 0x1f7: /* shlx Gy, Ey, By */
4052            case 0x2f7: /* sarx Gy, Ey, By */
4053            case 0x3f7: /* shrx Gy, Ey, By */
4054                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4055                    || !(s->prefix & PREFIX_VEX)
4056                    || s->vex_l != 0) {
4057                    goto illegal_op;
4058                }
4059                ot = mo_64_32(s->dflag);
4060                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4061                if (ot == MO_64) {
4062                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4063                } else {
4064                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4065                }
4066                if (b == 0x1f7) {
4067                    tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4068                } else if (b == 0x2f7) {
4069                    if (ot != MO_64) {
4070                        tcg_gen_ext32s_tl(s->T0, s->T0);
4071                    }
4072                    tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4073                } else {
4074                    if (ot != MO_64) {
4075                        tcg_gen_ext32u_tl(s->T0, s->T0);
4076                    }
4077                    tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4078                }
4079                gen_op_mov_reg_v(s, ot, reg, s->T0);
4080                break;
4081
4082            case 0x0f3:
4083            case 0x1f3:
4084            case 0x2f3:
4085            case 0x3f3: /* Group 17 */
4086                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4087                    || !(s->prefix & PREFIX_VEX)
4088                    || s->vex_l != 0) {
4089                    goto illegal_op;
4090                }
4091                ot = mo_64_32(s->dflag);
4092                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4093
4094                tcg_gen_mov_tl(cpu_cc_src, s->T0);
4095                switch (reg & 7) {
4096                case 1: /* blsr By,Ey */
4097                    tcg_gen_subi_tl(s->T1, s->T0, 1);
4098                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
4099                    break;
4100                case 2: /* blsmsk By,Ey */
4101                    tcg_gen_subi_tl(s->T1, s->T0, 1);
4102                    tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4103                    break;
4104                case 3: /* blsi By, Ey */
4105                    tcg_gen_neg_tl(s->T1, s->T0);
4106                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
4107                    break;
4108                default:
4109                    goto unknown_op;
4110                }
4111                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4112                gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4113                set_cc_op(s, CC_OP_BMILGB + ot);
4114                break;
4115
4116            default:
4117                goto unknown_op;
4118            }
4119            break;
4120
4121        case 0x03a:
4122        case 0x13a:
4123            b = modrm;
4124            modrm = x86_ldub_code(env, s);
4125            rm = modrm & 7;
4126            reg = ((modrm >> 3) & 7) | rex_r;
4127            mod = (modrm >> 6) & 3;
4128            if (b1 >= 2) {
4129                goto unknown_op;
4130            }
4131
4132            sse_fn_eppi = sse_op_table7[b].op[b1];
4133            if (!sse_fn_eppi) {
4134                goto unknown_op;
4135            }
4136            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4137                goto illegal_op;
4138
4139            s->rip_offset = 1;
4140
4141            if (sse_fn_eppi == SSE_SPECIAL) {
4142                ot = mo_64_32(s->dflag);
4143                rm = (modrm & 7) | REX_B(s);
4144                if (mod != 3)
4145                    gen_lea_modrm(env, s, modrm);
4146                reg = ((modrm >> 3) & 7) | rex_r;
4147                val = x86_ldub_code(env, s);
4148                switch (b) {
4149                case 0x14: /* pextrb */
4150                    tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4151                                            xmm_regs[reg].ZMM_B(val & 15)));
4152                    if (mod == 3) {
4153                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4154                    } else {
4155                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4156                                           s->mem_index, MO_UB);
4157                    }
4158                    break;
4159                case 0x15: /* pextrw */
4160                    tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4161                                            xmm_regs[reg].ZMM_W(val & 7)));
4162                    if (mod == 3) {
4163                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4164                    } else {
4165                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4166                                           s->mem_index, MO_LEUW);
4167                    }
4168                    break;
4169                case 0x16:
4170                    if (ot == MO_32) { /* pextrd */
4171                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4172                                        offsetof(CPUX86State,
4173                                                xmm_regs[reg].ZMM_L(val & 3)));
4174                        if (mod == 3) {
4175                            tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4176                        } else {
4177                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4178                                                s->mem_index, MO_LEUL);
4179                        }
4180                    } else { /* pextrq */
4181#ifdef TARGET_X86_64
4182                        tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4183                                        offsetof(CPUX86State,
4184                                                xmm_regs[reg].ZMM_Q(val & 1)));
4185                        if (mod == 3) {
4186                            tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4187                        } else {
4188                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4189                                                s->mem_index, MO_LEQ);
4190                        }
4191#else
4192                        goto illegal_op;
4193#endif
4194                    }
4195                    break;
4196                case 0x17: /* extractps */
4197                    tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4198                                            xmm_regs[reg].ZMM_L(val & 3)));
4199                    if (mod == 3) {
4200                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4201                    } else {
4202                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4203                                           s->mem_index, MO_LEUL);
4204                    }
4205                    break;
4206                case 0x20: /* pinsrb */
4207                    if (mod == 3) {
4208                        gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4209                    } else {
4210                        tcg_gen_qemu_ld_tl(s->T0, s->A0,
4211                                           s->mem_index, MO_UB);
4212                    }
4213                    tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4214                                            xmm_regs[reg].ZMM_B(val & 15)));
4215                    break;
4216                case 0x21: /* insertps */
4217                    if (mod == 3) {
4218                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4219                                        offsetof(CPUX86State,xmm_regs[rm]
4220                                                .ZMM_L((val >> 6) & 3)));
4221                    } else {
4222                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4223                                            s->mem_index, MO_LEUL);
4224                    }
4225                    tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4226                                    offsetof(CPUX86State,xmm_regs[reg]
4227                                            .ZMM_L((val >> 4) & 3)));
4228                    if ((val >> 0) & 1)
4229                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4230                                        cpu_env, offsetof(CPUX86State,
4231                                                xmm_regs[reg].ZMM_L(0)));
4232                    if ((val >> 1) & 1)
4233                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4234                                        cpu_env, offsetof(CPUX86State,
4235                                                xmm_regs[reg].ZMM_L(1)));
4236                    if ((val >> 2) & 1)
4237                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4238                                        cpu_env, offsetof(CPUX86State,
4239                                                xmm_regs[reg].ZMM_L(2)));
4240                    if ((val >> 3) & 1)
4241                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4242                                        cpu_env, offsetof(CPUX86State,
4243                                                xmm_regs[reg].ZMM_L(3)));
4244                    break;
4245                case 0x22:
4246                    if (ot == MO_32) { /* pinsrd */
4247                        if (mod == 3) {
4248                            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4249                        } else {
4250                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4251                                                s->mem_index, MO_LEUL);
4252                        }
4253                        tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4254                                        offsetof(CPUX86State,
4255                                                xmm_regs[reg].ZMM_L(val & 3)));
4256                    } else { /* pinsrq */
4257#ifdef TARGET_X86_64
4258                        if (mod == 3) {
4259                            gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4260                        } else {
4261                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4262                                                s->mem_index, MO_LEQ);
4263                        }
4264                        tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4265                                        offsetof(CPUX86State,
4266                                                xmm_regs[reg].ZMM_Q(val & 1)));
4267#else
4268                        goto illegal_op;
4269#endif
4270                    }
4271                    break;
4272                }
4273                return;
4274            }
4275
4276            if (b1) {
4277                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4278                if (mod == 3) {
4279                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4280                } else {
4281                    op2_offset = offsetof(CPUX86State,xmm_t0);
4282                    gen_lea_modrm(env, s, modrm);
4283                    gen_ldo_env_A0(s, op2_offset);
4284                }
4285            } else {
4286                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4287                if (mod == 3) {
4288                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4289                } else {
4290                    op2_offset = offsetof(CPUX86State,mmx_t0);
4291                    gen_lea_modrm(env, s, modrm);
4292                    gen_ldq_env_A0(s, op2_offset);
4293                }
4294            }
4295            val = x86_ldub_code(env, s);
4296
4297            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4298                set_cc_op(s, CC_OP_EFLAGS);
4299
4300                if (s->dflag == MO_64) {
4301                    /* The helper must use entire 64-bit gp registers */
4302                    val |= 1 << 8;
4303                }
4304            }
4305
4306            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4307            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4308            sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4309            break;
4310
4311        case 0x33a:
4312            /* Various integer extensions at 0f 3a f[0-f].  */
4313            b = modrm | (b1 << 8);
4314            modrm = x86_ldub_code(env, s);
4315            reg = ((modrm >> 3) & 7) | rex_r;
4316
4317            switch (b) {
4318            case 0x3f0: /* rorx Gy,Ey, Ib */
4319                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4320                    || !(s->prefix & PREFIX_VEX)
4321                    || s->vex_l != 0) {
4322                    goto illegal_op;
4323                }
4324                ot = mo_64_32(s->dflag);
4325                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4326                b = x86_ldub_code(env, s);
4327                if (ot == MO_64) {
4328                    tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4329                } else {
4330                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4331                    tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4332                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4333                }
4334                gen_op_mov_reg_v(s, ot, reg, s->T0);
4335                break;
4336
4337            default:
4338                goto unknown_op;
4339            }
4340            break;
4341
4342        default:
4343        unknown_op:
4344            gen_unknown_opcode(env, s);
4345            return;
4346        }
4347    } else {
4348        /* generic MMX or SSE operation */
4349        switch(b) {
4350        case 0x70: /* pshufx insn */
4351        case 0xc6: /* pshufx insn */
4352        case 0xc2: /* compare insns */
4353            s->rip_offset = 1;
4354            break;
4355        default:
4356            break;
4357        }
4358        if (is_xmm) {
4359            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4360            if (mod != 3) {
4361                int sz = 4;
4362
4363                gen_lea_modrm(env, s, modrm);
4364                op2_offset = offsetof(CPUX86State,xmm_t0);
4365
4366                switch (b) {
4367                case 0x50 ... 0x5a:
4368                case 0x5c ... 0x5f:
4369                case 0xc2:
4370                    /* Most sse scalar operations.  */
4371                    if (b1 == 2) {
4372                        sz = 2;
4373                    } else if (b1 == 3) {
4374                        sz = 3;
4375                    }
4376                    break;
4377
4378                case 0x2e:  /* ucomis[sd] */
4379                case 0x2f:  /* comis[sd] */
4380                    if (b1 == 0) {
4381                        sz = 2;
4382                    } else {
4383                        sz = 3;
4384                    }
4385                    break;
4386                }
4387
4388                switch (sz) {
4389                case 2:
4390                    /* 32 bit access */
4391                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
4392                    tcg_gen_st32_tl(s->T0, cpu_env,
4393                                    offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4394                    break;
4395                case 3:
4396                    /* 64 bit access */
4397                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4398                    break;
4399                default:
4400                    /* 128 bit access */
4401                    gen_ldo_env_A0(s, op2_offset);
4402                    break;
4403                }
4404            } else {
4405                rm = (modrm & 7) | REX_B(s);
4406                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4407            }
4408        } else {
4409            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4410            if (mod != 3) {
4411                gen_lea_modrm(env, s, modrm);
4412                op2_offset = offsetof(CPUX86State,mmx_t0);
4413                gen_ldq_env_A0(s, op2_offset);
4414            } else {
4415                rm = (modrm & 7);
4416                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4417            }
4418        }
4419        switch(b) {
4420        case 0x0f: /* 3DNow! data insns */
4421            val = x86_ldub_code(env, s);
4422            sse_fn_epp = sse_op_table5[val];
4423            if (!sse_fn_epp) {
4424                goto unknown_op;
4425            }
4426            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4427                goto illegal_op;
4428            }
4429            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4430            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4431            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4432            break;
4433        case 0x70: /* pshufx insn */
4434        case 0xc6: /* pshufx insn */
4435            val = x86_ldub_code(env, s);
4436            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4437            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4438            /* XXX: introduce a new table? */
4439            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4440            sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4441            break;
4442        case 0xc2:
4443            /* compare insns */
4444            val = x86_ldub_code(env, s);
4445            if (val >= 8)
4446                goto unknown_op;
4447            sse_fn_epp = sse_op_table4[val][b1];
4448
4449            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4450            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4451            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4452            break;
4453        case 0xf7:
4454            /* maskmov : we must prepare A0 */
4455            if (mod != 3)
4456                goto illegal_op;
4457            tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4458            gen_extu(s->aflag, s->A0);
4459            gen_add_A0_ds_seg(s);
4460
4461            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4462            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4463            /* XXX: introduce a new table? */
4464            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4465            sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4466            break;
4467        default:
4468            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4469            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4470            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4471            break;
4472        }
4473        if (b == 0x2e || b == 0x2f) {
4474            set_cc_op(s, CC_OP_EFLAGS);
4475        }
4476    }
4477}
4478
4479/* convert one instruction. s->base.is_jmp is set if the translation must
4480   be stopped. Return the next pc value */
4481static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4482{
4483    CPUX86State *env = cpu->env_ptr;
4484    int b, prefixes;
4485    int shift;
4486    TCGMemOp ot, aflag, dflag;
4487    int modrm, reg, rm, mod, op, opreg, val;
4488    target_ulong next_eip, tval;
4489    int rex_w, rex_r;
4490    target_ulong pc_start = s->base.pc_next;
4491
4492    s->pc_start = s->pc = pc_start;
4493    s->override = -1;
4494#ifdef TARGET_X86_64
4495    s->rex_x = 0;
4496    s->rex_b = 0;
4497    s->x86_64_hregs = false;
4498#endif
4499    s->rip_offset = 0; /* for relative ip address */
4500    s->vex_l = 0;
4501    s->vex_v = 0;
4502    if (sigsetjmp(s->jmpbuf, 0) != 0) {
4503        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
4504        return s->pc;
4505    }
4506
4507    prefixes = 0;
4508    rex_w = -1;
4509    rex_r = 0;
4510
4511 next_byte:
4512    b = x86_ldub_code(env, s);
4513    /* Collect prefixes.  */
4514    switch (b) {
4515    case 0xf3:
4516        prefixes |= PREFIX_REPZ;
4517        goto next_byte;
4518    case 0xf2:
4519        prefixes |= PREFIX_REPNZ;
4520        goto next_byte;
4521    case 0xf0:
4522        prefixes |= PREFIX_LOCK;
4523        goto next_byte;
4524    case 0x2e:
4525        s->override = R_CS;
4526        goto next_byte;
4527    case 0x36:
4528        s->override = R_SS;
4529        goto next_byte;
4530    case 0x3e:
4531        s->override = R_DS;
4532        goto next_byte;
4533    case 0x26:
4534        s->override = R_ES;
4535        goto next_byte;
4536    case 0x64:
4537        s->override = R_FS;
4538        goto next_byte;
4539    case 0x65:
4540        s->override = R_GS;
4541        goto next_byte;
4542    case 0x66:
4543        prefixes |= PREFIX_DATA;
4544        goto next_byte;
4545    case 0x67:
4546        prefixes |= PREFIX_ADR;
4547        goto next_byte;
4548#ifdef TARGET_X86_64
4549    case 0x40 ... 0x4f:
4550        if (CODE64(s)) {
4551            /* REX prefix */
4552            rex_w = (b >> 3) & 1;
4553            rex_r = (b & 0x4) << 1;
4554            s->rex_x = (b & 0x2) << 2;
4555            REX_B(s) = (b & 0x1) << 3;
4556            /* select uniform byte register addressing */
4557            s->x86_64_hregs = true;
4558            goto next_byte;
4559        }
4560        break;
4561#endif
4562    case 0xc5: /* 2-byte VEX */
4563    case 0xc4: /* 3-byte VEX */
4564        /* VEX prefixes cannot be used except in 32-bit mode.
4565           Otherwise the instruction is LES or LDS.  */
4566        if (s->code32 && !s->vm86) {
4567            static const int pp_prefix[4] = {
4568                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4569            };
4570            int vex3, vex2 = x86_ldub_code(env, s);
4571
4572            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4573                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4574                   otherwise the instruction is LES or LDS.  */
4575                s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4576                break;
4577            }
4578
4579            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4580            if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4581                            | PREFIX_LOCK | PREFIX_DATA)) {
4582                goto illegal_op;
4583            }
4584#ifdef TARGET_X86_64
4585            if (s->x86_64_hregs) {
4586                goto illegal_op;
4587            }
4588#endif
4589            rex_r = (~vex2 >> 4) & 8;
4590            if (b == 0xc5) {
4591                /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4592                vex3 = vex2;
4593                b = x86_ldub_code(env, s) | 0x100;
4594            } else {
4595                /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4596#ifdef TARGET_X86_64
4597                s->rex_x = (~vex2 >> 3) & 8;
4598                s->rex_b = (~vex2 >> 2) & 8;
4599#endif
4600                vex3 = x86_ldub_code(env, s);
4601                rex_w = (vex3 >> 7) & 1;
4602                switch (vex2 & 0x1f) {
4603                case 0x01: /* Implied 0f leading opcode bytes.  */
4604                    b = x86_ldub_code(env, s) | 0x100;
4605                    break;
4606                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4607                    b = 0x138;
4608                    break;
4609                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4610                    b = 0x13a;
4611                    break;
4612                default:   /* Reserved for future use.  */
4613                    goto unknown_op;
4614                }
4615            }
4616            s->vex_v = (~vex3 >> 3) & 0xf;
4617            s->vex_l = (vex3 >> 2) & 1;
4618            prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4619        }
4620        break;
4621    }
4622
4623    /* Post-process prefixes.  */
4624    if (CODE64(s)) {
4625        /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4626           data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4627           over 0x66 if both are present.  */
4628        dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4629        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4630        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4631    } else {
4632        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4633        if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4634            dflag = MO_32;
4635        } else {
4636            dflag = MO_16;
4637        }
4638        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4639        if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4640            aflag = MO_32;
4641        }  else {
4642            aflag = MO_16;
4643        }
4644    }
4645
4646    s->prefix = prefixes;
4647    s->aflag = aflag;
4648    s->dflag = dflag;
4649
4650    /* now check op code */
4651 reswitch:
4652    switch(b) {
4653    case 0x0f:
4654        /**************************/
4655        /* extended op code */
4656        b = x86_ldub_code(env, s) | 0x100;
4657        goto reswitch;
4658
4659        /**************************/
4660        /* arith & logic */
4661    case 0x00 ... 0x05:
4662    case 0x08 ... 0x0d:
4663    case 0x10 ... 0x15:
4664    case 0x18 ... 0x1d:
4665    case 0x20 ... 0x25:
4666    case 0x28 ... 0x2d:
4667    case 0x30 ... 0x35:
4668    case 0x38 ... 0x3d:
4669        {
4670            int op, f, val;
4671            op = (b >> 3) & 7;
4672            f = (b >> 1) & 3;
4673
4674            ot = mo_b_d(b, dflag);
4675
4676            switch(f) {
4677            case 0: /* OP Ev, Gv */
4678                modrm = x86_ldub_code(env, s);
4679                reg = ((modrm >> 3) & 7) | rex_r;
4680                mod = (modrm >> 6) & 3;
4681                rm = (modrm & 7) | REX_B(s);
4682                if (mod != 3) {
4683                    gen_lea_modrm(env, s, modrm);
4684                    opreg = OR_TMP0;
4685                } else if (op == OP_XORL && rm == reg) {
4686                xor_zero:
4687                    /* xor reg, reg optimisation */
4688                    set_cc_op(s, CC_OP_CLR);
4689                    tcg_gen_movi_tl(s->T0, 0);
4690                    gen_op_mov_reg_v(s, ot, reg, s->T0);
4691                    break;
4692                } else {
4693                    opreg = rm;
4694                }
4695                gen_op_mov_v_reg(s, ot, s->T1, reg);
4696                gen_op(s, op, ot, opreg);
4697                break;
4698            case 1: /* OP Gv, Ev */
4699                modrm = x86_ldub_code(env, s);
4700                mod = (modrm >> 6) & 3;
4701                reg = ((modrm >> 3) & 7) | rex_r;
4702                rm = (modrm & 7) | REX_B(s);
4703                if (mod != 3) {
4704                    gen_lea_modrm(env, s, modrm);
4705                    gen_op_ld_v(s, ot, s->T1, s->A0);
4706                } else if (op == OP_XORL && rm == reg) {
4707                    goto xor_zero;
4708                } else {
4709                    gen_op_mov_v_reg(s, ot, s->T1, rm);
4710                }
4711                gen_op(s, op, ot, reg);
4712                break;
4713            case 2: /* OP A, Iv */
4714                val = insn_get(env, s, ot);
4715                tcg_gen_movi_tl(s->T1, val);
4716                gen_op(s, op, ot, OR_EAX);
4717                break;
4718            }
4719        }
4720        break;
4721
4722    case 0x82:
4723        if (CODE64(s))
4724            goto illegal_op;
4725        /* fall through */
4726    case 0x80: /* GRP1 */
4727    case 0x81:
4728    case 0x83:
4729        {
4730            int val;
4731
4732            ot = mo_b_d(b, dflag);
4733
4734            modrm = x86_ldub_code(env, s);
4735            mod = (modrm >> 6) & 3;
4736            rm = (modrm & 7) | REX_B(s);
4737            op = (modrm >> 3) & 7;
4738
4739            if (mod != 3) {
4740                if (b == 0x83)
4741                    s->rip_offset = 1;
4742                else
4743                    s->rip_offset = insn_const_size(ot);
4744                gen_lea_modrm(env, s, modrm);
4745                opreg = OR_TMP0;
4746            } else {
4747                opreg = rm;
4748            }
4749
4750            switch(b) {
4751            default:
4752            case 0x80:
4753            case 0x81:
4754            case 0x82:
4755                val = insn_get(env, s, ot);
4756                break;
4757            case 0x83:
4758                val = (int8_t)insn_get(env, s, MO_8);
4759                break;
4760            }
4761            tcg_gen_movi_tl(s->T1, val);
4762            gen_op(s, op, ot, opreg);
4763        }
4764        break;
4765
4766        /**************************/
4767        /* inc, dec, and other misc arith */
4768    case 0x40 ... 0x47: /* inc Gv */
4769        ot = dflag;
4770        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4771        break;
4772    case 0x48 ... 0x4f: /* dec Gv */
4773        ot = dflag;
4774        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4775        break;
4776    case 0xf6: /* GRP3 */
4777    case 0xf7:
4778        ot = mo_b_d(b, dflag);
4779
4780        modrm = x86_ldub_code(env, s);
4781        mod = (modrm >> 6) & 3;
4782        rm = (modrm & 7) | REX_B(s);
4783        op = (modrm >> 3) & 7;
4784        if (mod != 3) {
4785            if (op == 0) {
4786                s->rip_offset = insn_const_size(ot);
4787            }
4788            gen_lea_modrm(env, s, modrm);
4789            /* For those below that handle locked memory, don't load here.  */
4790            if (!(s->prefix & PREFIX_LOCK)
4791                || op != 2) {
4792                gen_op_ld_v(s, ot, s->T0, s->A0);
4793            }
4794        } else {
4795            gen_op_mov_v_reg(s, ot, s->T0, rm);
4796        }
4797
4798        switch(op) {
4799        case 0: /* test */
4800            val = insn_get(env, s, ot);
4801            tcg_gen_movi_tl(s->T1, val);
4802            gen_op_testl_T0_T1_cc(s);
4803            set_cc_op(s, CC_OP_LOGICB + ot);
4804            break;
4805        case 2: /* not */
4806            if (s->prefix & PREFIX_LOCK) {
4807                if (mod == 3) {
4808                    goto illegal_op;
4809                }
4810                tcg_gen_movi_tl(s->T0, ~0);
4811                tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4812                                            s->mem_index, ot | MO_LE);
4813            } else {
4814                tcg_gen_not_tl(s->T0, s->T0);
4815                if (mod != 3) {
4816                    gen_op_st_v(s, ot, s->T0, s->A0);
4817                } else {
4818                    gen_op_mov_reg_v(s, ot, rm, s->T0);
4819                }
4820            }
4821            break;
4822        case 3: /* neg */
4823            if (s->prefix & PREFIX_LOCK) {
4824                TCGLabel *label1;
4825                TCGv a0, t0, t1, t2;
4826
4827                if (mod == 3) {
4828                    goto illegal_op;
4829                }
4830                a0 = tcg_temp_local_new();
4831                t0 = tcg_temp_local_new();
4832                label1 = gen_new_label();
4833
4834                tcg_gen_mov_tl(a0, s->A0);
4835                tcg_gen_mov_tl(t0, s->T0);
4836
4837                gen_set_label(label1);
4838                t1 = tcg_temp_new();
4839                t2 = tcg_temp_new();
4840                tcg_gen_mov_tl(t2, t0);
4841                tcg_gen_neg_tl(t1, t0);
4842                tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4843                                          s->mem_index, ot | MO_LE);
4844                tcg_temp_free(t1);
4845                tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4846
4847                tcg_temp_free(t2);
4848                tcg_temp_free(a0);
4849                tcg_gen_mov_tl(s->T0, t0);
4850                tcg_temp_free(t0);
4851            } else {
4852                tcg_gen_neg_tl(s->T0, s->T0);
4853                if (mod != 3) {
4854                    gen_op_st_v(s, ot, s->T0, s->A0);
4855                } else {
4856                    gen_op_mov_reg_v(s, ot, rm, s->T0);
4857                }
4858            }
4859            gen_op_update_neg_cc(s);
4860            set_cc_op(s, CC_OP_SUBB + ot);
4861            break;
4862        case 4: /* mul */
4863            switch(ot) {
4864            case MO_8:
4865                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4866                tcg_gen_ext8u_tl(s->T0, s->T0);
4867                tcg_gen_ext8u_tl(s->T1, s->T1);
4868                /* XXX: use 32 bit mul which could be faster */
4869                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4870                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4871                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4872                tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4873                set_cc_op(s, CC_OP_MULB);
4874                break;
4875            case MO_16:
4876                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4877                tcg_gen_ext16u_tl(s->T0, s->T0);
4878                tcg_gen_ext16u_tl(s->T1, s->T1);
4879                /* XXX: use 32 bit mul which could be faster */
4880                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4881                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4882                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4883                tcg_gen_shri_tl(s->T0, s->T0, 16);
4884                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4885                tcg_gen_mov_tl(cpu_cc_src, s->T0);
4886                set_cc_op(s, CC_OP_MULW);
4887                break;
4888            default:
4889            case MO_32:
4890                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4891                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4892                tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4893                                  s->tmp2_i32, s->tmp3_i32);
4894                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4895                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4896                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4897                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4898                set_cc_op(s, CC_OP_MULL);
4899                break;
4900#ifdef TARGET_X86_64
4901            case MO_64:
4902                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4903                                  s->T0, cpu_regs[R_EAX]);
4904                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4905                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4906                set_cc_op(s, CC_OP_MULQ);
4907                break;
4908#endif
4909            }
4910            break;
4911        case 5: /* imul */
4912            switch(ot) {
4913            case MO_8:
4914                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4915                tcg_gen_ext8s_tl(s->T0, s->T0);
4916                tcg_gen_ext8s_tl(s->T1, s->T1);
4917                /* XXX: use 32 bit mul which could be faster */
4918                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4919                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4920                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4921                tcg_gen_ext8s_tl(s->tmp0, s->T0);
4922                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4923                set_cc_op(s, CC_OP_MULB);
4924                break;
4925            case MO_16:
4926                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4927                tcg_gen_ext16s_tl(s->T0, s->T0);
4928                tcg_gen_ext16s_tl(s->T1, s->T1);
4929                /* XXX: use 32 bit mul which could be faster */
4930                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4931                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4932                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4933                tcg_gen_ext16s_tl(s->tmp0, s->T0);
4934                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4935                tcg_gen_shri_tl(s->T0, s->T0, 16);
4936                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4937                set_cc_op(s, CC_OP_MULW);
4938                break;
4939            default:
4940            case MO_32:
4941                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4942                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4943                tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
4944                                  s->tmp2_i32, s->tmp3_i32);
4945                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4946                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4947                tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
4948                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4949                tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
4950                tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
4951                set_cc_op(s, CC_OP_MULL);
4952                break;
4953#ifdef TARGET_X86_64
4954            case MO_64:
4955                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4956                                  s->T0, cpu_regs[R_EAX]);
4957                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4958                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4959                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4960                set_cc_op(s, CC_OP_MULQ);
4961                break;
4962#endif
4963            }
4964            break;
4965        case 6: /* div */
4966            switch(ot) {
4967            case MO_8:
4968                gen_helper_divb_AL(cpu_env, s->T0);
4969                break;
4970            case MO_16:
4971                gen_helper_divw_AX(cpu_env, s->T0);
4972                break;
4973            default:
4974            case MO_32:
4975                gen_helper_divl_EAX(cpu_env, s->T0);
4976                break;
4977#ifdef TARGET_X86_64
4978            case MO_64:
4979                gen_helper_divq_EAX(cpu_env, s->T0);
4980                break;
4981#endif
4982            }
4983            break;
4984        case 7: /* idiv */
4985            switch(ot) {
4986            case MO_8:
4987                gen_helper_idivb_AL(cpu_env, s->T0);
4988                break;
4989            case MO_16:
4990                gen_helper_idivw_AX(cpu_env, s->T0);
4991                break;
4992            default:
4993            case MO_32:
4994                gen_helper_idivl_EAX(cpu_env, s->T0);
4995                break;
4996#ifdef TARGET_X86_64
4997            case MO_64:
4998                gen_helper_idivq_EAX(cpu_env, s->T0);
4999                break;
5000#endif
5001            }
5002            break;
5003        default:
5004            goto unknown_op;
5005        }
5006        break;
5007
5008    case 0xfe: /* GRP4 */
5009    case 0xff: /* GRP5 */
5010        ot = mo_b_d(b, dflag);
5011
5012        modrm = x86_ldub_code(env, s);
5013        mod = (modrm >> 6) & 3;
5014        rm = (modrm & 7) | REX_B(s);
5015        op = (modrm >> 3) & 7;
5016        if (op >= 2 && b == 0xfe) {
5017            goto unknown_op;
5018        }
5019        if (CODE64(s)) {
5020            if (op == 2 || op == 4) {
5021                /* operand size for jumps is 64 bit */
5022                ot = MO_64;
5023            } else if (op == 3 || op == 5) {
5024                ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
5025            } else if (op == 6) {
5026                /* default push size is 64 bit */
5027                ot = mo_pushpop(s, dflag);
5028            }
5029        }
5030        if (mod != 3) {
5031            gen_lea_modrm(env, s, modrm);
5032            if (op >= 2 && op != 3 && op != 5)
5033                gen_op_ld_v(s, ot, s->T0, s->A0);
5034        } else {
5035            gen_op_mov_v_reg(s, ot, s->T0, rm);
5036        }
5037
5038        switch(op) {
5039        case 0: /* inc Ev */
5040            if (mod != 3)
5041                opreg = OR_TMP0;
5042            else
5043                opreg = rm;
5044            gen_inc(s, ot, opreg, 1);
5045            break;
5046        case 1: /* dec Ev */
5047            if (mod != 3)
5048                opreg = OR_TMP0;
5049            else
5050                opreg = rm;
5051            gen_inc(s, ot, opreg, -1);
5052            break;
5053        case 2: /* call Ev */
5054            /* XXX: optimize if memory (no 'and' is necessary) */
5055            if (dflag == MO_16) {
5056                tcg_gen_ext16u_tl(s->T0, s->T0);
5057            }
5058            next_eip = s->pc - s->cs_base;
5059            tcg_gen_movi_tl(s->T1, next_eip);
5060            gen_push_v(s, s->T1);
5061            gen_op_jmp_v(s->T0);
5062            gen_bnd_jmp(s);
5063            gen_jr(s, s->T0);
5064            break;
5065        case 3: /* lcall Ev */
5066            gen_op_ld_v(s, ot, s->T1, s->A0);
5067            gen_add_A0_im(s, 1 << ot);
5068            gen_op_ld_v(s, MO_16, s->T0, s->A0);
5069        do_lcall:
5070            if (s->pe && !s->vm86) {
5071                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5072                gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5073                                           tcg_const_i32(dflag - 1),
5074                                           tcg_const_tl(s->pc - s->cs_base));
5075            } else {
5076                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5077                gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5078                                      tcg_const_i32(dflag - 1),
5079                                      tcg_const_i32(s->pc - s->cs_base));
5080            }
5081            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5082            gen_jr(s, s->tmp4);
5083            break;
5084        case 4: /* jmp Ev */
5085            if (dflag == MO_16) {
5086                tcg_gen_ext16u_tl(s->T0, s->T0);
5087            }
5088            gen_op_jmp_v(s->T0);
5089            gen_bnd_jmp(s);
5090            gen_jr(s, s->T0);
5091            break;
5092        case 5: /* ljmp Ev */
5093            gen_op_ld_v(s, ot, s->T1, s->A0);
5094            gen_add_A0_im(s, 1 << ot);
5095            gen_op_ld_v(s, MO_16, s->T0, s->A0);
5096        do_ljmp:
5097            if (s->pe && !s->vm86) {
5098                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5099                gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5100                                          tcg_const_tl(s->pc - s->cs_base));
5101            } else {
5102                gen_op_movl_seg_T0_vm(s, R_CS);
5103                gen_op_jmp_v(s->T1);
5104            }
5105            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5106            gen_jr(s, s->tmp4);
5107            break;
5108        case 6: /* push Ev */
5109            gen_push_v(s, s->T0);
5110            break;
5111        default:
5112            goto unknown_op;
5113        }
5114        break;
5115
5116    case 0x84: /* test Ev, Gv */
5117    case 0x85:
5118        ot = mo_b_d(b, dflag);
5119
5120        modrm = x86_ldub_code(env, s);
5121        reg = ((modrm >> 3) & 7) | rex_r;
5122
5123        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5124        gen_op_mov_v_reg(s, ot, s->T1, reg);
5125        gen_op_testl_T0_T1_cc(s);
5126        set_cc_op(s, CC_OP_LOGICB + ot);
5127        break;
5128
5129    case 0xa8: /* test eAX, Iv */
5130    case 0xa9:
5131        ot = mo_b_d(b, dflag);
5132        val = insn_get(env, s, ot);
5133
5134        gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5135        tcg_gen_movi_tl(s->T1, val);
5136        gen_op_testl_T0_T1_cc(s);
5137        set_cc_op(s, CC_OP_LOGICB + ot);
5138        break;
5139
5140    case 0x98: /* CWDE/CBW */
5141        switch (dflag) {
5142#ifdef TARGET_X86_64
5143        case MO_64:
5144            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5145            tcg_gen_ext32s_tl(s->T0, s->T0);
5146            gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5147            break;
5148#endif
5149        case MO_32:
5150            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5151            tcg_gen_ext16s_tl(s->T0, s->T0);
5152            gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5153            break;
5154        case MO_16:
5155            gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5156            tcg_gen_ext8s_tl(s->T0, s->T0);
5157            gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5158            break;
5159        default:
5160            tcg_abort();
5161        }
5162        break;
5163    case 0x99: /* CDQ/CWD */
5164        switch (dflag) {
5165#ifdef TARGET_X86_64
5166        case MO_64:
5167            gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5168            tcg_gen_sari_tl(s->T0, s->T0, 63);
5169            gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5170            break;
5171#endif
5172        case MO_32:
5173            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5174            tcg_gen_ext32s_tl(s->T0, s->T0);
5175            tcg_gen_sari_tl(s->T0, s->T0, 31);
5176            gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5177            break;
5178        case MO_16:
5179            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5180            tcg_gen_ext16s_tl(s->T0, s->T0);
5181            tcg_gen_sari_tl(s->T0, s->T0, 15);
5182            gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5183            break;
5184        default:
5185            tcg_abort();
5186        }
5187        break;
5188    case 0x1af: /* imul Gv, Ev */
5189    case 0x69: /* imul Gv, Ev, I */
5190    case 0x6b:
5191        ot = dflag;
5192        modrm = x86_ldub_code(env, s);
5193        reg = ((modrm >> 3) & 7) | rex_r;
5194        if (b == 0x69)
5195            s->rip_offset = insn_const_size(ot);
5196        else if (b == 0x6b)
5197            s->rip_offset = 1;
5198        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5199        if (b == 0x69) {
5200            val = insn_get(env, s, ot);
5201            tcg_gen_movi_tl(s->T1, val);
5202        } else if (b == 0x6b) {
5203            val = (int8_t)insn_get(env, s, MO_8);
5204            tcg_gen_movi_tl(s->T1, val);
5205        } else {
5206            gen_op_mov_v_reg(s, ot, s->T1, reg);
5207        }
5208        switch (ot) {
5209#ifdef TARGET_X86_64
5210        case MO_64:
5211            tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5212            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5213            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5214            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5215            break;
5216#endif
5217        case MO_32:
5218            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5219            tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5220            tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5221                              s->tmp2_i32, s->tmp3_i32);
5222            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5223            tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5224            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5225            tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5226            tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5227            break;
5228        default:
5229            tcg_gen_ext16s_tl(s->T0, s->T0);
5230            tcg_gen_ext16s_tl(s->T1, s->T1);
5231            /* XXX: use 32 bit mul which could be faster */
5232            tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5233            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5234            tcg_gen_ext16s_tl(s->tmp0, s->T0);
5235            tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5236            gen_op_mov_reg_v(s, ot, reg, s->T0);
5237            break;
5238        }
5239        set_cc_op(s, CC_OP_MULB + ot);
5240        break;
5241    case 0x1c0:
5242    case 0x1c1: /* xadd Ev, Gv */
5243        ot = mo_b_d(b, dflag);
5244        modrm = x86_ldub_code(env, s);
5245        reg = ((modrm >> 3) & 7) | rex_r;
5246        mod = (modrm >> 6) & 3;
5247        gen_op_mov_v_reg(s, ot, s->T0, reg);
5248        if (mod == 3) {
5249            rm = (modrm & 7) | REX_B(s);
5250            gen_op_mov_v_reg(s, ot, s->T1, rm);
5251            tcg_gen_add_tl(s->T0, s->T0, s->T1);
5252            gen_op_mov_reg_v(s, ot, reg, s->T1);
5253            gen_op_mov_reg_v(s, ot, rm, s->T0);
5254        } else {
5255            gen_lea_modrm(env, s, modrm);
5256            if (s->prefix & PREFIX_LOCK) {
5257                tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5258                                            s->mem_index, ot | MO_LE);
5259                tcg_gen_add_tl(s->T0, s->T0, s->T1);
5260            } else {
5261                gen_op_ld_v(s, ot, s->T1, s->A0);
5262                tcg_gen_add_tl(s->T0, s->T0, s->T1);
5263                gen_op_st_v(s, ot, s->T0, s->A0);
5264            }
5265            gen_op_mov_reg_v(s, ot, reg, s->T1);
5266        }
5267        gen_op_update2_cc(s);
5268        set_cc_op(s, CC_OP_ADDB + ot);
5269        break;
5270    case 0x1b0:
5271    case 0x1b1: /* cmpxchg Ev, Gv */
5272        {
5273            TCGv oldv, newv, cmpv;
5274
5275            ot = mo_b_d(b, dflag);
5276            modrm = x86_ldub_code(env, s);
5277            reg = ((modrm >> 3) & 7) | rex_r;
5278            mod = (modrm >> 6) & 3;
5279            oldv = tcg_temp_new();
5280            newv = tcg_temp_new();
5281            cmpv = tcg_temp_new();
5282            gen_op_mov_v_reg(s, ot, newv, reg);
5283            tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5284
5285            if (s->prefix & PREFIX_LOCK) {
5286                if (mod == 3) {
5287                    goto illegal_op;
5288                }
5289                gen_lea_modrm(env, s, modrm);
5290                tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5291                                          s->mem_index, ot | MO_LE);
5292                gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5293            } else {
5294                if (mod == 3) {
5295                    rm = (modrm & 7) | REX_B(s);
5296                    gen_op_mov_v_reg(s, ot, oldv, rm);
5297                } else {
5298                    gen_lea_modrm(env, s, modrm);
5299                    gen_op_ld_v(s, ot, oldv, s->A0);
5300                    rm = 0; /* avoid warning */
5301                }
5302                gen_extu(ot, oldv);
5303                gen_extu(ot, cmpv);
5304                /* store value = (old == cmp ? new : old);  */
5305                tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5306                if (mod == 3) {
5307                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5308                    gen_op_mov_reg_v(s, ot, rm, newv);
5309                } else {
5310                    /* Perform an unconditional store cycle like physical cpu;
5311                       must be before changing accumulator to ensure
5312                       idempotency if the store faults and the instruction
5313                       is restarted */
5314                    gen_op_st_v(s, ot, newv, s->A0);
5315                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5316                }
5317            }
5318            tcg_gen_mov_tl(cpu_cc_src, oldv);
5319            tcg_gen_mov_tl(s->cc_srcT, cmpv);
5320            tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5321            set_cc_op(s, CC_OP_SUBB + ot);
5322            tcg_temp_free(oldv);
5323            tcg_temp_free(newv);
5324            tcg_temp_free(cmpv);
5325        }
5326        break;
5327    case 0x1c7: /* cmpxchg8b */
5328        modrm = x86_ldub_code(env, s);
5329        mod = (modrm >> 6) & 3;
5330        if ((mod == 3) || ((modrm & 0x38) != 0x8))
5331            goto illegal_op;
5332#ifdef TARGET_X86_64
5333        if (dflag == MO_64) {
5334            if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
5335                goto illegal_op;
5336            gen_lea_modrm(env, s, modrm);
5337            if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5338                gen_helper_cmpxchg16b(cpu_env, s->A0);
5339            } else {
5340                gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5341            }
5342        } else
5343#endif        
5344        {
5345            if (!(s->cpuid_features & CPUID_CX8))
5346                goto illegal_op;
5347            gen_lea_modrm(env, s, modrm);
5348            if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5349                gen_helper_cmpxchg8b(cpu_env, s->A0);
5350            } else {
5351                gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5352            }
5353        }
5354        set_cc_op(s, CC_OP_EFLAGS);
5355        break;
5356
5357        /**************************/
5358        /* push/pop */
5359    case 0x50 ... 0x57: /* push */
5360        gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5361        gen_push_v(s, s->T0);
5362        break;
5363    case 0x58 ... 0x5f: /* pop */
5364        ot = gen_pop_T0(s);
5365        /* NOTE: order is important for pop %sp */
5366        gen_pop_update(s, ot);
5367        gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5368        break;
5369    case 0x60: /* pusha */
5370        if (CODE64(s))
5371            goto illegal_op;
5372        gen_pusha(s);
5373        break;
5374    case 0x61: /* popa */
5375        if (CODE64(s))
5376            goto illegal_op;
5377        gen_popa(s);
5378        break;
5379    case 0x68: /* push Iv */
5380    case 0x6a:
5381        ot = mo_pushpop(s, dflag);
5382        if (b == 0x68)
5383            val = insn_get(env, s, ot);
5384        else
5385            val = (int8_t)insn_get(env, s, MO_8);
5386        tcg_gen_movi_tl(s->T0, val);
5387        gen_push_v(s, s->T0);
5388        break;
5389    case 0x8f: /* pop Ev */
5390        modrm = x86_ldub_code(env, s);
5391        mod = (modrm >> 6) & 3;
5392        ot = gen_pop_T0(s);
5393        if (mod == 3) {
5394            /* NOTE: order is important for pop %sp */
5395            gen_pop_update(s, ot);
5396            rm = (modrm & 7) | REX_B(s);
5397            gen_op_mov_reg_v(s, ot, rm, s->T0);
5398        } else {
5399            /* NOTE: order is important too for MMU exceptions */
5400            s->popl_esp_hack = 1 << ot;
5401            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5402            s->popl_esp_hack = 0;
5403            gen_pop_update(s, ot);
5404        }
5405        break;
5406    case 0xc8: /* enter */
5407        {
5408            int level;
5409            val = x86_lduw_code(env, s);
5410            level = x86_ldub_code(env, s);
5411            gen_enter(s, val, level);
5412        }
5413        break;
5414    case 0xc9: /* leave */
5415        gen_leave(s);
5416        break;
5417    case 0x06: /* push es */
5418    case 0x0e: /* push cs */
5419    case 0x16: /* push ss */
5420    case 0x1e: /* push ds */
5421        if (CODE64(s))
5422            goto illegal_op;
5423        gen_op_movl_T0_seg(s, b >> 3);
5424        gen_push_v(s, s->T0);
5425        break;
5426    case 0x1a0: /* push fs */
5427    case 0x1a8: /* push gs */
5428        gen_op_movl_T0_seg(s, (b >> 3) & 7);
5429        gen_push_v(s, s->T0);
5430        break;
5431    case 0x07: /* pop es */
5432    case 0x17: /* pop ss */
5433    case 0x1f: /* pop ds */
5434        if (CODE64(s))
5435            goto illegal_op;
5436        reg = b >> 3;
5437        ot = gen_pop_T0(s);
5438        gen_movl_seg_T0(s, reg);
5439        gen_pop_update(s, ot);
5440        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5441        if (s->base.is_jmp) {
5442            gen_jmp_im(s, s->pc - s->cs_base);
5443            if (reg == R_SS) {
5444                s->tf = 0;
5445                gen_eob_inhibit_irq(s, true);
5446            } else {
5447                gen_eob(s);
5448            }
5449        }
5450        break;
5451    case 0x1a1: /* pop fs */
5452    case 0x1a9: /* pop gs */
5453        ot = gen_pop_T0(s);
5454        gen_movl_seg_T0(s, (b >> 3) & 7);
5455        gen_pop_update(s, ot);
5456        if (s->base.is_jmp) {
5457            gen_jmp_im(s, s->pc - s->cs_base);
5458            gen_eob(s);
5459        }
5460        break;
5461
5462        /**************************/
5463        /* mov */
5464    case 0x88:
5465    case 0x89: /* mov Gv, Ev */
5466        ot = mo_b_d(b, dflag);
5467        modrm = x86_ldub_code(env, s);
5468        reg = ((modrm >> 3) & 7) | rex_r;
5469
5470        /* generate a generic store */
5471        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5472        break;
5473    case 0xc6:
5474    case 0xc7: /* mov Ev, Iv */
5475        ot = mo_b_d(b, dflag);
5476        modrm = x86_ldub_code(env, s);
5477        mod = (modrm >> 6) & 3;
5478        if (mod != 3) {
5479            s->rip_offset = insn_const_size(ot);
5480            gen_lea_modrm(env, s, modrm);
5481        }
5482        val = insn_get(env, s, ot);
5483        tcg_gen_movi_tl(s->T0, val);
5484        if (mod != 3) {
5485            gen_op_st_v(s, ot, s->T0, s->A0);
5486        } else {
5487            gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5488        }
5489        break;
5490    case 0x8a:
5491    case 0x8b: /* mov Ev, Gv */
5492        ot = mo_b_d(b, dflag);
5493        modrm = x86_ldub_code(env, s);
5494        reg = ((modrm >> 3) & 7) | rex_r;
5495
5496        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5497        gen_op_mov_reg_v(s, ot, reg, s->T0);
5498        break;
5499    case 0x8e: /* mov seg, Gv */
5500        modrm = x86_ldub_code(env, s);
5501        reg = (modrm >> 3) & 7;
5502        if (reg >= 6 || reg == R_CS)
5503            goto illegal_op;
5504        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5505        gen_movl_seg_T0(s, reg);
5506        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5507        if (s->base.is_jmp) {
5508            gen_jmp_im(s, s->pc - s->cs_base);
5509            if (reg == R_SS) {
5510                s->tf = 0;
5511                gen_eob_inhibit_irq(s, true);
5512            } else {
5513                gen_eob(s);
5514            }
5515        }
5516        break;
5517    case 0x8c: /* mov Gv, seg */
5518        modrm = x86_ldub_code(env, s);
5519        reg = (modrm >> 3) & 7;
5520        mod = (modrm >> 6) & 3;
5521        if (reg >= 6)
5522            goto illegal_op;
5523        gen_op_movl_T0_seg(s, reg);
5524        ot = mod == 3 ? dflag : MO_16;
5525        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5526        break;
5527
5528    case 0x1b6: /* movzbS Gv, Eb */
5529    case 0x1b7: /* movzwS Gv, Eb */
5530    case 0x1be: /* movsbS Gv, Eb */
5531    case 0x1bf: /* movswS Gv, Eb */
5532        {
5533            TCGMemOp d_ot;
5534            TCGMemOp s_ot;
5535
5536            /* d_ot is the size of destination */
5537            d_ot = dflag;
5538            /* ot is the size of source */
5539            ot = (b & 1) + MO_8;
5540            /* s_ot is the sign+size of source */
5541            s_ot = b & 8 ? MO_SIGN | ot : ot;
5542
5543            modrm = x86_ldub_code(env, s);
5544            reg = ((modrm >> 3) & 7) | rex_r;
5545            mod = (modrm >> 6) & 3;
5546            rm = (modrm & 7) | REX_B(s);
5547
5548            if (mod == 3) {
5549                if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5550                    tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5551                } else {
5552                    gen_op_mov_v_reg(s, ot, s->T0, rm);
5553                    switch (s_ot) {
5554                    case MO_UB:
5555                        tcg_gen_ext8u_tl(s->T0, s->T0);
5556                        break;
5557                    case MO_SB:
5558                        tcg_gen_ext8s_tl(s->T0, s->T0);
5559                        break;
5560                    case MO_UW:
5561                        tcg_gen_ext16u_tl(s->T0, s->T0);
5562                        break;
5563                    default:
5564                    case MO_SW:
5565                        tcg_gen_ext16s_tl(s->T0, s->T0);
5566                        break;
5567                    }
5568                }
5569                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5570            } else {
5571                gen_lea_modrm(env, s, modrm);
5572                gen_op_ld_v(s, s_ot, s->T0, s->A0);
5573                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5574            }
5575        }
5576        break;
5577
5578    case 0x8d: /* lea */
5579        modrm = x86_ldub_code(env, s);
5580        mod = (modrm >> 6) & 3;
5581        if (mod == 3)
5582            goto illegal_op;
5583        reg = ((modrm >> 3) & 7) | rex_r;
5584        {
5585            AddressParts a = gen_lea_modrm_0(env, s, modrm);
5586            TCGv ea = gen_lea_modrm_1(s, a);
5587            gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5588            gen_op_mov_reg_v(s, dflag, reg, s->A0);
5589        }
5590        break;
5591
5592    case 0xa0: /* mov EAX, Ov */
5593    case 0xa1:
5594    case 0xa2: /* mov Ov, EAX */
5595    case 0xa3:
5596        {
5597            target_ulong offset_addr;
5598
5599            ot = mo_b_d(b, dflag);
5600            switch (s->aflag) {
5601#ifdef TARGET_X86_64
5602            case MO_64:
5603                offset_addr = x86_ldq_code(env, s);
5604                break;
5605#endif
5606            default:
5607                offset_addr = insn_get(env, s, s->aflag);
5608                break;
5609            }
5610            tcg_gen_movi_tl(s->A0, offset_addr);
5611            gen_add_A0_ds_seg(s);
5612            if ((b & 2) == 0) {
5613                gen_op_ld_v(s, ot, s->T0, s->A0);
5614                gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5615            } else {
5616                gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5617                gen_op_st_v(s, ot, s->T0, s->A0);
5618            }
5619        }
5620        break;
5621    case 0xd7: /* xlat */
5622        tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5623        tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5624        tcg_gen_add_tl(s->A0, s->A0, s->T0);
5625        gen_extu(s->aflag, s->A0);
5626        gen_add_A0_ds_seg(s);
5627        gen_op_ld_v(s, MO_8, s->T0, s->A0);
5628        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5629        break;
5630    case 0xb0 ... 0xb7: /* mov R, Ib */
5631        val = insn_get(env, s, MO_8);
5632        tcg_gen_movi_tl(s->T0, val);
5633        gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5634        break;
5635    case 0xb8 ... 0xbf: /* mov R, Iv */
5636#ifdef TARGET_X86_64
5637        if (dflag == MO_64) {
5638            uint64_t tmp;
5639            /* 64 bit case */
5640            tmp = x86_ldq_code(env, s);
5641            reg = (b & 7) | REX_B(s);
5642            tcg_gen_movi_tl(s->T0, tmp);
5643            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5644        } else
5645#endif
5646        {
5647            ot = dflag;
5648            val = insn_get(env, s, ot);
5649            reg = (b & 7) | REX_B(s);
5650            tcg_gen_movi_tl(s->T0, val);
5651            gen_op_mov_reg_v(s, ot, reg, s->T0);
5652        }
5653        break;
5654
5655    case 0x91 ... 0x97: /* xchg R, EAX */
5656    do_xchg_reg_eax:
5657        ot = dflag;
5658        reg = (b & 7) | REX_B(s);
5659        rm = R_EAX;
5660        goto do_xchg_reg;
5661    case 0x86:
5662    case 0x87: /* xchg Ev, Gv */
5663        ot = mo_b_d(b, dflag);
5664        modrm = x86_ldub_code(env, s);
5665        reg = ((modrm >> 3) & 7) | rex_r;
5666        mod = (modrm >> 6) & 3;
5667        if (mod == 3) {
5668            rm = (modrm & 7) | REX_B(s);
5669        do_xchg_reg:
5670            gen_op_mov_v_reg(s, ot, s->T0, reg);
5671            gen_op_mov_v_reg(s, ot, s->T1, rm);
5672            gen_op_mov_reg_v(s, ot, rm, s->T0);
5673            gen_op_mov_reg_v(s, ot, reg, s->T1);
5674        } else {
5675            gen_lea_modrm(env, s, modrm);
5676            gen_op_mov_v_reg(s, ot, s->T0, reg);
5677            /* for xchg, lock is implicit */
5678            tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5679                                   s->mem_index, ot | MO_LE);
5680            gen_op_mov_reg_v(s, ot, reg, s->T1);
5681        }
5682        break;
5683    case 0xc4: /* les Gv */
5684        /* In CODE64 this is VEX3; see above.  */
5685        op = R_ES;
5686        goto do_lxx;
5687    case 0xc5: /* lds Gv */
5688        /* In CODE64 this is VEX2; see above.  */
5689        op = R_DS;
5690        goto do_lxx;
5691    case 0x1b2: /* lss Gv */
5692        op = R_SS;
5693        goto do_lxx;
5694    case 0x1b4: /* lfs Gv */
5695        op = R_FS;
5696        goto do_lxx;
5697    case 0x1b5: /* lgs Gv */
5698        op = R_GS;
5699    do_lxx:
5700        ot = dflag != MO_16 ? MO_32 : MO_16;
5701        modrm = x86_ldub_code(env, s);
5702        reg = ((modrm >> 3) & 7) | rex_r;
5703        mod = (modrm >> 6) & 3;
5704        if (mod == 3)
5705            goto illegal_op;
5706        gen_lea_modrm(env, s, modrm);
5707        gen_op_ld_v(s, ot, s->T1, s->A0);
5708        gen_add_A0_im(s, 1 << ot);
5709        /* load the segment first to handle exceptions properly */
5710        gen_op_ld_v(s, MO_16, s->T0, s->A0);
5711        gen_movl_seg_T0(s, op);
5712        /* then put the data */
5713        gen_op_mov_reg_v(s, ot, reg, s->T1);
5714        if (s->base.is_jmp) {
5715            gen_jmp_im(s, s->pc - s->cs_base);
5716            gen_eob(s);
5717        }
5718        break;
5719
5720        /************************/
5721        /* shifts */
5722    case 0xc0:
5723    case 0xc1:
5724        /* shift Ev,Ib */
5725        shift = 2;
5726    grp2:
5727        {
5728            ot = mo_b_d(b, dflag);
5729            modrm = x86_ldub_code(env, s);
5730            mod = (modrm >> 6) & 3;
5731            op = (modrm >> 3) & 7;
5732
5733            if (mod != 3) {
5734                if (shift == 2) {
5735                    s->rip_offset = 1;
5736                }
5737                gen_lea_modrm(env, s, modrm);
5738                opreg = OR_TMP0;
5739            } else {
5740                opreg = (modrm & 7) | REX_B(s);
5741            }
5742
5743            /* simpler op */
5744            if (shift == 0) {
5745                gen_shift(s, op, ot, opreg, OR_ECX);
5746            } else {
5747                if (shift == 2) {
5748                    shift = x86_ldub_code(env, s);
5749                }
5750                gen_shifti(s, op, ot, opreg, shift);
5751            }
5752        }
5753        break;
5754    case 0xd0:
5755    case 0xd1:
5756        /* shift Ev,1 */
5757        shift = 1;
5758        goto grp2;
5759    case 0xd2:
5760    case 0xd3:
5761        /* shift Ev,cl */
5762        shift = 0;
5763        goto grp2;
5764
5765    case 0x1a4: /* shld imm */
5766        op = 0;
5767        shift = 1;
5768        goto do_shiftd;
5769    case 0x1a5: /* shld cl */
5770        op = 0;
5771        shift = 0;
5772        goto do_shiftd;
5773    case 0x1ac: /* shrd imm */
5774        op = 1;
5775        shift = 1;
5776        goto do_shiftd;
5777    case 0x1ad: /* shrd cl */
5778        op = 1;
5779        shift = 0;
5780    do_shiftd:
5781        ot = dflag;
5782        modrm = x86_ldub_code(env, s);
5783        mod = (modrm >> 6) & 3;
5784        rm = (modrm & 7) | REX_B(s);
5785        reg = ((modrm >> 3) & 7) | rex_r;
5786        if (mod != 3) {
5787            gen_lea_modrm(env, s, modrm);
5788            opreg = OR_TMP0;
5789        } else {
5790            opreg = rm;
5791        }
5792        gen_op_mov_v_reg(s, ot, s->T1, reg);
5793
5794        if (shift) {
5795            TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5796            gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5797            tcg_temp_free(imm);
5798        } else {
5799            gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5800        }
5801        break;
5802
5803        /************************/
5804        /* floats */
5805    case 0xd8 ... 0xdf:
5806        if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5807            /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5808            /* XXX: what to do if illegal op ? */
5809            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5810            break;
5811        }
5812        modrm = x86_ldub_code(env, s);
5813        mod = (modrm >> 6) & 3;
5814        rm = modrm & 7;
5815        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5816        if (mod != 3) {
5817            /* memory op */
5818            gen_lea_modrm(env, s, modrm);
5819            switch(op) {
5820            case 0x00 ... 0x07: /* fxxxs */
5821            case 0x10 ... 0x17: /* fixxxl */
5822            case 0x20 ... 0x27: /* fxxxl */
5823            case 0x30 ... 0x37: /* fixxx */
5824                {
5825                    int op1;
5826                    op1 = op & 7;
5827
5828                    switch(op >> 4) {
5829                    case 0:
5830                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5831                                            s->mem_index, MO_LEUL);
5832                        gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5833                        break;
5834                    case 1:
5835                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5836                                            s->mem_index, MO_LEUL);
5837                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5838                        break;
5839                    case 2:
5840                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5841                                            s->mem_index, MO_LEQ);
5842                        gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5843                        break;
5844                    case 3:
5845                    default:
5846                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5847                                            s->mem_index, MO_LESW);
5848                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5849                        break;
5850                    }
5851
5852                    gen_helper_fp_arith_ST0_FT0(op1);
5853                    if (op1 == 3) {
5854                        /* fcomp needs pop */
5855                        gen_helper_fpop(cpu_env);
5856                    }
5857                }
5858                break;
5859            case 0x08: /* flds */
5860            case 0x0a: /* fsts */
5861            case 0x0b: /* fstps */
5862            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5863            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5864            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5865                switch(op & 7) {
5866                case 0:
5867                    switch(op >> 4) {
5868                    case 0:
5869                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5870                                            s->mem_index, MO_LEUL);
5871                        gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5872                        break;
5873                    case 1:
5874                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5875                                            s->mem_index, MO_LEUL);
5876                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5877                        break;
5878                    case 2:
5879                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5880                                            s->mem_index, MO_LEQ);
5881                        gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
5882                        break;
5883                    case 3:
5884                    default:
5885                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5886                                            s->mem_index, MO_LESW);
5887                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5888                        break;
5889                    }
5890                    break;
5891                case 1:
5892                    /* XXX: the corresponding CPUID bit must be tested ! */
5893                    switch(op >> 4) {
5894                    case 1:
5895                        gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
5896                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5897                                            s->mem_index, MO_LEUL);
5898                        break;
5899                    case 2:
5900                        gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
5901                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5902                                            s->mem_index, MO_LEQ);
5903                        break;
5904                    case 3:
5905                    default:
5906                        gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
5907                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5908                                            s->mem_index, MO_LEUW);
5909                        break;
5910                    }
5911                    gen_helper_fpop(cpu_env);
5912                    break;
5913                default:
5914                    switch(op >> 4) {
5915                    case 0:
5916                        gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
5917                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5918                                            s->mem_index, MO_LEUL);
5919                        break;
5920                    case 1:
5921                        gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
5922                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5923                                            s->mem_index, MO_LEUL);
5924                        break;
5925                    case 2:
5926                        gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
5927                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5928                                            s->mem_index, MO_LEQ);
5929                        break;
5930                    case 3:
5931                    default:
5932                        gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
5933                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5934                                            s->mem_index, MO_LEUW);
5935                        break;
5936                    }
5937                    if ((op & 7) == 3)
5938                        gen_helper_fpop(cpu_env);
5939                    break;
5940                }
5941                break;
5942            case 0x0c: /* fldenv mem */
5943                gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5944                break;
5945            case 0x0d: /* fldcw mem */
5946                tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5947                                    s->mem_index, MO_LEUW);
5948                gen_helper_fldcw(cpu_env, s->tmp2_i32);
5949                break;
5950            case 0x0e: /* fnstenv mem */
5951                gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5952                break;
5953            case 0x0f: /* fnstcw mem */
5954                gen_helper_fnstcw(s->tmp2_i32, cpu_env);
5955                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5956                                    s->mem_index, MO_LEUW);
5957                break;
5958            case 0x1d: /* fldt mem */
5959                gen_helper_fldt_ST0(cpu_env, s->A0);
5960                break;
5961            case 0x1f: /* fstpt mem */
5962                gen_helper_fstt_ST0(cpu_env, s->A0);
5963                gen_helper_fpop(cpu_env);
5964                break;
5965            case 0x2c: /* frstor mem */
5966                gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5967                break;
5968            case 0x2e: /* fnsave mem */
5969                gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5970                break;
5971            case 0x2f: /* fnstsw mem */
5972                gen_helper_fnstsw(s->tmp2_i32, cpu_env);
5973                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5974                                    s->mem_index, MO_LEUW);
5975                break;
5976            case 0x3c: /* fbld */
5977                gen_helper_fbld_ST0(cpu_env, s->A0);
5978                break;
5979            case 0x3e: /* fbstp */
5980                gen_helper_fbst_ST0(cpu_env, s->A0);
5981                gen_helper_fpop(cpu_env);
5982                break;
5983            case 0x3d: /* fildll */
5984                tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
5985                gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
5986                break;
5987            case 0x3f: /* fistpll */
5988                gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
5989                tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
5990                gen_helper_fpop(cpu_env);
5991                break;
5992            default:
5993                goto unknown_op;
5994            }
5995        } else {
5996            /* register float ops */
5997            opreg = rm;
5998
5999            switch(op) {
6000            case 0x08: /* fld sti */
6001                gen_helper_fpush(cpu_env);
6002                gen_helper_fmov_ST0_STN(cpu_env,
6003                                        tcg_const_i32((opreg + 1) & 7));
6004                break;
6005            case 0x09: /* fxchg sti */
6006            case 0x29: /* fxchg4 sti, undocumented op */
6007            case 0x39: /* fxchg7 sti, undocumented op */
6008                gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6009                break;
6010            case 0x0a: /* grp d9/2 */
6011                switch(rm) {
6012                case 0: /* fnop */
6013                    /* check exceptions (FreeBSD FPU probe) */
6014                    gen_helper_fwait(cpu_env);
6015                    break;
6016                default:
6017                    goto unknown_op;
6018                }
6019                break;
6020            case 0x0c: /* grp d9/4 */
6021                switch(rm) {
6022                case 0: /* fchs */
6023                    gen_helper_fchs_ST0(cpu_env);
6024                    break;
6025                case 1: /* fabs */
6026                    gen_helper_fabs_ST0(cpu_env);
6027                    break;
6028                case 4: /* ftst */
6029                    gen_helper_fldz_FT0(cpu_env);
6030                    gen_helper_fcom_ST0_FT0(cpu_env);
6031                    break;
6032                case 5: /* fxam */
6033                    gen_helper_fxam_ST0(cpu_env);
6034                    break;
6035                default:
6036                    goto unknown_op;
6037                }
6038                break;
6039            case 0x0d: /* grp d9/5 */
6040                {
6041                    switch(rm) {
6042                    case 0:
6043                        gen_helper_fpush(cpu_env);
6044                        gen_helper_fld1_ST0(cpu_env);
6045                        break;
6046                    case 1:
6047                        gen_helper_fpush(cpu_env);
6048                        gen_helper_fldl2t_ST0(cpu_env);
6049                        break;
6050                    case 2:
6051                        gen_helper_fpush(cpu_env);
6052                        gen_helper_fldl2e_ST0(cpu_env);
6053                        break;
6054                    case 3:
6055                        gen_helper_fpush(cpu_env);
6056                        gen_helper_fldpi_ST0(cpu_env);
6057                        break;
6058                    case 4:
6059                        gen_helper_fpush(cpu_env);
6060                        gen_helper_fldlg2_ST0(cpu_env);
6061                        break;
6062                    case 5:
6063                        gen_helper_fpush(cpu_env);
6064                        gen_helper_fldln2_ST0(cpu_env);
6065                        break;
6066                    case 6:
6067                        gen_helper_fpush(cpu_env);
6068                        gen_helper_fldz_ST0(cpu_env);
6069                        break;
6070                    default:
6071                        goto unknown_op;
6072                    }
6073                }
6074                break;
6075            case 0x0e: /* grp d9/6 */
6076                switch(rm) {
6077                case 0: /* f2xm1 */
6078                    gen_helper_f2xm1(cpu_env);
6079                    break;
6080                case 1: /* fyl2x */
6081                    gen_helper_fyl2x(cpu_env);
6082                    break;
6083                case 2: /* fptan */
6084                    gen_helper_fptan(cpu_env);
6085                    break;
6086                case 3: /* fpatan */
6087                    gen_helper_fpatan(cpu_env);
6088                    break;
6089                case 4: /* fxtract */
6090                    gen_helper_fxtract(cpu_env);
6091                    break;
6092                case 5: /* fprem1 */
6093                    gen_helper_fprem1(cpu_env);
6094                    break;
6095                case 6: /* fdecstp */
6096                    gen_helper_fdecstp(cpu_env);
6097                    break;
6098                default:
6099                case 7: /* fincstp */
6100                    gen_helper_fincstp(cpu_env);
6101                    break;
6102                }
6103                break;
6104            case 0x0f: /* grp d9/7 */
6105                switch(rm) {
6106                case 0: /* fprem */
6107                    gen_helper_fprem(cpu_env);
6108                    break;
6109                case 1: /* fyl2xp1 */
6110                    gen_helper_fyl2xp1(cpu_env);
6111                    break;
6112                case 2: /* fsqrt */
6113                    gen_helper_fsqrt(cpu_env);
6114                    break;
6115                case 3: /* fsincos */
6116                    gen_helper_fsincos(cpu_env);
6117                    break;
6118                case 5: /* fscale */
6119                    gen_helper_fscale(cpu_env);
6120                    break;
6121                case 4: /* frndint */
6122                    gen_helper_frndint(cpu_env);
6123                    break;
6124                case 6: /* fsin */
6125                    gen_helper_fsin(cpu_env);
6126                    break;
6127                default:
6128                case 7: /* fcos */
6129                    gen_helper_fcos(cpu_env);
6130                    break;
6131                }
6132                break;
6133            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6134            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6135            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6136                {
6137                    int op1;
6138
6139                    op1 = op & 7;
6140                    if (op >= 0x20) {
6141                        gen_helper_fp_arith_STN_ST0(op1, opreg);
6142                        if (op >= 0x30)
6143                            gen_helper_fpop(cpu_env);
6144                    } else {
6145                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6146                        gen_helper_fp_arith_ST0_FT0(op1);
6147                    }
6148                }
6149                break;
6150            case 0x02: /* fcom */
6151            case 0x22: /* fcom2, undocumented op */
6152                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6153                gen_helper_fcom_ST0_FT0(cpu_env);
6154                break;
6155            case 0x03: /* fcomp */
6156            case 0x23: /* fcomp3, undocumented op */
6157            case 0x32: /* fcomp5, undocumented op */
6158                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6159                gen_helper_fcom_ST0_FT0(cpu_env);
6160                gen_helper_fpop(cpu_env);
6161                break;
6162            case 0x15: /* da/5 */
6163                switch(rm) {
6164                case 1: /* fucompp */
6165                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6166                    gen_helper_fucom_ST0_FT0(cpu_env);
6167                    gen_helper_fpop(cpu_env);
6168                    gen_helper_fpop(cpu_env);
6169                    break;
6170                default:
6171                    goto unknown_op;
6172                }
6173                break;
6174            case 0x1c:
6175                switch(rm) {
6176                case 0: /* feni (287 only, just do nop here) */
6177                    break;
6178                case 1: /* fdisi (287 only, just do nop here) */
6179                    break;
6180                case 2: /* fclex */
6181                    gen_helper_fclex(cpu_env);
6182                    break;
6183                case 3: /* fninit */
6184                    gen_helper_fninit(cpu_env);
6185                    break;
6186                case 4: /* fsetpm (287 only, just do nop here) */
6187                    break;
6188                default:
6189                    goto unknown_op;
6190                }
6191                break;
6192            case 0x1d: /* fucomi */
6193                if (!(s->cpuid_features & CPUID_CMOV)) {
6194                    goto illegal_op;
6195                }
6196                gen_update_cc_op(s);
6197                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6198                gen_helper_fucomi_ST0_FT0(cpu_env);
6199                set_cc_op(s, CC_OP_EFLAGS);
6200                break;
6201            case 0x1e: /* fcomi */
6202                if (!(s->cpuid_features & CPUID_CMOV)) {
6203                    goto illegal_op;
6204                }
6205                gen_update_cc_op(s);
6206                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6207                gen_helper_fcomi_ST0_FT0(cpu_env);
6208                set_cc_op(s, CC_OP_EFLAGS);
6209                break;
6210            case 0x28: /* ffree sti */
6211                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6212                break;
6213            case 0x2a: /* fst sti */
6214                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6215                break;
6216            case 0x2b: /* fstp sti */
6217            case 0x0b: /* fstp1 sti, undocumented op */
6218            case 0x3a: /* fstp8 sti, undocumented op */
6219            case 0x3b: /* fstp9 sti, undocumented op */
6220                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6221                gen_helper_fpop(cpu_env);
6222                break;
6223            case 0x2c: /* fucom st(i) */
6224                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6225                gen_helper_fucom_ST0_FT0(cpu_env);
6226                break;
6227            case 0x2d: /* fucomp st(i) */
6228                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6229                gen_helper_fucom_ST0_FT0(cpu_env);
6230                gen_helper_fpop(cpu_env);
6231                break;
6232            case 0x33: /* de/3 */
6233                switch(rm) {
6234                case 1: /* fcompp */
6235                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6236                    gen_helper_fcom_ST0_FT0(cpu_env);
6237                    gen_helper_fpop(cpu_env);
6238                    gen_helper_fpop(cpu_env);
6239                    break;
6240                default:
6241                    goto unknown_op;
6242                }
6243                break;
6244            case 0x38: /* ffreep sti, undocumented op */
6245                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6246                gen_helper_fpop(cpu_env);
6247                break;
6248            case 0x3c: /* df/4 */
6249                switch(rm) {
6250                case 0:
6251                    gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6252                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6253                    gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6254                    break;
6255                default:
6256                    goto unknown_op;
6257                }
6258                break;
6259            case 0x3d: /* fucomip */
6260                if (!(s->cpuid_features & CPUID_CMOV)) {
6261                    goto illegal_op;
6262                }
6263                gen_update_cc_op(s);
6264                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6265                gen_helper_fucomi_ST0_FT0(cpu_env);
6266                gen_helper_fpop(cpu_env);
6267                set_cc_op(s, CC_OP_EFLAGS);
6268                break;
6269            case 0x3e: /* fcomip */
6270                if (!(s->cpuid_features & CPUID_CMOV)) {
6271                    goto illegal_op;
6272                }
6273                gen_update_cc_op(s);
6274                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6275                gen_helper_fcomi_ST0_FT0(cpu_env);
6276                gen_helper_fpop(cpu_env);
6277                set_cc_op(s, CC_OP_EFLAGS);
6278                break;
6279            case 0x10 ... 0x13: /* fcmovxx */
6280            case 0x18 ... 0x1b:
6281                {
6282                    int op1;
6283                    TCGLabel *l1;
6284                    static const uint8_t fcmov_cc[8] = {
6285                        (JCC_B << 1),
6286                        (JCC_Z << 1),
6287                        (JCC_BE << 1),
6288                        (JCC_P << 1),
6289                    };
6290
6291                    if (!(s->cpuid_features & CPUID_CMOV)) {
6292                        goto illegal_op;
6293                    }
6294                    op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6295                    l1 = gen_new_label();
6296                    gen_jcc1_noeob(s, op1, l1);
6297                    gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6298                    gen_set_label(l1);
6299                }
6300                break;
6301            default:
6302                goto unknown_op;
6303            }
6304        }
6305        break;
6306        /************************/
6307        /* string ops */
6308
6309    case 0xa4: /* movsS */
6310    case 0xa5:
6311        ot = mo_b_d(b, dflag);
6312        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6313            gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6314        } else {
6315            gen_movs(s, ot);
6316        }
6317        break;
6318
6319    case 0xaa: /* stosS */
6320    case 0xab:
6321        ot = mo_b_d(b, dflag);
6322        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6323            gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6324        } else {
6325            gen_stos(s, ot);
6326        }
6327        break;
6328    case 0xac: /* lodsS */
6329    case 0xad:
6330        ot = mo_b_d(b, dflag);
6331        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6332            gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6333        } else {
6334            gen_lods(s, ot);
6335        }
6336        break;
6337    case 0xae: /* scasS */
6338    case 0xaf:
6339        ot = mo_b_d(b, dflag);
6340        if (prefixes & PREFIX_REPNZ) {
6341            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6342        } else if (prefixes & PREFIX_REPZ) {
6343            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6344        } else {
6345            gen_scas(s, ot);
6346        }
6347        break;
6348
6349    case 0xa6: /* cmpsS */
6350    case 0xa7:
6351        ot = mo_b_d(b, dflag);
6352        if (prefixes & PREFIX_REPNZ) {
6353            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6354        } else if (prefixes & PREFIX_REPZ) {
6355            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6356        } else {
6357            gen_cmps(s, ot);
6358        }
6359        break;
6360    case 0x6c: /* insS */
6361    case 0x6d:
6362        ot = mo_b_d32(b, dflag);
6363        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6364        gen_check_io(s, ot, pc_start - s->cs_base, 
6365                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6366        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6367            gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6368        } else {
6369            gen_ins(s, ot);
6370            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6371                gen_jmp(s, s->pc - s->cs_base);
6372            }
6373        }
6374        break;
6375    case 0x6e: /* outsS */
6376    case 0x6f:
6377        ot = mo_b_d32(b, dflag);
6378        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6379        gen_check_io(s, ot, pc_start - s->cs_base,
6380                     svm_is_rep(prefixes) | 4);
6381        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6382            gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6383        } else {
6384            gen_outs(s, ot);
6385            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6386                gen_jmp(s, s->pc - s->cs_base);
6387            }
6388        }
6389        break;
6390
6391        /************************/
6392        /* port I/O */
6393
6394    case 0xe4:
6395    case 0xe5:
6396        ot = mo_b_d32(b, dflag);
6397        val = x86_ldub_code(env, s);
6398        tcg_gen_movi_tl(s->T0, val);
6399        gen_check_io(s, ot, pc_start - s->cs_base,
6400                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6401        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6402            gen_io_start();
6403        }
6404        tcg_gen_movi_i32(s->tmp2_i32, val);
6405        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6406        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6407        gen_bpt_io(s, s->tmp2_i32, ot);
6408        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6409            gen_io_end();
6410            gen_jmp(s, s->pc - s->cs_base);
6411        }
6412        break;
6413    case 0xe6:
6414    case 0xe7:
6415        ot = mo_b_d32(b, dflag);
6416        val = x86_ldub_code(env, s);
6417        tcg_gen_movi_tl(s->T0, val);
6418        gen_check_io(s, ot, pc_start - s->cs_base,
6419                     svm_is_rep(prefixes));
6420        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6421
6422        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6423            gen_io_start();
6424        }
6425        tcg_gen_movi_i32(s->tmp2_i32, val);
6426        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6427        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6428        gen_bpt_io(s, s->tmp2_i32, ot);
6429        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6430            gen_io_end();
6431            gen_jmp(s, s->pc - s->cs_base);
6432        }
6433        break;
6434    case 0xec:
6435    case 0xed:
6436        ot = mo_b_d32(b, dflag);
6437        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6438        gen_check_io(s, ot, pc_start - s->cs_base,
6439                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6440        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6441            gen_io_start();
6442        }
6443        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6444        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6445        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6446        gen_bpt_io(s, s->tmp2_i32, ot);
6447        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6448            gen_io_end();
6449            gen_jmp(s, s->pc - s->cs_base);
6450        }
6451        break;
6452    case 0xee:
6453    case 0xef:
6454        ot = mo_b_d32(b, dflag);
6455        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6456        gen_check_io(s, ot, pc_start - s->cs_base,
6457                     svm_is_rep(prefixes));
6458        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6459
6460        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6461            gen_io_start();
6462        }
6463        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6464        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6465        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6466        gen_bpt_io(s, s->tmp2_i32, ot);
6467        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6468            gen_io_end();
6469            gen_jmp(s, s->pc - s->cs_base);
6470        }
6471        break;
6472
6473        /************************/
6474        /* control */
6475    case 0xc2: /* ret im */
6476        val = x86_ldsw_code(env, s);
6477        ot = gen_pop_T0(s);
6478        gen_stack_update(s, val + (1 << ot));
6479        /* Note that gen_pop_T0 uses a zero-extending load.  */
6480        gen_op_jmp_v(s->T0);
6481        gen_bnd_jmp(s);
6482        gen_jr(s, s->T0);
6483        break;
6484    case 0xc3: /* ret */
6485        ot = gen_pop_T0(s);
6486        gen_pop_update(s, ot);
6487        /* Note that gen_pop_T0 uses a zero-extending load.  */
6488        gen_op_jmp_v(s->T0);
6489        gen_bnd_jmp(s);
6490        gen_jr(s, s->T0);
6491        break;
6492    case 0xca: /* lret im */
6493        val = x86_ldsw_code(env, s);
6494    do_lret:
6495        if (s->pe && !s->vm86) {
6496            gen_update_cc_op(s);
6497            gen_jmp_im(s, pc_start - s->cs_base);
6498            gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6499                                      tcg_const_i32(val));
6500        } else {
6501            gen_stack_A0(s);
6502            /* pop offset */
6503            gen_op_ld_v(s, dflag, s->T0, s->A0);
6504            /* NOTE: keeping EIP updated is not a problem in case of
6505               exception */
6506            gen_op_jmp_v(s->T0);
6507            /* pop selector */
6508            gen_add_A0_im(s, 1 << dflag);
6509            gen_op_ld_v(s, dflag, s->T0, s->A0);
6510            gen_op_movl_seg_T0_vm(s, R_CS);
6511            /* add stack offset */
6512            gen_stack_update(s, val + (2 << dflag));
6513        }
6514        gen_eob(s);
6515        break;
6516    case 0xcb: /* lret */
6517        val = 0;
6518        goto do_lret;
6519    case 0xcf: /* iret */
6520        gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6521        if (!s->pe) {
6522            /* real mode */
6523            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6524            set_cc_op(s, CC_OP_EFLAGS);
6525        } else if (s->vm86) {
6526            if (s->iopl != 3) {
6527                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6528            } else {
6529                gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6530                set_cc_op(s, CC_OP_EFLAGS);
6531            }
6532        } else {
6533            gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6534                                      tcg_const_i32(s->pc - s->cs_base));
6535            set_cc_op(s, CC_OP_EFLAGS);
6536        }
6537        gen_eob(s);
6538        break;
6539    case 0xe8: /* call im */
6540        {
6541            if (dflag != MO_16) {
6542                tval = (int32_t)insn_get(env, s, MO_32);
6543            } else {
6544                tval = (int16_t)insn_get(env, s, MO_16);
6545            }
6546            next_eip = s->pc - s->cs_base;
6547            tval += next_eip;
6548            if (dflag == MO_16) {
6549                tval &= 0xffff;
6550            } else if (!CODE64(s)) {
6551                tval &= 0xffffffff;
6552            }
6553            tcg_gen_movi_tl(s->T0, next_eip);
6554            gen_push_v(s, s->T0);
6555            gen_bnd_jmp(s);
6556            gen_jmp(s, tval);
6557        }
6558        break;
6559    case 0x9a: /* lcall im */
6560        {
6561            unsigned int selector, offset;
6562
6563            if (CODE64(s))
6564                goto illegal_op;
6565            ot = dflag;
6566            offset = insn_get(env, s, ot);
6567            selector = insn_get(env, s, MO_16);
6568
6569            tcg_gen_movi_tl(s->T0, selector);
6570            tcg_gen_movi_tl(s->T1, offset);
6571        }
6572        goto do_lcall;
6573    case 0xe9: /* jmp im */
6574        if (dflag != MO_16) {
6575            tval = (int32_t)insn_get(env, s, MO_32);
6576        } else {
6577            tval = (int16_t)insn_get(env, s, MO_16);
6578        }
6579        tval += s->pc - s->cs_base;
6580        if (dflag == MO_16) {
6581            tval &= 0xffff;
6582        } else if (!CODE64(s)) {
6583            tval &= 0xffffffff;
6584        }
6585        gen_bnd_jmp(s);
6586        gen_jmp(s, tval);
6587        break;
6588    case 0xea: /* ljmp im */
6589        {
6590            unsigned int selector, offset;
6591
6592            if (CODE64(s))
6593                goto illegal_op;
6594            ot = dflag;
6595            offset = insn_get(env, s, ot);
6596            selector = insn_get(env, s, MO_16);
6597
6598            tcg_gen_movi_tl(s->T0, selector);
6599            tcg_gen_movi_tl(s->T1, offset);
6600        }
6601        goto do_ljmp;
6602    case 0xeb: /* jmp Jb */
6603        tval = (int8_t)insn_get(env, s, MO_8);
6604        tval += s->pc - s->cs_base;
6605        if (dflag == MO_16) {
6606            tval &= 0xffff;
6607        }
6608        gen_jmp(s, tval);
6609        break;
6610    case 0x70 ... 0x7f: /* jcc Jb */
6611        tval = (int8_t)insn_get(env, s, MO_8);
6612        goto do_jcc;
6613    case 0x180 ... 0x18f: /* jcc Jv */
6614        if (dflag != MO_16) {
6615            tval = (int32_t)insn_get(env, s, MO_32);
6616        } else {
6617            tval = (int16_t)insn_get(env, s, MO_16);
6618        }
6619    do_jcc:
6620        next_eip = s->pc - s->cs_base;
6621        tval += next_eip;
6622        if (dflag == MO_16) {
6623            tval &= 0xffff;
6624        }
6625        gen_bnd_jmp(s);
6626        gen_jcc(s, b, tval, next_eip);
6627        break;
6628
6629    case 0x190 ... 0x19f: /* setcc Gv */
6630        modrm = x86_ldub_code(env, s);
6631        gen_setcc1(s, b, s->T0);
6632        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6633        break;
6634    case 0x140 ... 0x14f: /* cmov Gv, Ev */
6635        if (!(s->cpuid_features & CPUID_CMOV)) {
6636            goto illegal_op;
6637        }
6638        ot = dflag;
6639        modrm = x86_ldub_code(env, s);
6640        reg = ((modrm >> 3) & 7) | rex_r;
6641        gen_cmovcc1(env, s, ot, b, modrm, reg);
6642        break;
6643
6644        /************************/
6645        /* flags */
6646    case 0x9c: /* pushf */
6647        gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6648        if (s->vm86 && s->iopl != 3) {
6649            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6650        } else {
6651            gen_update_cc_op(s);
6652            gen_helper_read_eflags(s->T0, cpu_env);
6653            gen_push_v(s, s->T0);
6654        }
6655        break;
6656    case 0x9d: /* popf */
6657        gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6658        if (s->vm86 && s->iopl != 3) {
6659            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6660        } else {
6661            ot = gen_pop_T0(s);
6662            if (s->cpl == 0) {
6663                if (dflag != MO_16) {
6664                    gen_helper_write_eflags(cpu_env, s->T0,
6665                                            tcg_const_i32((TF_MASK | AC_MASK |
6666                                                           ID_MASK | NT_MASK |
6667                                                           IF_MASK |
6668                                                           IOPL_MASK)));
6669                } else {
6670                    gen_helper_write_eflags(cpu_env, s->T0,
6671                                            tcg_const_i32((TF_MASK | AC_MASK |
6672                                                           ID_MASK | NT_MASK |
6673                                                           IF_MASK | IOPL_MASK)
6674                                                          & 0xffff));
6675                }
6676            } else {
6677                if (s->cpl <= s->iopl) {
6678                    if (dflag != MO_16) {
6679                        gen_helper_write_eflags(cpu_env, s->T0,
6680                                                tcg_const_i32((TF_MASK |
6681                                                               AC_MASK |
6682                                                               ID_MASK |
6683                                                               NT_MASK |
6684                                                               IF_MASK)));
6685                    } else {
6686                        gen_helper_write_eflags(cpu_env, s->T0,
6687                                                tcg_const_i32((TF_MASK |
6688                                                               AC_MASK |
6689                                                               ID_MASK |
6690                                                               NT_MASK |
6691                                                               IF_MASK)
6692                                                              & 0xffff));
6693                    }
6694                } else {
6695                    if (dflag != MO_16) {
6696                        gen_helper_write_eflags(cpu_env, s->T0,
6697                                           tcg_const_i32((TF_MASK | AC_MASK |
6698                                                          ID_MASK | NT_MASK)));
6699                    } else {
6700                        gen_helper_write_eflags(cpu_env, s->T0,
6701                                           tcg_const_i32((TF_MASK | AC_MASK |
6702                                                          ID_MASK | NT_MASK)
6703                                                         & 0xffff));
6704                    }
6705                }
6706            }
6707            gen_pop_update(s, ot);
6708            set_cc_op(s, CC_OP_EFLAGS);
6709            /* abort translation because TF/AC flag may change */
6710            gen_jmp_im(s, s->pc - s->cs_base);
6711            gen_eob(s);
6712        }
6713        break;
6714    case 0x9e: /* sahf */
6715        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6716            goto illegal_op;
6717        gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6718        gen_compute_eflags(s);
6719        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6720        tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6721        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6722        break;
6723    case 0x9f: /* lahf */
6724        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6725            goto illegal_op;
6726        gen_compute_eflags(s);
6727        /* Note: gen_compute_eflags() only gives the condition codes */
6728        tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6729        gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6730        break;
6731    case 0xf5: /* cmc */
6732        gen_compute_eflags(s);
6733        tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6734        break;
6735    case 0xf8: /* clc */
6736        gen_compute_eflags(s);
6737        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6738        break;
6739    case 0xf9: /* stc */
6740        gen_compute_eflags(s);
6741        tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6742        break;
6743    case 0xfc: /* cld */
6744        tcg_gen_movi_i32(s->tmp2_i32, 1);
6745        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6746        break;
6747    case 0xfd: /* std */
6748        tcg_gen_movi_i32(s->tmp2_i32, -1);
6749        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6750        break;
6751
6752        /************************/
6753        /* bit operations */
6754    case 0x1ba: /* bt/bts/btr/btc Gv, im */
6755        ot = dflag;
6756        modrm = x86_ldub_code(env, s);
6757        op = (modrm >> 3) & 7;
6758        mod = (modrm >> 6) & 3;
6759        rm = (modrm & 7) | REX_B(s);
6760        if (mod != 3) {
6761            s->rip_offset = 1;
6762            gen_lea_modrm(env, s, modrm);
6763            if (!(s->prefix & PREFIX_LOCK)) {
6764                gen_op_ld_v(s, ot, s->T0, s->A0);
6765            }
6766        } else {
6767            gen_op_mov_v_reg(s, ot, s->T0, rm);
6768        }
6769        /* load shift */
6770        val = x86_ldub_code(env, s);
6771        tcg_gen_movi_tl(s->T1, val);
6772        if (op < 4)
6773            goto unknown_op;
6774        op -= 4;
6775        goto bt_op;
6776    case 0x1a3: /* bt Gv, Ev */
6777        op = 0;
6778        goto do_btx;
6779    case 0x1ab: /* bts */
6780        op = 1;
6781        goto do_btx;
6782    case 0x1b3: /* btr */
6783        op = 2;
6784        goto do_btx;
6785    case 0x1bb: /* btc */
6786        op = 3;
6787    do_btx:
6788        ot = dflag;
6789        modrm = x86_ldub_code(env, s);
6790        reg = ((modrm >> 3) & 7) | rex_r;
6791        mod = (modrm >> 6) & 3;
6792        rm = (modrm & 7) | REX_B(s);
6793        gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6794        if (mod != 3) {
6795            AddressParts a = gen_lea_modrm_0(env, s, modrm);
6796            /* specific case: we need to add a displacement */
6797            gen_exts(ot, s->T1);
6798            tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6799            tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6800            tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6801            gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6802            if (!(s->prefix & PREFIX_LOCK)) {
6803                gen_op_ld_v(s, ot, s->T0, s->A0);
6804            }
6805        } else {
6806            gen_op_mov_v_reg(s, ot, s->T0, rm);
6807        }
6808    bt_op:
6809        tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6810        tcg_gen_movi_tl(s->tmp0, 1);
6811        tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6812        if (s->prefix & PREFIX_LOCK) {
6813            switch (op) {
6814            case 0: /* bt */
6815                /* Needs no atomic ops; we surpressed the normal
6816                   memory load for LOCK above so do it now.  */
6817                gen_op_ld_v(s, ot, s->T0, s->A0);
6818                break;
6819            case 1: /* bts */
6820                tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6821                                           s->mem_index, ot | MO_LE);
6822                break;
6823            case 2: /* btr */
6824                tcg_gen_not_tl(s->tmp0, s->tmp0);
6825                tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6826                                            s->mem_index, ot | MO_LE);
6827                break;
6828            default:
6829            case 3: /* btc */
6830                tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6831                                            s->mem_index, ot | MO_LE);
6832                break;
6833            }
6834            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6835        } else {
6836            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6837            switch (op) {
6838            case 0: /* bt */
6839                /* Data already loaded; nothing to do.  */
6840                break;
6841            case 1: /* bts */
6842                tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
6843                break;
6844            case 2: /* btr */
6845                tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
6846                break;
6847            default:
6848            case 3: /* btc */
6849                tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
6850                break;
6851            }
6852            if (op != 0) {
6853                if (mod != 3) {
6854                    gen_op_st_v(s, ot, s->T0, s->A0);
6855                } else {
6856                    gen_op_mov_reg_v(s, ot, rm, s->T0);
6857                }
6858            }
6859        }
6860
6861        /* Delay all CC updates until after the store above.  Note that
6862           C is the result of the test, Z is unchanged, and the others
6863           are all undefined.  */
6864        switch (s->cc_op) {
6865        case CC_OP_MULB ... CC_OP_MULQ:
6866        case CC_OP_ADDB ... CC_OP_ADDQ:
6867        case CC_OP_ADCB ... CC_OP_ADCQ:
6868        case CC_OP_SUBB ... CC_OP_SUBQ:
6869        case CC_OP_SBBB ... CC_OP_SBBQ:
6870        case CC_OP_LOGICB ... CC_OP_LOGICQ:
6871        case CC_OP_INCB ... CC_OP_INCQ:
6872        case CC_OP_DECB ... CC_OP_DECQ:
6873        case CC_OP_SHLB ... CC_OP_SHLQ:
6874        case CC_OP_SARB ... CC_OP_SARQ:
6875        case CC_OP_BMILGB ... CC_OP_BMILGQ:
6876            /* Z was going to be computed from the non-zero status of CC_DST.
6877               We can get that same Z value (and the new C value) by leaving
6878               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6879               same width.  */
6880            tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
6881            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6882            break;
6883        default:
6884            /* Otherwise, generate EFLAGS and replace the C bit.  */
6885            gen_compute_eflags(s);
6886            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
6887                               ctz32(CC_C), 1);
6888            break;
6889        }
6890        break;
6891    case 0x1bc: /* bsf / tzcnt */
6892    case 0x1bd: /* bsr / lzcnt */
6893        ot = dflag;
6894        modrm = x86_ldub_code(env, s);
6895        reg = ((modrm >> 3) & 7) | rex_r;
6896        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6897        gen_extu(ot, s->T0);
6898
6899        /* Note that lzcnt and tzcnt are in different extensions.  */
6900        if ((prefixes & PREFIX_REPZ)
6901            && (b & 1
6902                ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6903                : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6904            int size = 8 << ot;
6905            /* For lzcnt/tzcnt, C bit is defined related to the input. */
6906            tcg_gen_mov_tl(cpu_cc_src, s->T0);
6907            if (b & 1) {
6908                /* For lzcnt, reduce the target_ulong result by the
6909                   number of zeros that we expect to find at the top.  */
6910                tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
6911                tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
6912            } else {
6913                /* For tzcnt, a zero input must return the operand size.  */
6914                tcg_gen_ctzi_tl(s->T0, s->T0, size);
6915            }
6916            /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
6917            gen_op_update1_cc(s);
6918            set_cc_op(s, CC_OP_BMILGB + ot);
6919        } else {
6920            /* For bsr/bsf, only the Z bit is defined and it is related
6921               to the input and not the result.  */
6922            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
6923            set_cc_op(s, CC_OP_LOGICB + ot);
6924
6925            /* ??? The manual says that the output is undefined when the
6926               input is zero, but real hardware leaves it unchanged, and
6927               real programs appear to depend on that.  Accomplish this
6928               by passing the output as the value to return upon zero.  */
6929            if (b & 1) {
6930                /* For bsr, return the bit index of the first 1 bit,
6931                   not the count of leading zeros.  */
6932                tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
6933                tcg_gen_clz_tl(s->T0, s->T0, s->T1);
6934                tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
6935            } else {
6936                tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
6937            }
6938        }
6939        gen_op_mov_reg_v(s, ot, reg, s->T0);
6940        break;
6941        /************************/
6942        /* bcd */
6943    case 0x27: /* daa */
6944        if (CODE64(s))
6945            goto illegal_op;
6946        gen_update_cc_op(s);
6947        gen_helper_daa(cpu_env);
6948        set_cc_op(s, CC_OP_EFLAGS);
6949        break;
6950    case 0x2f: /* das */
6951        if (CODE64(s))
6952            goto illegal_op;
6953        gen_update_cc_op(s);
6954        gen_helper_das(cpu_env);
6955        set_cc_op(s, CC_OP_EFLAGS);
6956        break;
6957    case 0x37: /* aaa */
6958        if (CODE64(s))
6959            goto illegal_op;
6960        gen_update_cc_op(s);
6961        gen_helper_aaa(cpu_env);
6962        set_cc_op(s, CC_OP_EFLAGS);
6963        break;
6964    case 0x3f: /* aas */
6965        if (CODE64(s))
6966            goto illegal_op;
6967        gen_update_cc_op(s);
6968        gen_helper_aas(cpu_env);
6969        set_cc_op(s, CC_OP_EFLAGS);
6970        break;
6971    case 0xd4: /* aam */
6972        if (CODE64(s))
6973            goto illegal_op;
6974        val = x86_ldub_code(env, s);
6975        if (val == 0) {
6976            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
6977        } else {
6978            gen_helper_aam(cpu_env, tcg_const_i32(val));
6979            set_cc_op(s, CC_OP_LOGICB);
6980        }
6981        break;
6982    case 0xd5: /* aad */
6983        if (CODE64(s))
6984            goto illegal_op;
6985        val = x86_ldub_code(env, s);
6986        gen_helper_aad(cpu_env, tcg_const_i32(val));
6987        set_cc_op(s, CC_OP_LOGICB);
6988        break;
6989        /************************/
6990        /* misc */
6991    case 0x90: /* nop */
6992        /* XXX: correct lock test for all insn */
6993        if (prefixes & PREFIX_LOCK) {
6994            goto illegal_op;
6995        }
6996        /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
6997        if (REX_B(s)) {
6998            goto do_xchg_reg_eax;
6999        }
7000        if (prefixes & PREFIX_REPZ) {
7001            gen_update_cc_op(s);
7002            gen_jmp_im(s, pc_start - s->cs_base);
7003            gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7004            s->base.is_jmp = DISAS_NORETURN;
7005        }
7006        break;
7007    case 0x9b: /* fwait */
7008        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7009            (HF_MP_MASK | HF_TS_MASK)) {
7010            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7011        } else {
7012            gen_helper_fwait(cpu_env);
7013        }
7014        break;
7015    case 0xcc: /* int3 */
7016        gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7017        break;
7018    case 0xcd: /* int N */
7019        val = x86_ldub_code(env, s);
7020        if (s->vm86 && s->iopl != 3) {
7021            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7022        } else {
7023            gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7024        }
7025        break;
7026    case 0xce: /* into */
7027        if (CODE64(s))
7028            goto illegal_op;
7029        gen_update_cc_op(s);
7030        gen_jmp_im(s, pc_start - s->cs_base);
7031        gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7032        break;
7033#ifdef WANT_ICEBP
7034    case 0xf1: /* icebp (undocumented, exits to external debugger) */
7035        gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
7036        gen_debug(s, pc_start - s->cs_base);
7037        break;
7038#endif
7039    case 0xfa: /* cli */
7040        if (!s->vm86) {
7041            if (s->cpl <= s->iopl) {
7042                gen_helper_cli(cpu_env);
7043            } else {
7044                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7045            }
7046        } else {
7047            if (s->iopl == 3) {
7048                gen_helper_cli(cpu_env);
7049            } else {
7050                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7051            }
7052        }
7053        break;
7054    case 0xfb: /* sti */
7055        if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
7056            gen_helper_sti(cpu_env);
7057            /* interruptions are enabled only the first insn after sti */
7058            gen_jmp_im(s, s->pc - s->cs_base);
7059            gen_eob_inhibit_irq(s, true);
7060        } else {
7061            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7062        }
7063        break;
7064    case 0x62: /* bound */
7065        if (CODE64(s))
7066            goto illegal_op;
7067        ot = dflag;
7068        modrm = x86_ldub_code(env, s);
7069        reg = (modrm >> 3) & 7;
7070        mod = (modrm >> 6) & 3;
7071        if (mod == 3)
7072            goto illegal_op;
7073        gen_op_mov_v_reg(s, ot, s->T0, reg);
7074        gen_lea_modrm(env, s, modrm);
7075        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7076        if (ot == MO_16) {
7077            gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7078        } else {
7079            gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7080        }
7081        break;
7082    case 0x1c8 ... 0x1cf: /* bswap reg */
7083        reg = (b & 7) | REX_B(s);
7084#ifdef TARGET_X86_64
7085        if (dflag == MO_64) {
7086            gen_op_mov_v_reg(s, MO_64, s->T0, reg);
7087            tcg_gen_bswap64_i64(s->T0, s->T0);
7088            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
7089        } else
7090#endif
7091        {
7092            gen_op_mov_v_reg(s, MO_32, s->T0, reg);
7093            tcg_gen_ext32u_tl(s->T0, s->T0);
7094            tcg_gen_bswap32_tl(s->T0, s->T0);
7095            gen_op_mov_reg_v(s, MO_32, reg, s->T0);
7096        }
7097        break;
7098    case 0xd6: /* salc */
7099        if (CODE64(s))
7100            goto illegal_op;
7101        gen_compute_eflags_c(s, s->T0);
7102        tcg_gen_neg_tl(s->T0, s->T0);
7103        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7104        break;
7105    case 0xe0: /* loopnz */
7106    case 0xe1: /* loopz */
7107    case 0xe2: /* loop */
7108    case 0xe3: /* jecxz */
7109        {
7110            TCGLabel *l1, *l2, *l3;
7111
7112            tval = (int8_t)insn_get(env, s, MO_8);
7113            next_eip = s->pc - s->cs_base;
7114            tval += next_eip;
7115            if (dflag == MO_16) {
7116                tval &= 0xffff;
7117            }
7118
7119            l1 = gen_new_label();
7120            l2 = gen_new_label();
7121            l3 = gen_new_label();
7122            b &= 3;
7123            switch(b) {
7124            case 0: /* loopnz */
7125            case 1: /* loopz */
7126                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7127                gen_op_jz_ecx(s, s->aflag, l3);
7128                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7129                break;
7130            case 2: /* loop */
7131                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7132                gen_op_jnz_ecx(s, s->aflag, l1);
7133                break;
7134            default:
7135            case 3: /* jcxz */
7136                gen_op_jz_ecx(s, s->aflag, l1);
7137                break;
7138            }
7139
7140            gen_set_label(l3);
7141            gen_jmp_im(s, next_eip);
7142            tcg_gen_br(l2);
7143
7144            gen_set_label(l1);
7145            gen_jmp_im(s, tval);
7146            gen_set_label(l2);
7147            gen_eob(s);
7148        }
7149        break;
7150    case 0x130: /* wrmsr */
7151    case 0x132: /* rdmsr */
7152        if (s->cpl != 0) {
7153            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7154        } else {
7155            gen_update_cc_op(s);
7156            gen_jmp_im(s, pc_start - s->cs_base);
7157            if (b & 2) {
7158                gen_helper_rdmsr(cpu_env);
7159            } else {
7160                gen_helper_wrmsr(cpu_env);
7161            }
7162        }
7163        break;
7164    case 0x131: /* rdtsc */
7165        gen_update_cc_op(s);
7166        gen_jmp_im(s, pc_start - s->cs_base);
7167        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7168            gen_io_start();
7169        }
7170        gen_helper_rdtsc(cpu_env);
7171        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7172            gen_io_end();
7173            gen_jmp(s, s->pc - s->cs_base);
7174        }
7175        break;
7176    case 0x133: /* rdpmc */
7177        gen_update_cc_op(s);
7178        gen_jmp_im(s, pc_start - s->cs_base);
7179        gen_helper_rdpmc(cpu_env);
7180        break;
7181    case 0x134: /* sysenter */
7182        /* For Intel SYSENTER is valid on 64-bit */
7183        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7184            goto illegal_op;
7185        if (!s->pe) {
7186            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7187        } else {
7188            gen_helper_sysenter(cpu_env);
7189            gen_eob(s);
7190        }
7191        break;
7192    case 0x135: /* sysexit */
7193        /* For Intel SYSEXIT is valid on 64-bit */
7194        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7195            goto illegal_op;
7196        if (!s->pe) {
7197            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7198        } else {
7199            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7200            gen_eob(s);
7201        }
7202        break;
7203#ifdef TARGET_X86_64
7204    case 0x105: /* syscall */
7205        /* XXX: is it usable in real mode ? */
7206        gen_update_cc_op(s);
7207        gen_jmp_im(s, pc_start - s->cs_base);
7208        gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7209        /* TF handling for the syscall insn is different. The TF bit is  checked
7210           after the syscall insn completes. This allows #DB to not be
7211           generated after one has entered CPL0 if TF is set in FMASK.  */
7212        gen_eob_worker(s, false, true);
7213        break;
7214    case 0x107: /* sysret */
7215        if (!s->pe) {
7216            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7217        } else {
7218            gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7219            /* condition codes are modified only in long mode */
7220            if (s->lma) {
7221                set_cc_op(s, CC_OP_EFLAGS);
7222            }
7223            /* TF handling for the sysret insn is different. The TF bit is
7224               checked after the sysret insn completes. This allows #DB to be
7225               generated "as if" the syscall insn in userspace has just
7226               completed.  */
7227            gen_eob_worker(s, false, true);
7228        }
7229        break;
7230#endif
7231    case 0x1a2: /* cpuid */
7232        gen_update_cc_op(s);
7233        gen_jmp_im(s, pc_start - s->cs_base);
7234        gen_helper_cpuid(cpu_env);
7235        break;
7236    case 0xf4: /* hlt */
7237        if (s->cpl != 0) {
7238            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7239        } else {
7240            gen_update_cc_op(s);
7241            gen_jmp_im(s, pc_start - s->cs_base);
7242            gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7243            s->base.is_jmp = DISAS_NORETURN;
7244        }
7245        break;
7246    case 0x100:
7247        modrm = x86_ldub_code(env, s);
7248        mod = (modrm >> 6) & 3;
7249        op = (modrm >> 3) & 7;
7250        switch(op) {
7251        case 0: /* sldt */
7252            if (!s->pe || s->vm86)
7253                goto illegal_op;
7254            gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7255            tcg_gen_ld32u_tl(s->T0, cpu_env,
7256                             offsetof(CPUX86State, ldt.selector));
7257            ot = mod == 3 ? dflag : MO_16;
7258            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7259            break;
7260        case 2: /* lldt */
7261            if (!s->pe || s->vm86)
7262                goto illegal_op;
7263            if (s->cpl != 0) {
7264                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7265            } else {
7266                gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7267                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7268                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7269                gen_helper_lldt(cpu_env, s->tmp2_i32);
7270            }
7271            break;
7272        case 1: /* str */
7273            if (!s->pe || s->vm86)
7274                goto illegal_op;
7275            gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7276            tcg_gen_ld32u_tl(s->T0, cpu_env,
7277                             offsetof(CPUX86State, tr.selector));
7278            ot = mod == 3 ? dflag : MO_16;
7279            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7280            break;
7281        case 3: /* ltr */
7282            if (!s->pe || s->vm86)
7283                goto illegal_op;
7284            if (s->cpl != 0) {
7285                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7286            } else {
7287                gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7288                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7289                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7290                gen_helper_ltr(cpu_env, s->tmp2_i32);
7291            }
7292            break;
7293        case 4: /* verr */
7294        case 5: /* verw */
7295            if (!s->pe || s->vm86)
7296                goto illegal_op;
7297            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7298            gen_update_cc_op(s);
7299            if (op == 4) {
7300                gen_helper_verr(cpu_env, s->T0);
7301            } else {
7302                gen_helper_verw(cpu_env, s->T0);
7303            }
7304            set_cc_op(s, CC_OP_EFLAGS);
7305            break;
7306        default:
7307            goto unknown_op;
7308        }
7309        break;
7310
7311    case 0x101:
7312        modrm = x86_ldub_code(env, s);
7313        switch (modrm) {
7314        CASE_MODRM_MEM_OP(0): /* sgdt */
7315            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7316            gen_lea_modrm(env, s, modrm);
7317            tcg_gen_ld32u_tl(s->T0,
7318                             cpu_env, offsetof(CPUX86State, gdt.limit));
7319            gen_op_st_v(s, MO_16, s->T0, s->A0);
7320            gen_add_A0_im(s, 2);
7321            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7322            if (dflag == MO_16) {
7323                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7324            }
7325            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7326            break;
7327
7328        case 0xc8: /* monitor */
7329            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7330                goto illegal_op;
7331            }
7332            gen_update_cc_op(s);
7333            gen_jmp_im(s, pc_start - s->cs_base);
7334            tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7335            gen_extu(s->aflag, s->A0);
7336            gen_add_A0_ds_seg(s);
7337            gen_helper_monitor(cpu_env, s->A0);
7338            break;
7339
7340        case 0xc9: /* mwait */
7341            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7342                goto illegal_op;
7343            }
7344            gen_update_cc_op(s);
7345            gen_jmp_im(s, pc_start - s->cs_base);
7346            gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7347            gen_eob(s);
7348            break;
7349
7350        case 0xca: /* clac */
7351            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7352                || s->cpl != 0) {
7353                goto illegal_op;
7354            }
7355            gen_helper_clac(cpu_env);
7356            gen_jmp_im(s, s->pc - s->cs_base);
7357            gen_eob(s);
7358            break;
7359
7360        case 0xcb: /* stac */
7361            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7362                || s->cpl != 0) {
7363                goto illegal_op;
7364            }
7365            gen_helper_stac(cpu_env);
7366            gen_jmp_im(s, s->pc - s->cs_base);
7367            gen_eob(s);
7368            break;
7369
7370        CASE_MODRM_MEM_OP(1): /* sidt */
7371            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7372            gen_lea_modrm(env, s, modrm);
7373            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7374            gen_op_st_v(s, MO_16, s->T0, s->A0);
7375            gen_add_A0_im(s, 2);
7376            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7377            if (dflag == MO_16) {
7378                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7379            }
7380            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7381            break;
7382
7383        case 0xd0: /* xgetbv */
7384            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7385                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7386                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7387                goto illegal_op;
7388            }
7389            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7390            gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7391            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7392            break;
7393
7394        case 0xd1: /* xsetbv */
7395            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7396                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7397                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7398                goto illegal_op;
7399            }
7400            if (s->cpl != 0) {
7401                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7402                break;
7403            }
7404            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7405                                  cpu_regs[R_EDX]);
7406            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7407            gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7408            /* End TB because translation flags may change.  */
7409            gen_jmp_im(s, s->pc - s->cs_base);
7410            gen_eob(s);
7411            break;
7412
7413        case 0xd8: /* VMRUN */
7414            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7415                goto illegal_op;
7416            }
7417            if (s->cpl != 0) {
7418                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7419                break;
7420            }
7421            gen_update_cc_op(s);
7422            gen_jmp_im(s, pc_start - s->cs_base);
7423            gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7424                             tcg_const_i32(s->pc - pc_start));
7425            tcg_gen_exit_tb(NULL, 0);
7426            s->base.is_jmp = DISAS_NORETURN;
7427            break;
7428
7429        case 0xd9: /* VMMCALL */
7430            if (!(s->flags & HF_SVME_MASK)) {
7431                goto illegal_op;
7432            }
7433            gen_update_cc_op(s);
7434            gen_jmp_im(s, pc_start - s->cs_base);
7435            gen_helper_vmmcall(cpu_env);
7436            break;
7437
7438        case 0xda: /* VMLOAD */
7439            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7440                goto illegal_op;
7441            }
7442            if (s->cpl != 0) {
7443                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7444                break;
7445            }
7446            gen_update_cc_op(s);
7447            gen_jmp_im(s, pc_start - s->cs_base);
7448            gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7449            break;
7450
7451        case 0xdb: /* VMSAVE */
7452            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7453                goto illegal_op;
7454            }
7455            if (s->cpl != 0) {
7456                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7457                break;
7458            }
7459            gen_update_cc_op(s);
7460            gen_jmp_im(s, pc_start - s->cs_base);
7461            gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7462            break;
7463
7464        case 0xdc: /* STGI */
7465            if ((!(s->flags & HF_SVME_MASK)
7466                   && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7467                || !s->pe) {
7468                goto illegal_op;
7469            }
7470            if (s->cpl != 0) {
7471                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7472                break;
7473            }
7474            gen_update_cc_op(s);
7475            gen_helper_stgi(cpu_env);
7476            gen_jmp_im(s, s->pc - s->cs_base);
7477            gen_eob(s);
7478            break;
7479
7480        case 0xdd: /* CLGI */
7481            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7482                goto illegal_op;
7483            }
7484            if (s->cpl != 0) {
7485                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7486                break;
7487            }
7488            gen_update_cc_op(s);
7489            gen_jmp_im(s, pc_start - s->cs_base);
7490            gen_helper_clgi(cpu_env);
7491            break;
7492
7493        case 0xde: /* SKINIT */
7494            if ((!(s->flags & HF_SVME_MASK)
7495                 && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7496                || !s->pe) {
7497                goto illegal_op;
7498            }
7499            gen_update_cc_op(s);
7500            gen_jmp_im(s, pc_start - s->cs_base);
7501            gen_helper_skinit(cpu_env);
7502            break;
7503
7504        case 0xdf: /* INVLPGA */
7505            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7506                goto illegal_op;
7507            }
7508            if (s->cpl != 0) {
7509                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7510                break;
7511            }
7512            gen_update_cc_op(s);
7513            gen_jmp_im(s, pc_start - s->cs_base);
7514            gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
7515            break;
7516
7517        CASE_MODRM_MEM_OP(2): /* lgdt */
7518            if (s->cpl != 0) {
7519                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7520                break;
7521            }
7522            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
7523            gen_lea_modrm(env, s, modrm);
7524            gen_op_ld_v(s, MO_16, s->T1, s->A0);
7525            gen_add_A0_im(s, 2);
7526            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7527            if (dflag == MO_16) {
7528                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7529            }
7530            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7531            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7532            break;
7533
7534        CASE_MODRM_MEM_OP(3): /* lidt */
7535            if (s->cpl != 0) {
7536                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7537                break;
7538            }
7539            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
7540            gen_lea_modrm(env, s, modrm);
7541            gen_op_ld_v(s, MO_16, s->T1, s->A0);
7542            gen_add_A0_im(s, 2);
7543            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7544            if (dflag == MO_16) {
7545                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7546            }
7547            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7548            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7549            break;
7550
7551        CASE_MODRM_OP(4): /* smsw */
7552            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7553            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7554            if (CODE64(s)) {
7555                mod = (modrm >> 6) & 3;
7556                ot = (mod != 3 ? MO_16 : s->dflag);
7557            } else {
7558                ot = MO_16;
7559            }
7560            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7561            break;
7562        case 0xee: /* rdpkru */
7563            if (prefixes & PREFIX_LOCK) {
7564                goto illegal_op;
7565            }
7566            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7567            gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7568            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7569            break;
7570        case 0xef: /* wrpkru */
7571            if (prefixes & PREFIX_LOCK) {
7572                goto illegal_op;
7573            }
7574            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7575                                  cpu_regs[R_EDX]);
7576            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7577            gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7578            break;
7579        CASE_MODRM_OP(6): /* lmsw */
7580            if (s->cpl != 0) {
7581                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7582                break;
7583            }
7584            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7585            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7586            gen_helper_lmsw(cpu_env, s->T0);
7587            gen_jmp_im(s, s->pc - s->cs_base);
7588            gen_eob(s);
7589            break;
7590
7591        CASE_MODRM_MEM_OP(7): /* invlpg */
7592            if (s->cpl != 0) {
7593                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7594                break;
7595            }
7596            gen_update_cc_op(s);
7597            gen_jmp_im(s, pc_start - s->cs_base);
7598            gen_lea_modrm(env, s, modrm);
7599            gen_helper_invlpg(cpu_env, s->A0);
7600            gen_jmp_im(s, s->pc - s->cs_base);
7601            gen_eob(s);
7602            break;
7603
7604        case 0xf8: /* swapgs */
7605#ifdef TARGET_X86_64
7606            if (CODE64(s)) {
7607                if (s->cpl != 0) {
7608                    gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7609                } else {
7610                    tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7611                    tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7612                                  offsetof(CPUX86State, kernelgsbase));
7613                    tcg_gen_st_tl(s->T0, cpu_env,
7614                                  offsetof(CPUX86State, kernelgsbase));
7615                }
7616                break;
7617            }
7618#endif
7619            goto illegal_op;
7620
7621        case 0xf9: /* rdtscp */
7622            if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7623                goto illegal_op;
7624            }
7625            gen_update_cc_op(s);
7626            gen_jmp_im(s, pc_start - s->cs_base);
7627            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7628                gen_io_start();
7629            }
7630            gen_helper_rdtscp(cpu_env);
7631            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7632                gen_io_end();
7633                gen_jmp(s, s->pc - s->cs_base);
7634            }
7635            break;
7636
7637        default:
7638            goto unknown_op;
7639        }
7640        break;
7641
7642    case 0x108: /* invd */
7643    case 0x109: /* wbinvd */
7644        if (s->cpl != 0) {
7645            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7646        } else {
7647            gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7648            /* nothing to do */
7649        }
7650        break;
7651    case 0x63: /* arpl or movslS (x86_64) */
7652#ifdef TARGET_X86_64
7653        if (CODE64(s)) {
7654            int d_ot;
7655            /* d_ot is the size of destination */
7656            d_ot = dflag;
7657
7658            modrm = x86_ldub_code(env, s);
7659            reg = ((modrm >> 3) & 7) | rex_r;
7660            mod = (modrm >> 6) & 3;
7661            rm = (modrm & 7) | REX_B(s);
7662
7663            if (mod == 3) {
7664                gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7665                /* sign extend */
7666                if (d_ot == MO_64) {
7667                    tcg_gen_ext32s_tl(s->T0, s->T0);
7668                }
7669                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7670            } else {
7671                gen_lea_modrm(env, s, modrm);
7672                gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7673                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7674            }
7675        } else
7676#endif
7677        {
7678            TCGLabel *label1;
7679            TCGv t0, t1, t2, a0;
7680
7681            if (!s->pe || s->vm86)
7682                goto illegal_op;
7683            t0 = tcg_temp_local_new();
7684            t1 = tcg_temp_local_new();
7685            t2 = tcg_temp_local_new();
7686            ot = MO_16;
7687            modrm = x86_ldub_code(env, s);
7688            reg = (modrm >> 3) & 7;
7689            mod = (modrm >> 6) & 3;
7690            rm = modrm & 7;
7691            if (mod != 3) {
7692                gen_lea_modrm(env, s, modrm);
7693                gen_op_ld_v(s, ot, t0, s->A0);
7694                a0 = tcg_temp_local_new();
7695                tcg_gen_mov_tl(a0, s->A0);
7696            } else {
7697                gen_op_mov_v_reg(s, ot, t0, rm);
7698                a0 = NULL;
7699            }
7700            gen_op_mov_v_reg(s, ot, t1, reg);
7701            tcg_gen_andi_tl(s->tmp0, t0, 3);
7702            tcg_gen_andi_tl(t1, t1, 3);
7703            tcg_gen_movi_tl(t2, 0);
7704            label1 = gen_new_label();
7705            tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7706            tcg_gen_andi_tl(t0, t0, ~3);
7707            tcg_gen_or_tl(t0, t0, t1);
7708            tcg_gen_movi_tl(t2, CC_Z);
7709            gen_set_label(label1);
7710            if (mod != 3) {
7711                gen_op_st_v(s, ot, t0, a0);
7712                tcg_temp_free(a0);
7713           } else {
7714                gen_op_mov_reg_v(s, ot, rm, t0);
7715            }
7716            gen_compute_eflags(s);
7717            tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7718            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7719            tcg_temp_free(t0);
7720            tcg_temp_free(t1);
7721            tcg_temp_free(t2);
7722        }
7723        break;
7724    case 0x102: /* lar */
7725    case 0x103: /* lsl */
7726        {
7727            TCGLabel *label1;
7728            TCGv t0;
7729            if (!s->pe || s->vm86)
7730                goto illegal_op;
7731            ot = dflag != MO_16 ? MO_32 : MO_16;
7732            modrm = x86_ldub_code(env, s);
7733            reg = ((modrm >> 3) & 7) | rex_r;
7734            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7735            t0 = tcg_temp_local_new();
7736            gen_update_cc_op(s);
7737            if (b == 0x102) {
7738                gen_helper_lar(t0, cpu_env, s->T0);
7739            } else {
7740                gen_helper_lsl(t0, cpu_env, s->T0);
7741            }
7742            tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7743            label1 = gen_new_label();
7744            tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7745            gen_op_mov_reg_v(s, ot, reg, t0);
7746            gen_set_label(label1);
7747            set_cc_op(s, CC_OP_EFLAGS);
7748            tcg_temp_free(t0);
7749        }
7750        break;
7751    case 0x118:
7752        modrm = x86_ldub_code(env, s);
7753        mod = (modrm >> 6) & 3;
7754        op = (modrm >> 3) & 7;
7755        switch(op) {
7756        case 0: /* prefetchnta */
7757        case 1: /* prefetchnt0 */
7758        case 2: /* prefetchnt0 */
7759        case 3: /* prefetchnt0 */
7760            if (mod == 3)
7761                goto illegal_op;
7762            gen_nop_modrm(env, s, modrm);
7763            /* nothing more to do */
7764            break;
7765        default: /* nop (multi byte) */
7766            gen_nop_modrm(env, s, modrm);
7767            break;
7768        }
7769        break;
7770    case 0x11a:
7771        modrm = x86_ldub_code(env, s);
7772        if (s->flags & HF_MPX_EN_MASK) {
7773            mod = (modrm >> 6) & 3;
7774            reg = ((modrm >> 3) & 7) | rex_r;
7775            if (prefixes & PREFIX_REPZ) {
7776                /* bndcl */
7777                if (reg >= 4
7778                    || (prefixes & PREFIX_LOCK)
7779                    || s->aflag == MO_16) {
7780                    goto illegal_op;
7781                }
7782                gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7783            } else if (prefixes & PREFIX_REPNZ) {
7784                /* bndcu */
7785                if (reg >= 4
7786                    || (prefixes & PREFIX_LOCK)
7787                    || s->aflag == MO_16) {
7788                    goto illegal_op;
7789                }
7790                TCGv_i64 notu = tcg_temp_new_i64();
7791                tcg_gen_not_i64(notu, cpu_bndu[reg]);
7792                gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7793                tcg_temp_free_i64(notu);
7794            } else if (prefixes & PREFIX_DATA) {
7795                /* bndmov -- from reg/mem */
7796                if (reg >= 4 || s->aflag == MO_16) {
7797                    goto illegal_op;
7798                }
7799                if (mod == 3) {
7800                    int reg2 = (modrm & 7) | REX_B(s);
7801                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7802                        goto illegal_op;
7803                    }
7804                    if (s->flags & HF_MPX_IU_MASK) {
7805                        tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7806                        tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7807                    }
7808                } else {
7809                    gen_lea_modrm(env, s, modrm);
7810                    if (CODE64(s)) {
7811                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7812                                            s->mem_index, MO_LEQ);
7813                        tcg_gen_addi_tl(s->A0, s->A0, 8);
7814                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7815                                            s->mem_index, MO_LEQ);
7816                    } else {
7817                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7818                                            s->mem_index, MO_LEUL);
7819                        tcg_gen_addi_tl(s->A0, s->A0, 4);
7820                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7821                                            s->mem_index, MO_LEUL);
7822                    }
7823                    /* bnd registers are now in-use */
7824                    gen_set_hflag(s, HF_MPX_IU_MASK);
7825                }
7826            } else if (mod != 3) {
7827                /* bndldx */
7828                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7829                if (reg >= 4
7830                    || (prefixes & PREFIX_LOCK)
7831                    || s->aflag == MO_16
7832                    || a.base < -1) {
7833                    goto illegal_op;
7834                }
7835                if (a.base >= 0) {
7836                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7837                } else {
7838                    tcg_gen_movi_tl(s->A0, 0);
7839                }
7840                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7841                if (a.index >= 0) {
7842                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7843                } else {
7844                    tcg_gen_movi_tl(s->T0, 0);
7845                }
7846                if (CODE64(s)) {
7847                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7848                    tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7849                                   offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7850                } else {
7851                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7852                    tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7853                    tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7854                }
7855                gen_set_hflag(s, HF_MPX_IU_MASK);
7856            }
7857        }
7858        gen_nop_modrm(env, s, modrm);
7859        break;
7860    case 0x11b:
7861        modrm = x86_ldub_code(env, s);
7862        if (s->flags & HF_MPX_EN_MASK) {
7863            mod = (modrm >> 6) & 3;
7864            reg = ((modrm >> 3) & 7) | rex_r;
7865            if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7866                /* bndmk */
7867                if (reg >= 4
7868                    || (prefixes & PREFIX_LOCK)
7869                    || s->aflag == MO_16) {
7870                    goto illegal_op;
7871                }
7872                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7873                if (a.base >= 0) {
7874                    tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7875                    if (!CODE64(s)) {
7876                        tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7877                    }
7878                } else if (a.base == -1) {
7879                    /* no base register has lower bound of 0 */
7880                    tcg_gen_movi_i64(cpu_bndl[reg], 0);
7881                } else {
7882                    /* rip-relative generates #ud */
7883                    goto illegal_op;
7884                }
7885                tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
7886                if (!CODE64(s)) {
7887                    tcg_gen_ext32u_tl(s->A0, s->A0);
7888                }
7889                tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
7890                /* bnd registers are now in-use */
7891                gen_set_hflag(s, HF_MPX_IU_MASK);
7892                break;
7893            } else if (prefixes & PREFIX_REPNZ) {
7894                /* bndcn */
7895                if (reg >= 4
7896                    || (prefixes & PREFIX_LOCK)
7897                    || s->aflag == MO_16) {
7898                    goto illegal_op;
7899                }
7900                gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7901            } else if (prefixes & PREFIX_DATA) {
7902                /* bndmov -- to reg/mem */
7903                if (reg >= 4 || s->aflag == MO_16) {
7904                    goto illegal_op;
7905                }
7906                if (mod == 3) {
7907                    int reg2 = (modrm & 7) | REX_B(s);
7908                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7909                        goto illegal_op;
7910                    }
7911                    if (s->flags & HF_MPX_IU_MASK) {
7912                        tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7913                        tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
7914                    }
7915                } else {
7916                    gen_lea_modrm(env, s, modrm);
7917                    if (CODE64(s)) {
7918                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7919                                            s->mem_index, MO_LEQ);
7920                        tcg_gen_addi_tl(s->A0, s->A0, 8);
7921                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7922                                            s->mem_index, MO_LEQ);
7923                    } else {
7924                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7925                                            s->mem_index, MO_LEUL);
7926                        tcg_gen_addi_tl(s->A0, s->A0, 4);
7927                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7928                                            s->mem_index, MO_LEUL);
7929                    }
7930                }
7931            } else if (mod != 3) {
7932                /* bndstx */
7933                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7934                if (reg >= 4
7935                    || (prefixes & PREFIX_LOCK)
7936                    || s->aflag == MO_16
7937                    || a.base < -1) {
7938                    goto illegal_op;
7939                }
7940                if (a.base >= 0) {
7941                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7942                } else {
7943                    tcg_gen_movi_tl(s->A0, 0);
7944                }
7945                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7946                if (a.index >= 0) {
7947                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7948                } else {
7949                    tcg_gen_movi_tl(s->T0, 0);
7950                }
7951                if (CODE64(s)) {
7952                    gen_helper_bndstx64(cpu_env, s->A0, s->T0,
7953                                        cpu_bndl[reg], cpu_bndu[reg]);
7954                } else {
7955                    gen_helper_bndstx32(cpu_env, s->A0, s->T0,
7956                                        cpu_bndl[reg], cpu_bndu[reg]);
7957                }
7958            }
7959        }
7960        gen_nop_modrm(env, s, modrm);
7961        break;
7962    case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
7963        modrm = x86_ldub_code(env, s);
7964        gen_nop_modrm(env, s, modrm);
7965        break;
7966    case 0x120: /* mov reg, crN */
7967    case 0x122: /* mov crN, reg */
7968        if (s->cpl != 0) {
7969            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7970        } else {
7971            modrm = x86_ldub_code(env, s);
7972            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7973             * AMD documentation (24594.pdf) and testing of
7974             * intel 386 and 486 processors all show that the mod bits
7975             * are assumed to be 1's, regardless of actual values.
7976             */
7977            rm = (modrm & 7) | REX_B(s);
7978            reg = ((modrm >> 3) & 7) | rex_r;
7979            if (CODE64(s))
7980                ot = MO_64;
7981            else
7982                ot = MO_32;
7983            if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
7984                (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
7985                reg = 8;
7986            }
7987            switch(reg) {
7988            case 0:
7989            case 2:
7990            case 3:
7991            case 4:
7992            case 8:
7993                gen_update_cc_op(s);
7994                gen_jmp_im(s, pc_start - s->cs_base);
7995                if (b & 2) {
7996                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7997                        gen_io_start();
7998                    }
7999                    gen_op_mov_v_reg(s, ot, s->T0, rm);
8000                    gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
8001                                         s->T0);
8002                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8003                        gen_io_end();
8004                    }
8005                    gen_jmp_im(s, s->pc - s->cs_base);
8006                    gen_eob(s);
8007                } else {
8008                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8009                        gen_io_start();
8010                    }
8011                    gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
8012                    gen_op_mov_reg_v(s, ot, rm, s->T0);
8013                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8014                        gen_io_end();
8015                    }
8016                }
8017                break;
8018            default:
8019                goto unknown_op;
8020            }
8021        }
8022        break;
8023    case 0x121: /* mov reg, drN */
8024    case 0x123: /* mov drN, reg */
8025        if (s->cpl != 0) {
8026            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8027        } else {
8028            modrm = x86_ldub_code(env, s);
8029            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8030             * AMD documentation (24594.pdf) and testing of
8031             * intel 386 and 486 processors all show that the mod bits
8032             * are assumed to be 1's, regardless of actual values.
8033             */
8034            rm = (modrm & 7) | REX_B(s);
8035            reg = ((modrm >> 3) & 7) | rex_r;
8036            if (CODE64(s))
8037                ot = MO_64;
8038            else
8039                ot = MO_32;
8040            if (reg >= 8) {
8041                goto illegal_op;
8042            }
8043            if (b & 2) {
8044                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
8045                gen_op_mov_v_reg(s, ot, s->T0, rm);
8046                tcg_gen_movi_i32(s->tmp2_i32, reg);
8047                gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8048                gen_jmp_im(s, s->pc - s->cs_base);
8049                gen_eob(s);
8050            } else {
8051                gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
8052                tcg_gen_movi_i32(s->tmp2_i32, reg);
8053                gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8054                gen_op_mov_reg_v(s, ot, rm, s->T0);
8055            }
8056        }
8057        break;
8058    case 0x106: /* clts */
8059        if (s->cpl != 0) {
8060            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8061        } else {
8062            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
8063            gen_helper_clts(cpu_env);
8064            /* abort block because static cpu state changed */
8065            gen_jmp_im(s, s->pc - s->cs_base);
8066            gen_eob(s);
8067        }
8068        break;
8069    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8070    case 0x1c3: /* MOVNTI reg, mem */
8071        if (!(s->cpuid_features & CPUID_SSE2))
8072            goto illegal_op;
8073        ot = mo_64_32(dflag);
8074        modrm = x86_ldub_code(env, s);
8075        mod = (modrm >> 6) & 3;
8076        if (mod == 3)
8077            goto illegal_op;
8078        reg = ((modrm >> 3) & 7) | rex_r;
8079        /* generate a generic store */
8080        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8081        break;
8082    case 0x1ae:
8083        modrm = x86_ldub_code(env, s);
8084        switch (modrm) {
8085        CASE_MODRM_MEM_OP(0): /* fxsave */
8086            if (!(s->cpuid_features & CPUID_FXSR)
8087                || (prefixes & PREFIX_LOCK)) {
8088                goto illegal_op;
8089            }
8090            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8091                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8092                break;
8093            }
8094            gen_lea_modrm(env, s, modrm);
8095            gen_helper_fxsave(cpu_env, s->A0);
8096            break;
8097
8098        CASE_MODRM_MEM_OP(1): /* fxrstor */
8099            if (!(s->cpuid_features & CPUID_FXSR)
8100                || (prefixes & PREFIX_LOCK)) {
8101                goto illegal_op;
8102            }
8103            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8104                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8105                break;
8106            }
8107            gen_lea_modrm(env, s, modrm);
8108            gen_helper_fxrstor(cpu_env, s->A0);
8109            break;
8110
8111        CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8112            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8113                goto illegal_op;
8114            }
8115            if (s->flags & HF_TS_MASK) {
8116                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8117                break;
8118            }
8119            gen_lea_modrm(env, s, modrm);
8120            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8121            gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8122            break;
8123
8124        CASE_MODRM_MEM_OP(3): /* stmxcsr */
8125            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8126                goto illegal_op;
8127            }
8128            if (s->flags & HF_TS_MASK) {
8129                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8130                break;
8131            }
8132            gen_lea_modrm(env, s, modrm);
8133            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8134            gen_op_st_v(s, MO_32, s->T0, s->A0);
8135            break;
8136
8137        CASE_MODRM_MEM_OP(4): /* xsave */
8138            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8139                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8140                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8141                goto illegal_op;
8142            }
8143            gen_lea_modrm(env, s, modrm);
8144            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8145                                  cpu_regs[R_EDX]);
8146            gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8147            break;
8148
8149        CASE_MODRM_MEM_OP(5): /* xrstor */
8150            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8151                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8152                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8153                goto illegal_op;
8154            }
8155            gen_lea_modrm(env, s, modrm);
8156            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8157                                  cpu_regs[R_EDX]);
8158            gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8159            /* XRSTOR is how MPX is enabled, which changes how
8160               we translate.  Thus we need to end the TB.  */
8161            gen_update_cc_op(s);
8162            gen_jmp_im(s, s->pc - s->cs_base);
8163            gen_eob(s);
8164            break;
8165
8166        CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8167            if (prefixes & PREFIX_LOCK) {
8168                goto illegal_op;
8169            }
8170            if (prefixes & PREFIX_DATA) {
8171                /* clwb */
8172                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8173                    goto illegal_op;
8174                }
8175                gen_nop_modrm(env, s, modrm);
8176            } else {
8177                /* xsaveopt */
8178                if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8179                    || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8180                    || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8181                    goto illegal_op;
8182                }
8183                gen_lea_modrm(env, s, modrm);
8184                tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8185                                      cpu_regs[R_EDX]);
8186                gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8187            }
8188            break;
8189
8190        CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8191            if (prefixes & PREFIX_LOCK) {
8192                goto illegal_op;
8193            }
8194            if (prefixes & PREFIX_DATA) {
8195                /* clflushopt */
8196                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8197                    goto illegal_op;
8198                }
8199            } else {
8200                /* clflush */
8201                if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8202                    || !(s->cpuid_features & CPUID_CLFLUSH)) {
8203                    goto illegal_op;
8204                }
8205            }
8206            gen_nop_modrm(env, s, modrm);
8207            break;
8208
8209        case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8210        case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8211        case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8212        case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8213            if (CODE64(s)
8214                && (prefixes & PREFIX_REPZ)
8215                && !(prefixes & PREFIX_LOCK)
8216                && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8217                TCGv base, treg, src, dst;
8218
8219                /* Preserve hflags bits by testing CR4 at runtime.  */
8220                tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8221                gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8222
8223                base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8224                treg = cpu_regs[(modrm & 7) | REX_B(s)];
8225
8226                if (modrm & 0x10) {
8227                    /* wr*base */
8228                    dst = base, src = treg;
8229                } else {
8230                    /* rd*base */
8231                    dst = treg, src = base;
8232                }
8233
8234                if (s->dflag == MO_32) {
8235                    tcg_gen_ext32u_tl(dst, src);
8236                } else {
8237                    tcg_gen_mov_tl(dst, src);
8238                }
8239                break;
8240            }
8241            goto unknown_op;
8242
8243        case 0xf8: /* sfence / pcommit */
8244            if (prefixes & PREFIX_DATA) {
8245                /* pcommit */
8246                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8247                    || (prefixes & PREFIX_LOCK)) {
8248                    goto illegal_op;
8249                }
8250                break;
8251            }
8252            /* fallthru */
8253        case 0xf9 ... 0xff: /* sfence */
8254            if (!(s->cpuid_features & CPUID_SSE)
8255                || (prefixes & PREFIX_LOCK)) {
8256                goto illegal_op;
8257            }
8258            tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8259            break;
8260        case 0xe8 ... 0xef: /* lfence */
8261            if (!(s->cpuid_features & CPUID_SSE)
8262                || (prefixes & PREFIX_LOCK)) {
8263                goto illegal_op;
8264            }
8265            tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8266            break;
8267        case 0xf0 ... 0xf7: /* mfence */
8268            if (!(s->cpuid_features & CPUID_SSE2)
8269                || (prefixes & PREFIX_LOCK)) {
8270                goto illegal_op;
8271            }
8272            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8273            break;
8274
8275        default:
8276            goto unknown_op;
8277        }
8278        break;
8279
8280    case 0x10d: /* 3DNow! prefetch(w) */
8281        modrm = x86_ldub_code(env, s);
8282        mod = (modrm >> 6) & 3;
8283        if (mod == 3)
8284            goto illegal_op;
8285        gen_nop_modrm(env, s, modrm);
8286        break;
8287    case 0x1aa: /* rsm */
8288        gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
8289        if (!(s->flags & HF_SMM_MASK))
8290            goto illegal_op;
8291        gen_update_cc_op(s);
8292        gen_jmp_im(s, s->pc - s->cs_base);
8293        gen_helper_rsm(cpu_env);
8294        gen_eob(s);
8295        break;
8296    case 0x1b8: /* SSE4.2 popcnt */
8297        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8298             PREFIX_REPZ)
8299            goto illegal_op;
8300        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8301            goto illegal_op;
8302
8303        modrm = x86_ldub_code(env, s);
8304        reg = ((modrm >> 3) & 7) | rex_r;
8305
8306        if (s->prefix & PREFIX_DATA) {
8307            ot = MO_16;
8308        } else {
8309            ot = mo_64_32(dflag);
8310        }
8311
8312        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8313        gen_extu(ot, s->T0);
8314        tcg_gen_mov_tl(cpu_cc_src, s->T0);
8315        tcg_gen_ctpop_tl(s->T0, s->T0);
8316        gen_op_mov_reg_v(s, ot, reg, s->T0);
8317
8318        set_cc_op(s, CC_OP_POPCNT);
8319        break;
8320    case 0x10e ... 0x10f:
8321        /* 3DNow! instructions, ignore prefixes */
8322        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8323        /* fall through */
8324    case 0x110 ... 0x117:
8325    case 0x128 ... 0x12f:
8326    case 0x138 ... 0x13a:
8327    case 0x150 ... 0x179:
8328    case 0x17c ... 0x17f:
8329    case 0x1c2:
8330    case 0x1c4 ... 0x1c6:
8331    case 0x1d0 ... 0x1fe:
8332        gen_sse(env, s, b, pc_start, rex_r);
8333        break;
8334    default:
8335        goto unknown_op;
8336    }
8337    return s->pc;
8338 illegal_op:
8339    gen_illegal_opcode(s);
8340    return s->pc;
8341 unknown_op:
8342    gen_unknown_opcode(env, s);
8343    return s->pc;
8344}
8345
8346void tcg_x86_init(void)
8347{
8348    static const char reg_names[CPU_NB_REGS][4] = {
8349#ifdef TARGET_X86_64
8350        [R_EAX] = "rax",
8351        [R_EBX] = "rbx",
8352        [R_ECX] = "rcx",
8353        [R_EDX] = "rdx",
8354        [R_ESI] = "rsi",
8355        [R_EDI] = "rdi",
8356        [R_EBP] = "rbp",
8357        [R_ESP] = "rsp",
8358        [8]  = "r8",
8359        [9]  = "r9",
8360        [10] = "r10",
8361        [11] = "r11",
8362        [12] = "r12",
8363        [13] = "r13",
8364        [14] = "r14",
8365        [15] = "r15",
8366#else
8367        [R_EAX] = "eax",
8368        [R_EBX] = "ebx",
8369        [R_ECX] = "ecx",
8370        [R_EDX] = "edx",
8371        [R_ESI] = "esi",
8372        [R_EDI] = "edi",
8373        [R_EBP] = "ebp",
8374        [R_ESP] = "esp",
8375#endif
8376    };
8377    static const char seg_base_names[6][8] = {
8378        [R_CS] = "cs_base",
8379        [R_DS] = "ds_base",
8380        [R_ES] = "es_base",
8381        [R_FS] = "fs_base",
8382        [R_GS] = "gs_base",
8383        [R_SS] = "ss_base",
8384    };
8385    static const char bnd_regl_names[4][8] = {
8386        "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8387    };
8388    static const char bnd_regu_names[4][8] = {
8389        "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8390    };
8391    int i;
8392
8393    cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8394                                       offsetof(CPUX86State, cc_op), "cc_op");
8395    cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8396                                    "cc_dst");
8397    cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8398                                    "cc_src");
8399    cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8400                                     "cc_src2");
8401
8402    for (i = 0; i < CPU_NB_REGS; ++i) {
8403        cpu_regs[i] = tcg_global_mem_new(cpu_env,
8404                                         offsetof(CPUX86State, regs[i]),
8405                                         reg_names[i]);
8406    }
8407
8408    for (i = 0; i < 6; ++i) {
8409        cpu_seg_base[i]
8410            = tcg_global_mem_new(cpu_env,
8411                                 offsetof(CPUX86State, segs[i].base),
8412                                 seg_base_names[i]);
8413    }
8414
8415    for (i = 0; i < 4; ++i) {
8416        cpu_bndl[i]
8417            = tcg_global_mem_new_i64(cpu_env,
8418                                     offsetof(CPUX86State, bnd_regs[i].lb),
8419                                     bnd_regl_names[i]);
8420        cpu_bndu[i]
8421            = tcg_global_mem_new_i64(cpu_env,
8422                                     offsetof(CPUX86State, bnd_regs[i].ub),
8423                                     bnd_regu_names[i]);
8424    }
8425}
8426
8427static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8428{
8429    DisasContext *dc = container_of(dcbase, DisasContext, base);
8430    CPUX86State *env = cpu->env_ptr;
8431    uint32_t flags = dc->base.tb->flags;
8432    target_ulong cs_base = dc->base.tb->cs_base;
8433
8434    dc->pe = (flags >> HF_PE_SHIFT) & 1;
8435    dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
8436    dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
8437    dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
8438    dc->f_st = 0;
8439    dc->vm86 = (flags >> VM_SHIFT) & 1;
8440    dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
8441    dc->iopl = (flags >> IOPL_SHIFT) & 3;
8442    dc->tf = (flags >> TF_SHIFT) & 1;
8443    dc->cc_op = CC_OP_DYNAMIC;
8444    dc->cc_op_dirty = false;
8445    dc->cs_base = cs_base;
8446    dc->popl_esp_hack = 0;
8447    /* select memory access functions */
8448    dc->mem_index = 0;
8449#ifdef CONFIG_SOFTMMU
8450    dc->mem_index = cpu_mmu_index(env, false);
8451#endif
8452    dc->cpuid_features = env->features[FEAT_1_EDX];
8453    dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8454    dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8455    dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8456    dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8457    dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8458#ifdef TARGET_X86_64
8459    dc->lma = (flags >> HF_LMA_SHIFT) & 1;
8460    dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
8461#endif
8462    dc->flags = flags;
8463    dc->jmp_opt = !(dc->tf || dc->base.singlestep_enabled ||
8464                    (flags & HF_INHIBIT_IRQ_MASK));
8465    /* Do not optimize repz jumps at all in icount mode, because
8466       rep movsS instructions are execured with different paths
8467       in !repz_opt and repz_opt modes. The first one was used
8468       always except single step mode. And this setting
8469       disables jumps optimization and control paths become
8470       equivalent in run and single step modes.
8471       Now there will be no jump optimization for repz in
8472       record/replay modes and there will always be an
8473       additional step for ecx=0 when icount is enabled.
8474     */
8475    dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT);
8476#if 0
8477    /* check addseg logic */
8478    if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
8479        printf("ERROR addseg\n");
8480#endif
8481
8482    dc->T0 = tcg_temp_new();
8483    dc->T1 = tcg_temp_new();
8484    dc->A0 = tcg_temp_new();
8485
8486    dc->tmp0 = tcg_temp_new();
8487    dc->tmp1_i64 = tcg_temp_new_i64();
8488    dc->tmp2_i32 = tcg_temp_new_i32();
8489    dc->tmp3_i32 = tcg_temp_new_i32();
8490    dc->tmp4 = tcg_temp_new();
8491    dc->ptr0 = tcg_temp_new_ptr();
8492    dc->ptr1 = tcg_temp_new_ptr();
8493    dc->cc_srcT = tcg_temp_local_new();
8494}
8495
8496static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8497{
8498}
8499
8500static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8501{
8502    DisasContext *dc = container_of(dcbase, DisasContext, base);
8503
8504    tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8505}
8506
8507static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8508                                     const CPUBreakpoint *bp)
8509{
8510    DisasContext *dc = container_of(dcbase, DisasContext, base);
8511    /* If RF is set, suppress an internally generated breakpoint.  */
8512    int flags = dc->base.tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY;
8513    if (bp->flags & flags) {
8514        gen_debug(dc, dc->base.pc_next - dc->cs_base);
8515        dc->base.is_jmp = DISAS_NORETURN;
8516        /* The address covered by the breakpoint must be included in
8517           [tb->pc, tb->pc + tb->size) in order to for it to be
8518           properly cleared -- thus we increment the PC here so that
8519           the generic logic setting tb->size later does the right thing.  */
8520        dc->base.pc_next += 1;
8521        return true;
8522    } else {
8523        return false;
8524    }
8525}
8526
8527static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8528{
8529    DisasContext *dc = container_of(dcbase, DisasContext, base);
8530    target_ulong pc_next = disas_insn(dc, cpu);
8531
8532    if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) {
8533        /* if single step mode, we generate only one instruction and
8534           generate an exception */
8535        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8536           the flag and abort the translation to give the irqs a
8537           chance to happen */
8538        dc->base.is_jmp = DISAS_TOO_MANY;
8539    } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8540               && ((pc_next & TARGET_PAGE_MASK)
8541                   != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8542                       & TARGET_PAGE_MASK)
8543                   || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8544        /* Do not cross the boundary of the pages in icount mode,
8545           it can cause an exception. Do it only when boundary is
8546           crossed by the first instruction in the block.
8547           If current instruction already crossed the bound - it's ok,
8548           because an exception hasn't stopped this code.
8549         */
8550        dc->base.is_jmp = DISAS_TOO_MANY;
8551    } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8552        dc->base.is_jmp = DISAS_TOO_MANY;
8553    }
8554
8555    dc->base.pc_next = pc_next;
8556}
8557
8558static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8559{
8560    DisasContext *dc = container_of(dcbase, DisasContext, base);
8561
8562    if (dc->base.is_jmp == DISAS_TOO_MANY) {
8563        gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8564        gen_eob(dc);
8565    }
8566}
8567
8568static void i386_tr_disas_log(const DisasContextBase *dcbase,
8569                              CPUState *cpu)
8570{
8571    DisasContext *dc = container_of(dcbase, DisasContext, base);
8572
8573    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8574    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8575}
8576
8577static const TranslatorOps i386_tr_ops = {
8578    .init_disas_context = i386_tr_init_disas_context,
8579    .tb_start           = i386_tr_tb_start,
8580    .insn_start         = i386_tr_insn_start,
8581    .breakpoint_check   = i386_tr_breakpoint_check,
8582    .translate_insn     = i386_tr_translate_insn,
8583    .tb_stop            = i386_tr_tb_stop,
8584    .disas_log          = i386_tr_disas_log,
8585};
8586
8587/* generate intermediate code for basic block 'tb'.  */
8588void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb)
8589{
8590    DisasContext dc;
8591
8592    translator_loop(&i386_tr_ops, &dc.base, cpu, tb);
8593}
8594
8595void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8596                          target_ulong *data)
8597{
8598    int cc_op = data[1];
8599    env->eip = data[0] - tb->cs_base;
8600    if (cc_op != CC_OP_DYNAMIC) {
8601        env->cc_op = cc_op;
8602    }
8603}
8604