qemu/target/i386/translate.c
<<
>>
Prefs
   1/*
   2 *  i386 translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "qemu/host-utils.h"
  22#include "cpu.h"
  23#include "disas/disas.h"
  24#include "exec/exec-all.h"
  25#include "tcg-op.h"
  26#include "exec/cpu_ldst.h"
  27#include "exec/translator.h"
  28
  29#include "exec/helper-proto.h"
  30#include "exec/helper-gen.h"
  31
  32#include "trace-tcg.h"
  33#include "exec/log.h"
  34
  35#define PREFIX_REPZ   0x01
  36#define PREFIX_REPNZ  0x02
  37#define PREFIX_LOCK   0x04
  38#define PREFIX_DATA   0x08
  39#define PREFIX_ADR    0x10
  40#define PREFIX_VEX    0x20
  41
  42#ifdef TARGET_X86_64
  43#define CODE64(s) ((s)->code64)
  44#define REX_X(s) ((s)->rex_x)
  45#define REX_B(s) ((s)->rex_b)
  46#else
  47#define CODE64(s) 0
  48#define REX_X(s) 0
  49#define REX_B(s) 0
  50#endif
  51
  52#ifdef TARGET_X86_64
  53# define ctztl  ctz64
  54# define clztl  clz64
  55#else
  56# define ctztl  ctz32
  57# define clztl  clz32
  58#endif
  59
  60/* For a switch indexed by MODRM, match all memory operands for a given OP.  */
  61#define CASE_MODRM_MEM_OP(OP) \
  62    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  63    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  64    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
  65
  66#define CASE_MODRM_OP(OP) \
  67    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  68    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  69    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
  70    case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
  71
  72//#define MACRO_TEST   1
  73
  74/* global register indexes */
  75static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
  76static TCGv_i32 cpu_cc_op;
  77static TCGv cpu_regs[CPU_NB_REGS];
  78static TCGv cpu_seg_base[6];
  79static TCGv_i64 cpu_bndl[4];
  80static TCGv_i64 cpu_bndu[4];
  81
  82#include "exec/gen-icount.h"
  83
  84typedef struct DisasContext {
  85    DisasContextBase base;
  86
  87    /* current insn context */
  88    int override; /* -1 if no override */
  89    int prefix;
  90    TCGMemOp aflag;
  91    TCGMemOp dflag;
  92    target_ulong pc_start;
  93    target_ulong pc; /* pc = eip + cs_base */
  94    /* current block context */
  95    target_ulong cs_base; /* base of CS segment */
  96    int pe;     /* protected mode */
  97    int code32; /* 32 bit code segment */
  98#ifdef TARGET_X86_64
  99    int lma;    /* long mode active */
 100    int code64; /* 64 bit code segment */
 101    int rex_x, rex_b;
 102#endif
 103    int vex_l;  /* vex vector length */
 104    int vex_v;  /* vex vvvv register, without 1's complement.  */
 105    int ss32;   /* 32 bit stack segment */
 106    CCOp cc_op;  /* current CC operation */
 107    bool cc_op_dirty;
 108#ifdef TARGET_X86_64
 109    bool x86_64_hregs;
 110#endif
 111    int addseg; /* non zero if either DS/ES/SS have a non zero base */
 112    int f_st;   /* currently unused */
 113    int vm86;   /* vm86 mode */
 114    int cpl;
 115    int iopl;
 116    int tf;     /* TF cpu flag */
 117    int jmp_opt; /* use direct block chaining for direct jumps */
 118    int repz_opt; /* optimize jumps within repz instructions */
 119    int mem_index; /* select memory access functions */
 120    uint64_t flags; /* all execution flags */
 121    int popl_esp_hack; /* for correct popl with esp base handling */
 122    int rip_offset; /* only used in x86_64, but left for simplicity */
 123    int cpuid_features;
 124    int cpuid_ext_features;
 125    int cpuid_ext2_features;
 126    int cpuid_ext3_features;
 127    int cpuid_7_0_ebx_features;
 128    int cpuid_xsave_features;
 129
 130    /* TCG local temps */
 131    TCGv cc_srcT;
 132    TCGv A0;
 133    TCGv T0;
 134    TCGv T1;
 135
 136    /* TCG local register indexes (only used inside old micro ops) */
 137    TCGv tmp0;
 138    TCGv tmp4;
 139    TCGv_ptr ptr0;
 140    TCGv_ptr ptr1;
 141    TCGv_i32 tmp2_i32;
 142    TCGv_i32 tmp3_i32;
 143    TCGv_i64 tmp1_i64;
 144
 145    sigjmp_buf jmpbuf;
 146} DisasContext;
 147
 148static void gen_eob(DisasContext *s);
 149static void gen_jr(DisasContext *s, TCGv dest);
 150static void gen_jmp(DisasContext *s, target_ulong eip);
 151static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 152static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
 153
 154/* i386 arith/logic operations */
 155enum {
 156    OP_ADDL,
 157    OP_ORL,
 158    OP_ADCL,
 159    OP_SBBL,
 160    OP_ANDL,
 161    OP_SUBL,
 162    OP_XORL,
 163    OP_CMPL,
 164};
 165
 166/* i386 shift ops */
 167enum {
 168    OP_ROL,
 169    OP_ROR,
 170    OP_RCL,
 171    OP_RCR,
 172    OP_SHL,
 173    OP_SHR,
 174    OP_SHL1, /* undocumented */
 175    OP_SAR = 7,
 176};
 177
 178enum {
 179    JCC_O,
 180    JCC_B,
 181    JCC_Z,
 182    JCC_BE,
 183    JCC_S,
 184    JCC_P,
 185    JCC_L,
 186    JCC_LE,
 187};
 188
 189enum {
 190    /* I386 int registers */
 191    OR_EAX,   /* MUST be even numbered */
 192    OR_ECX,
 193    OR_EDX,
 194    OR_EBX,
 195    OR_ESP,
 196    OR_EBP,
 197    OR_ESI,
 198    OR_EDI,
 199
 200    OR_TMP0 = 16,    /* temporary operand register */
 201    OR_TMP1,
 202    OR_A0, /* temporary register used when doing address evaluation */
 203};
 204
 205enum {
 206    USES_CC_DST  = 1,
 207    USES_CC_SRC  = 2,
 208    USES_CC_SRC2 = 4,
 209    USES_CC_SRCT = 8,
 210};
 211
 212/* Bit set if the global variable is live after setting CC_OP to X.  */
 213static const uint8_t cc_op_live[CC_OP_NB] = {
 214    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 215    [CC_OP_EFLAGS] = USES_CC_SRC,
 216    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
 217    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
 218    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 219    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
 220    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 221    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
 222    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
 223    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
 224    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
 225    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
 226    [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
 227    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
 228    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
 229    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 230    [CC_OP_CLR] = 0,
 231    [CC_OP_POPCNT] = USES_CC_SRC,
 232};
 233
 234static void set_cc_op(DisasContext *s, CCOp op)
 235{
 236    int dead;
 237
 238    if (s->cc_op == op) {
 239        return;
 240    }
 241
 242    /* Discard CC computation that will no longer be used.  */
 243    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
 244    if (dead & USES_CC_DST) {
 245        tcg_gen_discard_tl(cpu_cc_dst);
 246    }
 247    if (dead & USES_CC_SRC) {
 248        tcg_gen_discard_tl(cpu_cc_src);
 249    }
 250    if (dead & USES_CC_SRC2) {
 251        tcg_gen_discard_tl(cpu_cc_src2);
 252    }
 253    if (dead & USES_CC_SRCT) {
 254        tcg_gen_discard_tl(s->cc_srcT);
 255    }
 256
 257    if (op == CC_OP_DYNAMIC) {
 258        /* The DYNAMIC setting is translator only, and should never be
 259           stored.  Thus we always consider it clean.  */
 260        s->cc_op_dirty = false;
 261    } else {
 262        /* Discard any computed CC_OP value (see shifts).  */
 263        if (s->cc_op == CC_OP_DYNAMIC) {
 264            tcg_gen_discard_i32(cpu_cc_op);
 265        }
 266        s->cc_op_dirty = true;
 267    }
 268    s->cc_op = op;
 269}
 270
 271static void gen_update_cc_op(DisasContext *s)
 272{
 273    if (s->cc_op_dirty) {
 274        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
 275        s->cc_op_dirty = false;
 276    }
 277}
 278
 279#ifdef TARGET_X86_64
 280
 281#define NB_OP_SIZES 4
 282
 283#else /* !TARGET_X86_64 */
 284
 285#define NB_OP_SIZES 3
 286
 287#endif /* !TARGET_X86_64 */
 288
 289#if defined(HOST_WORDS_BIGENDIAN)
 290#define REG_B_OFFSET (sizeof(target_ulong) - 1)
 291#define REG_H_OFFSET (sizeof(target_ulong) - 2)
 292#define REG_W_OFFSET (sizeof(target_ulong) - 2)
 293#define REG_L_OFFSET (sizeof(target_ulong) - 4)
 294#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
 295#else
 296#define REG_B_OFFSET 0
 297#define REG_H_OFFSET 1
 298#define REG_W_OFFSET 0
 299#define REG_L_OFFSET 0
 300#define REG_LH_OFFSET 4
 301#endif
 302
 303/* In instruction encodings for byte register accesses the
 304 * register number usually indicates "low 8 bits of register N";
 305 * however there are some special cases where N 4..7 indicates
 306 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
 307 * true for this special case, false otherwise.
 308 */
 309static inline bool byte_reg_is_xH(DisasContext *s, int reg)
 310{
 311    if (reg < 4) {
 312        return false;
 313    }
 314#ifdef TARGET_X86_64
 315    if (reg >= 8 || s->x86_64_hregs) {
 316        return false;
 317    }
 318#endif
 319    return true;
 320}
 321
 322/* Select the size of a push/pop operation.  */
 323static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
 324{
 325    if (CODE64(s)) {
 326        return ot == MO_16 ? MO_16 : MO_64;
 327    } else {
 328        return ot;
 329    }
 330}
 331
 332/* Select the size of the stack pointer.  */
 333static inline TCGMemOp mo_stacksize(DisasContext *s)
 334{
 335    return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
 336}
 337
 338/* Select only size 64 else 32.  Used for SSE operand sizes.  */
 339static inline TCGMemOp mo_64_32(TCGMemOp ot)
 340{
 341#ifdef TARGET_X86_64
 342    return ot == MO_64 ? MO_64 : MO_32;
 343#else
 344    return MO_32;
 345#endif
 346}
 347
 348/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
 349   byte vs word opcodes.  */
 350static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
 351{
 352    return b & 1 ? ot : MO_8;
 353}
 354
 355/* Select size 8 if lsb of B is clear, else OT capped at 32.
 356   Used for decoding operand size of port opcodes.  */
 357static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
 358{
 359    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 360}
 361
 362static void gen_op_mov_reg_v(DisasContext *s, TCGMemOp ot, int reg, TCGv t0)
 363{
 364    switch(ot) {
 365    case MO_8:
 366        if (!byte_reg_is_xH(s, reg)) {
 367            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
 368        } else {
 369            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
 370        }
 371        break;
 372    case MO_16:
 373        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
 374        break;
 375    case MO_32:
 376        /* For x86_64, this sets the higher half of register to zero.
 377           For i386, this is equivalent to a mov. */
 378        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
 379        break;
 380#ifdef TARGET_X86_64
 381    case MO_64:
 382        tcg_gen_mov_tl(cpu_regs[reg], t0);
 383        break;
 384#endif
 385    default:
 386        tcg_abort();
 387    }
 388}
 389
 390static inline
 391void gen_op_mov_v_reg(DisasContext *s, TCGMemOp ot, TCGv t0, int reg)
 392{
 393    if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
 394        tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
 395    } else {
 396        tcg_gen_mov_tl(t0, cpu_regs[reg]);
 397    }
 398}
 399
 400static void gen_add_A0_im(DisasContext *s, int val)
 401{
 402    tcg_gen_addi_tl(s->A0, s->A0, val);
 403    if (!CODE64(s)) {
 404        tcg_gen_ext32u_tl(s->A0, s->A0);
 405    }
 406}
 407
 408static inline void gen_op_jmp_v(TCGv dest)
 409{
 410    tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 411}
 412
 413static inline
 414void gen_op_add_reg_im(DisasContext *s, TCGMemOp size, int reg, int32_t val)
 415{
 416    tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
 417    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 418}
 419
 420static inline void gen_op_add_reg_T0(DisasContext *s, TCGMemOp size, int reg)
 421{
 422    tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
 423    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 424}
 425
 426static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 427{
 428    tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
 429}
 430
 431static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 432{
 433    tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 434}
 435
 436static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 437{
 438    if (d == OR_TMP0) {
 439        gen_op_st_v(s, idx, s->T0, s->A0);
 440    } else {
 441        gen_op_mov_reg_v(s, idx, d, s->T0);
 442    }
 443}
 444
 445static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
 446{
 447    tcg_gen_movi_tl(s->tmp0, pc);
 448    gen_op_jmp_v(s->tmp0);
 449}
 450
 451/* Compute SEG:REG into A0.  SEG is selected from the override segment
 452   (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
 453   indicate no override.  */
 454static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
 455                          int def_seg, int ovr_seg)
 456{
 457    switch (aflag) {
 458#ifdef TARGET_X86_64
 459    case MO_64:
 460        if (ovr_seg < 0) {
 461            tcg_gen_mov_tl(s->A0, a0);
 462            return;
 463        }
 464        break;
 465#endif
 466    case MO_32:
 467        /* 32 bit address */
 468        if (ovr_seg < 0 && s->addseg) {
 469            ovr_seg = def_seg;
 470        }
 471        if (ovr_seg < 0) {
 472            tcg_gen_ext32u_tl(s->A0, a0);
 473            return;
 474        }
 475        break;
 476    case MO_16:
 477        /* 16 bit address */
 478        tcg_gen_ext16u_tl(s->A0, a0);
 479        a0 = s->A0;
 480        if (ovr_seg < 0) {
 481            if (s->addseg) {
 482                ovr_seg = def_seg;
 483            } else {
 484                return;
 485            }
 486        }
 487        break;
 488    default:
 489        tcg_abort();
 490    }
 491
 492    if (ovr_seg >= 0) {
 493        TCGv seg = cpu_seg_base[ovr_seg];
 494
 495        if (aflag == MO_64) {
 496            tcg_gen_add_tl(s->A0, a0, seg);
 497        } else if (CODE64(s)) {
 498            tcg_gen_ext32u_tl(s->A0, a0);
 499            tcg_gen_add_tl(s->A0, s->A0, seg);
 500        } else {
 501            tcg_gen_add_tl(s->A0, a0, seg);
 502            tcg_gen_ext32u_tl(s->A0, s->A0);
 503        }
 504    }
 505}
 506
 507static inline void gen_string_movl_A0_ESI(DisasContext *s)
 508{
 509    gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
 510}
 511
 512static inline void gen_string_movl_A0_EDI(DisasContext *s)
 513{
 514    gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
 515}
 516
 517static inline void gen_op_movl_T0_Dshift(DisasContext *s, TCGMemOp ot)
 518{
 519    tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
 520    tcg_gen_shli_tl(s->T0, s->T0, ot);
 521};
 522
 523static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
 524{
 525    switch (size) {
 526    case MO_8:
 527        if (sign) {
 528            tcg_gen_ext8s_tl(dst, src);
 529        } else {
 530            tcg_gen_ext8u_tl(dst, src);
 531        }
 532        return dst;
 533    case MO_16:
 534        if (sign) {
 535            tcg_gen_ext16s_tl(dst, src);
 536        } else {
 537            tcg_gen_ext16u_tl(dst, src);
 538        }
 539        return dst;
 540#ifdef TARGET_X86_64
 541    case MO_32:
 542        if (sign) {
 543            tcg_gen_ext32s_tl(dst, src);
 544        } else {
 545            tcg_gen_ext32u_tl(dst, src);
 546        }
 547        return dst;
 548#endif
 549    default:
 550        return src;
 551    }
 552}
 553
 554static void gen_extu(TCGMemOp ot, TCGv reg)
 555{
 556    gen_ext_tl(reg, reg, ot, false);
 557}
 558
 559static void gen_exts(TCGMemOp ot, TCGv reg)
 560{
 561    gen_ext_tl(reg, reg, ot, true);
 562}
 563
 564static inline
 565void gen_op_jnz_ecx(DisasContext *s, TCGMemOp size, TCGLabel *label1)
 566{
 567    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
 568    gen_extu(size, s->tmp0);
 569    tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
 570}
 571
 572static inline
 573void gen_op_jz_ecx(DisasContext *s, TCGMemOp size, TCGLabel *label1)
 574{
 575    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
 576    gen_extu(size, s->tmp0);
 577    tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
 578}
 579
 580static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
 581{
 582    switch (ot) {
 583    case MO_8:
 584        gen_helper_inb(v, cpu_env, n);
 585        break;
 586    case MO_16:
 587        gen_helper_inw(v, cpu_env, n);
 588        break;
 589    case MO_32:
 590        gen_helper_inl(v, cpu_env, n);
 591        break;
 592    default:
 593        tcg_abort();
 594    }
 595}
 596
 597static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
 598{
 599    switch (ot) {
 600    case MO_8:
 601        gen_helper_outb(cpu_env, v, n);
 602        break;
 603    case MO_16:
 604        gen_helper_outw(cpu_env, v, n);
 605        break;
 606    case MO_32:
 607        gen_helper_outl(cpu_env, v, n);
 608        break;
 609    default:
 610        tcg_abort();
 611    }
 612}
 613
 614static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
 615                         uint32_t svm_flags)
 616{
 617    target_ulong next_eip;
 618
 619    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
 620        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 621        switch (ot) {
 622        case MO_8:
 623            gen_helper_check_iob(cpu_env, s->tmp2_i32);
 624            break;
 625        case MO_16:
 626            gen_helper_check_iow(cpu_env, s->tmp2_i32);
 627            break;
 628        case MO_32:
 629            gen_helper_check_iol(cpu_env, s->tmp2_i32);
 630            break;
 631        default:
 632            tcg_abort();
 633        }
 634    }
 635    if(s->flags & HF_GUEST_MASK) {
 636        gen_update_cc_op(s);
 637        gen_jmp_im(s, cur_eip);
 638        svm_flags |= (1 << (4 + ot));
 639        next_eip = s->pc - s->cs_base;
 640        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 641        gen_helper_svm_check_io(cpu_env, s->tmp2_i32,
 642                                tcg_const_i32(svm_flags),
 643                                tcg_const_i32(next_eip - cur_eip));
 644    }
 645}
 646
 647static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 648{
 649    gen_string_movl_A0_ESI(s);
 650    gen_op_ld_v(s, ot, s->T0, s->A0);
 651    gen_string_movl_A0_EDI(s);
 652    gen_op_st_v(s, ot, s->T0, s->A0);
 653    gen_op_movl_T0_Dshift(s, ot);
 654    gen_op_add_reg_T0(s, s->aflag, R_ESI);
 655    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 656}
 657
 658static void gen_op_update1_cc(DisasContext *s)
 659{
 660    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 661}
 662
 663static void gen_op_update2_cc(DisasContext *s)
 664{
 665    tcg_gen_mov_tl(cpu_cc_src, s->T1);
 666    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 667}
 668
 669static void gen_op_update3_cc(DisasContext *s, TCGv reg)
 670{
 671    tcg_gen_mov_tl(cpu_cc_src2, reg);
 672    tcg_gen_mov_tl(cpu_cc_src, s->T1);
 673    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 674}
 675
 676static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
 677{
 678    tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
 679}
 680
 681static void gen_op_update_neg_cc(DisasContext *s)
 682{
 683    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 684    tcg_gen_neg_tl(cpu_cc_src, s->T0);
 685    tcg_gen_movi_tl(s->cc_srcT, 0);
 686}
 687
 688/* compute all eflags to cc_src */
 689static void gen_compute_eflags(DisasContext *s)
 690{
 691    TCGv zero, dst, src1, src2;
 692    int live, dead;
 693
 694    if (s->cc_op == CC_OP_EFLAGS) {
 695        return;
 696    }
 697    if (s->cc_op == CC_OP_CLR) {
 698        tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
 699        set_cc_op(s, CC_OP_EFLAGS);
 700        return;
 701    }
 702
 703    zero = NULL;
 704    dst = cpu_cc_dst;
 705    src1 = cpu_cc_src;
 706    src2 = cpu_cc_src2;
 707
 708    /* Take care to not read values that are not live.  */
 709    live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
 710    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
 711    if (dead) {
 712        zero = tcg_const_tl(0);
 713        if (dead & USES_CC_DST) {
 714            dst = zero;
 715        }
 716        if (dead & USES_CC_SRC) {
 717            src1 = zero;
 718        }
 719        if (dead & USES_CC_SRC2) {
 720            src2 = zero;
 721        }
 722    }
 723
 724    gen_update_cc_op(s);
 725    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
 726    set_cc_op(s, CC_OP_EFLAGS);
 727
 728    if (dead) {
 729        tcg_temp_free(zero);
 730    }
 731}
 732
 733typedef struct CCPrepare {
 734    TCGCond cond;
 735    TCGv reg;
 736    TCGv reg2;
 737    target_ulong imm;
 738    target_ulong mask;
 739    bool use_reg2;
 740    bool no_setcond;
 741} CCPrepare;
 742
 743/* compute eflags.C to reg */
 744static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 745{
 746    TCGv t0, t1;
 747    int size, shift;
 748
 749    switch (s->cc_op) {
 750    case CC_OP_SUBB ... CC_OP_SUBQ:
 751        /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
 752        size = s->cc_op - CC_OP_SUBB;
 753        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 754        /* If no temporary was used, be careful not to alias t1 and t0.  */
 755        t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
 756        tcg_gen_mov_tl(t0, s->cc_srcT);
 757        gen_extu(size, t0);
 758        goto add_sub;
 759
 760    case CC_OP_ADDB ... CC_OP_ADDQ:
 761        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
 762        size = s->cc_op - CC_OP_ADDB;
 763        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 764        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 765    add_sub:
 766        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
 767                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 768
 769    case CC_OP_LOGICB ... CC_OP_LOGICQ:
 770    case CC_OP_CLR:
 771    case CC_OP_POPCNT:
 772        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 773
 774    case CC_OP_INCB ... CC_OP_INCQ:
 775    case CC_OP_DECB ... CC_OP_DECQ:
 776        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 777                             .mask = -1, .no_setcond = true };
 778
 779    case CC_OP_SHLB ... CC_OP_SHLQ:
 780        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
 781        size = s->cc_op - CC_OP_SHLB;
 782        shift = (8 << size) - 1;
 783        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 784                             .mask = (target_ulong)1 << shift };
 785
 786    case CC_OP_MULB ... CC_OP_MULQ:
 787        return (CCPrepare) { .cond = TCG_COND_NE,
 788                             .reg = cpu_cc_src, .mask = -1 };
 789
 790    case CC_OP_BMILGB ... CC_OP_BMILGQ:
 791        size = s->cc_op - CC_OP_BMILGB;
 792        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
 793        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 794
 795    case CC_OP_ADCX:
 796    case CC_OP_ADCOX:
 797        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
 798                             .mask = -1, .no_setcond = true };
 799
 800    case CC_OP_EFLAGS:
 801    case CC_OP_SARB ... CC_OP_SARQ:
 802        /* CC_SRC & 1 */
 803        return (CCPrepare) { .cond = TCG_COND_NE,
 804                             .reg = cpu_cc_src, .mask = CC_C };
 805
 806    default:
 807       /* The need to compute only C from CC_OP_DYNAMIC is important
 808          in efficiently implementing e.g. INC at the start of a TB.  */
 809       gen_update_cc_op(s);
 810       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
 811                               cpu_cc_src2, cpu_cc_op);
 812       return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 813                            .mask = -1, .no_setcond = true };
 814    }
 815}
 816
 817/* compute eflags.P to reg */
 818static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 819{
 820    gen_compute_eflags(s);
 821    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 822                         .mask = CC_P };
 823}
 824
 825/* compute eflags.S to reg */
 826static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 827{
 828    switch (s->cc_op) {
 829    case CC_OP_DYNAMIC:
 830        gen_compute_eflags(s);
 831        /* FALLTHRU */
 832    case CC_OP_EFLAGS:
 833    case CC_OP_ADCX:
 834    case CC_OP_ADOX:
 835    case CC_OP_ADCOX:
 836        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 837                             .mask = CC_S };
 838    case CC_OP_CLR:
 839    case CC_OP_POPCNT:
 840        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 841    default:
 842        {
 843            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 844            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
 845            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
 846        }
 847    }
 848}
 849
 850/* compute eflags.O to reg */
 851static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 852{
 853    switch (s->cc_op) {
 854    case CC_OP_ADOX:
 855    case CC_OP_ADCOX:
 856        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
 857                             .mask = -1, .no_setcond = true };
 858    case CC_OP_CLR:
 859    case CC_OP_POPCNT:
 860        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 861    default:
 862        gen_compute_eflags(s);
 863        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 864                             .mask = CC_O };
 865    }
 866}
 867
 868/* compute eflags.Z to reg */
 869static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 870{
 871    switch (s->cc_op) {
 872    case CC_OP_DYNAMIC:
 873        gen_compute_eflags(s);
 874        /* FALLTHRU */
 875    case CC_OP_EFLAGS:
 876    case CC_OP_ADCX:
 877    case CC_OP_ADOX:
 878    case CC_OP_ADCOX:
 879        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 880                             .mask = CC_Z };
 881    case CC_OP_CLR:
 882        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
 883    case CC_OP_POPCNT:
 884        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
 885                             .mask = -1 };
 886    default:
 887        {
 888            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 889            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 890            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 891        }
 892    }
 893}
 894
 895/* perform a conditional store into register 'reg' according to jump opcode
 896   value 'b'. In the fast case, T0 is guaranted not to be used. */
 897static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 898{
 899    int inv, jcc_op, cond;
 900    TCGMemOp size;
 901    CCPrepare cc;
 902    TCGv t0;
 903
 904    inv = b & 1;
 905    jcc_op = (b >> 1) & 7;
 906
 907    switch (s->cc_op) {
 908    case CC_OP_SUBB ... CC_OP_SUBQ:
 909        /* We optimize relational operators for the cmp/jcc case.  */
 910        size = s->cc_op - CC_OP_SUBB;
 911        switch (jcc_op) {
 912        case JCC_BE:
 913            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
 914            gen_extu(size, s->tmp4);
 915            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 916            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
 917                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 918            break;
 919
 920        case JCC_L:
 921            cond = TCG_COND_LT;
 922            goto fast_jcc_l;
 923        case JCC_LE:
 924            cond = TCG_COND_LE;
 925        fast_jcc_l:
 926            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
 927            gen_exts(size, s->tmp4);
 928            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
 929            cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
 930                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 931            break;
 932
 933        default:
 934            goto slow_jcc;
 935        }
 936        break;
 937
 938    default:
 939    slow_jcc:
 940        /* This actually generates good code for JC, JZ and JS.  */
 941        switch (jcc_op) {
 942        case JCC_O:
 943            cc = gen_prepare_eflags_o(s, reg);
 944            break;
 945        case JCC_B:
 946            cc = gen_prepare_eflags_c(s, reg);
 947            break;
 948        case JCC_Z:
 949            cc = gen_prepare_eflags_z(s, reg);
 950            break;
 951        case JCC_BE:
 952            gen_compute_eflags(s);
 953            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 954                               .mask = CC_Z | CC_C };
 955            break;
 956        case JCC_S:
 957            cc = gen_prepare_eflags_s(s, reg);
 958            break;
 959        case JCC_P:
 960            cc = gen_prepare_eflags_p(s, reg);
 961            break;
 962        case JCC_L:
 963            gen_compute_eflags(s);
 964            if (reg == cpu_cc_src) {
 965                reg = s->tmp0;
 966            }
 967            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 968            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 969            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 970                               .mask = CC_S };
 971            break;
 972        default:
 973        case JCC_LE:
 974            gen_compute_eflags(s);
 975            if (reg == cpu_cc_src) {
 976                reg = s->tmp0;
 977            }
 978            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 979            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 980            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 981                               .mask = CC_S | CC_Z };
 982            break;
 983        }
 984        break;
 985    }
 986
 987    if (inv) {
 988        cc.cond = tcg_invert_cond(cc.cond);
 989    }
 990    return cc;
 991}
 992
 993static void gen_setcc1(DisasContext *s, int b, TCGv reg)
 994{
 995    CCPrepare cc = gen_prepare_cc(s, b, reg);
 996
 997    if (cc.no_setcond) {
 998        if (cc.cond == TCG_COND_EQ) {
 999            tcg_gen_xori_tl(reg, cc.reg, 1);
1000        } else {
1001            tcg_gen_mov_tl(reg, cc.reg);
1002        }
1003        return;
1004    }
1005
1006    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1007        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1008        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1009        tcg_gen_andi_tl(reg, reg, 1);
1010        return;
1011    }
1012    if (cc.mask != -1) {
1013        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1014        cc.reg = reg;
1015    }
1016    if (cc.use_reg2) {
1017        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1018    } else {
1019        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1020    }
1021}
1022
1023static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1024{
1025    gen_setcc1(s, JCC_B << 1, reg);
1026}
1027
1028/* generate a conditional jump to label 'l1' according to jump opcode
1029   value 'b'. In the fast case, T0 is guaranted not to be used. */
1030static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1031{
1032    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1033
1034    if (cc.mask != -1) {
1035        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1036        cc.reg = s->T0;
1037    }
1038    if (cc.use_reg2) {
1039        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1040    } else {
1041        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1042    }
1043}
1044
1045/* Generate a conditional jump to label 'l1' according to jump opcode
1046   value 'b'. In the fast case, T0 is guaranted not to be used.
1047   A translation block must end soon.  */
1048static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1049{
1050    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1051
1052    gen_update_cc_op(s);
1053    if (cc.mask != -1) {
1054        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1055        cc.reg = s->T0;
1056    }
1057    set_cc_op(s, CC_OP_DYNAMIC);
1058    if (cc.use_reg2) {
1059        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1060    } else {
1061        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1062    }
1063}
1064
1065/* XXX: does not work with gdbstub "ice" single step - not a
1066   serious problem */
1067static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1068{
1069    TCGLabel *l1 = gen_new_label();
1070    TCGLabel *l2 = gen_new_label();
1071    gen_op_jnz_ecx(s, s->aflag, l1);
1072    gen_set_label(l2);
1073    gen_jmp_tb(s, next_eip, 1);
1074    gen_set_label(l1);
1075    return l2;
1076}
1077
1078static inline void gen_stos(DisasContext *s, TCGMemOp ot)
1079{
1080    gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1081    gen_string_movl_A0_EDI(s);
1082    gen_op_st_v(s, ot, s->T0, s->A0);
1083    gen_op_movl_T0_Dshift(s, ot);
1084    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1085}
1086
1087static inline void gen_lods(DisasContext *s, TCGMemOp ot)
1088{
1089    gen_string_movl_A0_ESI(s);
1090    gen_op_ld_v(s, ot, s->T0, s->A0);
1091    gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1092    gen_op_movl_T0_Dshift(s, ot);
1093    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1094}
1095
1096static inline void gen_scas(DisasContext *s, TCGMemOp ot)
1097{
1098    gen_string_movl_A0_EDI(s);
1099    gen_op_ld_v(s, ot, s->T1, s->A0);
1100    gen_op(s, OP_CMPL, ot, R_EAX);
1101    gen_op_movl_T0_Dshift(s, ot);
1102    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1103}
1104
1105static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
1106{
1107    gen_string_movl_A0_EDI(s);
1108    gen_op_ld_v(s, ot, s->T1, s->A0);
1109    gen_string_movl_A0_ESI(s);
1110    gen_op(s, OP_CMPL, ot, OR_TMP0);
1111    gen_op_movl_T0_Dshift(s, ot);
1112    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1113    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1114}
1115
1116static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1117{
1118    if (s->flags & HF_IOBPT_MASK) {
1119        TCGv_i32 t_size = tcg_const_i32(1 << ot);
1120        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1121
1122        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1123        tcg_temp_free_i32(t_size);
1124        tcg_temp_free(t_next);
1125    }
1126}
1127
1128
1129static inline void gen_ins(DisasContext *s, TCGMemOp ot)
1130{
1131    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1132        gen_io_start();
1133    }
1134    gen_string_movl_A0_EDI(s);
1135    /* Note: we must do this dummy write first to be restartable in
1136       case of page fault. */
1137    tcg_gen_movi_tl(s->T0, 0);
1138    gen_op_st_v(s, ot, s->T0, s->A0);
1139    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1140    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1141    gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1142    gen_op_st_v(s, ot, s->T0, s->A0);
1143    gen_op_movl_T0_Dshift(s, ot);
1144    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1145    gen_bpt_io(s, s->tmp2_i32, ot);
1146    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1147        gen_io_end();
1148    }
1149}
1150
1151static inline void gen_outs(DisasContext *s, TCGMemOp ot)
1152{
1153    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1154        gen_io_start();
1155    }
1156    gen_string_movl_A0_ESI(s);
1157    gen_op_ld_v(s, ot, s->T0, s->A0);
1158
1159    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1160    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1161    tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1162    gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1163    gen_op_movl_T0_Dshift(s, ot);
1164    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1165    gen_bpt_io(s, s->tmp2_i32, ot);
1166    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1167        gen_io_end();
1168    }
1169}
1170
1171/* same method as Valgrind : we generate jumps to current or next
1172   instruction */
1173#define GEN_REPZ(op)                                                          \
1174static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1175                                 target_ulong cur_eip, target_ulong next_eip) \
1176{                                                                             \
1177    TCGLabel *l2;                                                             \
1178    gen_update_cc_op(s);                                                      \
1179    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1180    gen_ ## op(s, ot);                                                        \
1181    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1182    /* a loop would cause two single step exceptions if ECX = 1               \
1183       before rep string_insn */                                              \
1184    if (s->repz_opt)                                                          \
1185        gen_op_jz_ecx(s, s->aflag, l2);                                       \
1186    gen_jmp(s, cur_eip);                                                      \
1187}
1188
1189#define GEN_REPZ2(op)                                                         \
1190static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1191                                   target_ulong cur_eip,                      \
1192                                   target_ulong next_eip,                     \
1193                                   int nz)                                    \
1194{                                                                             \
1195    TCGLabel *l2;                                                             \
1196    gen_update_cc_op(s);                                                      \
1197    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1198    gen_ ## op(s, ot);                                                        \
1199    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1200    gen_update_cc_op(s);                                                      \
1201    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1202    if (s->repz_opt)                                                          \
1203        gen_op_jz_ecx(s, s->aflag, l2);                                       \
1204    gen_jmp(s, cur_eip);                                                      \
1205}
1206
1207GEN_REPZ(movs)
1208GEN_REPZ(stos)
1209GEN_REPZ(lods)
1210GEN_REPZ(ins)
1211GEN_REPZ(outs)
1212GEN_REPZ2(scas)
1213GEN_REPZ2(cmps)
1214
1215static void gen_helper_fp_arith_ST0_FT0(int op)
1216{
1217    switch (op) {
1218    case 0:
1219        gen_helper_fadd_ST0_FT0(cpu_env);
1220        break;
1221    case 1:
1222        gen_helper_fmul_ST0_FT0(cpu_env);
1223        break;
1224    case 2:
1225        gen_helper_fcom_ST0_FT0(cpu_env);
1226        break;
1227    case 3:
1228        gen_helper_fcom_ST0_FT0(cpu_env);
1229        break;
1230    case 4:
1231        gen_helper_fsub_ST0_FT0(cpu_env);
1232        break;
1233    case 5:
1234        gen_helper_fsubr_ST0_FT0(cpu_env);
1235        break;
1236    case 6:
1237        gen_helper_fdiv_ST0_FT0(cpu_env);
1238        break;
1239    case 7:
1240        gen_helper_fdivr_ST0_FT0(cpu_env);
1241        break;
1242    }
1243}
1244
1245/* NOTE the exception in "r" op ordering */
1246static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1247{
1248    TCGv_i32 tmp = tcg_const_i32(opreg);
1249    switch (op) {
1250    case 0:
1251        gen_helper_fadd_STN_ST0(cpu_env, tmp);
1252        break;
1253    case 1:
1254        gen_helper_fmul_STN_ST0(cpu_env, tmp);
1255        break;
1256    case 4:
1257        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1258        break;
1259    case 5:
1260        gen_helper_fsub_STN_ST0(cpu_env, tmp);
1261        break;
1262    case 6:
1263        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1264        break;
1265    case 7:
1266        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1267        break;
1268    }
1269}
1270
1271static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1272{
1273    gen_update_cc_op(s);
1274    gen_jmp_im(s, cur_eip);
1275    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1276    s->base.is_jmp = DISAS_NORETURN;
1277}
1278
1279/* Generate #UD for the current instruction.  The assumption here is that
1280   the instruction is known, but it isn't allowed in the current cpu mode.  */
1281static void gen_illegal_opcode(DisasContext *s)
1282{
1283    gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1284}
1285
1286/* if d == OR_TMP0, it means memory operand (address in A0) */
1287static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
1288{
1289    if (d != OR_TMP0) {
1290        if (s1->prefix & PREFIX_LOCK) {
1291            /* Lock prefix when destination is not memory.  */
1292            gen_illegal_opcode(s1);
1293            return;
1294        }
1295        gen_op_mov_v_reg(s1, ot, s1->T0, d);
1296    } else if (!(s1->prefix & PREFIX_LOCK)) {
1297        gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1298    }
1299    switch(op) {
1300    case OP_ADCL:
1301        gen_compute_eflags_c(s1, s1->tmp4);
1302        if (s1->prefix & PREFIX_LOCK) {
1303            tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1304            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1305                                        s1->mem_index, ot | MO_LE);
1306        } else {
1307            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1308            tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1309            gen_op_st_rm_T0_A0(s1, ot, d);
1310        }
1311        gen_op_update3_cc(s1, s1->tmp4);
1312        set_cc_op(s1, CC_OP_ADCB + ot);
1313        break;
1314    case OP_SBBL:
1315        gen_compute_eflags_c(s1, s1->tmp4);
1316        if (s1->prefix & PREFIX_LOCK) {
1317            tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1318            tcg_gen_neg_tl(s1->T0, s1->T0);
1319            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1320                                        s1->mem_index, ot | MO_LE);
1321        } else {
1322            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1323            tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1324            gen_op_st_rm_T0_A0(s1, ot, d);
1325        }
1326        gen_op_update3_cc(s1, s1->tmp4);
1327        set_cc_op(s1, CC_OP_SBBB + ot);
1328        break;
1329    case OP_ADDL:
1330        if (s1->prefix & PREFIX_LOCK) {
1331            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1332                                        s1->mem_index, ot | MO_LE);
1333        } else {
1334            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1335            gen_op_st_rm_T0_A0(s1, ot, d);
1336        }
1337        gen_op_update2_cc(s1);
1338        set_cc_op(s1, CC_OP_ADDB + ot);
1339        break;
1340    case OP_SUBL:
1341        if (s1->prefix & PREFIX_LOCK) {
1342            tcg_gen_neg_tl(s1->T0, s1->T1);
1343            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1344                                        s1->mem_index, ot | MO_LE);
1345            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1346        } else {
1347            tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1348            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1349            gen_op_st_rm_T0_A0(s1, ot, d);
1350        }
1351        gen_op_update2_cc(s1);
1352        set_cc_op(s1, CC_OP_SUBB + ot);
1353        break;
1354    default:
1355    case OP_ANDL:
1356        if (s1->prefix & PREFIX_LOCK) {
1357            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1358                                        s1->mem_index, ot | MO_LE);
1359        } else {
1360            tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1361            gen_op_st_rm_T0_A0(s1, ot, d);
1362        }
1363        gen_op_update1_cc(s1);
1364        set_cc_op(s1, CC_OP_LOGICB + ot);
1365        break;
1366    case OP_ORL:
1367        if (s1->prefix & PREFIX_LOCK) {
1368            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1369                                       s1->mem_index, ot | MO_LE);
1370        } else {
1371            tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1372            gen_op_st_rm_T0_A0(s1, ot, d);
1373        }
1374        gen_op_update1_cc(s1);
1375        set_cc_op(s1, CC_OP_LOGICB + ot);
1376        break;
1377    case OP_XORL:
1378        if (s1->prefix & PREFIX_LOCK) {
1379            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1380                                        s1->mem_index, ot | MO_LE);
1381        } else {
1382            tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1383            gen_op_st_rm_T0_A0(s1, ot, d);
1384        }
1385        gen_op_update1_cc(s1);
1386        set_cc_op(s1, CC_OP_LOGICB + ot);
1387        break;
1388    case OP_CMPL:
1389        tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1390        tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1391        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1392        set_cc_op(s1, CC_OP_SUBB + ot);
1393        break;
1394    }
1395}
1396
1397/* if d == OR_TMP0, it means memory operand (address in A0) */
1398static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
1399{
1400    if (s1->prefix & PREFIX_LOCK) {
1401        if (d != OR_TMP0) {
1402            /* Lock prefix when destination is not memory */
1403            gen_illegal_opcode(s1);
1404            return;
1405        }
1406        tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1407        tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1408                                    s1->mem_index, ot | MO_LE);
1409    } else {
1410        if (d != OR_TMP0) {
1411            gen_op_mov_v_reg(s1, ot, s1->T0, d);
1412        } else {
1413            gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1414        }
1415        tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1416        gen_op_st_rm_T0_A0(s1, ot, d);
1417    }
1418
1419    gen_compute_eflags_c(s1, cpu_cc_src);
1420    tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1421    set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1422}
1423
1424static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
1425                            TCGv shm1, TCGv count, bool is_right)
1426{
1427    TCGv_i32 z32, s32, oldop;
1428    TCGv z_tl;
1429
1430    /* Store the results into the CC variables.  If we know that the
1431       variable must be dead, store unconditionally.  Otherwise we'll
1432       need to not disrupt the current contents.  */
1433    z_tl = tcg_const_tl(0);
1434    if (cc_op_live[s->cc_op] & USES_CC_DST) {
1435        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1436                           result, cpu_cc_dst);
1437    } else {
1438        tcg_gen_mov_tl(cpu_cc_dst, result);
1439    }
1440    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1441        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1442                           shm1, cpu_cc_src);
1443    } else {
1444        tcg_gen_mov_tl(cpu_cc_src, shm1);
1445    }
1446    tcg_temp_free(z_tl);
1447
1448    /* Get the two potential CC_OP values into temporaries.  */
1449    tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1450    if (s->cc_op == CC_OP_DYNAMIC) {
1451        oldop = cpu_cc_op;
1452    } else {
1453        tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1454        oldop = s->tmp3_i32;
1455    }
1456
1457    /* Conditionally store the CC_OP value.  */
1458    z32 = tcg_const_i32(0);
1459    s32 = tcg_temp_new_i32();
1460    tcg_gen_trunc_tl_i32(s32, count);
1461    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1462    tcg_temp_free_i32(z32);
1463    tcg_temp_free_i32(s32);
1464
1465    /* The CC_OP value is no longer predictable.  */
1466    set_cc_op(s, CC_OP_DYNAMIC);
1467}
1468
1469static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1470                            int is_right, int is_arith)
1471{
1472    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1473
1474    /* load */
1475    if (op1 == OR_TMP0) {
1476        gen_op_ld_v(s, ot, s->T0, s->A0);
1477    } else {
1478        gen_op_mov_v_reg(s, ot, s->T0, op1);
1479    }
1480
1481    tcg_gen_andi_tl(s->T1, s->T1, mask);
1482    tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1483
1484    if (is_right) {
1485        if (is_arith) {
1486            gen_exts(ot, s->T0);
1487            tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1488            tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1489        } else {
1490            gen_extu(ot, s->T0);
1491            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1492            tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1493        }
1494    } else {
1495        tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1496        tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1497    }
1498
1499    /* store */
1500    gen_op_st_rm_T0_A0(s, ot, op1);
1501
1502    gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1503}
1504
1505static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1506                            int is_right, int is_arith)
1507{
1508    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1509
1510    /* load */
1511    if (op1 == OR_TMP0)
1512        gen_op_ld_v(s, ot, s->T0, s->A0);
1513    else
1514        gen_op_mov_v_reg(s, ot, s->T0, op1);
1515
1516    op2 &= mask;
1517    if (op2 != 0) {
1518        if (is_right) {
1519            if (is_arith) {
1520                gen_exts(ot, s->T0);
1521                tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1522                tcg_gen_sari_tl(s->T0, s->T0, op2);
1523            } else {
1524                gen_extu(ot, s->T0);
1525                tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1526                tcg_gen_shri_tl(s->T0, s->T0, op2);
1527            }
1528        } else {
1529            tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1530            tcg_gen_shli_tl(s->T0, s->T0, op2);
1531        }
1532    }
1533
1534    /* store */
1535    gen_op_st_rm_T0_A0(s, ot, op1);
1536
1537    /* update eflags if non zero shift */
1538    if (op2 != 0) {
1539        tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1540        tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1541        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1542    }
1543}
1544
1545static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
1546{
1547    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1548    TCGv_i32 t0, t1;
1549
1550    /* load */
1551    if (op1 == OR_TMP0) {
1552        gen_op_ld_v(s, ot, s->T0, s->A0);
1553    } else {
1554        gen_op_mov_v_reg(s, ot, s->T0, op1);
1555    }
1556
1557    tcg_gen_andi_tl(s->T1, s->T1, mask);
1558
1559    switch (ot) {
1560    case MO_8:
1561        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1562        tcg_gen_ext8u_tl(s->T0, s->T0);
1563        tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1564        goto do_long;
1565    case MO_16:
1566        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1567        tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1568        goto do_long;
1569    do_long:
1570#ifdef TARGET_X86_64
1571    case MO_32:
1572        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1573        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1574        if (is_right) {
1575            tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1576        } else {
1577            tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1578        }
1579        tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1580        break;
1581#endif
1582    default:
1583        if (is_right) {
1584            tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1585        } else {
1586            tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1587        }
1588        break;
1589    }
1590
1591    /* store */
1592    gen_op_st_rm_T0_A0(s, ot, op1);
1593
1594    /* We'll need the flags computed into CC_SRC.  */
1595    gen_compute_eflags(s);
1596
1597    /* The value that was "rotated out" is now present at the other end
1598       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1599       since we've computed the flags into CC_SRC, these variables are
1600       currently dead.  */
1601    if (is_right) {
1602        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1603        tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1604        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1605    } else {
1606        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1607        tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1608    }
1609    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1610    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1611
1612    /* Now conditionally store the new CC_OP value.  If the shift count
1613       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1614       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1615       exactly as we computed above.  */
1616    t0 = tcg_const_i32(0);
1617    t1 = tcg_temp_new_i32();
1618    tcg_gen_trunc_tl_i32(t1, s->T1);
1619    tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1620    tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1621    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1622                        s->tmp2_i32, s->tmp3_i32);
1623    tcg_temp_free_i32(t0);
1624    tcg_temp_free_i32(t1);
1625
1626    /* The CC_OP value is no longer predictable.  */ 
1627    set_cc_op(s, CC_OP_DYNAMIC);
1628}
1629
1630static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1631                          int is_right)
1632{
1633    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1634    int shift;
1635
1636    /* load */
1637    if (op1 == OR_TMP0) {
1638        gen_op_ld_v(s, ot, s->T0, s->A0);
1639    } else {
1640        gen_op_mov_v_reg(s, ot, s->T0, op1);
1641    }
1642
1643    op2 &= mask;
1644    if (op2 != 0) {
1645        switch (ot) {
1646#ifdef TARGET_X86_64
1647        case MO_32:
1648            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1649            if (is_right) {
1650                tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1651            } else {
1652                tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1653            }
1654            tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1655            break;
1656#endif
1657        default:
1658            if (is_right) {
1659                tcg_gen_rotri_tl(s->T0, s->T0, op2);
1660            } else {
1661                tcg_gen_rotli_tl(s->T0, s->T0, op2);
1662            }
1663            break;
1664        case MO_8:
1665            mask = 7;
1666            goto do_shifts;
1667        case MO_16:
1668            mask = 15;
1669        do_shifts:
1670            shift = op2 & mask;
1671            if (is_right) {
1672                shift = mask + 1 - shift;
1673            }
1674            gen_extu(ot, s->T0);
1675            tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1676            tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1677            tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1678            break;
1679        }
1680    }
1681
1682    /* store */
1683    gen_op_st_rm_T0_A0(s, ot, op1);
1684
1685    if (op2 != 0) {
1686        /* Compute the flags into CC_SRC.  */
1687        gen_compute_eflags(s);
1688
1689        /* The value that was "rotated out" is now present at the other end
1690           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1691           since we've computed the flags into CC_SRC, these variables are
1692           currently dead.  */
1693        if (is_right) {
1694            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1695            tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1696            tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1697        } else {
1698            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1699            tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1700        }
1701        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1702        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1703        set_cc_op(s, CC_OP_ADCOX);
1704    }
1705}
1706
1707/* XXX: add faster immediate = 1 case */
1708static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1709                           int is_right)
1710{
1711    gen_compute_eflags(s);
1712    assert(s->cc_op == CC_OP_EFLAGS);
1713
1714    /* load */
1715    if (op1 == OR_TMP0)
1716        gen_op_ld_v(s, ot, s->T0, s->A0);
1717    else
1718        gen_op_mov_v_reg(s, ot, s->T0, op1);
1719    
1720    if (is_right) {
1721        switch (ot) {
1722        case MO_8:
1723            gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1724            break;
1725        case MO_16:
1726            gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1727            break;
1728        case MO_32:
1729            gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1730            break;
1731#ifdef TARGET_X86_64
1732        case MO_64:
1733            gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1734            break;
1735#endif
1736        default:
1737            tcg_abort();
1738        }
1739    } else {
1740        switch (ot) {
1741        case MO_8:
1742            gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1743            break;
1744        case MO_16:
1745            gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1746            break;
1747        case MO_32:
1748            gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1749            break;
1750#ifdef TARGET_X86_64
1751        case MO_64:
1752            gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1753            break;
1754#endif
1755        default:
1756            tcg_abort();
1757        }
1758    }
1759    /* store */
1760    gen_op_st_rm_T0_A0(s, ot, op1);
1761}
1762
1763/* XXX: add faster immediate case */
1764static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1765                             bool is_right, TCGv count_in)
1766{
1767    target_ulong mask = (ot == MO_64 ? 63 : 31);
1768    TCGv count;
1769
1770    /* load */
1771    if (op1 == OR_TMP0) {
1772        gen_op_ld_v(s, ot, s->T0, s->A0);
1773    } else {
1774        gen_op_mov_v_reg(s, ot, s->T0, op1);
1775    }
1776
1777    count = tcg_temp_new();
1778    tcg_gen_andi_tl(count, count_in, mask);
1779
1780    switch (ot) {
1781    case MO_16:
1782        /* Note: we implement the Intel behaviour for shift count > 16.
1783           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1784           portion by constructing it as a 32-bit value.  */
1785        if (is_right) {
1786            tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1787            tcg_gen_mov_tl(s->T1, s->T0);
1788            tcg_gen_mov_tl(s->T0, s->tmp0);
1789        } else {
1790            tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1791        }
1792        /* FALLTHRU */
1793#ifdef TARGET_X86_64
1794    case MO_32:
1795        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1796        tcg_gen_subi_tl(s->tmp0, count, 1);
1797        if (is_right) {
1798            tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1799            tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1800            tcg_gen_shr_i64(s->T0, s->T0, count);
1801        } else {
1802            tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1803            tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1804            tcg_gen_shl_i64(s->T0, s->T0, count);
1805            tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1806            tcg_gen_shri_i64(s->T0, s->T0, 32);
1807        }
1808        break;
1809#endif
1810    default:
1811        tcg_gen_subi_tl(s->tmp0, count, 1);
1812        if (is_right) {
1813            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1814
1815            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1816            tcg_gen_shr_tl(s->T0, s->T0, count);
1817            tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1818        } else {
1819            tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1820            if (ot == MO_16) {
1821                /* Only needed if count > 16, for Intel behaviour.  */
1822                tcg_gen_subfi_tl(s->tmp4, 33, count);
1823                tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1824                tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1825            }
1826
1827            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1828            tcg_gen_shl_tl(s->T0, s->T0, count);
1829            tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1830        }
1831        tcg_gen_movi_tl(s->tmp4, 0);
1832        tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1833                           s->tmp4, s->T1);
1834        tcg_gen_or_tl(s->T0, s->T0, s->T1);
1835        break;
1836    }
1837
1838    /* store */
1839    gen_op_st_rm_T0_A0(s, ot, op1);
1840
1841    gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1842    tcg_temp_free(count);
1843}
1844
1845static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
1846{
1847    if (s != OR_TMP1)
1848        gen_op_mov_v_reg(s1, ot, s1->T1, s);
1849    switch(op) {
1850    case OP_ROL:
1851        gen_rot_rm_T1(s1, ot, d, 0);
1852        break;
1853    case OP_ROR:
1854        gen_rot_rm_T1(s1, ot, d, 1);
1855        break;
1856    case OP_SHL:
1857    case OP_SHL1:
1858        gen_shift_rm_T1(s1, ot, d, 0, 0);
1859        break;
1860    case OP_SHR:
1861        gen_shift_rm_T1(s1, ot, d, 1, 0);
1862        break;
1863    case OP_SAR:
1864        gen_shift_rm_T1(s1, ot, d, 1, 1);
1865        break;
1866    case OP_RCL:
1867        gen_rotc_rm_T1(s1, ot, d, 0);
1868        break;
1869    case OP_RCR:
1870        gen_rotc_rm_T1(s1, ot, d, 1);
1871        break;
1872    }
1873}
1874
1875static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
1876{
1877    switch(op) {
1878    case OP_ROL:
1879        gen_rot_rm_im(s1, ot, d, c, 0);
1880        break;
1881    case OP_ROR:
1882        gen_rot_rm_im(s1, ot, d, c, 1);
1883        break;
1884    case OP_SHL:
1885    case OP_SHL1:
1886        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1887        break;
1888    case OP_SHR:
1889        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1890        break;
1891    case OP_SAR:
1892        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1893        break;
1894    default:
1895        /* currently not optimized */
1896        tcg_gen_movi_tl(s1->T1, c);
1897        gen_shift(s1, op, ot, d, OR_TMP1);
1898        break;
1899    }
1900}
1901
1902#define X86_MAX_INSN_LENGTH 15
1903
1904static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
1905{
1906    uint64_t pc = s->pc;
1907
1908    s->pc += num_bytes;
1909    if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
1910        /* If the instruction's 16th byte is on a different page than the 1st, a
1911         * page fault on the second page wins over the general protection fault
1912         * caused by the instruction being too long.
1913         * This can happen even if the operand is only one byte long!
1914         */
1915        if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
1916            volatile uint8_t unused =
1917                cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
1918            (void) unused;
1919        }
1920        siglongjmp(s->jmpbuf, 1);
1921    }
1922
1923    return pc;
1924}
1925
1926static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
1927{
1928    return cpu_ldub_code(env, advance_pc(env, s, 1));
1929}
1930
1931static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
1932{
1933    return cpu_ldsw_code(env, advance_pc(env, s, 2));
1934}
1935
1936static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
1937{
1938    return cpu_lduw_code(env, advance_pc(env, s, 2));
1939}
1940
1941static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
1942{
1943    return cpu_ldl_code(env, advance_pc(env, s, 4));
1944}
1945
1946#ifdef TARGET_X86_64
1947static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
1948{
1949    return cpu_ldq_code(env, advance_pc(env, s, 8));
1950}
1951#endif
1952
1953/* Decompose an address.  */
1954
1955typedef struct AddressParts {
1956    int def_seg;
1957    int base;
1958    int index;
1959    int scale;
1960    target_long disp;
1961} AddressParts;
1962
1963static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1964                                    int modrm)
1965{
1966    int def_seg, base, index, scale, mod, rm;
1967    target_long disp;
1968    bool havesib;
1969
1970    def_seg = R_DS;
1971    index = -1;
1972    scale = 0;
1973    disp = 0;
1974
1975    mod = (modrm >> 6) & 3;
1976    rm = modrm & 7;
1977    base = rm | REX_B(s);
1978
1979    if (mod == 3) {
1980        /* Normally filtered out earlier, but including this path
1981           simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
1982        goto done;
1983    }
1984
1985    switch (s->aflag) {
1986    case MO_64:
1987    case MO_32:
1988        havesib = 0;
1989        if (rm == 4) {
1990            int code = x86_ldub_code(env, s);
1991            scale = (code >> 6) & 3;
1992            index = ((code >> 3) & 7) | REX_X(s);
1993            if (index == 4) {
1994                index = -1;  /* no index */
1995            }
1996            base = (code & 7) | REX_B(s);
1997            havesib = 1;
1998        }
1999
2000        switch (mod) {
2001        case 0:
2002            if ((base & 7) == 5) {
2003                base = -1;
2004                disp = (int32_t)x86_ldl_code(env, s);
2005                if (CODE64(s) && !havesib) {
2006                    base = -2;
2007                    disp += s->pc + s->rip_offset;
2008                }
2009            }
2010            break;
2011        case 1:
2012            disp = (int8_t)x86_ldub_code(env, s);
2013            break;
2014        default:
2015        case 2:
2016            disp = (int32_t)x86_ldl_code(env, s);
2017            break;
2018        }
2019
2020        /* For correct popl handling with esp.  */
2021        if (base == R_ESP && s->popl_esp_hack) {
2022            disp += s->popl_esp_hack;
2023        }
2024        if (base == R_EBP || base == R_ESP) {
2025            def_seg = R_SS;
2026        }
2027        break;
2028
2029    case MO_16:
2030        if (mod == 0) {
2031            if (rm == 6) {
2032                base = -1;
2033                disp = x86_lduw_code(env, s);
2034                break;
2035            }
2036        } else if (mod == 1) {
2037            disp = (int8_t)x86_ldub_code(env, s);
2038        } else {
2039            disp = (int16_t)x86_lduw_code(env, s);
2040        }
2041
2042        switch (rm) {
2043        case 0:
2044            base = R_EBX;
2045            index = R_ESI;
2046            break;
2047        case 1:
2048            base = R_EBX;
2049            index = R_EDI;
2050            break;
2051        case 2:
2052            base = R_EBP;
2053            index = R_ESI;
2054            def_seg = R_SS;
2055            break;
2056        case 3:
2057            base = R_EBP;
2058            index = R_EDI;
2059            def_seg = R_SS;
2060            break;
2061        case 4:
2062            base = R_ESI;
2063            break;
2064        case 5:
2065            base = R_EDI;
2066            break;
2067        case 6:
2068            base = R_EBP;
2069            def_seg = R_SS;
2070            break;
2071        default:
2072        case 7:
2073            base = R_EBX;
2074            break;
2075        }
2076        break;
2077
2078    default:
2079        tcg_abort();
2080    }
2081
2082 done:
2083    return (AddressParts){ def_seg, base, index, scale, disp };
2084}
2085
2086/* Compute the address, with a minimum number of TCG ops.  */
2087static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2088{
2089    TCGv ea = NULL;
2090
2091    if (a.index >= 0) {
2092        if (a.scale == 0) {
2093            ea = cpu_regs[a.index];
2094        } else {
2095            tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2096            ea = s->A0;
2097        }
2098        if (a.base >= 0) {
2099            tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2100            ea = s->A0;
2101        }
2102    } else if (a.base >= 0) {
2103        ea = cpu_regs[a.base];
2104    }
2105    if (!ea) {
2106        tcg_gen_movi_tl(s->A0, a.disp);
2107        ea = s->A0;
2108    } else if (a.disp != 0) {
2109        tcg_gen_addi_tl(s->A0, ea, a.disp);
2110        ea = s->A0;
2111    }
2112
2113    return ea;
2114}
2115
2116static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2117{
2118    AddressParts a = gen_lea_modrm_0(env, s, modrm);
2119    TCGv ea = gen_lea_modrm_1(s, a);
2120    gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2121}
2122
2123static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2124{
2125    (void)gen_lea_modrm_0(env, s, modrm);
2126}
2127
2128/* Used for BNDCL, BNDCU, BNDCN.  */
2129static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2130                      TCGCond cond, TCGv_i64 bndv)
2131{
2132    TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2133
2134    tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2135    if (!CODE64(s)) {
2136        tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2137    }
2138    tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2139    tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2140    gen_helper_bndck(cpu_env, s->tmp2_i32);
2141}
2142
2143/* used for LEA and MOV AX, mem */
2144static void gen_add_A0_ds_seg(DisasContext *s)
2145{
2146    gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2147}
2148
2149/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2150   OR_TMP0 */
2151static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2152                           TCGMemOp ot, int reg, int is_store)
2153{
2154    int mod, rm;
2155
2156    mod = (modrm >> 6) & 3;
2157    rm = (modrm & 7) | REX_B(s);
2158    if (mod == 3) {
2159        if (is_store) {
2160            if (reg != OR_TMP0)
2161                gen_op_mov_v_reg(s, ot, s->T0, reg);
2162            gen_op_mov_reg_v(s, ot, rm, s->T0);
2163        } else {
2164            gen_op_mov_v_reg(s, ot, s->T0, rm);
2165            if (reg != OR_TMP0)
2166                gen_op_mov_reg_v(s, ot, reg, s->T0);
2167        }
2168    } else {
2169        gen_lea_modrm(env, s, modrm);
2170        if (is_store) {
2171            if (reg != OR_TMP0)
2172                gen_op_mov_v_reg(s, ot, s->T0, reg);
2173            gen_op_st_v(s, ot, s->T0, s->A0);
2174        } else {
2175            gen_op_ld_v(s, ot, s->T0, s->A0);
2176            if (reg != OR_TMP0)
2177                gen_op_mov_reg_v(s, ot, reg, s->T0);
2178        }
2179    }
2180}
2181
2182static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
2183{
2184    uint32_t ret;
2185
2186    switch (ot) {
2187    case MO_8:
2188        ret = x86_ldub_code(env, s);
2189        break;
2190    case MO_16:
2191        ret = x86_lduw_code(env, s);
2192        break;
2193    case MO_32:
2194#ifdef TARGET_X86_64
2195    case MO_64:
2196#endif
2197        ret = x86_ldl_code(env, s);
2198        break;
2199    default:
2200        tcg_abort();
2201    }
2202    return ret;
2203}
2204
2205static inline int insn_const_size(TCGMemOp ot)
2206{
2207    if (ot <= MO_32) {
2208        return 1 << ot;
2209    } else {
2210        return 4;
2211    }
2212}
2213
2214static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2215{
2216#ifndef CONFIG_USER_ONLY
2217    return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
2218           (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2219#else
2220    return true;
2221#endif
2222}
2223
2224static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2225{
2226    target_ulong pc = s->cs_base + eip;
2227
2228    if (use_goto_tb(s, pc))  {
2229        /* jump to same page: we can use a direct jump */
2230        tcg_gen_goto_tb(tb_num);
2231        gen_jmp_im(s, eip);
2232        tcg_gen_exit_tb(s->base.tb, tb_num);
2233        s->base.is_jmp = DISAS_NORETURN;
2234    } else {
2235        /* jump to another page */
2236        gen_jmp_im(s, eip);
2237        gen_jr(s, s->tmp0);
2238    }
2239}
2240
2241static inline void gen_jcc(DisasContext *s, int b,
2242                           target_ulong val, target_ulong next_eip)
2243{
2244    TCGLabel *l1, *l2;
2245
2246    if (s->jmp_opt) {
2247        l1 = gen_new_label();
2248        gen_jcc1(s, b, l1);
2249
2250        gen_goto_tb(s, 0, next_eip);
2251
2252        gen_set_label(l1);
2253        gen_goto_tb(s, 1, val);
2254    } else {
2255        l1 = gen_new_label();
2256        l2 = gen_new_label();
2257        gen_jcc1(s, b, l1);
2258
2259        gen_jmp_im(s, next_eip);
2260        tcg_gen_br(l2);
2261
2262        gen_set_label(l1);
2263        gen_jmp_im(s, val);
2264        gen_set_label(l2);
2265        gen_eob(s);
2266    }
2267}
2268
2269static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
2270                        int modrm, int reg)
2271{
2272    CCPrepare cc;
2273
2274    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2275
2276    cc = gen_prepare_cc(s, b, s->T1);
2277    if (cc.mask != -1) {
2278        TCGv t0 = tcg_temp_new();
2279        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2280        cc.reg = t0;
2281    }
2282    if (!cc.use_reg2) {
2283        cc.reg2 = tcg_const_tl(cc.imm);
2284    }
2285
2286    tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2287                       s->T0, cpu_regs[reg]);
2288    gen_op_mov_reg_v(s, ot, reg, s->T0);
2289
2290    if (cc.mask != -1) {
2291        tcg_temp_free(cc.reg);
2292    }
2293    if (!cc.use_reg2) {
2294        tcg_temp_free(cc.reg2);
2295    }
2296}
2297
2298static inline void gen_op_movl_T0_seg(DisasContext *s, int seg_reg)
2299{
2300    tcg_gen_ld32u_tl(s->T0, cpu_env,
2301                     offsetof(CPUX86State,segs[seg_reg].selector));
2302}
2303
2304static inline void gen_op_movl_seg_T0_vm(DisasContext *s, int seg_reg)
2305{
2306    tcg_gen_ext16u_tl(s->T0, s->T0);
2307    tcg_gen_st32_tl(s->T0, cpu_env,
2308                    offsetof(CPUX86State,segs[seg_reg].selector));
2309    tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2310}
2311
2312/* move T0 to seg_reg and compute if the CPU state may change. Never
2313   call this function with seg_reg == R_CS */
2314static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2315{
2316    if (s->pe && !s->vm86) {
2317        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2318        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2319        /* abort translation because the addseg value may change or
2320           because ss32 may change. For R_SS, translation must always
2321           stop as a special handling must be done to disable hardware
2322           interrupts for the next instruction */
2323        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS)) {
2324            s->base.is_jmp = DISAS_TOO_MANY;
2325        }
2326    } else {
2327        gen_op_movl_seg_T0_vm(s, seg_reg);
2328        if (seg_reg == R_SS) {
2329            s->base.is_jmp = DISAS_TOO_MANY;
2330        }
2331    }
2332}
2333
2334static inline int svm_is_rep(int prefixes)
2335{
2336    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2337}
2338
2339static inline void
2340gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2341                              uint32_t type, uint64_t param)
2342{
2343    /* no SVM activated; fast case */
2344    if (likely(!(s->flags & HF_GUEST_MASK)))
2345        return;
2346    gen_update_cc_op(s);
2347    gen_jmp_im(s, pc_start - s->cs_base);
2348    gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2349                                         tcg_const_i64(param));
2350}
2351
2352static inline void
2353gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2354{
2355    gen_svm_check_intercept_param(s, pc_start, type, 0);
2356}
2357
2358static inline void gen_stack_update(DisasContext *s, int addend)
2359{
2360    gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2361}
2362
2363/* Generate a push. It depends on ss32, addseg and dflag.  */
2364static void gen_push_v(DisasContext *s, TCGv val)
2365{
2366    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2367    TCGMemOp a_ot = mo_stacksize(s);
2368    int size = 1 << d_ot;
2369    TCGv new_esp = s->A0;
2370
2371    tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2372
2373    if (!CODE64(s)) {
2374        if (s->addseg) {
2375            new_esp = s->tmp4;
2376            tcg_gen_mov_tl(new_esp, s->A0);
2377        }
2378        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2379    }
2380
2381    gen_op_st_v(s, d_ot, val, s->A0);
2382    gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2383}
2384
2385/* two step pop is necessary for precise exceptions */
2386static TCGMemOp gen_pop_T0(DisasContext *s)
2387{
2388    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2389
2390    gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2391    gen_op_ld_v(s, d_ot, s->T0, s->A0);
2392
2393    return d_ot;
2394}
2395
2396static inline void gen_pop_update(DisasContext *s, TCGMemOp ot)
2397{
2398    gen_stack_update(s, 1 << ot);
2399}
2400
2401static inline void gen_stack_A0(DisasContext *s)
2402{
2403    gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2404}
2405
2406static void gen_pusha(DisasContext *s)
2407{
2408    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2409    TCGMemOp d_ot = s->dflag;
2410    int size = 1 << d_ot;
2411    int i;
2412
2413    for (i = 0; i < 8; i++) {
2414        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2415        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2416        gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2417    }
2418
2419    gen_stack_update(s, -8 * size);
2420}
2421
2422static void gen_popa(DisasContext *s)
2423{
2424    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2425    TCGMemOp d_ot = s->dflag;
2426    int size = 1 << d_ot;
2427    int i;
2428
2429    for (i = 0; i < 8; i++) {
2430        /* ESP is not reloaded */
2431        if (7 - i == R_ESP) {
2432            continue;
2433        }
2434        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2435        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2436        gen_op_ld_v(s, d_ot, s->T0, s->A0);
2437        gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2438    }
2439
2440    gen_stack_update(s, 8 * size);
2441}
2442
2443static void gen_enter(DisasContext *s, int esp_addend, int level)
2444{
2445    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2446    TCGMemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2447    int size = 1 << d_ot;
2448
2449    /* Push BP; compute FrameTemp into T1.  */
2450    tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2451    gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2452    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2453
2454    level &= 31;
2455    if (level != 0) {
2456        int i;
2457
2458        /* Copy level-1 pointers from the previous frame.  */
2459        for (i = 1; i < level; ++i) {
2460            tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2461            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2462            gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2463
2464            tcg_gen_subi_tl(s->A0, s->T1, size * i);
2465            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2466            gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2467        }
2468
2469        /* Push the current FrameTemp as the last level.  */
2470        tcg_gen_subi_tl(s->A0, s->T1, size * level);
2471        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2472        gen_op_st_v(s, d_ot, s->T1, s->A0);
2473    }
2474
2475    /* Copy the FrameTemp value to EBP.  */
2476    gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2477
2478    /* Compute the final value of ESP.  */
2479    tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2480    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2481}
2482
2483static void gen_leave(DisasContext *s)
2484{
2485    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2486    TCGMemOp a_ot = mo_stacksize(s);
2487
2488    gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2489    gen_op_ld_v(s, d_ot, s->T0, s->A0);
2490
2491    tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2492
2493    gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2494    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2495}
2496
2497/* Similarly, except that the assumption here is that we don't decode
2498   the instruction at all -- either a missing opcode, an unimplemented
2499   feature, or just a bogus instruction stream.  */
2500static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2501{
2502    gen_illegal_opcode(s);
2503
2504    if (qemu_loglevel_mask(LOG_UNIMP)) {
2505        target_ulong pc = s->pc_start, end = s->pc;
2506        qemu_log_lock();
2507        qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2508        for (; pc < end; ++pc) {
2509            qemu_log(" %02x", cpu_ldub_code(env, pc));
2510        }
2511        qemu_log("\n");
2512        qemu_log_unlock();
2513    }
2514}
2515
2516/* an interrupt is different from an exception because of the
2517   privilege checks */
2518static void gen_interrupt(DisasContext *s, int intno,
2519                          target_ulong cur_eip, target_ulong next_eip)
2520{
2521    gen_update_cc_op(s);
2522    gen_jmp_im(s, cur_eip);
2523    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2524                               tcg_const_i32(next_eip - cur_eip));
2525    s->base.is_jmp = DISAS_NORETURN;
2526}
2527
2528static void gen_debug(DisasContext *s, target_ulong cur_eip)
2529{
2530    gen_update_cc_op(s);
2531    gen_jmp_im(s, cur_eip);
2532    gen_helper_debug(cpu_env);
2533    s->base.is_jmp = DISAS_NORETURN;
2534}
2535
2536static void gen_set_hflag(DisasContext *s, uint32_t mask)
2537{
2538    if ((s->flags & mask) == 0) {
2539        TCGv_i32 t = tcg_temp_new_i32();
2540        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2541        tcg_gen_ori_i32(t, t, mask);
2542        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2543        tcg_temp_free_i32(t);
2544        s->flags |= mask;
2545    }
2546}
2547
2548static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2549{
2550    if (s->flags & mask) {
2551        TCGv_i32 t = tcg_temp_new_i32();
2552        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2553        tcg_gen_andi_i32(t, t, ~mask);
2554        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2555        tcg_temp_free_i32(t);
2556        s->flags &= ~mask;
2557    }
2558}
2559
2560/* Clear BND registers during legacy branches.  */
2561static void gen_bnd_jmp(DisasContext *s)
2562{
2563    /* Clear the registers only if BND prefix is missing, MPX is enabled,
2564       and if the BNDREGs are known to be in use (non-zero) already.
2565       The helper itself will check BNDPRESERVE at runtime.  */
2566    if ((s->prefix & PREFIX_REPNZ) == 0
2567        && (s->flags & HF_MPX_EN_MASK) != 0
2568        && (s->flags & HF_MPX_IU_MASK) != 0) {
2569        gen_helper_bnd_jmp(cpu_env);
2570    }
2571}
2572
2573/* Generate an end of block. Trace exception is also generated if needed.
2574   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2575   If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2576   S->TF.  This is used by the syscall/sysret insns.  */
2577static void
2578do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2579{
2580    gen_update_cc_op(s);
2581
2582    /* If several instructions disable interrupts, only the first does it.  */
2583    if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2584        gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2585    } else {
2586        gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2587    }
2588
2589    if (s->base.tb->flags & HF_RF_MASK) {
2590        gen_helper_reset_rf(cpu_env);
2591    }
2592    if (s->base.singlestep_enabled) {
2593        gen_helper_debug(cpu_env);
2594    } else if (recheck_tf) {
2595        gen_helper_rechecking_single_step(cpu_env);
2596        tcg_gen_exit_tb(NULL, 0);
2597    } else if (s->tf) {
2598        gen_helper_single_step(cpu_env);
2599    } else if (jr) {
2600        tcg_gen_lookup_and_goto_ptr();
2601    } else {
2602        tcg_gen_exit_tb(NULL, 0);
2603    }
2604    s->base.is_jmp = DISAS_NORETURN;
2605}
2606
2607static inline void
2608gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2609{
2610    do_gen_eob_worker(s, inhibit, recheck_tf, false);
2611}
2612
2613/* End of block.
2614   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2615static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2616{
2617    gen_eob_worker(s, inhibit, false);
2618}
2619
2620/* End of block, resetting the inhibit irq flag.  */
2621static void gen_eob(DisasContext *s)
2622{
2623    gen_eob_worker(s, false, false);
2624}
2625
2626/* Jump to register */
2627static void gen_jr(DisasContext *s, TCGv dest)
2628{
2629    do_gen_eob_worker(s, false, false, true);
2630}
2631
2632/* generate a jump to eip. No segment change must happen before as a
2633   direct call to the next block may occur */
2634static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2635{
2636    gen_update_cc_op(s);
2637    set_cc_op(s, CC_OP_DYNAMIC);
2638    if (s->jmp_opt) {
2639        gen_goto_tb(s, tb_num, eip);
2640    } else {
2641        gen_jmp_im(s, eip);
2642        gen_eob(s);
2643    }
2644}
2645
2646static void gen_jmp(DisasContext *s, target_ulong eip)
2647{
2648    gen_jmp_tb(s, eip, 0);
2649}
2650
2651static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2652{
2653    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2654    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2655}
2656
2657static inline void gen_stq_env_A0(DisasContext *s, int offset)
2658{
2659    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2660    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2661}
2662
2663static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2664{
2665    int mem_index = s->mem_index;
2666    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2667    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2668    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2669    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2670    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2671}
2672
2673static inline void gen_sto_env_A0(DisasContext *s, int offset)
2674{
2675    int mem_index = s->mem_index;
2676    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2677    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2678    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2679    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2680    tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2681}
2682
2683static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2684{
2685    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2686    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2687    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2688    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2689}
2690
2691static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2692{
2693    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2694    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2695}
2696
2697static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2698{
2699    tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2700    tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2701}
2702
2703static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2704{
2705    tcg_gen_movi_i64(s->tmp1_i64, 0);
2706    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2707}
2708
2709typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2710typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2711typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2712typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2713typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2714typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2715                               TCGv_i32 val);
2716typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2717typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2718                               TCGv val);
2719
2720#define SSE_SPECIAL ((void *)1)
2721#define SSE_DUMMY ((void *)2)
2722
2723#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2724#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2725                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2726
2727static const SSEFunc_0_epp sse_op_table1[256][4] = {
2728    /* 3DNow! extensions */
2729    [0x0e] = { SSE_DUMMY }, /* femms */
2730    [0x0f] = { SSE_DUMMY }, /* pf... */
2731    /* pure SSE operations */
2732    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2733    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2734    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2735    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2736    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2737    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2738    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2739    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2740
2741    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2742    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2743    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2744    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2745    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2746    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2747    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2748    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2749    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2750    [0x51] = SSE_FOP(sqrt),
2751    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2752    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2753    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2754    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2755    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2756    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2757    [0x58] = SSE_FOP(add),
2758    [0x59] = SSE_FOP(mul),
2759    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2760               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2761    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2762    [0x5c] = SSE_FOP(sub),
2763    [0x5d] = SSE_FOP(min),
2764    [0x5e] = SSE_FOP(div),
2765    [0x5f] = SSE_FOP(max),
2766
2767    [0xc2] = SSE_FOP(cmpeq),
2768    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2769               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2770
2771    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2772    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2773    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2774
2775    /* MMX ops and their SSE extensions */
2776    [0x60] = MMX_OP2(punpcklbw),
2777    [0x61] = MMX_OP2(punpcklwd),
2778    [0x62] = MMX_OP2(punpckldq),
2779    [0x63] = MMX_OP2(packsswb),
2780    [0x64] = MMX_OP2(pcmpgtb),
2781    [0x65] = MMX_OP2(pcmpgtw),
2782    [0x66] = MMX_OP2(pcmpgtl),
2783    [0x67] = MMX_OP2(packuswb),
2784    [0x68] = MMX_OP2(punpckhbw),
2785    [0x69] = MMX_OP2(punpckhwd),
2786    [0x6a] = MMX_OP2(punpckhdq),
2787    [0x6b] = MMX_OP2(packssdw),
2788    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2789    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2790    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2791    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2792    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2793               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2794               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2795               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2796    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2797    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2798    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2799    [0x74] = MMX_OP2(pcmpeqb),
2800    [0x75] = MMX_OP2(pcmpeqw),
2801    [0x76] = MMX_OP2(pcmpeql),
2802    [0x77] = { SSE_DUMMY }, /* emms */
2803    [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2804    [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2805    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2806    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2807    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2808    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2809    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2810    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2811    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2812    [0xd1] = MMX_OP2(psrlw),
2813    [0xd2] = MMX_OP2(psrld),
2814    [0xd3] = MMX_OP2(psrlq),
2815    [0xd4] = MMX_OP2(paddq),
2816    [0xd5] = MMX_OP2(pmullw),
2817    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2818    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2819    [0xd8] = MMX_OP2(psubusb),
2820    [0xd9] = MMX_OP2(psubusw),
2821    [0xda] = MMX_OP2(pminub),
2822    [0xdb] = MMX_OP2(pand),
2823    [0xdc] = MMX_OP2(paddusb),
2824    [0xdd] = MMX_OP2(paddusw),
2825    [0xde] = MMX_OP2(pmaxub),
2826    [0xdf] = MMX_OP2(pandn),
2827    [0xe0] = MMX_OP2(pavgb),
2828    [0xe1] = MMX_OP2(psraw),
2829    [0xe2] = MMX_OP2(psrad),
2830    [0xe3] = MMX_OP2(pavgw),
2831    [0xe4] = MMX_OP2(pmulhuw),
2832    [0xe5] = MMX_OP2(pmulhw),
2833    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2834    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2835    [0xe8] = MMX_OP2(psubsb),
2836    [0xe9] = MMX_OP2(psubsw),
2837    [0xea] = MMX_OP2(pminsw),
2838    [0xeb] = MMX_OP2(por),
2839    [0xec] = MMX_OP2(paddsb),
2840    [0xed] = MMX_OP2(paddsw),
2841    [0xee] = MMX_OP2(pmaxsw),
2842    [0xef] = MMX_OP2(pxor),
2843    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2844    [0xf1] = MMX_OP2(psllw),
2845    [0xf2] = MMX_OP2(pslld),
2846    [0xf3] = MMX_OP2(psllq),
2847    [0xf4] = MMX_OP2(pmuludq),
2848    [0xf5] = MMX_OP2(pmaddwd),
2849    [0xf6] = MMX_OP2(psadbw),
2850    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2851               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2852    [0xf8] = MMX_OP2(psubb),
2853    [0xf9] = MMX_OP2(psubw),
2854    [0xfa] = MMX_OP2(psubl),
2855    [0xfb] = MMX_OP2(psubq),
2856    [0xfc] = MMX_OP2(paddb),
2857    [0xfd] = MMX_OP2(paddw),
2858    [0xfe] = MMX_OP2(paddl),
2859};
2860
2861static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2862    [0 + 2] = MMX_OP2(psrlw),
2863    [0 + 4] = MMX_OP2(psraw),
2864    [0 + 6] = MMX_OP2(psllw),
2865    [8 + 2] = MMX_OP2(psrld),
2866    [8 + 4] = MMX_OP2(psrad),
2867    [8 + 6] = MMX_OP2(pslld),
2868    [16 + 2] = MMX_OP2(psrlq),
2869    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2870    [16 + 6] = MMX_OP2(psllq),
2871    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2872};
2873
2874static const SSEFunc_0_epi sse_op_table3ai[] = {
2875    gen_helper_cvtsi2ss,
2876    gen_helper_cvtsi2sd
2877};
2878
2879#ifdef TARGET_X86_64
2880static const SSEFunc_0_epl sse_op_table3aq[] = {
2881    gen_helper_cvtsq2ss,
2882    gen_helper_cvtsq2sd
2883};
2884#endif
2885
2886static const SSEFunc_i_ep sse_op_table3bi[] = {
2887    gen_helper_cvttss2si,
2888    gen_helper_cvtss2si,
2889    gen_helper_cvttsd2si,
2890    gen_helper_cvtsd2si
2891};
2892
2893#ifdef TARGET_X86_64
2894static const SSEFunc_l_ep sse_op_table3bq[] = {
2895    gen_helper_cvttss2sq,
2896    gen_helper_cvtss2sq,
2897    gen_helper_cvttsd2sq,
2898    gen_helper_cvtsd2sq
2899};
2900#endif
2901
2902static const SSEFunc_0_epp sse_op_table4[8][4] = {
2903    SSE_FOP(cmpeq),
2904    SSE_FOP(cmplt),
2905    SSE_FOP(cmple),
2906    SSE_FOP(cmpunord),
2907    SSE_FOP(cmpneq),
2908    SSE_FOP(cmpnlt),
2909    SSE_FOP(cmpnle),
2910    SSE_FOP(cmpord),
2911};
2912
2913static const SSEFunc_0_epp sse_op_table5[256] = {
2914    [0x0c] = gen_helper_pi2fw,
2915    [0x0d] = gen_helper_pi2fd,
2916    [0x1c] = gen_helper_pf2iw,
2917    [0x1d] = gen_helper_pf2id,
2918    [0x8a] = gen_helper_pfnacc,
2919    [0x8e] = gen_helper_pfpnacc,
2920    [0x90] = gen_helper_pfcmpge,
2921    [0x94] = gen_helper_pfmin,
2922    [0x96] = gen_helper_pfrcp,
2923    [0x97] = gen_helper_pfrsqrt,
2924    [0x9a] = gen_helper_pfsub,
2925    [0x9e] = gen_helper_pfadd,
2926    [0xa0] = gen_helper_pfcmpgt,
2927    [0xa4] = gen_helper_pfmax,
2928    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2929    [0xa7] = gen_helper_movq, /* pfrsqit1 */
2930    [0xaa] = gen_helper_pfsubr,
2931    [0xae] = gen_helper_pfacc,
2932    [0xb0] = gen_helper_pfcmpeq,
2933    [0xb4] = gen_helper_pfmul,
2934    [0xb6] = gen_helper_movq, /* pfrcpit2 */
2935    [0xb7] = gen_helper_pmulhrw_mmx,
2936    [0xbb] = gen_helper_pswapd,
2937    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2938};
2939
2940struct SSEOpHelper_epp {
2941    SSEFunc_0_epp op[2];
2942    uint32_t ext_mask;
2943};
2944
2945struct SSEOpHelper_eppi {
2946    SSEFunc_0_eppi op[2];
2947    uint32_t ext_mask;
2948};
2949
2950#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2951#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2952#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2953#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2954#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2955        CPUID_EXT_PCLMULQDQ }
2956#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2957
2958static const struct SSEOpHelper_epp sse_op_table6[256] = {
2959    [0x00] = SSSE3_OP(pshufb),
2960    [0x01] = SSSE3_OP(phaddw),
2961    [0x02] = SSSE3_OP(phaddd),
2962    [0x03] = SSSE3_OP(phaddsw),
2963    [0x04] = SSSE3_OP(pmaddubsw),
2964    [0x05] = SSSE3_OP(phsubw),
2965    [0x06] = SSSE3_OP(phsubd),
2966    [0x07] = SSSE3_OP(phsubsw),
2967    [0x08] = SSSE3_OP(psignb),
2968    [0x09] = SSSE3_OP(psignw),
2969    [0x0a] = SSSE3_OP(psignd),
2970    [0x0b] = SSSE3_OP(pmulhrsw),
2971    [0x10] = SSE41_OP(pblendvb),
2972    [0x14] = SSE41_OP(blendvps),
2973    [0x15] = SSE41_OP(blendvpd),
2974    [0x17] = SSE41_OP(ptest),
2975    [0x1c] = SSSE3_OP(pabsb),
2976    [0x1d] = SSSE3_OP(pabsw),
2977    [0x1e] = SSSE3_OP(pabsd),
2978    [0x20] = SSE41_OP(pmovsxbw),
2979    [0x21] = SSE41_OP(pmovsxbd),
2980    [0x22] = SSE41_OP(pmovsxbq),
2981    [0x23] = SSE41_OP(pmovsxwd),
2982    [0x24] = SSE41_OP(pmovsxwq),
2983    [0x25] = SSE41_OP(pmovsxdq),
2984    [0x28] = SSE41_OP(pmuldq),
2985    [0x29] = SSE41_OP(pcmpeqq),
2986    [0x2a] = SSE41_SPECIAL, /* movntqda */
2987    [0x2b] = SSE41_OP(packusdw),
2988    [0x30] = SSE41_OP(pmovzxbw),
2989    [0x31] = SSE41_OP(pmovzxbd),
2990    [0x32] = SSE41_OP(pmovzxbq),
2991    [0x33] = SSE41_OP(pmovzxwd),
2992    [0x34] = SSE41_OP(pmovzxwq),
2993    [0x35] = SSE41_OP(pmovzxdq),
2994    [0x37] = SSE42_OP(pcmpgtq),
2995    [0x38] = SSE41_OP(pminsb),
2996    [0x39] = SSE41_OP(pminsd),
2997    [0x3a] = SSE41_OP(pminuw),
2998    [0x3b] = SSE41_OP(pminud),
2999    [0x3c] = SSE41_OP(pmaxsb),
3000    [0x3d] = SSE41_OP(pmaxsd),
3001    [0x3e] = SSE41_OP(pmaxuw),
3002    [0x3f] = SSE41_OP(pmaxud),
3003    [0x40] = SSE41_OP(pmulld),
3004    [0x41] = SSE41_OP(phminposuw),
3005    [0xdb] = AESNI_OP(aesimc),
3006    [0xdc] = AESNI_OP(aesenc),
3007    [0xdd] = AESNI_OP(aesenclast),
3008    [0xde] = AESNI_OP(aesdec),
3009    [0xdf] = AESNI_OP(aesdeclast),
3010};
3011
3012static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3013    [0x08] = SSE41_OP(roundps),
3014    [0x09] = SSE41_OP(roundpd),
3015    [0x0a] = SSE41_OP(roundss),
3016    [0x0b] = SSE41_OP(roundsd),
3017    [0x0c] = SSE41_OP(blendps),
3018    [0x0d] = SSE41_OP(blendpd),
3019    [0x0e] = SSE41_OP(pblendw),
3020    [0x0f] = SSSE3_OP(palignr),
3021    [0x14] = SSE41_SPECIAL, /* pextrb */
3022    [0x15] = SSE41_SPECIAL, /* pextrw */
3023    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3024    [0x17] = SSE41_SPECIAL, /* extractps */
3025    [0x20] = SSE41_SPECIAL, /* pinsrb */
3026    [0x21] = SSE41_SPECIAL, /* insertps */
3027    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3028    [0x40] = SSE41_OP(dpps),
3029    [0x41] = SSE41_OP(dppd),
3030    [0x42] = SSE41_OP(mpsadbw),
3031    [0x44] = PCLMULQDQ_OP(pclmulqdq),
3032    [0x60] = SSE42_OP(pcmpestrm),
3033    [0x61] = SSE42_OP(pcmpestri),
3034    [0x62] = SSE42_OP(pcmpistrm),
3035    [0x63] = SSE42_OP(pcmpistri),
3036    [0xdf] = AESNI_OP(aeskeygenassist),
3037};
3038
3039static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3040                    target_ulong pc_start, int rex_r)
3041{
3042    int b1, op1_offset, op2_offset, is_xmm, val;
3043    int modrm, mod, rm, reg;
3044    SSEFunc_0_epp sse_fn_epp;
3045    SSEFunc_0_eppi sse_fn_eppi;
3046    SSEFunc_0_ppi sse_fn_ppi;
3047    SSEFunc_0_eppt sse_fn_eppt;
3048    TCGMemOp ot;
3049
3050    b &= 0xff;
3051    if (s->prefix & PREFIX_DATA)
3052        b1 = 1;
3053    else if (s->prefix & PREFIX_REPZ)
3054        b1 = 2;
3055    else if (s->prefix & PREFIX_REPNZ)
3056        b1 = 3;
3057    else
3058        b1 = 0;
3059    sse_fn_epp = sse_op_table1[b][b1];
3060    if (!sse_fn_epp) {
3061        goto unknown_op;
3062    }
3063    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3064        is_xmm = 1;
3065    } else {
3066        if (b1 == 0) {
3067            /* MMX case */
3068            is_xmm = 0;
3069        } else {
3070            is_xmm = 1;
3071        }
3072    }
3073    /* simple MMX/SSE operation */
3074    if (s->flags & HF_TS_MASK) {
3075        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3076        return;
3077    }
3078    if (s->flags & HF_EM_MASK) {
3079    illegal_op:
3080        gen_illegal_opcode(s);
3081        return;
3082    }
3083    if (is_xmm
3084        && !(s->flags & HF_OSFXSR_MASK)
3085        && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
3086        goto unknown_op;
3087    }
3088    if (b == 0x0e) {
3089        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3090            /* If we were fully decoding this we might use illegal_op.  */
3091            goto unknown_op;
3092        }
3093        /* femms */
3094        gen_helper_emms(cpu_env);
3095        return;
3096    }
3097    if (b == 0x77) {
3098        /* emms */
3099        gen_helper_emms(cpu_env);
3100        return;
3101    }
3102    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3103       the static cpu state) */
3104    if (!is_xmm) {
3105        gen_helper_enter_mmx(cpu_env);
3106    }
3107
3108    modrm = x86_ldub_code(env, s);
3109    reg = ((modrm >> 3) & 7);
3110    if (is_xmm)
3111        reg |= rex_r;
3112    mod = (modrm >> 6) & 3;
3113    if (sse_fn_epp == SSE_SPECIAL) {
3114        b |= (b1 << 8);
3115        switch(b) {
3116        case 0x0e7: /* movntq */
3117            if (mod == 3) {
3118                goto illegal_op;
3119            }
3120            gen_lea_modrm(env, s, modrm);
3121            gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3122            break;
3123        case 0x1e7: /* movntdq */
3124        case 0x02b: /* movntps */
3125        case 0x12b: /* movntps */
3126            if (mod == 3)
3127                goto illegal_op;
3128            gen_lea_modrm(env, s, modrm);
3129            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3130            break;
3131        case 0x3f0: /* lddqu */
3132            if (mod == 3)
3133                goto illegal_op;
3134            gen_lea_modrm(env, s, modrm);
3135            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3136            break;
3137        case 0x22b: /* movntss */
3138        case 0x32b: /* movntsd */
3139            if (mod == 3)
3140                goto illegal_op;
3141            gen_lea_modrm(env, s, modrm);
3142            if (b1 & 1) {
3143                gen_stq_env_A0(s, offsetof(CPUX86State,
3144                                           xmm_regs[reg].ZMM_Q(0)));
3145            } else {
3146                tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3147                    xmm_regs[reg].ZMM_L(0)));
3148                gen_op_st_v(s, MO_32, s->T0, s->A0);
3149            }
3150            break;
3151        case 0x6e: /* movd mm, ea */
3152#ifdef TARGET_X86_64
3153            if (s->dflag == MO_64) {
3154                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3155                tcg_gen_st_tl(s->T0, cpu_env,
3156                              offsetof(CPUX86State, fpregs[reg].mmx));
3157            } else
3158#endif
3159            {
3160                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3161                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3162                                 offsetof(CPUX86State,fpregs[reg].mmx));
3163                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3164                gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3165            }
3166            break;
3167        case 0x16e: /* movd xmm, ea */
3168#ifdef TARGET_X86_64
3169            if (s->dflag == MO_64) {
3170                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3171                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3172                                 offsetof(CPUX86State,xmm_regs[reg]));
3173                gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3174            } else
3175#endif
3176            {
3177                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3178                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3179                                 offsetof(CPUX86State,xmm_regs[reg]));
3180                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3181                gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3182            }
3183            break;
3184        case 0x6f: /* movq mm, ea */
3185            if (mod != 3) {
3186                gen_lea_modrm(env, s, modrm);
3187                gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3188            } else {
3189                rm = (modrm & 7);
3190                tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3191                               offsetof(CPUX86State,fpregs[rm].mmx));
3192                tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3193                               offsetof(CPUX86State,fpregs[reg].mmx));
3194            }
3195            break;
3196        case 0x010: /* movups */
3197        case 0x110: /* movupd */
3198        case 0x028: /* movaps */
3199        case 0x128: /* movapd */
3200        case 0x16f: /* movdqa xmm, ea */
3201        case 0x26f: /* movdqu xmm, ea */
3202            if (mod != 3) {
3203                gen_lea_modrm(env, s, modrm);
3204                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3205            } else {
3206                rm = (modrm & 7) | REX_B(s);
3207                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3208                            offsetof(CPUX86State,xmm_regs[rm]));
3209            }
3210            break;
3211        case 0x210: /* movss xmm, ea */
3212            if (mod != 3) {
3213                gen_lea_modrm(env, s, modrm);
3214                gen_op_ld_v(s, MO_32, s->T0, s->A0);
3215                tcg_gen_st32_tl(s->T0, cpu_env,
3216                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3217                tcg_gen_movi_tl(s->T0, 0);
3218                tcg_gen_st32_tl(s->T0, cpu_env,
3219                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3220                tcg_gen_st32_tl(s->T0, cpu_env,
3221                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3222                tcg_gen_st32_tl(s->T0, cpu_env,
3223                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3224            } else {
3225                rm = (modrm & 7) | REX_B(s);
3226                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3227                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3228            }
3229            break;
3230        case 0x310: /* movsd xmm, ea */
3231            if (mod != 3) {
3232                gen_lea_modrm(env, s, modrm);
3233                gen_ldq_env_A0(s, offsetof(CPUX86State,
3234                                           xmm_regs[reg].ZMM_Q(0)));
3235                tcg_gen_movi_tl(s->T0, 0);
3236                tcg_gen_st32_tl(s->T0, cpu_env,
3237                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3238                tcg_gen_st32_tl(s->T0, cpu_env,
3239                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3240            } else {
3241                rm = (modrm & 7) | REX_B(s);
3242                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3243                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3244            }
3245            break;
3246        case 0x012: /* movlps */
3247        case 0x112: /* movlpd */
3248            if (mod != 3) {
3249                gen_lea_modrm(env, s, modrm);
3250                gen_ldq_env_A0(s, offsetof(CPUX86State,
3251                                           xmm_regs[reg].ZMM_Q(0)));
3252            } else {
3253                /* movhlps */
3254                rm = (modrm & 7) | REX_B(s);
3255                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3256                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3257            }
3258            break;
3259        case 0x212: /* movsldup */
3260            if (mod != 3) {
3261                gen_lea_modrm(env, s, modrm);
3262                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3263            } else {
3264                rm = (modrm & 7) | REX_B(s);
3265                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3266                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3267                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3268                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3269            }
3270            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3271                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3272            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3273                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3274            break;
3275        case 0x312: /* movddup */
3276            if (mod != 3) {
3277                gen_lea_modrm(env, s, modrm);
3278                gen_ldq_env_A0(s, offsetof(CPUX86State,
3279                                           xmm_regs[reg].ZMM_Q(0)));
3280            } else {
3281                rm = (modrm & 7) | REX_B(s);
3282                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3283                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3284            }
3285            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3286                        offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3287            break;
3288        case 0x016: /* movhps */
3289        case 0x116: /* movhpd */
3290            if (mod != 3) {
3291                gen_lea_modrm(env, s, modrm);
3292                gen_ldq_env_A0(s, offsetof(CPUX86State,
3293                                           xmm_regs[reg].ZMM_Q(1)));
3294            } else {
3295                /* movlhps */
3296                rm = (modrm & 7) | REX_B(s);
3297                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3298                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3299            }
3300            break;
3301        case 0x216: /* movshdup */
3302            if (mod != 3) {
3303                gen_lea_modrm(env, s, modrm);
3304                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3305            } else {
3306                rm = (modrm & 7) | REX_B(s);
3307                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3308                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3309                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3310                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3311            }
3312            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3313                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3314            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3315                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3316            break;
3317        case 0x178:
3318        case 0x378:
3319            {
3320                int bit_index, field_length;
3321
3322                if (b1 == 1 && reg != 0)
3323                    goto illegal_op;
3324                field_length = x86_ldub_code(env, s) & 0x3F;
3325                bit_index = x86_ldub_code(env, s) & 0x3F;
3326                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3327                    offsetof(CPUX86State,xmm_regs[reg]));
3328                if (b1 == 1)
3329                    gen_helper_extrq_i(cpu_env, s->ptr0,
3330                                       tcg_const_i32(bit_index),
3331                                       tcg_const_i32(field_length));
3332                else
3333                    gen_helper_insertq_i(cpu_env, s->ptr0,
3334                                         tcg_const_i32(bit_index),
3335                                         tcg_const_i32(field_length));
3336            }
3337            break;
3338        case 0x7e: /* movd ea, mm */
3339#ifdef TARGET_X86_64
3340            if (s->dflag == MO_64) {
3341                tcg_gen_ld_i64(s->T0, cpu_env,
3342                               offsetof(CPUX86State,fpregs[reg].mmx));
3343                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3344            } else
3345#endif
3346            {
3347                tcg_gen_ld32u_tl(s->T0, cpu_env,
3348                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3349                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3350            }
3351            break;
3352        case 0x17e: /* movd ea, xmm */
3353#ifdef TARGET_X86_64
3354            if (s->dflag == MO_64) {
3355                tcg_gen_ld_i64(s->T0, cpu_env,
3356                               offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3357                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3358            } else
3359#endif
3360            {
3361                tcg_gen_ld32u_tl(s->T0, cpu_env,
3362                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3363                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3364            }
3365            break;
3366        case 0x27e: /* movq xmm, ea */
3367            if (mod != 3) {
3368                gen_lea_modrm(env, s, modrm);
3369                gen_ldq_env_A0(s, offsetof(CPUX86State,
3370                                           xmm_regs[reg].ZMM_Q(0)));
3371            } else {
3372                rm = (modrm & 7) | REX_B(s);
3373                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3374                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3375            }
3376            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3377            break;
3378        case 0x7f: /* movq ea, mm */
3379            if (mod != 3) {
3380                gen_lea_modrm(env, s, modrm);
3381                gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3382            } else {
3383                rm = (modrm & 7);
3384                gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3385                            offsetof(CPUX86State,fpregs[reg].mmx));
3386            }
3387            break;
3388        case 0x011: /* movups */
3389        case 0x111: /* movupd */
3390        case 0x029: /* movaps */
3391        case 0x129: /* movapd */
3392        case 0x17f: /* movdqa ea, xmm */
3393        case 0x27f: /* movdqu ea, xmm */
3394            if (mod != 3) {
3395                gen_lea_modrm(env, s, modrm);
3396                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3397            } else {
3398                rm = (modrm & 7) | REX_B(s);
3399                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3400                            offsetof(CPUX86State,xmm_regs[reg]));
3401            }
3402            break;
3403        case 0x211: /* movss ea, xmm */
3404            if (mod != 3) {
3405                gen_lea_modrm(env, s, modrm);
3406                tcg_gen_ld32u_tl(s->T0, cpu_env,
3407                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3408                gen_op_st_v(s, MO_32, s->T0, s->A0);
3409            } else {
3410                rm = (modrm & 7) | REX_B(s);
3411                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3412                            offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3413            }
3414            break;
3415        case 0x311: /* movsd ea, xmm */
3416            if (mod != 3) {
3417                gen_lea_modrm(env, s, modrm);
3418                gen_stq_env_A0(s, offsetof(CPUX86State,
3419                                           xmm_regs[reg].ZMM_Q(0)));
3420            } else {
3421                rm = (modrm & 7) | REX_B(s);
3422                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3423                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3424            }
3425            break;
3426        case 0x013: /* movlps */
3427        case 0x113: /* movlpd */
3428            if (mod != 3) {
3429                gen_lea_modrm(env, s, modrm);
3430                gen_stq_env_A0(s, offsetof(CPUX86State,
3431                                           xmm_regs[reg].ZMM_Q(0)));
3432            } else {
3433                goto illegal_op;
3434            }
3435            break;
3436        case 0x017: /* movhps */
3437        case 0x117: /* movhpd */
3438            if (mod != 3) {
3439                gen_lea_modrm(env, s, modrm);
3440                gen_stq_env_A0(s, offsetof(CPUX86State,
3441                                           xmm_regs[reg].ZMM_Q(1)));
3442            } else {
3443                goto illegal_op;
3444            }
3445            break;
3446        case 0x71: /* shift mm, im */
3447        case 0x72:
3448        case 0x73:
3449        case 0x171: /* shift xmm, im */
3450        case 0x172:
3451        case 0x173:
3452            if (b1 >= 2) {
3453                goto unknown_op;
3454            }
3455            val = x86_ldub_code(env, s);
3456            if (is_xmm) {
3457                tcg_gen_movi_tl(s->T0, val);
3458                tcg_gen_st32_tl(s->T0, cpu_env,
3459                                offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3460                tcg_gen_movi_tl(s->T0, 0);
3461                tcg_gen_st32_tl(s->T0, cpu_env,
3462                                offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3463                op1_offset = offsetof(CPUX86State,xmm_t0);
3464            } else {
3465                tcg_gen_movi_tl(s->T0, val);
3466                tcg_gen_st32_tl(s->T0, cpu_env,
3467                                offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3468                tcg_gen_movi_tl(s->T0, 0);
3469                tcg_gen_st32_tl(s->T0, cpu_env,
3470                                offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3471                op1_offset = offsetof(CPUX86State,mmx_t0);
3472            }
3473            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3474                                       (((modrm >> 3)) & 7)][b1];
3475            if (!sse_fn_epp) {
3476                goto unknown_op;
3477            }
3478            if (is_xmm) {
3479                rm = (modrm & 7) | REX_B(s);
3480                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3481            } else {
3482                rm = (modrm & 7);
3483                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3484            }
3485            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3486            tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3487            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3488            break;
3489        case 0x050: /* movmskps */
3490            rm = (modrm & 7) | REX_B(s);
3491            tcg_gen_addi_ptr(s->ptr0, cpu_env,
3492                             offsetof(CPUX86State,xmm_regs[rm]));
3493            gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3494            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3495            break;
3496        case 0x150: /* movmskpd */
3497            rm = (modrm & 7) | REX_B(s);
3498            tcg_gen_addi_ptr(s->ptr0, cpu_env,
3499                             offsetof(CPUX86State,xmm_regs[rm]));
3500            gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3501            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3502            break;
3503        case 0x02a: /* cvtpi2ps */
3504        case 0x12a: /* cvtpi2pd */
3505            gen_helper_enter_mmx(cpu_env);
3506            if (mod != 3) {
3507                gen_lea_modrm(env, s, modrm);
3508                op2_offset = offsetof(CPUX86State,mmx_t0);
3509                gen_ldq_env_A0(s, op2_offset);
3510            } else {
3511                rm = (modrm & 7);
3512                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3513            }
3514            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3515            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3516            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3517            switch(b >> 8) {
3518            case 0x0:
3519                gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3520                break;
3521            default:
3522            case 0x1:
3523                gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3524                break;
3525            }
3526            break;
3527        case 0x22a: /* cvtsi2ss */
3528        case 0x32a: /* cvtsi2sd */
3529            ot = mo_64_32(s->dflag);
3530            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3531            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3532            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3533            if (ot == MO_32) {
3534                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3535                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3536                sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3537            } else {
3538#ifdef TARGET_X86_64
3539                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3540                sse_fn_epl(cpu_env, s->ptr0, s->T0);
3541#else
3542                goto illegal_op;
3543#endif
3544            }
3545            break;
3546        case 0x02c: /* cvttps2pi */
3547        case 0x12c: /* cvttpd2pi */
3548        case 0x02d: /* cvtps2pi */
3549        case 0x12d: /* cvtpd2pi */
3550            gen_helper_enter_mmx(cpu_env);
3551            if (mod != 3) {
3552                gen_lea_modrm(env, s, modrm);
3553                op2_offset = offsetof(CPUX86State,xmm_t0);
3554                gen_ldo_env_A0(s, op2_offset);
3555            } else {
3556                rm = (modrm & 7) | REX_B(s);
3557                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3558            }
3559            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3560            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3561            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3562            switch(b) {
3563            case 0x02c:
3564                gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3565                break;
3566            case 0x12c:
3567                gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3568                break;
3569            case 0x02d:
3570                gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3571                break;
3572            case 0x12d:
3573                gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3574                break;
3575            }
3576            break;
3577        case 0x22c: /* cvttss2si */
3578        case 0x32c: /* cvttsd2si */
3579        case 0x22d: /* cvtss2si */
3580        case 0x32d: /* cvtsd2si */
3581            ot = mo_64_32(s->dflag);
3582            if (mod != 3) {
3583                gen_lea_modrm(env, s, modrm);
3584                if ((b >> 8) & 1) {
3585                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3586                } else {
3587                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
3588                    tcg_gen_st32_tl(s->T0, cpu_env,
3589                                    offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3590                }
3591                op2_offset = offsetof(CPUX86State,xmm_t0);
3592            } else {
3593                rm = (modrm & 7) | REX_B(s);
3594                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3595            }
3596            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3597            if (ot == MO_32) {
3598                SSEFunc_i_ep sse_fn_i_ep =
3599                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3600                sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3601                tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3602            } else {
3603#ifdef TARGET_X86_64
3604                SSEFunc_l_ep sse_fn_l_ep =
3605                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3606                sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3607#else
3608                goto illegal_op;
3609#endif
3610            }
3611            gen_op_mov_reg_v(s, ot, reg, s->T0);
3612            break;
3613        case 0xc4: /* pinsrw */
3614        case 0x1c4:
3615            s->rip_offset = 1;
3616            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3617            val = x86_ldub_code(env, s);
3618            if (b1) {
3619                val &= 7;
3620                tcg_gen_st16_tl(s->T0, cpu_env,
3621                                offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3622            } else {
3623                val &= 3;
3624                tcg_gen_st16_tl(s->T0, cpu_env,
3625                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3626            }
3627            break;
3628        case 0xc5: /* pextrw */
3629        case 0x1c5:
3630            if (mod != 3)
3631                goto illegal_op;
3632            ot = mo_64_32(s->dflag);
3633            val = x86_ldub_code(env, s);
3634            if (b1) {
3635                val &= 7;
3636                rm = (modrm & 7) | REX_B(s);
3637                tcg_gen_ld16u_tl(s->T0, cpu_env,
3638                                 offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3639            } else {
3640                val &= 3;
3641                rm = (modrm & 7);
3642                tcg_gen_ld16u_tl(s->T0, cpu_env,
3643                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3644            }
3645            reg = ((modrm >> 3) & 7) | rex_r;
3646            gen_op_mov_reg_v(s, ot, reg, s->T0);
3647            break;
3648        case 0x1d6: /* movq ea, xmm */
3649            if (mod != 3) {
3650                gen_lea_modrm(env, s, modrm);
3651                gen_stq_env_A0(s, offsetof(CPUX86State,
3652                                           xmm_regs[reg].ZMM_Q(0)));
3653            } else {
3654                rm = (modrm & 7) | REX_B(s);
3655                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3656                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3657                gen_op_movq_env_0(s,
3658                                  offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3659            }
3660            break;
3661        case 0x2d6: /* movq2dq */
3662            gen_helper_enter_mmx(cpu_env);
3663            rm = (modrm & 7);
3664            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3665                        offsetof(CPUX86State,fpregs[rm].mmx));
3666            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3667            break;
3668        case 0x3d6: /* movdq2q */
3669            gen_helper_enter_mmx(cpu_env);
3670            rm = (modrm & 7) | REX_B(s);
3671            gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3672                        offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3673            break;
3674        case 0xd7: /* pmovmskb */
3675        case 0x1d7:
3676            if (mod != 3)
3677                goto illegal_op;
3678            if (b1) {
3679                rm = (modrm & 7) | REX_B(s);
3680                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3681                                 offsetof(CPUX86State, xmm_regs[rm]));
3682                gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3683            } else {
3684                rm = (modrm & 7);
3685                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3686                                 offsetof(CPUX86State, fpregs[rm].mmx));
3687                gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3688            }
3689            reg = ((modrm >> 3) & 7) | rex_r;
3690            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3691            break;
3692
3693        case 0x138:
3694        case 0x038:
3695            b = modrm;
3696            if ((b & 0xf0) == 0xf0) {
3697                goto do_0f_38_fx;
3698            }
3699            modrm = x86_ldub_code(env, s);
3700            rm = modrm & 7;
3701            reg = ((modrm >> 3) & 7) | rex_r;
3702            mod = (modrm >> 6) & 3;
3703            if (b1 >= 2) {
3704                goto unknown_op;
3705            }
3706
3707            sse_fn_epp = sse_op_table6[b].op[b1];
3708            if (!sse_fn_epp) {
3709                goto unknown_op;
3710            }
3711            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3712                goto illegal_op;
3713
3714            if (b1) {
3715                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3716                if (mod == 3) {
3717                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3718                } else {
3719                    op2_offset = offsetof(CPUX86State,xmm_t0);
3720                    gen_lea_modrm(env, s, modrm);
3721                    switch (b) {
3722                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3723                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3724                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3725                        gen_ldq_env_A0(s, op2_offset +
3726                                        offsetof(ZMMReg, ZMM_Q(0)));
3727                        break;
3728                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3729                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3730                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3731                                            s->mem_index, MO_LEUL);
3732                        tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3733                                        offsetof(ZMMReg, ZMM_L(0)));
3734                        break;
3735                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3736                        tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3737                                           s->mem_index, MO_LEUW);
3738                        tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3739                                        offsetof(ZMMReg, ZMM_W(0)));
3740                        break;
3741                    case 0x2a:            /* movntqda */
3742                        gen_ldo_env_A0(s, op1_offset);
3743                        return;
3744                    default:
3745                        gen_ldo_env_A0(s, op2_offset);
3746                    }
3747                }
3748            } else {
3749                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3750                if (mod == 3) {
3751                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3752                } else {
3753                    op2_offset = offsetof(CPUX86State,mmx_t0);
3754                    gen_lea_modrm(env, s, modrm);
3755                    gen_ldq_env_A0(s, op2_offset);
3756                }
3757            }
3758            if (sse_fn_epp == SSE_SPECIAL) {
3759                goto unknown_op;
3760            }
3761
3762            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3763            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3764            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3765
3766            if (b == 0x17) {
3767                set_cc_op(s, CC_OP_EFLAGS);
3768            }
3769            break;
3770
3771        case 0x238:
3772        case 0x338:
3773        do_0f_38_fx:
3774            /* Various integer extensions at 0f 38 f[0-f].  */
3775            b = modrm | (b1 << 8);
3776            modrm = x86_ldub_code(env, s);
3777            reg = ((modrm >> 3) & 7) | rex_r;
3778
3779            switch (b) {
3780            case 0x3f0: /* crc32 Gd,Eb */
3781            case 0x3f1: /* crc32 Gd,Ey */
3782            do_crc32:
3783                if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3784                    goto illegal_op;
3785                }
3786                if ((b & 0xff) == 0xf0) {
3787                    ot = MO_8;
3788                } else if (s->dflag != MO_64) {
3789                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3790                } else {
3791                    ot = MO_64;
3792                }
3793
3794                tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3795                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3796                gen_helper_crc32(s->T0, s->tmp2_i32,
3797                                 s->T0, tcg_const_i32(8 << ot));
3798
3799                ot = mo_64_32(s->dflag);
3800                gen_op_mov_reg_v(s, ot, reg, s->T0);
3801                break;
3802
3803            case 0x1f0: /* crc32 or movbe */
3804            case 0x1f1:
3805                /* For these insns, the f3 prefix is supposed to have priority
3806                   over the 66 prefix, but that's not what we implement above
3807                   setting b1.  */
3808                if (s->prefix & PREFIX_REPNZ) {
3809                    goto do_crc32;
3810                }
3811                /* FALLTHRU */
3812            case 0x0f0: /* movbe Gy,My */
3813            case 0x0f1: /* movbe My,Gy */
3814                if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3815                    goto illegal_op;
3816                }
3817                if (s->dflag != MO_64) {
3818                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3819                } else {
3820                    ot = MO_64;
3821                }
3822
3823                gen_lea_modrm(env, s, modrm);
3824                if ((b & 1) == 0) {
3825                    tcg_gen_qemu_ld_tl(s->T0, s->A0,
3826                                       s->mem_index, ot | MO_BE);
3827                    gen_op_mov_reg_v(s, ot, reg, s->T0);
3828                } else {
3829                    tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3830                                       s->mem_index, ot | MO_BE);
3831                }
3832                break;
3833
3834            case 0x0f2: /* andn Gy, By, Ey */
3835                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3836                    || !(s->prefix & PREFIX_VEX)
3837                    || s->vex_l != 0) {
3838                    goto illegal_op;
3839                }
3840                ot = mo_64_32(s->dflag);
3841                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3842                tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3843                gen_op_mov_reg_v(s, ot, reg, s->T0);
3844                gen_op_update1_cc(s);
3845                set_cc_op(s, CC_OP_LOGICB + ot);
3846                break;
3847
3848            case 0x0f7: /* bextr Gy, Ey, By */
3849                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3850                    || !(s->prefix & PREFIX_VEX)
3851                    || s->vex_l != 0) {
3852                    goto illegal_op;
3853                }
3854                ot = mo_64_32(s->dflag);
3855                {
3856                    TCGv bound, zero;
3857
3858                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3859                    /* Extract START, and shift the operand.
3860                       Shifts larger than operand size get zeros.  */
3861                    tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3862                    tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3863
3864                    bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3865                    zero = tcg_const_tl(0);
3866                    tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3867                                       s->T0, zero);
3868                    tcg_temp_free(zero);
3869
3870                    /* Extract the LEN into a mask.  Lengths larger than
3871                       operand size get all ones.  */
3872                    tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3873                    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3874                                       s->A0, bound);
3875                    tcg_temp_free(bound);
3876                    tcg_gen_movi_tl(s->T1, 1);
3877                    tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3878                    tcg_gen_subi_tl(s->T1, s->T1, 1);
3879                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
3880
3881                    gen_op_mov_reg_v(s, ot, reg, s->T0);
3882                    gen_op_update1_cc(s);
3883                    set_cc_op(s, CC_OP_LOGICB + ot);
3884                }
3885                break;
3886
3887            case 0x0f5: /* bzhi Gy, Ey, By */
3888                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3889                    || !(s->prefix & PREFIX_VEX)
3890                    || s->vex_l != 0) {
3891                    goto illegal_op;
3892                }
3893                ot = mo_64_32(s->dflag);
3894                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3895                tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3896                {
3897                    TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3898                    /* Note that since we're using BMILG (in order to get O
3899                       cleared) we need to store the inverse into C.  */
3900                    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3901                                       s->T1, bound);
3902                    tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3903                                       bound, bound, s->T1);
3904                    tcg_temp_free(bound);
3905                }
3906                tcg_gen_movi_tl(s->A0, -1);
3907                tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3908                tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3909                gen_op_mov_reg_v(s, ot, reg, s->T0);
3910                gen_op_update1_cc(s);
3911                set_cc_op(s, CC_OP_BMILGB + ot);
3912                break;
3913
3914            case 0x3f6: /* mulx By, Gy, rdx, Ey */
3915                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3916                    || !(s->prefix & PREFIX_VEX)
3917                    || s->vex_l != 0) {
3918                    goto illegal_op;
3919                }
3920                ot = mo_64_32(s->dflag);
3921                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3922                switch (ot) {
3923                default:
3924                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3925                    tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
3926                    tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
3927                                      s->tmp2_i32, s->tmp3_i32);
3928                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
3929                    tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
3930                    break;
3931#ifdef TARGET_X86_64
3932                case MO_64:
3933                    tcg_gen_mulu2_i64(s->T0, s->T1,
3934                                      s->T0, cpu_regs[R_EDX]);
3935                    tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
3936                    tcg_gen_mov_i64(cpu_regs[reg], s->T1);
3937                    break;
3938#endif
3939                }
3940                break;
3941
3942            case 0x3f5: /* pdep Gy, By, Ey */
3943                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3944                    || !(s->prefix & PREFIX_VEX)
3945                    || s->vex_l != 0) {
3946                    goto illegal_op;
3947                }
3948                ot = mo_64_32(s->dflag);
3949                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3950                /* Note that by zero-extending the mask operand, we
3951                   automatically handle zero-extending the result.  */
3952                if (ot == MO_64) {
3953                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3954                } else {
3955                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3956                }
3957                gen_helper_pdep(cpu_regs[reg], s->T0, s->T1);
3958                break;
3959
3960            case 0x2f5: /* pext Gy, By, Ey */
3961                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3962                    || !(s->prefix & PREFIX_VEX)
3963                    || s->vex_l != 0) {
3964                    goto illegal_op;
3965                }
3966                ot = mo_64_32(s->dflag);
3967                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3968                /* Note that by zero-extending the mask operand, we
3969                   automatically handle zero-extending the result.  */
3970                if (ot == MO_64) {
3971                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3972                } else {
3973                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3974                }
3975                gen_helper_pext(cpu_regs[reg], s->T0, s->T1);
3976                break;
3977
3978            case 0x1f6: /* adcx Gy, Ey */
3979            case 0x2f6: /* adox Gy, Ey */
3980                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3981                    goto illegal_op;
3982                } else {
3983                    TCGv carry_in, carry_out, zero;
3984                    int end_op;
3985
3986                    ot = mo_64_32(s->dflag);
3987                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3988
3989                    /* Re-use the carry-out from a previous round.  */
3990                    carry_in = NULL;
3991                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3992                    switch (s->cc_op) {
3993                    case CC_OP_ADCX:
3994                        if (b == 0x1f6) {
3995                            carry_in = cpu_cc_dst;
3996                            end_op = CC_OP_ADCX;
3997                        } else {
3998                            end_op = CC_OP_ADCOX;
3999                        }
4000                        break;
4001                    case CC_OP_ADOX:
4002                        if (b == 0x1f6) {
4003                            end_op = CC_OP_ADCOX;
4004                        } else {
4005                            carry_in = cpu_cc_src2;
4006                            end_op = CC_OP_ADOX;
4007                        }
4008                        break;
4009                    case CC_OP_ADCOX:
4010                        end_op = CC_OP_ADCOX;
4011                        carry_in = carry_out;
4012                        break;
4013                    default:
4014                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4015                        break;
4016                    }
4017                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
4018                    if (!carry_in) {
4019                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4020                            gen_compute_eflags(s);
4021                        }
4022                        carry_in = s->tmp0;
4023                        tcg_gen_extract_tl(carry_in, cpu_cc_src,
4024                                           ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4025                    }
4026
4027                    switch (ot) {
4028#ifdef TARGET_X86_64
4029                    case MO_32:
4030                        /* If we know TL is 64-bit, and we want a 32-bit
4031                           result, just do everything in 64-bit arithmetic.  */
4032                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4033                        tcg_gen_ext32u_i64(s->T0, s->T0);
4034                        tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4035                        tcg_gen_add_i64(s->T0, s->T0, carry_in);
4036                        tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4037                        tcg_gen_shri_i64(carry_out, s->T0, 32);
4038                        break;
4039#endif
4040                    default:
4041                        /* Otherwise compute the carry-out in two steps.  */
4042                        zero = tcg_const_tl(0);
4043                        tcg_gen_add2_tl(s->T0, carry_out,
4044                                        s->T0, zero,
4045                                        carry_in, zero);
4046                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4047                                        cpu_regs[reg], carry_out,
4048                                        s->T0, zero);
4049                        tcg_temp_free(zero);
4050                        break;
4051                    }
4052                    set_cc_op(s, end_op);
4053                }
4054                break;
4055
4056            case 0x1f7: /* shlx Gy, Ey, By */
4057            case 0x2f7: /* sarx Gy, Ey, By */
4058            case 0x3f7: /* shrx Gy, Ey, By */
4059                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4060                    || !(s->prefix & PREFIX_VEX)
4061                    || s->vex_l != 0) {
4062                    goto illegal_op;
4063                }
4064                ot = mo_64_32(s->dflag);
4065                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4066                if (ot == MO_64) {
4067                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4068                } else {
4069                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4070                }
4071                if (b == 0x1f7) {
4072                    tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4073                } else if (b == 0x2f7) {
4074                    if (ot != MO_64) {
4075                        tcg_gen_ext32s_tl(s->T0, s->T0);
4076                    }
4077                    tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4078                } else {
4079                    if (ot != MO_64) {
4080                        tcg_gen_ext32u_tl(s->T0, s->T0);
4081                    }
4082                    tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4083                }
4084                gen_op_mov_reg_v(s, ot, reg, s->T0);
4085                break;
4086
4087            case 0x0f3:
4088            case 0x1f3:
4089            case 0x2f3:
4090            case 0x3f3: /* Group 17 */
4091                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4092                    || !(s->prefix & PREFIX_VEX)
4093                    || s->vex_l != 0) {
4094                    goto illegal_op;
4095                }
4096                ot = mo_64_32(s->dflag);
4097                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4098
4099                tcg_gen_mov_tl(cpu_cc_src, s->T0);
4100                switch (reg & 7) {
4101                case 1: /* blsr By,Ey */
4102                    tcg_gen_subi_tl(s->T1, s->T0, 1);
4103                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
4104                    break;
4105                case 2: /* blsmsk By,Ey */
4106                    tcg_gen_subi_tl(s->T1, s->T0, 1);
4107                    tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4108                    break;
4109                case 3: /* blsi By, Ey */
4110                    tcg_gen_neg_tl(s->T1, s->T0);
4111                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
4112                    break;
4113                default:
4114                    goto unknown_op;
4115                }
4116                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4117                gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4118                set_cc_op(s, CC_OP_BMILGB + ot);
4119                break;
4120
4121            default:
4122                goto unknown_op;
4123            }
4124            break;
4125
4126        case 0x03a:
4127        case 0x13a:
4128            b = modrm;
4129            modrm = x86_ldub_code(env, s);
4130            rm = modrm & 7;
4131            reg = ((modrm >> 3) & 7) | rex_r;
4132            mod = (modrm >> 6) & 3;
4133            if (b1 >= 2) {
4134                goto unknown_op;
4135            }
4136
4137            sse_fn_eppi = sse_op_table7[b].op[b1];
4138            if (!sse_fn_eppi) {
4139                goto unknown_op;
4140            }
4141            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4142                goto illegal_op;
4143
4144            s->rip_offset = 1;
4145
4146            if (sse_fn_eppi == SSE_SPECIAL) {
4147                ot = mo_64_32(s->dflag);
4148                rm = (modrm & 7) | REX_B(s);
4149                if (mod != 3)
4150                    gen_lea_modrm(env, s, modrm);
4151                reg = ((modrm >> 3) & 7) | rex_r;
4152                val = x86_ldub_code(env, s);
4153                switch (b) {
4154                case 0x14: /* pextrb */
4155                    tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4156                                            xmm_regs[reg].ZMM_B(val & 15)));
4157                    if (mod == 3) {
4158                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4159                    } else {
4160                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4161                                           s->mem_index, MO_UB);
4162                    }
4163                    break;
4164                case 0x15: /* pextrw */
4165                    tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4166                                            xmm_regs[reg].ZMM_W(val & 7)));
4167                    if (mod == 3) {
4168                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4169                    } else {
4170                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4171                                           s->mem_index, MO_LEUW);
4172                    }
4173                    break;
4174                case 0x16:
4175                    if (ot == MO_32) { /* pextrd */
4176                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4177                                        offsetof(CPUX86State,
4178                                                xmm_regs[reg].ZMM_L(val & 3)));
4179                        if (mod == 3) {
4180                            tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4181                        } else {
4182                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4183                                                s->mem_index, MO_LEUL);
4184                        }
4185                    } else { /* pextrq */
4186#ifdef TARGET_X86_64
4187                        tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4188                                        offsetof(CPUX86State,
4189                                                xmm_regs[reg].ZMM_Q(val & 1)));
4190                        if (mod == 3) {
4191                            tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4192                        } else {
4193                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4194                                                s->mem_index, MO_LEQ);
4195                        }
4196#else
4197                        goto illegal_op;
4198#endif
4199                    }
4200                    break;
4201                case 0x17: /* extractps */
4202                    tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4203                                            xmm_regs[reg].ZMM_L(val & 3)));
4204                    if (mod == 3) {
4205                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4206                    } else {
4207                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4208                                           s->mem_index, MO_LEUL);
4209                    }
4210                    break;
4211                case 0x20: /* pinsrb */
4212                    if (mod == 3) {
4213                        gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4214                    } else {
4215                        tcg_gen_qemu_ld_tl(s->T0, s->A0,
4216                                           s->mem_index, MO_UB);
4217                    }
4218                    tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4219                                            xmm_regs[reg].ZMM_B(val & 15)));
4220                    break;
4221                case 0x21: /* insertps */
4222                    if (mod == 3) {
4223                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4224                                        offsetof(CPUX86State,xmm_regs[rm]
4225                                                .ZMM_L((val >> 6) & 3)));
4226                    } else {
4227                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4228                                            s->mem_index, MO_LEUL);
4229                    }
4230                    tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4231                                    offsetof(CPUX86State,xmm_regs[reg]
4232                                            .ZMM_L((val >> 4) & 3)));
4233                    if ((val >> 0) & 1)
4234                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4235                                        cpu_env, offsetof(CPUX86State,
4236                                                xmm_regs[reg].ZMM_L(0)));
4237                    if ((val >> 1) & 1)
4238                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4239                                        cpu_env, offsetof(CPUX86State,
4240                                                xmm_regs[reg].ZMM_L(1)));
4241                    if ((val >> 2) & 1)
4242                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4243                                        cpu_env, offsetof(CPUX86State,
4244                                                xmm_regs[reg].ZMM_L(2)));
4245                    if ((val >> 3) & 1)
4246                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4247                                        cpu_env, offsetof(CPUX86State,
4248                                                xmm_regs[reg].ZMM_L(3)));
4249                    break;
4250                case 0x22:
4251                    if (ot == MO_32) { /* pinsrd */
4252                        if (mod == 3) {
4253                            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4254                        } else {
4255                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4256                                                s->mem_index, MO_LEUL);
4257                        }
4258                        tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4259                                        offsetof(CPUX86State,
4260                                                xmm_regs[reg].ZMM_L(val & 3)));
4261                    } else { /* pinsrq */
4262#ifdef TARGET_X86_64
4263                        if (mod == 3) {
4264                            gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4265                        } else {
4266                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4267                                                s->mem_index, MO_LEQ);
4268                        }
4269                        tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4270                                        offsetof(CPUX86State,
4271                                                xmm_regs[reg].ZMM_Q(val & 1)));
4272#else
4273                        goto illegal_op;
4274#endif
4275                    }
4276                    break;
4277                }
4278                return;
4279            }
4280
4281            if (b1) {
4282                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4283                if (mod == 3) {
4284                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4285                } else {
4286                    op2_offset = offsetof(CPUX86State,xmm_t0);
4287                    gen_lea_modrm(env, s, modrm);
4288                    gen_ldo_env_A0(s, op2_offset);
4289                }
4290            } else {
4291                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4292                if (mod == 3) {
4293                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4294                } else {
4295                    op2_offset = offsetof(CPUX86State,mmx_t0);
4296                    gen_lea_modrm(env, s, modrm);
4297                    gen_ldq_env_A0(s, op2_offset);
4298                }
4299            }
4300            val = x86_ldub_code(env, s);
4301
4302            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4303                set_cc_op(s, CC_OP_EFLAGS);
4304
4305                if (s->dflag == MO_64) {
4306                    /* The helper must use entire 64-bit gp registers */
4307                    val |= 1 << 8;
4308                }
4309            }
4310
4311            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4312            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4313            sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4314            break;
4315
4316        case 0x33a:
4317            /* Various integer extensions at 0f 3a f[0-f].  */
4318            b = modrm | (b1 << 8);
4319            modrm = x86_ldub_code(env, s);
4320            reg = ((modrm >> 3) & 7) | rex_r;
4321
4322            switch (b) {
4323            case 0x3f0: /* rorx Gy,Ey, Ib */
4324                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4325                    || !(s->prefix & PREFIX_VEX)
4326                    || s->vex_l != 0) {
4327                    goto illegal_op;
4328                }
4329                ot = mo_64_32(s->dflag);
4330                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4331                b = x86_ldub_code(env, s);
4332                if (ot == MO_64) {
4333                    tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4334                } else {
4335                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4336                    tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4337                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4338                }
4339                gen_op_mov_reg_v(s, ot, reg, s->T0);
4340                break;
4341
4342            default:
4343                goto unknown_op;
4344            }
4345            break;
4346
4347        default:
4348        unknown_op:
4349            gen_unknown_opcode(env, s);
4350            return;
4351        }
4352    } else {
4353        /* generic MMX or SSE operation */
4354        switch(b) {
4355        case 0x70: /* pshufx insn */
4356        case 0xc6: /* pshufx insn */
4357        case 0xc2: /* compare insns */
4358            s->rip_offset = 1;
4359            break;
4360        default:
4361            break;
4362        }
4363        if (is_xmm) {
4364            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4365            if (mod != 3) {
4366                int sz = 4;
4367
4368                gen_lea_modrm(env, s, modrm);
4369                op2_offset = offsetof(CPUX86State,xmm_t0);
4370
4371                switch (b) {
4372                case 0x50 ... 0x5a:
4373                case 0x5c ... 0x5f:
4374                case 0xc2:
4375                    /* Most sse scalar operations.  */
4376                    if (b1 == 2) {
4377                        sz = 2;
4378                    } else if (b1 == 3) {
4379                        sz = 3;
4380                    }
4381                    break;
4382
4383                case 0x2e:  /* ucomis[sd] */
4384                case 0x2f:  /* comis[sd] */
4385                    if (b1 == 0) {
4386                        sz = 2;
4387                    } else {
4388                        sz = 3;
4389                    }
4390                    break;
4391                }
4392
4393                switch (sz) {
4394                case 2:
4395                    /* 32 bit access */
4396                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
4397                    tcg_gen_st32_tl(s->T0, cpu_env,
4398                                    offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4399                    break;
4400                case 3:
4401                    /* 64 bit access */
4402                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4403                    break;
4404                default:
4405                    /* 128 bit access */
4406                    gen_ldo_env_A0(s, op2_offset);
4407                    break;
4408                }
4409            } else {
4410                rm = (modrm & 7) | REX_B(s);
4411                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4412            }
4413        } else {
4414            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4415            if (mod != 3) {
4416                gen_lea_modrm(env, s, modrm);
4417                op2_offset = offsetof(CPUX86State,mmx_t0);
4418                gen_ldq_env_A0(s, op2_offset);
4419            } else {
4420                rm = (modrm & 7);
4421                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4422            }
4423        }
4424        switch(b) {
4425        case 0x0f: /* 3DNow! data insns */
4426            val = x86_ldub_code(env, s);
4427            sse_fn_epp = sse_op_table5[val];
4428            if (!sse_fn_epp) {
4429                goto unknown_op;
4430            }
4431            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4432                goto illegal_op;
4433            }
4434            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4435            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4436            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4437            break;
4438        case 0x70: /* pshufx insn */
4439        case 0xc6: /* pshufx insn */
4440            val = x86_ldub_code(env, s);
4441            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4442            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4443            /* XXX: introduce a new table? */
4444            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4445            sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4446            break;
4447        case 0xc2:
4448            /* compare insns */
4449            val = x86_ldub_code(env, s);
4450            if (val >= 8)
4451                goto unknown_op;
4452            sse_fn_epp = sse_op_table4[val][b1];
4453
4454            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4455            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4456            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4457            break;
4458        case 0xf7:
4459            /* maskmov : we must prepare A0 */
4460            if (mod != 3)
4461                goto illegal_op;
4462            tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4463            gen_extu(s->aflag, s->A0);
4464            gen_add_A0_ds_seg(s);
4465
4466            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4467            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4468            /* XXX: introduce a new table? */
4469            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4470            sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4471            break;
4472        default:
4473            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4474            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4475            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4476            break;
4477        }
4478        if (b == 0x2e || b == 0x2f) {
4479            set_cc_op(s, CC_OP_EFLAGS);
4480        }
4481    }
4482}
4483
4484/* convert one instruction. s->base.is_jmp is set if the translation must
4485   be stopped. Return the next pc value */
4486static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4487{
4488    CPUX86State *env = cpu->env_ptr;
4489    int b, prefixes;
4490    int shift;
4491    TCGMemOp ot, aflag, dflag;
4492    int modrm, reg, rm, mod, op, opreg, val;
4493    target_ulong next_eip, tval;
4494    int rex_w, rex_r;
4495    target_ulong pc_start = s->base.pc_next;
4496
4497    s->pc_start = s->pc = pc_start;
4498    s->override = -1;
4499#ifdef TARGET_X86_64
4500    s->rex_x = 0;
4501    s->rex_b = 0;
4502    s->x86_64_hregs = false;
4503#endif
4504    s->rip_offset = 0; /* for relative ip address */
4505    s->vex_l = 0;
4506    s->vex_v = 0;
4507    if (sigsetjmp(s->jmpbuf, 0) != 0) {
4508        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
4509        return s->pc;
4510    }
4511
4512    prefixes = 0;
4513    rex_w = -1;
4514    rex_r = 0;
4515
4516 next_byte:
4517    b = x86_ldub_code(env, s);
4518    /* Collect prefixes.  */
4519    switch (b) {
4520    case 0xf3:
4521        prefixes |= PREFIX_REPZ;
4522        goto next_byte;
4523    case 0xf2:
4524        prefixes |= PREFIX_REPNZ;
4525        goto next_byte;
4526    case 0xf0:
4527        prefixes |= PREFIX_LOCK;
4528        goto next_byte;
4529    case 0x2e:
4530        s->override = R_CS;
4531        goto next_byte;
4532    case 0x36:
4533        s->override = R_SS;
4534        goto next_byte;
4535    case 0x3e:
4536        s->override = R_DS;
4537        goto next_byte;
4538    case 0x26:
4539        s->override = R_ES;
4540        goto next_byte;
4541    case 0x64:
4542        s->override = R_FS;
4543        goto next_byte;
4544    case 0x65:
4545        s->override = R_GS;
4546        goto next_byte;
4547    case 0x66:
4548        prefixes |= PREFIX_DATA;
4549        goto next_byte;
4550    case 0x67:
4551        prefixes |= PREFIX_ADR;
4552        goto next_byte;
4553#ifdef TARGET_X86_64
4554    case 0x40 ... 0x4f:
4555        if (CODE64(s)) {
4556            /* REX prefix */
4557            rex_w = (b >> 3) & 1;
4558            rex_r = (b & 0x4) << 1;
4559            s->rex_x = (b & 0x2) << 2;
4560            REX_B(s) = (b & 0x1) << 3;
4561            /* select uniform byte register addressing */
4562            s->x86_64_hregs = true;
4563            goto next_byte;
4564        }
4565        break;
4566#endif
4567    case 0xc5: /* 2-byte VEX */
4568    case 0xc4: /* 3-byte VEX */
4569        /* VEX prefixes cannot be used except in 32-bit mode.
4570           Otherwise the instruction is LES or LDS.  */
4571        if (s->code32 && !s->vm86) {
4572            static const int pp_prefix[4] = {
4573                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4574            };
4575            int vex3, vex2 = x86_ldub_code(env, s);
4576
4577            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4578                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4579                   otherwise the instruction is LES or LDS.  */
4580                s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4581                break;
4582            }
4583
4584            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4585            if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4586                            | PREFIX_LOCK | PREFIX_DATA)) {
4587                goto illegal_op;
4588            }
4589#ifdef TARGET_X86_64
4590            if (s->x86_64_hregs) {
4591                goto illegal_op;
4592            }
4593#endif
4594            rex_r = (~vex2 >> 4) & 8;
4595            if (b == 0xc5) {
4596                /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4597                vex3 = vex2;
4598                b = x86_ldub_code(env, s) | 0x100;
4599            } else {
4600                /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4601#ifdef TARGET_X86_64
4602                s->rex_x = (~vex2 >> 3) & 8;
4603                s->rex_b = (~vex2 >> 2) & 8;
4604#endif
4605                vex3 = x86_ldub_code(env, s);
4606                rex_w = (vex3 >> 7) & 1;
4607                switch (vex2 & 0x1f) {
4608                case 0x01: /* Implied 0f leading opcode bytes.  */
4609                    b = x86_ldub_code(env, s) | 0x100;
4610                    break;
4611                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4612                    b = 0x138;
4613                    break;
4614                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4615                    b = 0x13a;
4616                    break;
4617                default:   /* Reserved for future use.  */
4618                    goto unknown_op;
4619                }
4620            }
4621            s->vex_v = (~vex3 >> 3) & 0xf;
4622            s->vex_l = (vex3 >> 2) & 1;
4623            prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4624        }
4625        break;
4626    }
4627
4628    /* Post-process prefixes.  */
4629    if (CODE64(s)) {
4630        /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4631           data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4632           over 0x66 if both are present.  */
4633        dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4634        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4635        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4636    } else {
4637        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4638        if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4639            dflag = MO_32;
4640        } else {
4641            dflag = MO_16;
4642        }
4643        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4644        if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4645            aflag = MO_32;
4646        }  else {
4647            aflag = MO_16;
4648        }
4649    }
4650
4651    s->prefix = prefixes;
4652    s->aflag = aflag;
4653    s->dflag = dflag;
4654
4655    /* now check op code */
4656 reswitch:
4657    switch(b) {
4658    case 0x0f:
4659        /**************************/
4660        /* extended op code */
4661        b = x86_ldub_code(env, s) | 0x100;
4662        goto reswitch;
4663
4664        /**************************/
4665        /* arith & logic */
4666    case 0x00 ... 0x05:
4667    case 0x08 ... 0x0d:
4668    case 0x10 ... 0x15:
4669    case 0x18 ... 0x1d:
4670    case 0x20 ... 0x25:
4671    case 0x28 ... 0x2d:
4672    case 0x30 ... 0x35:
4673    case 0x38 ... 0x3d:
4674        {
4675            int op, f, val;
4676            op = (b >> 3) & 7;
4677            f = (b >> 1) & 3;
4678
4679            ot = mo_b_d(b, dflag);
4680
4681            switch(f) {
4682            case 0: /* OP Ev, Gv */
4683                modrm = x86_ldub_code(env, s);
4684                reg = ((modrm >> 3) & 7) | rex_r;
4685                mod = (modrm >> 6) & 3;
4686                rm = (modrm & 7) | REX_B(s);
4687                if (mod != 3) {
4688                    gen_lea_modrm(env, s, modrm);
4689                    opreg = OR_TMP0;
4690                } else if (op == OP_XORL && rm == reg) {
4691                xor_zero:
4692                    /* xor reg, reg optimisation */
4693                    set_cc_op(s, CC_OP_CLR);
4694                    tcg_gen_movi_tl(s->T0, 0);
4695                    gen_op_mov_reg_v(s, ot, reg, s->T0);
4696                    break;
4697                } else {
4698                    opreg = rm;
4699                }
4700                gen_op_mov_v_reg(s, ot, s->T1, reg);
4701                gen_op(s, op, ot, opreg);
4702                break;
4703            case 1: /* OP Gv, Ev */
4704                modrm = x86_ldub_code(env, s);
4705                mod = (modrm >> 6) & 3;
4706                reg = ((modrm >> 3) & 7) | rex_r;
4707                rm = (modrm & 7) | REX_B(s);
4708                if (mod != 3) {
4709                    gen_lea_modrm(env, s, modrm);
4710                    gen_op_ld_v(s, ot, s->T1, s->A0);
4711                } else if (op == OP_XORL && rm == reg) {
4712                    goto xor_zero;
4713                } else {
4714                    gen_op_mov_v_reg(s, ot, s->T1, rm);
4715                }
4716                gen_op(s, op, ot, reg);
4717                break;
4718            case 2: /* OP A, Iv */
4719                val = insn_get(env, s, ot);
4720                tcg_gen_movi_tl(s->T1, val);
4721                gen_op(s, op, ot, OR_EAX);
4722                break;
4723            }
4724        }
4725        break;
4726
4727    case 0x82:
4728        if (CODE64(s))
4729            goto illegal_op;
4730        /* fall through */
4731    case 0x80: /* GRP1 */
4732    case 0x81:
4733    case 0x83:
4734        {
4735            int val;
4736
4737            ot = mo_b_d(b, dflag);
4738
4739            modrm = x86_ldub_code(env, s);
4740            mod = (modrm >> 6) & 3;
4741            rm = (modrm & 7) | REX_B(s);
4742            op = (modrm >> 3) & 7;
4743
4744            if (mod != 3) {
4745                if (b == 0x83)
4746                    s->rip_offset = 1;
4747                else
4748                    s->rip_offset = insn_const_size(ot);
4749                gen_lea_modrm(env, s, modrm);
4750                opreg = OR_TMP0;
4751            } else {
4752                opreg = rm;
4753            }
4754
4755            switch(b) {
4756            default:
4757            case 0x80:
4758            case 0x81:
4759            case 0x82:
4760                val = insn_get(env, s, ot);
4761                break;
4762            case 0x83:
4763                val = (int8_t)insn_get(env, s, MO_8);
4764                break;
4765            }
4766            tcg_gen_movi_tl(s->T1, val);
4767            gen_op(s, op, ot, opreg);
4768        }
4769        break;
4770
4771        /**************************/
4772        /* inc, dec, and other misc arith */
4773    case 0x40 ... 0x47: /* inc Gv */
4774        ot = dflag;
4775        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4776        break;
4777    case 0x48 ... 0x4f: /* dec Gv */
4778        ot = dflag;
4779        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4780        break;
4781    case 0xf6: /* GRP3 */
4782    case 0xf7:
4783        ot = mo_b_d(b, dflag);
4784
4785        modrm = x86_ldub_code(env, s);
4786        mod = (modrm >> 6) & 3;
4787        rm = (modrm & 7) | REX_B(s);
4788        op = (modrm >> 3) & 7;
4789        if (mod != 3) {
4790            if (op == 0) {
4791                s->rip_offset = insn_const_size(ot);
4792            }
4793            gen_lea_modrm(env, s, modrm);
4794            /* For those below that handle locked memory, don't load here.  */
4795            if (!(s->prefix & PREFIX_LOCK)
4796                || op != 2) {
4797                gen_op_ld_v(s, ot, s->T0, s->A0);
4798            }
4799        } else {
4800            gen_op_mov_v_reg(s, ot, s->T0, rm);
4801        }
4802
4803        switch(op) {
4804        case 0: /* test */
4805            val = insn_get(env, s, ot);
4806            tcg_gen_movi_tl(s->T1, val);
4807            gen_op_testl_T0_T1_cc(s);
4808            set_cc_op(s, CC_OP_LOGICB + ot);
4809            break;
4810        case 2: /* not */
4811            if (s->prefix & PREFIX_LOCK) {
4812                if (mod == 3) {
4813                    goto illegal_op;
4814                }
4815                tcg_gen_movi_tl(s->T0, ~0);
4816                tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4817                                            s->mem_index, ot | MO_LE);
4818            } else {
4819                tcg_gen_not_tl(s->T0, s->T0);
4820                if (mod != 3) {
4821                    gen_op_st_v(s, ot, s->T0, s->A0);
4822                } else {
4823                    gen_op_mov_reg_v(s, ot, rm, s->T0);
4824                }
4825            }
4826            break;
4827        case 3: /* neg */
4828            if (s->prefix & PREFIX_LOCK) {
4829                TCGLabel *label1;
4830                TCGv a0, t0, t1, t2;
4831
4832                if (mod == 3) {
4833                    goto illegal_op;
4834                }
4835                a0 = tcg_temp_local_new();
4836                t0 = tcg_temp_local_new();
4837                label1 = gen_new_label();
4838
4839                tcg_gen_mov_tl(a0, s->A0);
4840                tcg_gen_mov_tl(t0, s->T0);
4841
4842                gen_set_label(label1);
4843                t1 = tcg_temp_new();
4844                t2 = tcg_temp_new();
4845                tcg_gen_mov_tl(t2, t0);
4846                tcg_gen_neg_tl(t1, t0);
4847                tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4848                                          s->mem_index, ot | MO_LE);
4849                tcg_temp_free(t1);
4850                tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4851
4852                tcg_temp_free(t2);
4853                tcg_temp_free(a0);
4854                tcg_gen_mov_tl(s->T0, t0);
4855                tcg_temp_free(t0);
4856            } else {
4857                tcg_gen_neg_tl(s->T0, s->T0);
4858                if (mod != 3) {
4859                    gen_op_st_v(s, ot, s->T0, s->A0);
4860                } else {
4861                    gen_op_mov_reg_v(s, ot, rm, s->T0);
4862                }
4863            }
4864            gen_op_update_neg_cc(s);
4865            set_cc_op(s, CC_OP_SUBB + ot);
4866            break;
4867        case 4: /* mul */
4868            switch(ot) {
4869            case MO_8:
4870                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4871                tcg_gen_ext8u_tl(s->T0, s->T0);
4872                tcg_gen_ext8u_tl(s->T1, s->T1);
4873                /* XXX: use 32 bit mul which could be faster */
4874                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4875                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4876                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4877                tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4878                set_cc_op(s, CC_OP_MULB);
4879                break;
4880            case MO_16:
4881                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4882                tcg_gen_ext16u_tl(s->T0, s->T0);
4883                tcg_gen_ext16u_tl(s->T1, s->T1);
4884                /* XXX: use 32 bit mul which could be faster */
4885                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4886                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4887                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4888                tcg_gen_shri_tl(s->T0, s->T0, 16);
4889                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4890                tcg_gen_mov_tl(cpu_cc_src, s->T0);
4891                set_cc_op(s, CC_OP_MULW);
4892                break;
4893            default:
4894            case MO_32:
4895                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4896                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4897                tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4898                                  s->tmp2_i32, s->tmp3_i32);
4899                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4900                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4901                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4902                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4903                set_cc_op(s, CC_OP_MULL);
4904                break;
4905#ifdef TARGET_X86_64
4906            case MO_64:
4907                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4908                                  s->T0, cpu_regs[R_EAX]);
4909                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4910                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4911                set_cc_op(s, CC_OP_MULQ);
4912                break;
4913#endif
4914            }
4915            break;
4916        case 5: /* imul */
4917            switch(ot) {
4918            case MO_8:
4919                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4920                tcg_gen_ext8s_tl(s->T0, s->T0);
4921                tcg_gen_ext8s_tl(s->T1, s->T1);
4922                /* XXX: use 32 bit mul which could be faster */
4923                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4924                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4925                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4926                tcg_gen_ext8s_tl(s->tmp0, s->T0);
4927                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4928                set_cc_op(s, CC_OP_MULB);
4929                break;
4930            case MO_16:
4931                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4932                tcg_gen_ext16s_tl(s->T0, s->T0);
4933                tcg_gen_ext16s_tl(s->T1, s->T1);
4934                /* XXX: use 32 bit mul which could be faster */
4935                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4936                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4937                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4938                tcg_gen_ext16s_tl(s->tmp0, s->T0);
4939                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4940                tcg_gen_shri_tl(s->T0, s->T0, 16);
4941                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4942                set_cc_op(s, CC_OP_MULW);
4943                break;
4944            default:
4945            case MO_32:
4946                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4947                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4948                tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
4949                                  s->tmp2_i32, s->tmp3_i32);
4950                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4951                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4952                tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
4953                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4954                tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
4955                tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
4956                set_cc_op(s, CC_OP_MULL);
4957                break;
4958#ifdef TARGET_X86_64
4959            case MO_64:
4960                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4961                                  s->T0, cpu_regs[R_EAX]);
4962                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4963                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4964                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4965                set_cc_op(s, CC_OP_MULQ);
4966                break;
4967#endif
4968            }
4969            break;
4970        case 6: /* div */
4971            switch(ot) {
4972            case MO_8:
4973                gen_helper_divb_AL(cpu_env, s->T0);
4974                break;
4975            case MO_16:
4976                gen_helper_divw_AX(cpu_env, s->T0);
4977                break;
4978            default:
4979            case MO_32:
4980                gen_helper_divl_EAX(cpu_env, s->T0);
4981                break;
4982#ifdef TARGET_X86_64
4983            case MO_64:
4984                gen_helper_divq_EAX(cpu_env, s->T0);
4985                break;
4986#endif
4987            }
4988            break;
4989        case 7: /* idiv */
4990            switch(ot) {
4991            case MO_8:
4992                gen_helper_idivb_AL(cpu_env, s->T0);
4993                break;
4994            case MO_16:
4995                gen_helper_idivw_AX(cpu_env, s->T0);
4996                break;
4997            default:
4998            case MO_32:
4999                gen_helper_idivl_EAX(cpu_env, s->T0);
5000                break;
5001#ifdef TARGET_X86_64
5002            case MO_64:
5003                gen_helper_idivq_EAX(cpu_env, s->T0);
5004                break;
5005#endif
5006            }
5007            break;
5008        default:
5009            goto unknown_op;
5010        }
5011        break;
5012
5013    case 0xfe: /* GRP4 */
5014    case 0xff: /* GRP5 */
5015        ot = mo_b_d(b, dflag);
5016
5017        modrm = x86_ldub_code(env, s);
5018        mod = (modrm >> 6) & 3;
5019        rm = (modrm & 7) | REX_B(s);
5020        op = (modrm >> 3) & 7;
5021        if (op >= 2 && b == 0xfe) {
5022            goto unknown_op;
5023        }
5024        if (CODE64(s)) {
5025            if (op == 2 || op == 4) {
5026                /* operand size for jumps is 64 bit */
5027                ot = MO_64;
5028            } else if (op == 3 || op == 5) {
5029                ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
5030            } else if (op == 6) {
5031                /* default push size is 64 bit */
5032                ot = mo_pushpop(s, dflag);
5033            }
5034        }
5035        if (mod != 3) {
5036            gen_lea_modrm(env, s, modrm);
5037            if (op >= 2 && op != 3 && op != 5)
5038                gen_op_ld_v(s, ot, s->T0, s->A0);
5039        } else {
5040            gen_op_mov_v_reg(s, ot, s->T0, rm);
5041        }
5042
5043        switch(op) {
5044        case 0: /* inc Ev */
5045            if (mod != 3)
5046                opreg = OR_TMP0;
5047            else
5048                opreg = rm;
5049            gen_inc(s, ot, opreg, 1);
5050            break;
5051        case 1: /* dec Ev */
5052            if (mod != 3)
5053                opreg = OR_TMP0;
5054            else
5055                opreg = rm;
5056            gen_inc(s, ot, opreg, -1);
5057            break;
5058        case 2: /* call Ev */
5059            /* XXX: optimize if memory (no 'and' is necessary) */
5060            if (dflag == MO_16) {
5061                tcg_gen_ext16u_tl(s->T0, s->T0);
5062            }
5063            next_eip = s->pc - s->cs_base;
5064            tcg_gen_movi_tl(s->T1, next_eip);
5065            gen_push_v(s, s->T1);
5066            gen_op_jmp_v(s->T0);
5067            gen_bnd_jmp(s);
5068            gen_jr(s, s->T0);
5069            break;
5070        case 3: /* lcall Ev */
5071            gen_op_ld_v(s, ot, s->T1, s->A0);
5072            gen_add_A0_im(s, 1 << ot);
5073            gen_op_ld_v(s, MO_16, s->T0, s->A0);
5074        do_lcall:
5075            if (s->pe && !s->vm86) {
5076                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5077                gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5078                                           tcg_const_i32(dflag - 1),
5079                                           tcg_const_tl(s->pc - s->cs_base));
5080            } else {
5081                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5082                gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5083                                      tcg_const_i32(dflag - 1),
5084                                      tcg_const_i32(s->pc - s->cs_base));
5085            }
5086            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5087            gen_jr(s, s->tmp4);
5088            break;
5089        case 4: /* jmp Ev */
5090            if (dflag == MO_16) {
5091                tcg_gen_ext16u_tl(s->T0, s->T0);
5092            }
5093            gen_op_jmp_v(s->T0);
5094            gen_bnd_jmp(s);
5095            gen_jr(s, s->T0);
5096            break;
5097        case 5: /* ljmp Ev */
5098            gen_op_ld_v(s, ot, s->T1, s->A0);
5099            gen_add_A0_im(s, 1 << ot);
5100            gen_op_ld_v(s, MO_16, s->T0, s->A0);
5101        do_ljmp:
5102            if (s->pe && !s->vm86) {
5103                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5104                gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5105                                          tcg_const_tl(s->pc - s->cs_base));
5106            } else {
5107                gen_op_movl_seg_T0_vm(s, R_CS);
5108                gen_op_jmp_v(s->T1);
5109            }
5110            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5111            gen_jr(s, s->tmp4);
5112            break;
5113        case 6: /* push Ev */
5114            gen_push_v(s, s->T0);
5115            break;
5116        default:
5117            goto unknown_op;
5118        }
5119        break;
5120
5121    case 0x84: /* test Ev, Gv */
5122    case 0x85:
5123        ot = mo_b_d(b, dflag);
5124
5125        modrm = x86_ldub_code(env, s);
5126        reg = ((modrm >> 3) & 7) | rex_r;
5127
5128        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5129        gen_op_mov_v_reg(s, ot, s->T1, reg);
5130        gen_op_testl_T0_T1_cc(s);
5131        set_cc_op(s, CC_OP_LOGICB + ot);
5132        break;
5133
5134    case 0xa8: /* test eAX, Iv */
5135    case 0xa9:
5136        ot = mo_b_d(b, dflag);
5137        val = insn_get(env, s, ot);
5138
5139        gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5140        tcg_gen_movi_tl(s->T1, val);
5141        gen_op_testl_T0_T1_cc(s);
5142        set_cc_op(s, CC_OP_LOGICB + ot);
5143        break;
5144
5145    case 0x98: /* CWDE/CBW */
5146        switch (dflag) {
5147#ifdef TARGET_X86_64
5148        case MO_64:
5149            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5150            tcg_gen_ext32s_tl(s->T0, s->T0);
5151            gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5152            break;
5153#endif
5154        case MO_32:
5155            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5156            tcg_gen_ext16s_tl(s->T0, s->T0);
5157            gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5158            break;
5159        case MO_16:
5160            gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5161            tcg_gen_ext8s_tl(s->T0, s->T0);
5162            gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5163            break;
5164        default:
5165            tcg_abort();
5166        }
5167        break;
5168    case 0x99: /* CDQ/CWD */
5169        switch (dflag) {
5170#ifdef TARGET_X86_64
5171        case MO_64:
5172            gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5173            tcg_gen_sari_tl(s->T0, s->T0, 63);
5174            gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5175            break;
5176#endif
5177        case MO_32:
5178            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5179            tcg_gen_ext32s_tl(s->T0, s->T0);
5180            tcg_gen_sari_tl(s->T0, s->T0, 31);
5181            gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5182            break;
5183        case MO_16:
5184            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5185            tcg_gen_ext16s_tl(s->T0, s->T0);
5186            tcg_gen_sari_tl(s->T0, s->T0, 15);
5187            gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5188            break;
5189        default:
5190            tcg_abort();
5191        }
5192        break;
5193    case 0x1af: /* imul Gv, Ev */
5194    case 0x69: /* imul Gv, Ev, I */
5195    case 0x6b:
5196        ot = dflag;
5197        modrm = x86_ldub_code(env, s);
5198        reg = ((modrm >> 3) & 7) | rex_r;
5199        if (b == 0x69)
5200            s->rip_offset = insn_const_size(ot);
5201        else if (b == 0x6b)
5202            s->rip_offset = 1;
5203        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5204        if (b == 0x69) {
5205            val = insn_get(env, s, ot);
5206            tcg_gen_movi_tl(s->T1, val);
5207        } else if (b == 0x6b) {
5208            val = (int8_t)insn_get(env, s, MO_8);
5209            tcg_gen_movi_tl(s->T1, val);
5210        } else {
5211            gen_op_mov_v_reg(s, ot, s->T1, reg);
5212        }
5213        switch (ot) {
5214#ifdef TARGET_X86_64
5215        case MO_64:
5216            tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5217            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5218            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5219            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5220            break;
5221#endif
5222        case MO_32:
5223            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5224            tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5225            tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5226                              s->tmp2_i32, s->tmp3_i32);
5227            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5228            tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5229            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5230            tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5231            tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5232            break;
5233        default:
5234            tcg_gen_ext16s_tl(s->T0, s->T0);
5235            tcg_gen_ext16s_tl(s->T1, s->T1);
5236            /* XXX: use 32 bit mul which could be faster */
5237            tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5238            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5239            tcg_gen_ext16s_tl(s->tmp0, s->T0);
5240            tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5241            gen_op_mov_reg_v(s, ot, reg, s->T0);
5242            break;
5243        }
5244        set_cc_op(s, CC_OP_MULB + ot);
5245        break;
5246    case 0x1c0:
5247    case 0x1c1: /* xadd Ev, Gv */
5248        ot = mo_b_d(b, dflag);
5249        modrm = x86_ldub_code(env, s);
5250        reg = ((modrm >> 3) & 7) | rex_r;
5251        mod = (modrm >> 6) & 3;
5252        gen_op_mov_v_reg(s, ot, s->T0, reg);
5253        if (mod == 3) {
5254            rm = (modrm & 7) | REX_B(s);
5255            gen_op_mov_v_reg(s, ot, s->T1, rm);
5256            tcg_gen_add_tl(s->T0, s->T0, s->T1);
5257            gen_op_mov_reg_v(s, ot, reg, s->T1);
5258            gen_op_mov_reg_v(s, ot, rm, s->T0);
5259        } else {
5260            gen_lea_modrm(env, s, modrm);
5261            if (s->prefix & PREFIX_LOCK) {
5262                tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5263                                            s->mem_index, ot | MO_LE);
5264                tcg_gen_add_tl(s->T0, s->T0, s->T1);
5265            } else {
5266                gen_op_ld_v(s, ot, s->T1, s->A0);
5267                tcg_gen_add_tl(s->T0, s->T0, s->T1);
5268                gen_op_st_v(s, ot, s->T0, s->A0);
5269            }
5270            gen_op_mov_reg_v(s, ot, reg, s->T1);
5271        }
5272        gen_op_update2_cc(s);
5273        set_cc_op(s, CC_OP_ADDB + ot);
5274        break;
5275    case 0x1b0:
5276    case 0x1b1: /* cmpxchg Ev, Gv */
5277        {
5278            TCGv oldv, newv, cmpv;
5279
5280            ot = mo_b_d(b, dflag);
5281            modrm = x86_ldub_code(env, s);
5282            reg = ((modrm >> 3) & 7) | rex_r;
5283            mod = (modrm >> 6) & 3;
5284            oldv = tcg_temp_new();
5285            newv = tcg_temp_new();
5286            cmpv = tcg_temp_new();
5287            gen_op_mov_v_reg(s, ot, newv, reg);
5288            tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5289
5290            if (s->prefix & PREFIX_LOCK) {
5291                if (mod == 3) {
5292                    goto illegal_op;
5293                }
5294                gen_lea_modrm(env, s, modrm);
5295                tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5296                                          s->mem_index, ot | MO_LE);
5297                gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5298            } else {
5299                if (mod == 3) {
5300                    rm = (modrm & 7) | REX_B(s);
5301                    gen_op_mov_v_reg(s, ot, oldv, rm);
5302                } else {
5303                    gen_lea_modrm(env, s, modrm);
5304                    gen_op_ld_v(s, ot, oldv, s->A0);
5305                    rm = 0; /* avoid warning */
5306                }
5307                gen_extu(ot, oldv);
5308                gen_extu(ot, cmpv);
5309                /* store value = (old == cmp ? new : old);  */
5310                tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5311                if (mod == 3) {
5312                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5313                    gen_op_mov_reg_v(s, ot, rm, newv);
5314                } else {
5315                    /* Perform an unconditional store cycle like physical cpu;
5316                       must be before changing accumulator to ensure
5317                       idempotency if the store faults and the instruction
5318                       is restarted */
5319                    gen_op_st_v(s, ot, newv, s->A0);
5320                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5321                }
5322            }
5323            tcg_gen_mov_tl(cpu_cc_src, oldv);
5324            tcg_gen_mov_tl(s->cc_srcT, cmpv);
5325            tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5326            set_cc_op(s, CC_OP_SUBB + ot);
5327            tcg_temp_free(oldv);
5328            tcg_temp_free(newv);
5329            tcg_temp_free(cmpv);
5330        }
5331        break;
5332    case 0x1c7: /* cmpxchg8b */
5333        modrm = x86_ldub_code(env, s);
5334        mod = (modrm >> 6) & 3;
5335        switch ((modrm >> 3) & 7) {
5336        case 1: /* CMPXCHG8, CMPXCHG16 */
5337            if (mod == 3) {
5338                goto illegal_op;
5339            }
5340#ifdef TARGET_X86_64
5341            if (dflag == MO_64) {
5342                if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5343                    goto illegal_op;
5344                }
5345                gen_lea_modrm(env, s, modrm);
5346                if ((s->prefix & PREFIX_LOCK) &&
5347                    (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5348                    gen_helper_cmpxchg16b(cpu_env, s->A0);
5349                } else {
5350                    gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5351                }
5352                set_cc_op(s, CC_OP_EFLAGS);
5353                break;
5354            }
5355#endif        
5356            if (!(s->cpuid_features & CPUID_CX8)) {
5357                goto illegal_op;
5358            }
5359            gen_lea_modrm(env, s, modrm);
5360            if ((s->prefix & PREFIX_LOCK) &&
5361                (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5362                gen_helper_cmpxchg8b(cpu_env, s->A0);
5363            } else {
5364                gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5365            }
5366            set_cc_op(s, CC_OP_EFLAGS);
5367            break;
5368
5369        case 7: /* RDSEED */
5370        case 6: /* RDRAND */
5371            if (mod != 3 ||
5372                (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5373                !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5374                goto illegal_op;
5375            }
5376            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5377                gen_io_start();
5378            }
5379            gen_helper_rdrand(s->T0, cpu_env);
5380            rm = (modrm & 7) | REX_B(s);
5381            gen_op_mov_reg_v(s, dflag, rm, s->T0);
5382            set_cc_op(s, CC_OP_EFLAGS);
5383            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5384                gen_io_end();
5385                gen_jmp(s, s->pc - s->cs_base);
5386            }
5387            break;
5388
5389        default:
5390            goto illegal_op;
5391        }
5392        break;
5393
5394        /**************************/
5395        /* push/pop */
5396    case 0x50 ... 0x57: /* push */
5397        gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5398        gen_push_v(s, s->T0);
5399        break;
5400    case 0x58 ... 0x5f: /* pop */
5401        ot = gen_pop_T0(s);
5402        /* NOTE: order is important for pop %sp */
5403        gen_pop_update(s, ot);
5404        gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5405        break;
5406    case 0x60: /* pusha */
5407        if (CODE64(s))
5408            goto illegal_op;
5409        gen_pusha(s);
5410        break;
5411    case 0x61: /* popa */
5412        if (CODE64(s))
5413            goto illegal_op;
5414        gen_popa(s);
5415        break;
5416    case 0x68: /* push Iv */
5417    case 0x6a:
5418        ot = mo_pushpop(s, dflag);
5419        if (b == 0x68)
5420            val = insn_get(env, s, ot);
5421        else
5422            val = (int8_t)insn_get(env, s, MO_8);
5423        tcg_gen_movi_tl(s->T0, val);
5424        gen_push_v(s, s->T0);
5425        break;
5426    case 0x8f: /* pop Ev */
5427        modrm = x86_ldub_code(env, s);
5428        mod = (modrm >> 6) & 3;
5429        ot = gen_pop_T0(s);
5430        if (mod == 3) {
5431            /* NOTE: order is important for pop %sp */
5432            gen_pop_update(s, ot);
5433            rm = (modrm & 7) | REX_B(s);
5434            gen_op_mov_reg_v(s, ot, rm, s->T0);
5435        } else {
5436            /* NOTE: order is important too for MMU exceptions */
5437            s->popl_esp_hack = 1 << ot;
5438            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5439            s->popl_esp_hack = 0;
5440            gen_pop_update(s, ot);
5441        }
5442        break;
5443    case 0xc8: /* enter */
5444        {
5445            int level;
5446            val = x86_lduw_code(env, s);
5447            level = x86_ldub_code(env, s);
5448            gen_enter(s, val, level);
5449        }
5450        break;
5451    case 0xc9: /* leave */
5452        gen_leave(s);
5453        break;
5454    case 0x06: /* push es */
5455    case 0x0e: /* push cs */
5456    case 0x16: /* push ss */
5457    case 0x1e: /* push ds */
5458        if (CODE64(s))
5459            goto illegal_op;
5460        gen_op_movl_T0_seg(s, b >> 3);
5461        gen_push_v(s, s->T0);
5462        break;
5463    case 0x1a0: /* push fs */
5464    case 0x1a8: /* push gs */
5465        gen_op_movl_T0_seg(s, (b >> 3) & 7);
5466        gen_push_v(s, s->T0);
5467        break;
5468    case 0x07: /* pop es */
5469    case 0x17: /* pop ss */
5470    case 0x1f: /* pop ds */
5471        if (CODE64(s))
5472            goto illegal_op;
5473        reg = b >> 3;
5474        ot = gen_pop_T0(s);
5475        gen_movl_seg_T0(s, reg);
5476        gen_pop_update(s, ot);
5477        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5478        if (s->base.is_jmp) {
5479            gen_jmp_im(s, s->pc - s->cs_base);
5480            if (reg == R_SS) {
5481                s->tf = 0;
5482                gen_eob_inhibit_irq(s, true);
5483            } else {
5484                gen_eob(s);
5485            }
5486        }
5487        break;
5488    case 0x1a1: /* pop fs */
5489    case 0x1a9: /* pop gs */
5490        ot = gen_pop_T0(s);
5491        gen_movl_seg_T0(s, (b >> 3) & 7);
5492        gen_pop_update(s, ot);
5493        if (s->base.is_jmp) {
5494            gen_jmp_im(s, s->pc - s->cs_base);
5495            gen_eob(s);
5496        }
5497        break;
5498
5499        /**************************/
5500        /* mov */
5501    case 0x88:
5502    case 0x89: /* mov Gv, Ev */
5503        ot = mo_b_d(b, dflag);
5504        modrm = x86_ldub_code(env, s);
5505        reg = ((modrm >> 3) & 7) | rex_r;
5506
5507        /* generate a generic store */
5508        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5509        break;
5510    case 0xc6:
5511    case 0xc7: /* mov Ev, Iv */
5512        ot = mo_b_d(b, dflag);
5513        modrm = x86_ldub_code(env, s);
5514        mod = (modrm >> 6) & 3;
5515        if (mod != 3) {
5516            s->rip_offset = insn_const_size(ot);
5517            gen_lea_modrm(env, s, modrm);
5518        }
5519        val = insn_get(env, s, ot);
5520        tcg_gen_movi_tl(s->T0, val);
5521        if (mod != 3) {
5522            gen_op_st_v(s, ot, s->T0, s->A0);
5523        } else {
5524            gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5525        }
5526        break;
5527    case 0x8a:
5528    case 0x8b: /* mov Ev, Gv */
5529        ot = mo_b_d(b, dflag);
5530        modrm = x86_ldub_code(env, s);
5531        reg = ((modrm >> 3) & 7) | rex_r;
5532
5533        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5534        gen_op_mov_reg_v(s, ot, reg, s->T0);
5535        break;
5536    case 0x8e: /* mov seg, Gv */
5537        modrm = x86_ldub_code(env, s);
5538        reg = (modrm >> 3) & 7;
5539        if (reg >= 6 || reg == R_CS)
5540            goto illegal_op;
5541        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5542        gen_movl_seg_T0(s, reg);
5543        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5544        if (s->base.is_jmp) {
5545            gen_jmp_im(s, s->pc - s->cs_base);
5546            if (reg == R_SS) {
5547                s->tf = 0;
5548                gen_eob_inhibit_irq(s, true);
5549            } else {
5550                gen_eob(s);
5551            }
5552        }
5553        break;
5554    case 0x8c: /* mov Gv, seg */
5555        modrm = x86_ldub_code(env, s);
5556        reg = (modrm >> 3) & 7;
5557        mod = (modrm >> 6) & 3;
5558        if (reg >= 6)
5559            goto illegal_op;
5560        gen_op_movl_T0_seg(s, reg);
5561        ot = mod == 3 ? dflag : MO_16;
5562        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5563        break;
5564
5565    case 0x1b6: /* movzbS Gv, Eb */
5566    case 0x1b7: /* movzwS Gv, Eb */
5567    case 0x1be: /* movsbS Gv, Eb */
5568    case 0x1bf: /* movswS Gv, Eb */
5569        {
5570            TCGMemOp d_ot;
5571            TCGMemOp s_ot;
5572
5573            /* d_ot is the size of destination */
5574            d_ot = dflag;
5575            /* ot is the size of source */
5576            ot = (b & 1) + MO_8;
5577            /* s_ot is the sign+size of source */
5578            s_ot = b & 8 ? MO_SIGN | ot : ot;
5579
5580            modrm = x86_ldub_code(env, s);
5581            reg = ((modrm >> 3) & 7) | rex_r;
5582            mod = (modrm >> 6) & 3;
5583            rm = (modrm & 7) | REX_B(s);
5584
5585            if (mod == 3) {
5586                if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5587                    tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5588                } else {
5589                    gen_op_mov_v_reg(s, ot, s->T0, rm);
5590                    switch (s_ot) {
5591                    case MO_UB:
5592                        tcg_gen_ext8u_tl(s->T0, s->T0);
5593                        break;
5594                    case MO_SB:
5595                        tcg_gen_ext8s_tl(s->T0, s->T0);
5596                        break;
5597                    case MO_UW:
5598                        tcg_gen_ext16u_tl(s->T0, s->T0);
5599                        break;
5600                    default:
5601                    case MO_SW:
5602                        tcg_gen_ext16s_tl(s->T0, s->T0);
5603                        break;
5604                    }
5605                }
5606                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5607            } else {
5608                gen_lea_modrm(env, s, modrm);
5609                gen_op_ld_v(s, s_ot, s->T0, s->A0);
5610                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5611            }
5612        }
5613        break;
5614
5615    case 0x8d: /* lea */
5616        modrm = x86_ldub_code(env, s);
5617        mod = (modrm >> 6) & 3;
5618        if (mod == 3)
5619            goto illegal_op;
5620        reg = ((modrm >> 3) & 7) | rex_r;
5621        {
5622            AddressParts a = gen_lea_modrm_0(env, s, modrm);
5623            TCGv ea = gen_lea_modrm_1(s, a);
5624            gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5625            gen_op_mov_reg_v(s, dflag, reg, s->A0);
5626        }
5627        break;
5628
5629    case 0xa0: /* mov EAX, Ov */
5630    case 0xa1:
5631    case 0xa2: /* mov Ov, EAX */
5632    case 0xa3:
5633        {
5634            target_ulong offset_addr;
5635
5636            ot = mo_b_d(b, dflag);
5637            switch (s->aflag) {
5638#ifdef TARGET_X86_64
5639            case MO_64:
5640                offset_addr = x86_ldq_code(env, s);
5641                break;
5642#endif
5643            default:
5644                offset_addr = insn_get(env, s, s->aflag);
5645                break;
5646            }
5647            tcg_gen_movi_tl(s->A0, offset_addr);
5648            gen_add_A0_ds_seg(s);
5649            if ((b & 2) == 0) {
5650                gen_op_ld_v(s, ot, s->T0, s->A0);
5651                gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5652            } else {
5653                gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5654                gen_op_st_v(s, ot, s->T0, s->A0);
5655            }
5656        }
5657        break;
5658    case 0xd7: /* xlat */
5659        tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5660        tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5661        tcg_gen_add_tl(s->A0, s->A0, s->T0);
5662        gen_extu(s->aflag, s->A0);
5663        gen_add_A0_ds_seg(s);
5664        gen_op_ld_v(s, MO_8, s->T0, s->A0);
5665        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5666        break;
5667    case 0xb0 ... 0xb7: /* mov R, Ib */
5668        val = insn_get(env, s, MO_8);
5669        tcg_gen_movi_tl(s->T0, val);
5670        gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5671        break;
5672    case 0xb8 ... 0xbf: /* mov R, Iv */
5673#ifdef TARGET_X86_64
5674        if (dflag == MO_64) {
5675            uint64_t tmp;
5676            /* 64 bit case */
5677            tmp = x86_ldq_code(env, s);
5678            reg = (b & 7) | REX_B(s);
5679            tcg_gen_movi_tl(s->T0, tmp);
5680            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5681        } else
5682#endif
5683        {
5684            ot = dflag;
5685            val = insn_get(env, s, ot);
5686            reg = (b & 7) | REX_B(s);
5687            tcg_gen_movi_tl(s->T0, val);
5688            gen_op_mov_reg_v(s, ot, reg, s->T0);
5689        }
5690        break;
5691
5692    case 0x91 ... 0x97: /* xchg R, EAX */
5693    do_xchg_reg_eax:
5694        ot = dflag;
5695        reg = (b & 7) | REX_B(s);
5696        rm = R_EAX;
5697        goto do_xchg_reg;
5698    case 0x86:
5699    case 0x87: /* xchg Ev, Gv */
5700        ot = mo_b_d(b, dflag);
5701        modrm = x86_ldub_code(env, s);
5702        reg = ((modrm >> 3) & 7) | rex_r;
5703        mod = (modrm >> 6) & 3;
5704        if (mod == 3) {
5705            rm = (modrm & 7) | REX_B(s);
5706        do_xchg_reg:
5707            gen_op_mov_v_reg(s, ot, s->T0, reg);
5708            gen_op_mov_v_reg(s, ot, s->T1, rm);
5709            gen_op_mov_reg_v(s, ot, rm, s->T0);
5710            gen_op_mov_reg_v(s, ot, reg, s->T1);
5711        } else {
5712            gen_lea_modrm(env, s, modrm);
5713            gen_op_mov_v_reg(s, ot, s->T0, reg);
5714            /* for xchg, lock is implicit */
5715            tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5716                                   s->mem_index, ot | MO_LE);
5717            gen_op_mov_reg_v(s, ot, reg, s->T1);
5718        }
5719        break;
5720    case 0xc4: /* les Gv */
5721        /* In CODE64 this is VEX3; see above.  */
5722        op = R_ES;
5723        goto do_lxx;
5724    case 0xc5: /* lds Gv */
5725        /* In CODE64 this is VEX2; see above.  */
5726        op = R_DS;
5727        goto do_lxx;
5728    case 0x1b2: /* lss Gv */
5729        op = R_SS;
5730        goto do_lxx;
5731    case 0x1b4: /* lfs Gv */
5732        op = R_FS;
5733        goto do_lxx;
5734    case 0x1b5: /* lgs Gv */
5735        op = R_GS;
5736    do_lxx:
5737        ot = dflag != MO_16 ? MO_32 : MO_16;
5738        modrm = x86_ldub_code(env, s);
5739        reg = ((modrm >> 3) & 7) | rex_r;
5740        mod = (modrm >> 6) & 3;
5741        if (mod == 3)
5742            goto illegal_op;
5743        gen_lea_modrm(env, s, modrm);
5744        gen_op_ld_v(s, ot, s->T1, s->A0);
5745        gen_add_A0_im(s, 1 << ot);
5746        /* load the segment first to handle exceptions properly */
5747        gen_op_ld_v(s, MO_16, s->T0, s->A0);
5748        gen_movl_seg_T0(s, op);
5749        /* then put the data */
5750        gen_op_mov_reg_v(s, ot, reg, s->T1);
5751        if (s->base.is_jmp) {
5752            gen_jmp_im(s, s->pc - s->cs_base);
5753            gen_eob(s);
5754        }
5755        break;
5756
5757        /************************/
5758        /* shifts */
5759    case 0xc0:
5760    case 0xc1:
5761        /* shift Ev,Ib */
5762        shift = 2;
5763    grp2:
5764        {
5765            ot = mo_b_d(b, dflag);
5766            modrm = x86_ldub_code(env, s);
5767            mod = (modrm >> 6) & 3;
5768            op = (modrm >> 3) & 7;
5769
5770            if (mod != 3) {
5771                if (shift == 2) {
5772                    s->rip_offset = 1;
5773                }
5774                gen_lea_modrm(env, s, modrm);
5775                opreg = OR_TMP0;
5776            } else {
5777                opreg = (modrm & 7) | REX_B(s);
5778            }
5779
5780            /* simpler op */
5781            if (shift == 0) {
5782                gen_shift(s, op, ot, opreg, OR_ECX);
5783            } else {
5784                if (shift == 2) {
5785                    shift = x86_ldub_code(env, s);
5786                }
5787                gen_shifti(s, op, ot, opreg, shift);
5788            }
5789        }
5790        break;
5791    case 0xd0:
5792    case 0xd1:
5793        /* shift Ev,1 */
5794        shift = 1;
5795        goto grp2;
5796    case 0xd2:
5797    case 0xd3:
5798        /* shift Ev,cl */
5799        shift = 0;
5800        goto grp2;
5801
5802    case 0x1a4: /* shld imm */
5803        op = 0;
5804        shift = 1;
5805        goto do_shiftd;
5806    case 0x1a5: /* shld cl */
5807        op = 0;
5808        shift = 0;
5809        goto do_shiftd;
5810    case 0x1ac: /* shrd imm */
5811        op = 1;
5812        shift = 1;
5813        goto do_shiftd;
5814    case 0x1ad: /* shrd cl */
5815        op = 1;
5816        shift = 0;
5817    do_shiftd:
5818        ot = dflag;
5819        modrm = x86_ldub_code(env, s);
5820        mod = (modrm >> 6) & 3;
5821        rm = (modrm & 7) | REX_B(s);
5822        reg = ((modrm >> 3) & 7) | rex_r;
5823        if (mod != 3) {
5824            gen_lea_modrm(env, s, modrm);
5825            opreg = OR_TMP0;
5826        } else {
5827            opreg = rm;
5828        }
5829        gen_op_mov_v_reg(s, ot, s->T1, reg);
5830
5831        if (shift) {
5832            TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5833            gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5834            tcg_temp_free(imm);
5835        } else {
5836            gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5837        }
5838        break;
5839
5840        /************************/
5841        /* floats */
5842    case 0xd8 ... 0xdf:
5843        if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5844            /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5845            /* XXX: what to do if illegal op ? */
5846            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5847            break;
5848        }
5849        modrm = x86_ldub_code(env, s);
5850        mod = (modrm >> 6) & 3;
5851        rm = modrm & 7;
5852        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5853        if (mod != 3) {
5854            /* memory op */
5855            gen_lea_modrm(env, s, modrm);
5856            switch(op) {
5857            case 0x00 ... 0x07: /* fxxxs */
5858            case 0x10 ... 0x17: /* fixxxl */
5859            case 0x20 ... 0x27: /* fxxxl */
5860            case 0x30 ... 0x37: /* fixxx */
5861                {
5862                    int op1;
5863                    op1 = op & 7;
5864
5865                    switch(op >> 4) {
5866                    case 0:
5867                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5868                                            s->mem_index, MO_LEUL);
5869                        gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5870                        break;
5871                    case 1:
5872                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5873                                            s->mem_index, MO_LEUL);
5874                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5875                        break;
5876                    case 2:
5877                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5878                                            s->mem_index, MO_LEQ);
5879                        gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5880                        break;
5881                    case 3:
5882                    default:
5883                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5884                                            s->mem_index, MO_LESW);
5885                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5886                        break;
5887                    }
5888
5889                    gen_helper_fp_arith_ST0_FT0(op1);
5890                    if (op1 == 3) {
5891                        /* fcomp needs pop */
5892                        gen_helper_fpop(cpu_env);
5893                    }
5894                }
5895                break;
5896            case 0x08: /* flds */
5897            case 0x0a: /* fsts */
5898            case 0x0b: /* fstps */
5899            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5900            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5901            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5902                switch(op & 7) {
5903                case 0:
5904                    switch(op >> 4) {
5905                    case 0:
5906                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5907                                            s->mem_index, MO_LEUL);
5908                        gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5909                        break;
5910                    case 1:
5911                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5912                                            s->mem_index, MO_LEUL);
5913                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5914                        break;
5915                    case 2:
5916                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5917                                            s->mem_index, MO_LEQ);
5918                        gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
5919                        break;
5920                    case 3:
5921                    default:
5922                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5923                                            s->mem_index, MO_LESW);
5924                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5925                        break;
5926                    }
5927                    break;
5928                case 1:
5929                    /* XXX: the corresponding CPUID bit must be tested ! */
5930                    switch(op >> 4) {
5931                    case 1:
5932                        gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
5933                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5934                                            s->mem_index, MO_LEUL);
5935                        break;
5936                    case 2:
5937                        gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
5938                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5939                                            s->mem_index, MO_LEQ);
5940                        break;
5941                    case 3:
5942                    default:
5943                        gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
5944                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5945                                            s->mem_index, MO_LEUW);
5946                        break;
5947                    }
5948                    gen_helper_fpop(cpu_env);
5949                    break;
5950                default:
5951                    switch(op >> 4) {
5952                    case 0:
5953                        gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
5954                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5955                                            s->mem_index, MO_LEUL);
5956                        break;
5957                    case 1:
5958                        gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
5959                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5960                                            s->mem_index, MO_LEUL);
5961                        break;
5962                    case 2:
5963                        gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
5964                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5965                                            s->mem_index, MO_LEQ);
5966                        break;
5967                    case 3:
5968                    default:
5969                        gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
5970                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5971                                            s->mem_index, MO_LEUW);
5972                        break;
5973                    }
5974                    if ((op & 7) == 3)
5975                        gen_helper_fpop(cpu_env);
5976                    break;
5977                }
5978                break;
5979            case 0x0c: /* fldenv mem */
5980                gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5981                break;
5982            case 0x0d: /* fldcw mem */
5983                tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5984                                    s->mem_index, MO_LEUW);
5985                gen_helper_fldcw(cpu_env, s->tmp2_i32);
5986                break;
5987            case 0x0e: /* fnstenv mem */
5988                gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5989                break;
5990            case 0x0f: /* fnstcw mem */
5991                gen_helper_fnstcw(s->tmp2_i32, cpu_env);
5992                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5993                                    s->mem_index, MO_LEUW);
5994                break;
5995            case 0x1d: /* fldt mem */
5996                gen_helper_fldt_ST0(cpu_env, s->A0);
5997                break;
5998            case 0x1f: /* fstpt mem */
5999                gen_helper_fstt_ST0(cpu_env, s->A0);
6000                gen_helper_fpop(cpu_env);
6001                break;
6002            case 0x2c: /* frstor mem */
6003                gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6004                break;
6005            case 0x2e: /* fnsave mem */
6006                gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6007                break;
6008            case 0x2f: /* fnstsw mem */
6009                gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6010                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6011                                    s->mem_index, MO_LEUW);
6012                break;
6013            case 0x3c: /* fbld */
6014                gen_helper_fbld_ST0(cpu_env, s->A0);
6015                break;
6016            case 0x3e: /* fbstp */
6017                gen_helper_fbst_ST0(cpu_env, s->A0);
6018                gen_helper_fpop(cpu_env);
6019                break;
6020            case 0x3d: /* fildll */
6021                tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6022                gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6023                break;
6024            case 0x3f: /* fistpll */
6025                gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6026                tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6027                gen_helper_fpop(cpu_env);
6028                break;
6029            default:
6030                goto unknown_op;
6031            }
6032        } else {
6033            /* register float ops */
6034            opreg = rm;
6035
6036            switch(op) {
6037            case 0x08: /* fld sti */
6038                gen_helper_fpush(cpu_env);
6039                gen_helper_fmov_ST0_STN(cpu_env,
6040                                        tcg_const_i32((opreg + 1) & 7));
6041                break;
6042            case 0x09: /* fxchg sti */
6043            case 0x29: /* fxchg4 sti, undocumented op */
6044            case 0x39: /* fxchg7 sti, undocumented op */
6045                gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6046                break;
6047            case 0x0a: /* grp d9/2 */
6048                switch(rm) {
6049                case 0: /* fnop */
6050                    /* check exceptions (FreeBSD FPU probe) */
6051                    gen_helper_fwait(cpu_env);
6052                    break;
6053                default:
6054                    goto unknown_op;
6055                }
6056                break;
6057            case 0x0c: /* grp d9/4 */
6058                switch(rm) {
6059                case 0: /* fchs */
6060                    gen_helper_fchs_ST0(cpu_env);
6061                    break;
6062                case 1: /* fabs */
6063                    gen_helper_fabs_ST0(cpu_env);
6064                    break;
6065                case 4: /* ftst */
6066                    gen_helper_fldz_FT0(cpu_env);
6067                    gen_helper_fcom_ST0_FT0(cpu_env);
6068                    break;
6069                case 5: /* fxam */
6070                    gen_helper_fxam_ST0(cpu_env);
6071                    break;
6072                default:
6073                    goto unknown_op;
6074                }
6075                break;
6076            case 0x0d: /* grp d9/5 */
6077                {
6078                    switch(rm) {
6079                    case 0:
6080                        gen_helper_fpush(cpu_env);
6081                        gen_helper_fld1_ST0(cpu_env);
6082                        break;
6083                    case 1:
6084                        gen_helper_fpush(cpu_env);
6085                        gen_helper_fldl2t_ST0(cpu_env);
6086                        break;
6087                    case 2:
6088                        gen_helper_fpush(cpu_env);
6089                        gen_helper_fldl2e_ST0(cpu_env);
6090                        break;
6091                    case 3:
6092                        gen_helper_fpush(cpu_env);
6093                        gen_helper_fldpi_ST0(cpu_env);
6094                        break;
6095                    case 4:
6096                        gen_helper_fpush(cpu_env);
6097                        gen_helper_fldlg2_ST0(cpu_env);
6098                        break;
6099                    case 5:
6100                        gen_helper_fpush(cpu_env);
6101                        gen_helper_fldln2_ST0(cpu_env);
6102                        break;
6103                    case 6:
6104                        gen_helper_fpush(cpu_env);
6105                        gen_helper_fldz_ST0(cpu_env);
6106                        break;
6107                    default:
6108                        goto unknown_op;
6109                    }
6110                }
6111                break;
6112            case 0x0e: /* grp d9/6 */
6113                switch(rm) {
6114                case 0: /* f2xm1 */
6115                    gen_helper_f2xm1(cpu_env);
6116                    break;
6117                case 1: /* fyl2x */
6118                    gen_helper_fyl2x(cpu_env);
6119                    break;
6120                case 2: /* fptan */
6121                    gen_helper_fptan(cpu_env);
6122                    break;
6123                case 3: /* fpatan */
6124                    gen_helper_fpatan(cpu_env);
6125                    break;
6126                case 4: /* fxtract */
6127                    gen_helper_fxtract(cpu_env);
6128                    break;
6129                case 5: /* fprem1 */
6130                    gen_helper_fprem1(cpu_env);
6131                    break;
6132                case 6: /* fdecstp */
6133                    gen_helper_fdecstp(cpu_env);
6134                    break;
6135                default:
6136                case 7: /* fincstp */
6137                    gen_helper_fincstp(cpu_env);
6138                    break;
6139                }
6140                break;
6141            case 0x0f: /* grp d9/7 */
6142                switch(rm) {
6143                case 0: /* fprem */
6144                    gen_helper_fprem(cpu_env);
6145                    break;
6146                case 1: /* fyl2xp1 */
6147                    gen_helper_fyl2xp1(cpu_env);
6148                    break;
6149                case 2: /* fsqrt */
6150                    gen_helper_fsqrt(cpu_env);
6151                    break;
6152                case 3: /* fsincos */
6153                    gen_helper_fsincos(cpu_env);
6154                    break;
6155                case 5: /* fscale */
6156                    gen_helper_fscale(cpu_env);
6157                    break;
6158                case 4: /* frndint */
6159                    gen_helper_frndint(cpu_env);
6160                    break;
6161                case 6: /* fsin */
6162                    gen_helper_fsin(cpu_env);
6163                    break;
6164                default:
6165                case 7: /* fcos */
6166                    gen_helper_fcos(cpu_env);
6167                    break;
6168                }
6169                break;
6170            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6171            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6172            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6173                {
6174                    int op1;
6175
6176                    op1 = op & 7;
6177                    if (op >= 0x20) {
6178                        gen_helper_fp_arith_STN_ST0(op1, opreg);
6179                        if (op >= 0x30)
6180                            gen_helper_fpop(cpu_env);
6181                    } else {
6182                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6183                        gen_helper_fp_arith_ST0_FT0(op1);
6184                    }
6185                }
6186                break;
6187            case 0x02: /* fcom */
6188            case 0x22: /* fcom2, undocumented op */
6189                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6190                gen_helper_fcom_ST0_FT0(cpu_env);
6191                break;
6192            case 0x03: /* fcomp */
6193            case 0x23: /* fcomp3, undocumented op */
6194            case 0x32: /* fcomp5, undocumented op */
6195                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6196                gen_helper_fcom_ST0_FT0(cpu_env);
6197                gen_helper_fpop(cpu_env);
6198                break;
6199            case 0x15: /* da/5 */
6200                switch(rm) {
6201                case 1: /* fucompp */
6202                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6203                    gen_helper_fucom_ST0_FT0(cpu_env);
6204                    gen_helper_fpop(cpu_env);
6205                    gen_helper_fpop(cpu_env);
6206                    break;
6207                default:
6208                    goto unknown_op;
6209                }
6210                break;
6211            case 0x1c:
6212                switch(rm) {
6213                case 0: /* feni (287 only, just do nop here) */
6214                    break;
6215                case 1: /* fdisi (287 only, just do nop here) */
6216                    break;
6217                case 2: /* fclex */
6218                    gen_helper_fclex(cpu_env);
6219                    break;
6220                case 3: /* fninit */
6221                    gen_helper_fninit(cpu_env);
6222                    break;
6223                case 4: /* fsetpm (287 only, just do nop here) */
6224                    break;
6225                default:
6226                    goto unknown_op;
6227                }
6228                break;
6229            case 0x1d: /* fucomi */
6230                if (!(s->cpuid_features & CPUID_CMOV)) {
6231                    goto illegal_op;
6232                }
6233                gen_update_cc_op(s);
6234                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6235                gen_helper_fucomi_ST0_FT0(cpu_env);
6236                set_cc_op(s, CC_OP_EFLAGS);
6237                break;
6238            case 0x1e: /* fcomi */
6239                if (!(s->cpuid_features & CPUID_CMOV)) {
6240                    goto illegal_op;
6241                }
6242                gen_update_cc_op(s);
6243                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6244                gen_helper_fcomi_ST0_FT0(cpu_env);
6245                set_cc_op(s, CC_OP_EFLAGS);
6246                break;
6247            case 0x28: /* ffree sti */
6248                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6249                break;
6250            case 0x2a: /* fst sti */
6251                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6252                break;
6253            case 0x2b: /* fstp sti */
6254            case 0x0b: /* fstp1 sti, undocumented op */
6255            case 0x3a: /* fstp8 sti, undocumented op */
6256            case 0x3b: /* fstp9 sti, undocumented op */
6257                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6258                gen_helper_fpop(cpu_env);
6259                break;
6260            case 0x2c: /* fucom st(i) */
6261                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6262                gen_helper_fucom_ST0_FT0(cpu_env);
6263                break;
6264            case 0x2d: /* fucomp st(i) */
6265                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6266                gen_helper_fucom_ST0_FT0(cpu_env);
6267                gen_helper_fpop(cpu_env);
6268                break;
6269            case 0x33: /* de/3 */
6270                switch(rm) {
6271                case 1: /* fcompp */
6272                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6273                    gen_helper_fcom_ST0_FT0(cpu_env);
6274                    gen_helper_fpop(cpu_env);
6275                    gen_helper_fpop(cpu_env);
6276                    break;
6277                default:
6278                    goto unknown_op;
6279                }
6280                break;
6281            case 0x38: /* ffreep sti, undocumented op */
6282                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6283                gen_helper_fpop(cpu_env);
6284                break;
6285            case 0x3c: /* df/4 */
6286                switch(rm) {
6287                case 0:
6288                    gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6289                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6290                    gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6291                    break;
6292                default:
6293                    goto unknown_op;
6294                }
6295                break;
6296            case 0x3d: /* fucomip */
6297                if (!(s->cpuid_features & CPUID_CMOV)) {
6298                    goto illegal_op;
6299                }
6300                gen_update_cc_op(s);
6301                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6302                gen_helper_fucomi_ST0_FT0(cpu_env);
6303                gen_helper_fpop(cpu_env);
6304                set_cc_op(s, CC_OP_EFLAGS);
6305                break;
6306            case 0x3e: /* fcomip */
6307                if (!(s->cpuid_features & CPUID_CMOV)) {
6308                    goto illegal_op;
6309                }
6310                gen_update_cc_op(s);
6311                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6312                gen_helper_fcomi_ST0_FT0(cpu_env);
6313                gen_helper_fpop(cpu_env);
6314                set_cc_op(s, CC_OP_EFLAGS);
6315                break;
6316            case 0x10 ... 0x13: /* fcmovxx */
6317            case 0x18 ... 0x1b:
6318                {
6319                    int op1;
6320                    TCGLabel *l1;
6321                    static const uint8_t fcmov_cc[8] = {
6322                        (JCC_B << 1),
6323                        (JCC_Z << 1),
6324                        (JCC_BE << 1),
6325                        (JCC_P << 1),
6326                    };
6327
6328                    if (!(s->cpuid_features & CPUID_CMOV)) {
6329                        goto illegal_op;
6330                    }
6331                    op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6332                    l1 = gen_new_label();
6333                    gen_jcc1_noeob(s, op1, l1);
6334                    gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6335                    gen_set_label(l1);
6336                }
6337                break;
6338            default:
6339                goto unknown_op;
6340            }
6341        }
6342        break;
6343        /************************/
6344        /* string ops */
6345
6346    case 0xa4: /* movsS */
6347    case 0xa5:
6348        ot = mo_b_d(b, dflag);
6349        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6350            gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6351        } else {
6352            gen_movs(s, ot);
6353        }
6354        break;
6355
6356    case 0xaa: /* stosS */
6357    case 0xab:
6358        ot = mo_b_d(b, dflag);
6359        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6360            gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6361        } else {
6362            gen_stos(s, ot);
6363        }
6364        break;
6365    case 0xac: /* lodsS */
6366    case 0xad:
6367        ot = mo_b_d(b, dflag);
6368        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6369            gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6370        } else {
6371            gen_lods(s, ot);
6372        }
6373        break;
6374    case 0xae: /* scasS */
6375    case 0xaf:
6376        ot = mo_b_d(b, dflag);
6377        if (prefixes & PREFIX_REPNZ) {
6378            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6379        } else if (prefixes & PREFIX_REPZ) {
6380            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6381        } else {
6382            gen_scas(s, ot);
6383        }
6384        break;
6385
6386    case 0xa6: /* cmpsS */
6387    case 0xa7:
6388        ot = mo_b_d(b, dflag);
6389        if (prefixes & PREFIX_REPNZ) {
6390            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6391        } else if (prefixes & PREFIX_REPZ) {
6392            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6393        } else {
6394            gen_cmps(s, ot);
6395        }
6396        break;
6397    case 0x6c: /* insS */
6398    case 0x6d:
6399        ot = mo_b_d32(b, dflag);
6400        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6401        gen_check_io(s, ot, pc_start - s->cs_base, 
6402                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6403        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6404            gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6405        } else {
6406            gen_ins(s, ot);
6407            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6408                gen_jmp(s, s->pc - s->cs_base);
6409            }
6410        }
6411        break;
6412    case 0x6e: /* outsS */
6413    case 0x6f:
6414        ot = mo_b_d32(b, dflag);
6415        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6416        gen_check_io(s, ot, pc_start - s->cs_base,
6417                     svm_is_rep(prefixes) | 4);
6418        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6419            gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6420        } else {
6421            gen_outs(s, ot);
6422            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6423                gen_jmp(s, s->pc - s->cs_base);
6424            }
6425        }
6426        break;
6427
6428        /************************/
6429        /* port I/O */
6430
6431    case 0xe4:
6432    case 0xe5:
6433        ot = mo_b_d32(b, dflag);
6434        val = x86_ldub_code(env, s);
6435        tcg_gen_movi_tl(s->T0, val);
6436        gen_check_io(s, ot, pc_start - s->cs_base,
6437                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6438        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6439            gen_io_start();
6440        }
6441        tcg_gen_movi_i32(s->tmp2_i32, val);
6442        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6443        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6444        gen_bpt_io(s, s->tmp2_i32, ot);
6445        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6446            gen_io_end();
6447            gen_jmp(s, s->pc - s->cs_base);
6448        }
6449        break;
6450    case 0xe6:
6451    case 0xe7:
6452        ot = mo_b_d32(b, dflag);
6453        val = x86_ldub_code(env, s);
6454        tcg_gen_movi_tl(s->T0, val);
6455        gen_check_io(s, ot, pc_start - s->cs_base,
6456                     svm_is_rep(prefixes));
6457        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6458
6459        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6460            gen_io_start();
6461        }
6462        tcg_gen_movi_i32(s->tmp2_i32, val);
6463        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6464        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6465        gen_bpt_io(s, s->tmp2_i32, ot);
6466        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6467            gen_io_end();
6468            gen_jmp(s, s->pc - s->cs_base);
6469        }
6470        break;
6471    case 0xec:
6472    case 0xed:
6473        ot = mo_b_d32(b, dflag);
6474        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6475        gen_check_io(s, ot, pc_start - s->cs_base,
6476                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6477        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6478            gen_io_start();
6479        }
6480        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6481        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6482        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6483        gen_bpt_io(s, s->tmp2_i32, ot);
6484        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6485            gen_io_end();
6486            gen_jmp(s, s->pc - s->cs_base);
6487        }
6488        break;
6489    case 0xee:
6490    case 0xef:
6491        ot = mo_b_d32(b, dflag);
6492        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6493        gen_check_io(s, ot, pc_start - s->cs_base,
6494                     svm_is_rep(prefixes));
6495        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6496
6497        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6498            gen_io_start();
6499        }
6500        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6501        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6502        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6503        gen_bpt_io(s, s->tmp2_i32, ot);
6504        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6505            gen_io_end();
6506            gen_jmp(s, s->pc - s->cs_base);
6507        }
6508        break;
6509
6510        /************************/
6511        /* control */
6512    case 0xc2: /* ret im */
6513        val = x86_ldsw_code(env, s);
6514        ot = gen_pop_T0(s);
6515        gen_stack_update(s, val + (1 << ot));
6516        /* Note that gen_pop_T0 uses a zero-extending load.  */
6517        gen_op_jmp_v(s->T0);
6518        gen_bnd_jmp(s);
6519        gen_jr(s, s->T0);
6520        break;
6521    case 0xc3: /* ret */
6522        ot = gen_pop_T0(s);
6523        gen_pop_update(s, ot);
6524        /* Note that gen_pop_T0 uses a zero-extending load.  */
6525        gen_op_jmp_v(s->T0);
6526        gen_bnd_jmp(s);
6527        gen_jr(s, s->T0);
6528        break;
6529    case 0xca: /* lret im */
6530        val = x86_ldsw_code(env, s);
6531    do_lret:
6532        if (s->pe && !s->vm86) {
6533            gen_update_cc_op(s);
6534            gen_jmp_im(s, pc_start - s->cs_base);
6535            gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6536                                      tcg_const_i32(val));
6537        } else {
6538            gen_stack_A0(s);
6539            /* pop offset */
6540            gen_op_ld_v(s, dflag, s->T0, s->A0);
6541            /* NOTE: keeping EIP updated is not a problem in case of
6542               exception */
6543            gen_op_jmp_v(s->T0);
6544            /* pop selector */
6545            gen_add_A0_im(s, 1 << dflag);
6546            gen_op_ld_v(s, dflag, s->T0, s->A0);
6547            gen_op_movl_seg_T0_vm(s, R_CS);
6548            /* add stack offset */
6549            gen_stack_update(s, val + (2 << dflag));
6550        }
6551        gen_eob(s);
6552        break;
6553    case 0xcb: /* lret */
6554        val = 0;
6555        goto do_lret;
6556    case 0xcf: /* iret */
6557        gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6558        if (!s->pe) {
6559            /* real mode */
6560            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6561            set_cc_op(s, CC_OP_EFLAGS);
6562        } else if (s->vm86) {
6563            if (s->iopl != 3) {
6564                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6565            } else {
6566                gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6567                set_cc_op(s, CC_OP_EFLAGS);
6568            }
6569        } else {
6570            gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6571                                      tcg_const_i32(s->pc - s->cs_base));
6572            set_cc_op(s, CC_OP_EFLAGS);
6573        }
6574        gen_eob(s);
6575        break;
6576    case 0xe8: /* call im */
6577        {
6578            if (dflag != MO_16) {
6579                tval = (int32_t)insn_get(env, s, MO_32);
6580            } else {
6581                tval = (int16_t)insn_get(env, s, MO_16);
6582            }
6583            next_eip = s->pc - s->cs_base;
6584            tval += next_eip;
6585            if (dflag == MO_16) {
6586                tval &= 0xffff;
6587            } else if (!CODE64(s)) {
6588                tval &= 0xffffffff;
6589            }
6590            tcg_gen_movi_tl(s->T0, next_eip);
6591            gen_push_v(s, s->T0);
6592            gen_bnd_jmp(s);
6593            gen_jmp(s, tval);
6594        }
6595        break;
6596    case 0x9a: /* lcall im */
6597        {
6598            unsigned int selector, offset;
6599
6600            if (CODE64(s))
6601                goto illegal_op;
6602            ot = dflag;
6603            offset = insn_get(env, s, ot);
6604            selector = insn_get(env, s, MO_16);
6605
6606            tcg_gen_movi_tl(s->T0, selector);
6607            tcg_gen_movi_tl(s->T1, offset);
6608        }
6609        goto do_lcall;
6610    case 0xe9: /* jmp im */
6611        if (dflag != MO_16) {
6612            tval = (int32_t)insn_get(env, s, MO_32);
6613        } else {
6614            tval = (int16_t)insn_get(env, s, MO_16);
6615        }
6616        tval += s->pc - s->cs_base;
6617        if (dflag == MO_16) {
6618            tval &= 0xffff;
6619        } else if (!CODE64(s)) {
6620            tval &= 0xffffffff;
6621        }
6622        gen_bnd_jmp(s);
6623        gen_jmp(s, tval);
6624        break;
6625    case 0xea: /* ljmp im */
6626        {
6627            unsigned int selector, offset;
6628
6629            if (CODE64(s))
6630                goto illegal_op;
6631            ot = dflag;
6632            offset = insn_get(env, s, ot);
6633            selector = insn_get(env, s, MO_16);
6634
6635            tcg_gen_movi_tl(s->T0, selector);
6636            tcg_gen_movi_tl(s->T1, offset);
6637        }
6638        goto do_ljmp;
6639    case 0xeb: /* jmp Jb */
6640        tval = (int8_t)insn_get(env, s, MO_8);
6641        tval += s->pc - s->cs_base;
6642        if (dflag == MO_16) {
6643            tval &= 0xffff;
6644        }
6645        gen_jmp(s, tval);
6646        break;
6647    case 0x70 ... 0x7f: /* jcc Jb */
6648        tval = (int8_t)insn_get(env, s, MO_8);
6649        goto do_jcc;
6650    case 0x180 ... 0x18f: /* jcc Jv */
6651        if (dflag != MO_16) {
6652            tval = (int32_t)insn_get(env, s, MO_32);
6653        } else {
6654            tval = (int16_t)insn_get(env, s, MO_16);
6655        }
6656    do_jcc:
6657        next_eip = s->pc - s->cs_base;
6658        tval += next_eip;
6659        if (dflag == MO_16) {
6660            tval &= 0xffff;
6661        }
6662        gen_bnd_jmp(s);
6663        gen_jcc(s, b, tval, next_eip);
6664        break;
6665
6666    case 0x190 ... 0x19f: /* setcc Gv */
6667        modrm = x86_ldub_code(env, s);
6668        gen_setcc1(s, b, s->T0);
6669        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6670        break;
6671    case 0x140 ... 0x14f: /* cmov Gv, Ev */
6672        if (!(s->cpuid_features & CPUID_CMOV)) {
6673            goto illegal_op;
6674        }
6675        ot = dflag;
6676        modrm = x86_ldub_code(env, s);
6677        reg = ((modrm >> 3) & 7) | rex_r;
6678        gen_cmovcc1(env, s, ot, b, modrm, reg);
6679        break;
6680
6681        /************************/
6682        /* flags */
6683    case 0x9c: /* pushf */
6684        gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6685        if (s->vm86 && s->iopl != 3) {
6686            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6687        } else {
6688            gen_update_cc_op(s);
6689            gen_helper_read_eflags(s->T0, cpu_env);
6690            gen_push_v(s, s->T0);
6691        }
6692        break;
6693    case 0x9d: /* popf */
6694        gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6695        if (s->vm86 && s->iopl != 3) {
6696            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6697        } else {
6698            ot = gen_pop_T0(s);
6699            if (s->cpl == 0) {
6700                if (dflag != MO_16) {
6701                    gen_helper_write_eflags(cpu_env, s->T0,
6702                                            tcg_const_i32((TF_MASK | AC_MASK |
6703                                                           ID_MASK | NT_MASK |
6704                                                           IF_MASK |
6705                                                           IOPL_MASK)));
6706                } else {
6707                    gen_helper_write_eflags(cpu_env, s->T0,
6708                                            tcg_const_i32((TF_MASK | AC_MASK |
6709                                                           ID_MASK | NT_MASK |
6710                                                           IF_MASK | IOPL_MASK)
6711                                                          & 0xffff));
6712                }
6713            } else {
6714                if (s->cpl <= s->iopl) {
6715                    if (dflag != MO_16) {
6716                        gen_helper_write_eflags(cpu_env, s->T0,
6717                                                tcg_const_i32((TF_MASK |
6718                                                               AC_MASK |
6719                                                               ID_MASK |
6720                                                               NT_MASK |
6721                                                               IF_MASK)));
6722                    } else {
6723                        gen_helper_write_eflags(cpu_env, s->T0,
6724                                                tcg_const_i32((TF_MASK |
6725                                                               AC_MASK |
6726                                                               ID_MASK |
6727                                                               NT_MASK |
6728                                                               IF_MASK)
6729                                                              & 0xffff));
6730                    }
6731                } else {
6732                    if (dflag != MO_16) {
6733                        gen_helper_write_eflags(cpu_env, s->T0,
6734                                           tcg_const_i32((TF_MASK | AC_MASK |
6735                                                          ID_MASK | NT_MASK)));
6736                    } else {
6737                        gen_helper_write_eflags(cpu_env, s->T0,
6738                                           tcg_const_i32((TF_MASK | AC_MASK |
6739                                                          ID_MASK | NT_MASK)
6740                                                         & 0xffff));
6741                    }
6742                }
6743            }
6744            gen_pop_update(s, ot);
6745            set_cc_op(s, CC_OP_EFLAGS);
6746            /* abort translation because TF/AC flag may change */
6747            gen_jmp_im(s, s->pc - s->cs_base);
6748            gen_eob(s);
6749        }
6750        break;
6751    case 0x9e: /* sahf */
6752        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6753            goto illegal_op;
6754        gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6755        gen_compute_eflags(s);
6756        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6757        tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6758        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6759        break;
6760    case 0x9f: /* lahf */
6761        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6762            goto illegal_op;
6763        gen_compute_eflags(s);
6764        /* Note: gen_compute_eflags() only gives the condition codes */
6765        tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6766        gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6767        break;
6768    case 0xf5: /* cmc */
6769        gen_compute_eflags(s);
6770        tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6771        break;
6772    case 0xf8: /* clc */
6773        gen_compute_eflags(s);
6774        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6775        break;
6776    case 0xf9: /* stc */
6777        gen_compute_eflags(s);
6778        tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6779        break;
6780    case 0xfc: /* cld */
6781        tcg_gen_movi_i32(s->tmp2_i32, 1);
6782        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6783        break;
6784    case 0xfd: /* std */
6785        tcg_gen_movi_i32(s->tmp2_i32, -1);
6786        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6787        break;
6788
6789        /************************/
6790        /* bit operations */
6791    case 0x1ba: /* bt/bts/btr/btc Gv, im */
6792        ot = dflag;
6793        modrm = x86_ldub_code(env, s);
6794        op = (modrm >> 3) & 7;
6795        mod = (modrm >> 6) & 3;
6796        rm = (modrm & 7) | REX_B(s);
6797        if (mod != 3) {
6798            s->rip_offset = 1;
6799            gen_lea_modrm(env, s, modrm);
6800            if (!(s->prefix & PREFIX_LOCK)) {
6801                gen_op_ld_v(s, ot, s->T0, s->A0);
6802            }
6803        } else {
6804            gen_op_mov_v_reg(s, ot, s->T0, rm);
6805        }
6806        /* load shift */
6807        val = x86_ldub_code(env, s);
6808        tcg_gen_movi_tl(s->T1, val);
6809        if (op < 4)
6810            goto unknown_op;
6811        op -= 4;
6812        goto bt_op;
6813    case 0x1a3: /* bt Gv, Ev */
6814        op = 0;
6815        goto do_btx;
6816    case 0x1ab: /* bts */
6817        op = 1;
6818        goto do_btx;
6819    case 0x1b3: /* btr */
6820        op = 2;
6821        goto do_btx;
6822    case 0x1bb: /* btc */
6823        op = 3;
6824    do_btx:
6825        ot = dflag;
6826        modrm = x86_ldub_code(env, s);
6827        reg = ((modrm >> 3) & 7) | rex_r;
6828        mod = (modrm >> 6) & 3;
6829        rm = (modrm & 7) | REX_B(s);
6830        gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6831        if (mod != 3) {
6832            AddressParts a = gen_lea_modrm_0(env, s, modrm);
6833            /* specific case: we need to add a displacement */
6834            gen_exts(ot, s->T1);
6835            tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6836            tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6837            tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6838            gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6839            if (!(s->prefix & PREFIX_LOCK)) {
6840                gen_op_ld_v(s, ot, s->T0, s->A0);
6841            }
6842        } else {
6843            gen_op_mov_v_reg(s, ot, s->T0, rm);
6844        }
6845    bt_op:
6846        tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6847        tcg_gen_movi_tl(s->tmp0, 1);
6848        tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6849        if (s->prefix & PREFIX_LOCK) {
6850            switch (op) {
6851            case 0: /* bt */
6852                /* Needs no atomic ops; we surpressed the normal
6853                   memory load for LOCK above so do it now.  */
6854                gen_op_ld_v(s, ot, s->T0, s->A0);
6855                break;
6856            case 1: /* bts */
6857                tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6858                                           s->mem_index, ot | MO_LE);
6859                break;
6860            case 2: /* btr */
6861                tcg_gen_not_tl(s->tmp0, s->tmp0);
6862                tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6863                                            s->mem_index, ot | MO_LE);
6864                break;
6865            default:
6866            case 3: /* btc */
6867                tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6868                                            s->mem_index, ot | MO_LE);
6869                break;
6870            }
6871            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6872        } else {
6873            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6874            switch (op) {
6875            case 0: /* bt */
6876                /* Data already loaded; nothing to do.  */
6877                break;
6878            case 1: /* bts */
6879                tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
6880                break;
6881            case 2: /* btr */
6882                tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
6883                break;
6884            default:
6885            case 3: /* btc */
6886                tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
6887                break;
6888            }
6889            if (op != 0) {
6890                if (mod != 3) {
6891                    gen_op_st_v(s, ot, s->T0, s->A0);
6892                } else {
6893                    gen_op_mov_reg_v(s, ot, rm, s->T0);
6894                }
6895            }
6896        }
6897
6898        /* Delay all CC updates until after the store above.  Note that
6899           C is the result of the test, Z is unchanged, and the others
6900           are all undefined.  */
6901        switch (s->cc_op) {
6902        case CC_OP_MULB ... CC_OP_MULQ:
6903        case CC_OP_ADDB ... CC_OP_ADDQ:
6904        case CC_OP_ADCB ... CC_OP_ADCQ:
6905        case CC_OP_SUBB ... CC_OP_SUBQ:
6906        case CC_OP_SBBB ... CC_OP_SBBQ:
6907        case CC_OP_LOGICB ... CC_OP_LOGICQ:
6908        case CC_OP_INCB ... CC_OP_INCQ:
6909        case CC_OP_DECB ... CC_OP_DECQ:
6910        case CC_OP_SHLB ... CC_OP_SHLQ:
6911        case CC_OP_SARB ... CC_OP_SARQ:
6912        case CC_OP_BMILGB ... CC_OP_BMILGQ:
6913            /* Z was going to be computed from the non-zero status of CC_DST.
6914               We can get that same Z value (and the new C value) by leaving
6915               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6916               same width.  */
6917            tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
6918            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6919            break;
6920        default:
6921            /* Otherwise, generate EFLAGS and replace the C bit.  */
6922            gen_compute_eflags(s);
6923            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
6924                               ctz32(CC_C), 1);
6925            break;
6926        }
6927        break;
6928    case 0x1bc: /* bsf / tzcnt */
6929    case 0x1bd: /* bsr / lzcnt */
6930        ot = dflag;
6931        modrm = x86_ldub_code(env, s);
6932        reg = ((modrm >> 3) & 7) | rex_r;
6933        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6934        gen_extu(ot, s->T0);
6935
6936        /* Note that lzcnt and tzcnt are in different extensions.  */
6937        if ((prefixes & PREFIX_REPZ)
6938            && (b & 1
6939                ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6940                : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6941            int size = 8 << ot;
6942            /* For lzcnt/tzcnt, C bit is defined related to the input. */
6943            tcg_gen_mov_tl(cpu_cc_src, s->T0);
6944            if (b & 1) {
6945                /* For lzcnt, reduce the target_ulong result by the
6946                   number of zeros that we expect to find at the top.  */
6947                tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
6948                tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
6949            } else {
6950                /* For tzcnt, a zero input must return the operand size.  */
6951                tcg_gen_ctzi_tl(s->T0, s->T0, size);
6952            }
6953            /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
6954            gen_op_update1_cc(s);
6955            set_cc_op(s, CC_OP_BMILGB + ot);
6956        } else {
6957            /* For bsr/bsf, only the Z bit is defined and it is related
6958               to the input and not the result.  */
6959            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
6960            set_cc_op(s, CC_OP_LOGICB + ot);
6961
6962            /* ??? The manual says that the output is undefined when the
6963               input is zero, but real hardware leaves it unchanged, and
6964               real programs appear to depend on that.  Accomplish this
6965               by passing the output as the value to return upon zero.  */
6966            if (b & 1) {
6967                /* For bsr, return the bit index of the first 1 bit,
6968                   not the count of leading zeros.  */
6969                tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
6970                tcg_gen_clz_tl(s->T0, s->T0, s->T1);
6971                tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
6972            } else {
6973                tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
6974            }
6975        }
6976        gen_op_mov_reg_v(s, ot, reg, s->T0);
6977        break;
6978        /************************/
6979        /* bcd */
6980    case 0x27: /* daa */
6981        if (CODE64(s))
6982            goto illegal_op;
6983        gen_update_cc_op(s);
6984        gen_helper_daa(cpu_env);
6985        set_cc_op(s, CC_OP_EFLAGS);
6986        break;
6987    case 0x2f: /* das */
6988        if (CODE64(s))
6989            goto illegal_op;
6990        gen_update_cc_op(s);
6991        gen_helper_das(cpu_env);
6992        set_cc_op(s, CC_OP_EFLAGS);
6993        break;
6994    case 0x37: /* aaa */
6995        if (CODE64(s))
6996            goto illegal_op;
6997        gen_update_cc_op(s);
6998        gen_helper_aaa(cpu_env);
6999        set_cc_op(s, CC_OP_EFLAGS);
7000        break;
7001    case 0x3f: /* aas */
7002        if (CODE64(s))
7003            goto illegal_op;
7004        gen_update_cc_op(s);
7005        gen_helper_aas(cpu_env);
7006        set_cc_op(s, CC_OP_EFLAGS);
7007        break;
7008    case 0xd4: /* aam */
7009        if (CODE64(s))
7010            goto illegal_op;
7011        val = x86_ldub_code(env, s);
7012        if (val == 0) {
7013            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7014        } else {
7015            gen_helper_aam(cpu_env, tcg_const_i32(val));
7016            set_cc_op(s, CC_OP_LOGICB);
7017        }
7018        break;
7019    case 0xd5: /* aad */
7020        if (CODE64(s))
7021            goto illegal_op;
7022        val = x86_ldub_code(env, s);
7023        gen_helper_aad(cpu_env, tcg_const_i32(val));
7024        set_cc_op(s, CC_OP_LOGICB);
7025        break;
7026        /************************/
7027        /* misc */
7028    case 0x90: /* nop */
7029        /* XXX: correct lock test for all insn */
7030        if (prefixes & PREFIX_LOCK) {
7031            goto illegal_op;
7032        }
7033        /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7034        if (REX_B(s)) {
7035            goto do_xchg_reg_eax;
7036        }
7037        if (prefixes & PREFIX_REPZ) {
7038            gen_update_cc_op(s);
7039            gen_jmp_im(s, pc_start - s->cs_base);
7040            gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7041            s->base.is_jmp = DISAS_NORETURN;
7042        }
7043        break;
7044    case 0x9b: /* fwait */
7045        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7046            (HF_MP_MASK | HF_TS_MASK)) {
7047            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7048        } else {
7049            gen_helper_fwait(cpu_env);
7050        }
7051        break;
7052    case 0xcc: /* int3 */
7053        gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7054        break;
7055    case 0xcd: /* int N */
7056        val = x86_ldub_code(env, s);
7057        if (s->vm86 && s->iopl != 3) {
7058            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7059        } else {
7060            gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7061        }
7062        break;
7063    case 0xce: /* into */
7064        if (CODE64(s))
7065            goto illegal_op;
7066        gen_update_cc_op(s);
7067        gen_jmp_im(s, pc_start - s->cs_base);
7068        gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7069        break;
7070#ifdef WANT_ICEBP
7071    case 0xf1: /* icebp (undocumented, exits to external debugger) */
7072        gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
7073        gen_debug(s, pc_start - s->cs_base);
7074        break;
7075#endif
7076    case 0xfa: /* cli */
7077        if (!s->vm86) {
7078            if (s->cpl <= s->iopl) {
7079                gen_helper_cli(cpu_env);
7080            } else {
7081                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7082            }
7083        } else {
7084            if (s->iopl == 3) {
7085                gen_helper_cli(cpu_env);
7086            } else {
7087                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7088            }
7089        }
7090        break;
7091    case 0xfb: /* sti */
7092        if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
7093            gen_helper_sti(cpu_env);
7094            /* interruptions are enabled only the first insn after sti */
7095            gen_jmp_im(s, s->pc - s->cs_base);
7096            gen_eob_inhibit_irq(s, true);
7097        } else {
7098            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7099        }
7100        break;
7101    case 0x62: /* bound */
7102        if (CODE64(s))
7103            goto illegal_op;
7104        ot = dflag;
7105        modrm = x86_ldub_code(env, s);
7106        reg = (modrm >> 3) & 7;
7107        mod = (modrm >> 6) & 3;
7108        if (mod == 3)
7109            goto illegal_op;
7110        gen_op_mov_v_reg(s, ot, s->T0, reg);
7111        gen_lea_modrm(env, s, modrm);
7112        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7113        if (ot == MO_16) {
7114            gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7115        } else {
7116            gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7117        }
7118        break;
7119    case 0x1c8 ... 0x1cf: /* bswap reg */
7120        reg = (b & 7) | REX_B(s);
7121#ifdef TARGET_X86_64
7122        if (dflag == MO_64) {
7123            gen_op_mov_v_reg(s, MO_64, s->T0, reg);
7124            tcg_gen_bswap64_i64(s->T0, s->T0);
7125            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
7126        } else
7127#endif
7128        {
7129            gen_op_mov_v_reg(s, MO_32, s->T0, reg);
7130            tcg_gen_ext32u_tl(s->T0, s->T0);
7131            tcg_gen_bswap32_tl(s->T0, s->T0);
7132            gen_op_mov_reg_v(s, MO_32, reg, s->T0);
7133        }
7134        break;
7135    case 0xd6: /* salc */
7136        if (CODE64(s))
7137            goto illegal_op;
7138        gen_compute_eflags_c(s, s->T0);
7139        tcg_gen_neg_tl(s->T0, s->T0);
7140        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7141        break;
7142    case 0xe0: /* loopnz */
7143    case 0xe1: /* loopz */
7144    case 0xe2: /* loop */
7145    case 0xe3: /* jecxz */
7146        {
7147            TCGLabel *l1, *l2, *l3;
7148
7149            tval = (int8_t)insn_get(env, s, MO_8);
7150            next_eip = s->pc - s->cs_base;
7151            tval += next_eip;
7152            if (dflag == MO_16) {
7153                tval &= 0xffff;
7154            }
7155
7156            l1 = gen_new_label();
7157            l2 = gen_new_label();
7158            l3 = gen_new_label();
7159            b &= 3;
7160            switch(b) {
7161            case 0: /* loopnz */
7162            case 1: /* loopz */
7163                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7164                gen_op_jz_ecx(s, s->aflag, l3);
7165                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7166                break;
7167            case 2: /* loop */
7168                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7169                gen_op_jnz_ecx(s, s->aflag, l1);
7170                break;
7171            default:
7172            case 3: /* jcxz */
7173                gen_op_jz_ecx(s, s->aflag, l1);
7174                break;
7175            }
7176
7177            gen_set_label(l3);
7178            gen_jmp_im(s, next_eip);
7179            tcg_gen_br(l2);
7180
7181            gen_set_label(l1);
7182            gen_jmp_im(s, tval);
7183            gen_set_label(l2);
7184            gen_eob(s);
7185        }
7186        break;
7187    case 0x130: /* wrmsr */
7188    case 0x132: /* rdmsr */
7189        if (s->cpl != 0) {
7190            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7191        } else {
7192            gen_update_cc_op(s);
7193            gen_jmp_im(s, pc_start - s->cs_base);
7194            if (b & 2) {
7195                gen_helper_rdmsr(cpu_env);
7196            } else {
7197                gen_helper_wrmsr(cpu_env);
7198            }
7199        }
7200        break;
7201    case 0x131: /* rdtsc */
7202        gen_update_cc_op(s);
7203        gen_jmp_im(s, pc_start - s->cs_base);
7204        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7205            gen_io_start();
7206        }
7207        gen_helper_rdtsc(cpu_env);
7208        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7209            gen_io_end();
7210            gen_jmp(s, s->pc - s->cs_base);
7211        }
7212        break;
7213    case 0x133: /* rdpmc */
7214        gen_update_cc_op(s);
7215        gen_jmp_im(s, pc_start - s->cs_base);
7216        gen_helper_rdpmc(cpu_env);
7217        break;
7218    case 0x134: /* sysenter */
7219        /* For Intel SYSENTER is valid on 64-bit */
7220        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7221            goto illegal_op;
7222        if (!s->pe) {
7223            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7224        } else {
7225            gen_helper_sysenter(cpu_env);
7226            gen_eob(s);
7227        }
7228        break;
7229    case 0x135: /* sysexit */
7230        /* For Intel SYSEXIT is valid on 64-bit */
7231        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7232            goto illegal_op;
7233        if (!s->pe) {
7234            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7235        } else {
7236            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7237            gen_eob(s);
7238        }
7239        break;
7240#ifdef TARGET_X86_64
7241    case 0x105: /* syscall */
7242        /* XXX: is it usable in real mode ? */
7243        gen_update_cc_op(s);
7244        gen_jmp_im(s, pc_start - s->cs_base);
7245        gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7246        /* TF handling for the syscall insn is different. The TF bit is  checked
7247           after the syscall insn completes. This allows #DB to not be
7248           generated after one has entered CPL0 if TF is set in FMASK.  */
7249        gen_eob_worker(s, false, true);
7250        break;
7251    case 0x107: /* sysret */
7252        if (!s->pe) {
7253            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7254        } else {
7255            gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7256            /* condition codes are modified only in long mode */
7257            if (s->lma) {
7258                set_cc_op(s, CC_OP_EFLAGS);
7259            }
7260            /* TF handling for the sysret insn is different. The TF bit is
7261               checked after the sysret insn completes. This allows #DB to be
7262               generated "as if" the syscall insn in userspace has just
7263               completed.  */
7264            gen_eob_worker(s, false, true);
7265        }
7266        break;
7267#endif
7268    case 0x1a2: /* cpuid */
7269        gen_update_cc_op(s);
7270        gen_jmp_im(s, pc_start - s->cs_base);
7271        gen_helper_cpuid(cpu_env);
7272        break;
7273    case 0xf4: /* hlt */
7274        if (s->cpl != 0) {
7275            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7276        } else {
7277            gen_update_cc_op(s);
7278            gen_jmp_im(s, pc_start - s->cs_base);
7279            gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7280            s->base.is_jmp = DISAS_NORETURN;
7281        }
7282        break;
7283    case 0x100:
7284        modrm = x86_ldub_code(env, s);
7285        mod = (modrm >> 6) & 3;
7286        op = (modrm >> 3) & 7;
7287        switch(op) {
7288        case 0: /* sldt */
7289            if (!s->pe || s->vm86)
7290                goto illegal_op;
7291            gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7292            tcg_gen_ld32u_tl(s->T0, cpu_env,
7293                             offsetof(CPUX86State, ldt.selector));
7294            ot = mod == 3 ? dflag : MO_16;
7295            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7296            break;
7297        case 2: /* lldt */
7298            if (!s->pe || s->vm86)
7299                goto illegal_op;
7300            if (s->cpl != 0) {
7301                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7302            } else {
7303                gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7304                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7305                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7306                gen_helper_lldt(cpu_env, s->tmp2_i32);
7307            }
7308            break;
7309        case 1: /* str */
7310            if (!s->pe || s->vm86)
7311                goto illegal_op;
7312            gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7313            tcg_gen_ld32u_tl(s->T0, cpu_env,
7314                             offsetof(CPUX86State, tr.selector));
7315            ot = mod == 3 ? dflag : MO_16;
7316            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7317            break;
7318        case 3: /* ltr */
7319            if (!s->pe || s->vm86)
7320                goto illegal_op;
7321            if (s->cpl != 0) {
7322                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7323            } else {
7324                gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7325                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7326                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7327                gen_helper_ltr(cpu_env, s->tmp2_i32);
7328            }
7329            break;
7330        case 4: /* verr */
7331        case 5: /* verw */
7332            if (!s->pe || s->vm86)
7333                goto illegal_op;
7334            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7335            gen_update_cc_op(s);
7336            if (op == 4) {
7337                gen_helper_verr(cpu_env, s->T0);
7338            } else {
7339                gen_helper_verw(cpu_env, s->T0);
7340            }
7341            set_cc_op(s, CC_OP_EFLAGS);
7342            break;
7343        default:
7344            goto unknown_op;
7345        }
7346        break;
7347
7348    case 0x101:
7349        modrm = x86_ldub_code(env, s);
7350        switch (modrm) {
7351        CASE_MODRM_MEM_OP(0): /* sgdt */
7352            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7353            gen_lea_modrm(env, s, modrm);
7354            tcg_gen_ld32u_tl(s->T0,
7355                             cpu_env, offsetof(CPUX86State, gdt.limit));
7356            gen_op_st_v(s, MO_16, s->T0, s->A0);
7357            gen_add_A0_im(s, 2);
7358            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7359            if (dflag == MO_16) {
7360                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7361            }
7362            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7363            break;
7364
7365        case 0xc8: /* monitor */
7366            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7367                goto illegal_op;
7368            }
7369            gen_update_cc_op(s);
7370            gen_jmp_im(s, pc_start - s->cs_base);
7371            tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7372            gen_extu(s->aflag, s->A0);
7373            gen_add_A0_ds_seg(s);
7374            gen_helper_monitor(cpu_env, s->A0);
7375            break;
7376
7377        case 0xc9: /* mwait */
7378            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7379                goto illegal_op;
7380            }
7381            gen_update_cc_op(s);
7382            gen_jmp_im(s, pc_start - s->cs_base);
7383            gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7384            gen_eob(s);
7385            break;
7386
7387        case 0xca: /* clac */
7388            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7389                || s->cpl != 0) {
7390                goto illegal_op;
7391            }
7392            gen_helper_clac(cpu_env);
7393            gen_jmp_im(s, s->pc - s->cs_base);
7394            gen_eob(s);
7395            break;
7396
7397        case 0xcb: /* stac */
7398            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7399                || s->cpl != 0) {
7400                goto illegal_op;
7401            }
7402            gen_helper_stac(cpu_env);
7403            gen_jmp_im(s, s->pc - s->cs_base);
7404            gen_eob(s);
7405            break;
7406
7407        CASE_MODRM_MEM_OP(1): /* sidt */
7408            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7409            gen_lea_modrm(env, s, modrm);
7410            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7411            gen_op_st_v(s, MO_16, s->T0, s->A0);
7412            gen_add_A0_im(s, 2);
7413            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7414            if (dflag == MO_16) {
7415                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7416            }
7417            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7418            break;
7419
7420        case 0xd0: /* xgetbv */
7421            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7422                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7423                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7424                goto illegal_op;
7425            }
7426            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7427            gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7428            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7429            break;
7430
7431        case 0xd1: /* xsetbv */
7432            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7433                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7434                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7435                goto illegal_op;
7436            }
7437            if (s->cpl != 0) {
7438                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7439                break;
7440            }
7441            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7442                                  cpu_regs[R_EDX]);
7443            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7444            gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7445            /* End TB because translation flags may change.  */
7446            gen_jmp_im(s, s->pc - s->cs_base);
7447            gen_eob(s);
7448            break;
7449
7450        case 0xd8: /* VMRUN */
7451            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7452                goto illegal_op;
7453            }
7454            if (s->cpl != 0) {
7455                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7456                break;
7457            }
7458            gen_update_cc_op(s);
7459            gen_jmp_im(s, pc_start - s->cs_base);
7460            gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7461                             tcg_const_i32(s->pc - pc_start));
7462            tcg_gen_exit_tb(NULL, 0);
7463            s->base.is_jmp = DISAS_NORETURN;
7464            break;
7465
7466        case 0xd9: /* VMMCALL */
7467            if (!(s->flags & HF_SVME_MASK)) {
7468                goto illegal_op;
7469            }
7470            gen_update_cc_op(s);
7471            gen_jmp_im(s, pc_start - s->cs_base);
7472            gen_helper_vmmcall(cpu_env);
7473            break;
7474
7475        case 0xda: /* VMLOAD */
7476            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7477                goto illegal_op;
7478            }
7479            if (s->cpl != 0) {
7480                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7481                break;
7482            }
7483            gen_update_cc_op(s);
7484            gen_jmp_im(s, pc_start - s->cs_base);
7485            gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7486            break;
7487
7488        case 0xdb: /* VMSAVE */
7489            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7490                goto illegal_op;
7491            }
7492            if (s->cpl != 0) {
7493                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7494                break;
7495            }
7496            gen_update_cc_op(s);
7497            gen_jmp_im(s, pc_start - s->cs_base);
7498            gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7499            break;
7500
7501        case 0xdc: /* STGI */
7502            if ((!(s->flags & HF_SVME_MASK)
7503                   && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7504                || !s->pe) {
7505                goto illegal_op;
7506            }
7507            if (s->cpl != 0) {
7508                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7509                break;
7510            }
7511            gen_update_cc_op(s);
7512            gen_helper_stgi(cpu_env);
7513            gen_jmp_im(s, s->pc - s->cs_base);
7514            gen_eob(s);
7515            break;
7516
7517        case 0xdd: /* CLGI */
7518            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7519                goto illegal_op;
7520            }
7521            if (s->cpl != 0) {
7522                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7523                break;
7524            }
7525            gen_update_cc_op(s);
7526            gen_jmp_im(s, pc_start - s->cs_base);
7527            gen_helper_clgi(cpu_env);
7528            break;
7529
7530        case 0xde: /* SKINIT */
7531            if ((!(s->flags & HF_SVME_MASK)
7532                 && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7533                || !s->pe) {
7534                goto illegal_op;
7535            }
7536            gen_update_cc_op(s);
7537            gen_jmp_im(s, pc_start - s->cs_base);
7538            gen_helper_skinit(cpu_env);
7539            break;
7540
7541        case 0xdf: /* INVLPGA */
7542            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7543                goto illegal_op;
7544            }
7545            if (s->cpl != 0) {
7546                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7547                break;
7548            }
7549            gen_update_cc_op(s);
7550            gen_jmp_im(s, pc_start - s->cs_base);
7551            gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
7552            break;
7553
7554        CASE_MODRM_MEM_OP(2): /* lgdt */
7555            if (s->cpl != 0) {
7556                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7557                break;
7558            }
7559            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
7560            gen_lea_modrm(env, s, modrm);
7561            gen_op_ld_v(s, MO_16, s->T1, s->A0);
7562            gen_add_A0_im(s, 2);
7563            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7564            if (dflag == MO_16) {
7565                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7566            }
7567            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7568            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7569            break;
7570
7571        CASE_MODRM_MEM_OP(3): /* lidt */
7572            if (s->cpl != 0) {
7573                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7574                break;
7575            }
7576            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
7577            gen_lea_modrm(env, s, modrm);
7578            gen_op_ld_v(s, MO_16, s->T1, s->A0);
7579            gen_add_A0_im(s, 2);
7580            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7581            if (dflag == MO_16) {
7582                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7583            }
7584            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7585            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7586            break;
7587
7588        CASE_MODRM_OP(4): /* smsw */
7589            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7590            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7591            if (CODE64(s)) {
7592                mod = (modrm >> 6) & 3;
7593                ot = (mod != 3 ? MO_16 : s->dflag);
7594            } else {
7595                ot = MO_16;
7596            }
7597            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7598            break;
7599        case 0xee: /* rdpkru */
7600            if (prefixes & PREFIX_LOCK) {
7601                goto illegal_op;
7602            }
7603            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7604            gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7605            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7606            break;
7607        case 0xef: /* wrpkru */
7608            if (prefixes & PREFIX_LOCK) {
7609                goto illegal_op;
7610            }
7611            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7612                                  cpu_regs[R_EDX]);
7613            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7614            gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7615            break;
7616        CASE_MODRM_OP(6): /* lmsw */
7617            if (s->cpl != 0) {
7618                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7619                break;
7620            }
7621            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7622            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7623            gen_helper_lmsw(cpu_env, s->T0);
7624            gen_jmp_im(s, s->pc - s->cs_base);
7625            gen_eob(s);
7626            break;
7627
7628        CASE_MODRM_MEM_OP(7): /* invlpg */
7629            if (s->cpl != 0) {
7630                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7631                break;
7632            }
7633            gen_update_cc_op(s);
7634            gen_jmp_im(s, pc_start - s->cs_base);
7635            gen_lea_modrm(env, s, modrm);
7636            gen_helper_invlpg(cpu_env, s->A0);
7637            gen_jmp_im(s, s->pc - s->cs_base);
7638            gen_eob(s);
7639            break;
7640
7641        case 0xf8: /* swapgs */
7642#ifdef TARGET_X86_64
7643            if (CODE64(s)) {
7644                if (s->cpl != 0) {
7645                    gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7646                } else {
7647                    tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7648                    tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7649                                  offsetof(CPUX86State, kernelgsbase));
7650                    tcg_gen_st_tl(s->T0, cpu_env,
7651                                  offsetof(CPUX86State, kernelgsbase));
7652                }
7653                break;
7654            }
7655#endif
7656            goto illegal_op;
7657
7658        case 0xf9: /* rdtscp */
7659            if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7660                goto illegal_op;
7661            }
7662            gen_update_cc_op(s);
7663            gen_jmp_im(s, pc_start - s->cs_base);
7664            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7665                gen_io_start();
7666            }
7667            gen_helper_rdtscp(cpu_env);
7668            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7669                gen_io_end();
7670                gen_jmp(s, s->pc - s->cs_base);
7671            }
7672            break;
7673
7674        default:
7675            goto unknown_op;
7676        }
7677        break;
7678
7679    case 0x108: /* invd */
7680    case 0x109: /* wbinvd */
7681        if (s->cpl != 0) {
7682            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7683        } else {
7684            gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7685            /* nothing to do */
7686        }
7687        break;
7688    case 0x63: /* arpl or movslS (x86_64) */
7689#ifdef TARGET_X86_64
7690        if (CODE64(s)) {
7691            int d_ot;
7692            /* d_ot is the size of destination */
7693            d_ot = dflag;
7694
7695            modrm = x86_ldub_code(env, s);
7696            reg = ((modrm >> 3) & 7) | rex_r;
7697            mod = (modrm >> 6) & 3;
7698            rm = (modrm & 7) | REX_B(s);
7699
7700            if (mod == 3) {
7701                gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7702                /* sign extend */
7703                if (d_ot == MO_64) {
7704                    tcg_gen_ext32s_tl(s->T0, s->T0);
7705                }
7706                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7707            } else {
7708                gen_lea_modrm(env, s, modrm);
7709                gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7710                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7711            }
7712        } else
7713#endif
7714        {
7715            TCGLabel *label1;
7716            TCGv t0, t1, t2, a0;
7717
7718            if (!s->pe || s->vm86)
7719                goto illegal_op;
7720            t0 = tcg_temp_local_new();
7721            t1 = tcg_temp_local_new();
7722            t2 = tcg_temp_local_new();
7723            ot = MO_16;
7724            modrm = x86_ldub_code(env, s);
7725            reg = (modrm >> 3) & 7;
7726            mod = (modrm >> 6) & 3;
7727            rm = modrm & 7;
7728            if (mod != 3) {
7729                gen_lea_modrm(env, s, modrm);
7730                gen_op_ld_v(s, ot, t0, s->A0);
7731                a0 = tcg_temp_local_new();
7732                tcg_gen_mov_tl(a0, s->A0);
7733            } else {
7734                gen_op_mov_v_reg(s, ot, t0, rm);
7735                a0 = NULL;
7736            }
7737            gen_op_mov_v_reg(s, ot, t1, reg);
7738            tcg_gen_andi_tl(s->tmp0, t0, 3);
7739            tcg_gen_andi_tl(t1, t1, 3);
7740            tcg_gen_movi_tl(t2, 0);
7741            label1 = gen_new_label();
7742            tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7743            tcg_gen_andi_tl(t0, t0, ~3);
7744            tcg_gen_or_tl(t0, t0, t1);
7745            tcg_gen_movi_tl(t2, CC_Z);
7746            gen_set_label(label1);
7747            if (mod != 3) {
7748                gen_op_st_v(s, ot, t0, a0);
7749                tcg_temp_free(a0);
7750           } else {
7751                gen_op_mov_reg_v(s, ot, rm, t0);
7752            }
7753            gen_compute_eflags(s);
7754            tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7755            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7756            tcg_temp_free(t0);
7757            tcg_temp_free(t1);
7758            tcg_temp_free(t2);
7759        }
7760        break;
7761    case 0x102: /* lar */
7762    case 0x103: /* lsl */
7763        {
7764            TCGLabel *label1;
7765            TCGv t0;
7766            if (!s->pe || s->vm86)
7767                goto illegal_op;
7768            ot = dflag != MO_16 ? MO_32 : MO_16;
7769            modrm = x86_ldub_code(env, s);
7770            reg = ((modrm >> 3) & 7) | rex_r;
7771            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7772            t0 = tcg_temp_local_new();
7773            gen_update_cc_op(s);
7774            if (b == 0x102) {
7775                gen_helper_lar(t0, cpu_env, s->T0);
7776            } else {
7777                gen_helper_lsl(t0, cpu_env, s->T0);
7778            }
7779            tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7780            label1 = gen_new_label();
7781            tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7782            gen_op_mov_reg_v(s, ot, reg, t0);
7783            gen_set_label(label1);
7784            set_cc_op(s, CC_OP_EFLAGS);
7785            tcg_temp_free(t0);
7786        }
7787        break;
7788    case 0x118:
7789        modrm = x86_ldub_code(env, s);
7790        mod = (modrm >> 6) & 3;
7791        op = (modrm >> 3) & 7;
7792        switch(op) {
7793        case 0: /* prefetchnta */
7794        case 1: /* prefetchnt0 */
7795        case 2: /* prefetchnt0 */
7796        case 3: /* prefetchnt0 */
7797            if (mod == 3)
7798                goto illegal_op;
7799            gen_nop_modrm(env, s, modrm);
7800            /* nothing more to do */
7801            break;
7802        default: /* nop (multi byte) */
7803            gen_nop_modrm(env, s, modrm);
7804            break;
7805        }
7806        break;
7807    case 0x11a:
7808        modrm = x86_ldub_code(env, s);
7809        if (s->flags & HF_MPX_EN_MASK) {
7810            mod = (modrm >> 6) & 3;
7811            reg = ((modrm >> 3) & 7) | rex_r;
7812            if (prefixes & PREFIX_REPZ) {
7813                /* bndcl */
7814                if (reg >= 4
7815                    || (prefixes & PREFIX_LOCK)
7816                    || s->aflag == MO_16) {
7817                    goto illegal_op;
7818                }
7819                gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7820            } else if (prefixes & PREFIX_REPNZ) {
7821                /* bndcu */
7822                if (reg >= 4
7823                    || (prefixes & PREFIX_LOCK)
7824                    || s->aflag == MO_16) {
7825                    goto illegal_op;
7826                }
7827                TCGv_i64 notu = tcg_temp_new_i64();
7828                tcg_gen_not_i64(notu, cpu_bndu[reg]);
7829                gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7830                tcg_temp_free_i64(notu);
7831            } else if (prefixes & PREFIX_DATA) {
7832                /* bndmov -- from reg/mem */
7833                if (reg >= 4 || s->aflag == MO_16) {
7834                    goto illegal_op;
7835                }
7836                if (mod == 3) {
7837                    int reg2 = (modrm & 7) | REX_B(s);
7838                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7839                        goto illegal_op;
7840                    }
7841                    if (s->flags & HF_MPX_IU_MASK) {
7842                        tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7843                        tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7844                    }
7845                } else {
7846                    gen_lea_modrm(env, s, modrm);
7847                    if (CODE64(s)) {
7848                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7849                                            s->mem_index, MO_LEQ);
7850                        tcg_gen_addi_tl(s->A0, s->A0, 8);
7851                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7852                                            s->mem_index, MO_LEQ);
7853                    } else {
7854                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7855                                            s->mem_index, MO_LEUL);
7856                        tcg_gen_addi_tl(s->A0, s->A0, 4);
7857                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7858                                            s->mem_index, MO_LEUL);
7859                    }
7860                    /* bnd registers are now in-use */
7861                    gen_set_hflag(s, HF_MPX_IU_MASK);
7862                }
7863            } else if (mod != 3) {
7864                /* bndldx */
7865                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7866                if (reg >= 4
7867                    || (prefixes & PREFIX_LOCK)
7868                    || s->aflag == MO_16
7869                    || a.base < -1) {
7870                    goto illegal_op;
7871                }
7872                if (a.base >= 0) {
7873                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7874                } else {
7875                    tcg_gen_movi_tl(s->A0, 0);
7876                }
7877                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7878                if (a.index >= 0) {
7879                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7880                } else {
7881                    tcg_gen_movi_tl(s->T0, 0);
7882                }
7883                if (CODE64(s)) {
7884                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7885                    tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7886                                   offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7887                } else {
7888                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7889                    tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7890                    tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7891                }
7892                gen_set_hflag(s, HF_MPX_IU_MASK);
7893            }
7894        }
7895        gen_nop_modrm(env, s, modrm);
7896        break;
7897    case 0x11b:
7898        modrm = x86_ldub_code(env, s);
7899        if (s->flags & HF_MPX_EN_MASK) {
7900            mod = (modrm >> 6) & 3;
7901            reg = ((modrm >> 3) & 7) | rex_r;
7902            if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7903                /* bndmk */
7904                if (reg >= 4
7905                    || (prefixes & PREFIX_LOCK)
7906                    || s->aflag == MO_16) {
7907                    goto illegal_op;
7908                }
7909                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7910                if (a.base >= 0) {
7911                    tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7912                    if (!CODE64(s)) {
7913                        tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7914                    }
7915                } else if (a.base == -1) {
7916                    /* no base register has lower bound of 0 */
7917                    tcg_gen_movi_i64(cpu_bndl[reg], 0);
7918                } else {
7919                    /* rip-relative generates #ud */
7920                    goto illegal_op;
7921                }
7922                tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
7923                if (!CODE64(s)) {
7924                    tcg_gen_ext32u_tl(s->A0, s->A0);
7925                }
7926                tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
7927                /* bnd registers are now in-use */
7928                gen_set_hflag(s, HF_MPX_IU_MASK);
7929                break;
7930            } else if (prefixes & PREFIX_REPNZ) {
7931                /* bndcn */
7932                if (reg >= 4
7933                    || (prefixes & PREFIX_LOCK)
7934                    || s->aflag == MO_16) {
7935                    goto illegal_op;
7936                }
7937                gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7938            } else if (prefixes & PREFIX_DATA) {
7939                /* bndmov -- to reg/mem */
7940                if (reg >= 4 || s->aflag == MO_16) {
7941                    goto illegal_op;
7942                }
7943                if (mod == 3) {
7944                    int reg2 = (modrm & 7) | REX_B(s);
7945                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7946                        goto illegal_op;
7947                    }
7948                    if (s->flags & HF_MPX_IU_MASK) {
7949                        tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7950                        tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
7951                    }
7952                } else {
7953                    gen_lea_modrm(env, s, modrm);
7954                    if (CODE64(s)) {
7955                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7956                                            s->mem_index, MO_LEQ);
7957                        tcg_gen_addi_tl(s->A0, s->A0, 8);
7958                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7959                                            s->mem_index, MO_LEQ);
7960                    } else {
7961                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7962                                            s->mem_index, MO_LEUL);
7963                        tcg_gen_addi_tl(s->A0, s->A0, 4);
7964                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7965                                            s->mem_index, MO_LEUL);
7966                    }
7967                }
7968            } else if (mod != 3) {
7969                /* bndstx */
7970                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7971                if (reg >= 4
7972                    || (prefixes & PREFIX_LOCK)
7973                    || s->aflag == MO_16
7974                    || a.base < -1) {
7975                    goto illegal_op;
7976                }
7977                if (a.base >= 0) {
7978                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7979                } else {
7980                    tcg_gen_movi_tl(s->A0, 0);
7981                }
7982                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7983                if (a.index >= 0) {
7984                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7985                } else {
7986                    tcg_gen_movi_tl(s->T0, 0);
7987                }
7988                if (CODE64(s)) {
7989                    gen_helper_bndstx64(cpu_env, s->A0, s->T0,
7990                                        cpu_bndl[reg], cpu_bndu[reg]);
7991                } else {
7992                    gen_helper_bndstx32(cpu_env, s->A0, s->T0,
7993                                        cpu_bndl[reg], cpu_bndu[reg]);
7994                }
7995            }
7996        }
7997        gen_nop_modrm(env, s, modrm);
7998        break;
7999    case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
8000        modrm = x86_ldub_code(env, s);
8001        gen_nop_modrm(env, s, modrm);
8002        break;
8003    case 0x120: /* mov reg, crN */
8004    case 0x122: /* mov crN, reg */
8005        if (s->cpl != 0) {
8006            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8007        } else {
8008            modrm = x86_ldub_code(env, s);
8009            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8010             * AMD documentation (24594.pdf) and testing of
8011             * intel 386 and 486 processors all show that the mod bits
8012             * are assumed to be 1's, regardless of actual values.
8013             */
8014            rm = (modrm & 7) | REX_B(s);
8015            reg = ((modrm >> 3) & 7) | rex_r;
8016            if (CODE64(s))
8017                ot = MO_64;
8018            else
8019                ot = MO_32;
8020            if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
8021                (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8022                reg = 8;
8023            }
8024            switch(reg) {
8025            case 0:
8026            case 2:
8027            case 3:
8028            case 4:
8029            case 8:
8030                gen_update_cc_op(s);
8031                gen_jmp_im(s, pc_start - s->cs_base);
8032                if (b & 2) {
8033                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8034                        gen_io_start();
8035                    }
8036                    gen_op_mov_v_reg(s, ot, s->T0, rm);
8037                    gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
8038                                         s->T0);
8039                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8040                        gen_io_end();
8041                    }
8042                    gen_jmp_im(s, s->pc - s->cs_base);
8043                    gen_eob(s);
8044                } else {
8045                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8046                        gen_io_start();
8047                    }
8048                    gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
8049                    gen_op_mov_reg_v(s, ot, rm, s->T0);
8050                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8051                        gen_io_end();
8052                    }
8053                }
8054                break;
8055            default:
8056                goto unknown_op;
8057            }
8058        }
8059        break;
8060    case 0x121: /* mov reg, drN */
8061    case 0x123: /* mov drN, reg */
8062        if (s->cpl != 0) {
8063            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8064        } else {
8065            modrm = x86_ldub_code(env, s);
8066            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8067             * AMD documentation (24594.pdf) and testing of
8068             * intel 386 and 486 processors all show that the mod bits
8069             * are assumed to be 1's, regardless of actual values.
8070             */
8071            rm = (modrm & 7) | REX_B(s);
8072            reg = ((modrm >> 3) & 7) | rex_r;
8073            if (CODE64(s))
8074                ot = MO_64;
8075            else
8076                ot = MO_32;
8077            if (reg >= 8) {
8078                goto illegal_op;
8079            }
8080            if (b & 2) {
8081                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
8082                gen_op_mov_v_reg(s, ot, s->T0, rm);
8083                tcg_gen_movi_i32(s->tmp2_i32, reg);
8084                gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8085                gen_jmp_im(s, s->pc - s->cs_base);
8086                gen_eob(s);
8087            } else {
8088                gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
8089                tcg_gen_movi_i32(s->tmp2_i32, reg);
8090                gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8091                gen_op_mov_reg_v(s, ot, rm, s->T0);
8092            }
8093        }
8094        break;
8095    case 0x106: /* clts */
8096        if (s->cpl != 0) {
8097            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8098        } else {
8099            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
8100            gen_helper_clts(cpu_env);
8101            /* abort block because static cpu state changed */
8102            gen_jmp_im(s, s->pc - s->cs_base);
8103            gen_eob(s);
8104        }
8105        break;
8106    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8107    case 0x1c3: /* MOVNTI reg, mem */
8108        if (!(s->cpuid_features & CPUID_SSE2))
8109            goto illegal_op;
8110        ot = mo_64_32(dflag);
8111        modrm = x86_ldub_code(env, s);
8112        mod = (modrm >> 6) & 3;
8113        if (mod == 3)
8114            goto illegal_op;
8115        reg = ((modrm >> 3) & 7) | rex_r;
8116        /* generate a generic store */
8117        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8118        break;
8119    case 0x1ae:
8120        modrm = x86_ldub_code(env, s);
8121        switch (modrm) {
8122        CASE_MODRM_MEM_OP(0): /* fxsave */
8123            if (!(s->cpuid_features & CPUID_FXSR)
8124                || (prefixes & PREFIX_LOCK)) {
8125                goto illegal_op;
8126            }
8127            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8128                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8129                break;
8130            }
8131            gen_lea_modrm(env, s, modrm);
8132            gen_helper_fxsave(cpu_env, s->A0);
8133            break;
8134
8135        CASE_MODRM_MEM_OP(1): /* fxrstor */
8136            if (!(s->cpuid_features & CPUID_FXSR)
8137                || (prefixes & PREFIX_LOCK)) {
8138                goto illegal_op;
8139            }
8140            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8141                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8142                break;
8143            }
8144            gen_lea_modrm(env, s, modrm);
8145            gen_helper_fxrstor(cpu_env, s->A0);
8146            break;
8147
8148        CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8149            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8150                goto illegal_op;
8151            }
8152            if (s->flags & HF_TS_MASK) {
8153                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8154                break;
8155            }
8156            gen_lea_modrm(env, s, modrm);
8157            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8158            gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8159            break;
8160
8161        CASE_MODRM_MEM_OP(3): /* stmxcsr */
8162            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8163                goto illegal_op;
8164            }
8165            if (s->flags & HF_TS_MASK) {
8166                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8167                break;
8168            }
8169            gen_lea_modrm(env, s, modrm);
8170            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8171            gen_op_st_v(s, MO_32, s->T0, s->A0);
8172            break;
8173
8174        CASE_MODRM_MEM_OP(4): /* xsave */
8175            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8176                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8177                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8178                goto illegal_op;
8179            }
8180            gen_lea_modrm(env, s, modrm);
8181            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8182                                  cpu_regs[R_EDX]);
8183            gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8184            break;
8185
8186        CASE_MODRM_MEM_OP(5): /* xrstor */
8187            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8188                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8189                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8190                goto illegal_op;
8191            }
8192            gen_lea_modrm(env, s, modrm);
8193            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8194                                  cpu_regs[R_EDX]);
8195            gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8196            /* XRSTOR is how MPX is enabled, which changes how
8197               we translate.  Thus we need to end the TB.  */
8198            gen_update_cc_op(s);
8199            gen_jmp_im(s, s->pc - s->cs_base);
8200            gen_eob(s);
8201            break;
8202
8203        CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8204            if (prefixes & PREFIX_LOCK) {
8205                goto illegal_op;
8206            }
8207            if (prefixes & PREFIX_DATA) {
8208                /* clwb */
8209                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8210                    goto illegal_op;
8211                }
8212                gen_nop_modrm(env, s, modrm);
8213            } else {
8214                /* xsaveopt */
8215                if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8216                    || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8217                    || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8218                    goto illegal_op;
8219                }
8220                gen_lea_modrm(env, s, modrm);
8221                tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8222                                      cpu_regs[R_EDX]);
8223                gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8224            }
8225            break;
8226
8227        CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8228            if (prefixes & PREFIX_LOCK) {
8229                goto illegal_op;
8230            }
8231            if (prefixes & PREFIX_DATA) {
8232                /* clflushopt */
8233                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8234                    goto illegal_op;
8235                }
8236            } else {
8237                /* clflush */
8238                if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8239                    || !(s->cpuid_features & CPUID_CLFLUSH)) {
8240                    goto illegal_op;
8241                }
8242            }
8243            gen_nop_modrm(env, s, modrm);
8244            break;
8245
8246        case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8247        case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8248        case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8249        case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8250            if (CODE64(s)
8251                && (prefixes & PREFIX_REPZ)
8252                && !(prefixes & PREFIX_LOCK)
8253                && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8254                TCGv base, treg, src, dst;
8255
8256                /* Preserve hflags bits by testing CR4 at runtime.  */
8257                tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8258                gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8259
8260                base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8261                treg = cpu_regs[(modrm & 7) | REX_B(s)];
8262
8263                if (modrm & 0x10) {
8264                    /* wr*base */
8265                    dst = base, src = treg;
8266                } else {
8267                    /* rd*base */
8268                    dst = treg, src = base;
8269                }
8270
8271                if (s->dflag == MO_32) {
8272                    tcg_gen_ext32u_tl(dst, src);
8273                } else {
8274                    tcg_gen_mov_tl(dst, src);
8275                }
8276                break;
8277            }
8278            goto unknown_op;
8279
8280        case 0xf8: /* sfence / pcommit */
8281            if (prefixes & PREFIX_DATA) {
8282                /* pcommit */
8283                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8284                    || (prefixes & PREFIX_LOCK)) {
8285                    goto illegal_op;
8286                }
8287                break;
8288            }
8289            /* fallthru */
8290        case 0xf9 ... 0xff: /* sfence */
8291            if (!(s->cpuid_features & CPUID_SSE)
8292                || (prefixes & PREFIX_LOCK)) {
8293                goto illegal_op;
8294            }
8295            tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8296            break;
8297        case 0xe8 ... 0xef: /* lfence */
8298            if (!(s->cpuid_features & CPUID_SSE)
8299                || (prefixes & PREFIX_LOCK)) {
8300                goto illegal_op;
8301            }
8302            tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8303            break;
8304        case 0xf0 ... 0xf7: /* mfence */
8305            if (!(s->cpuid_features & CPUID_SSE2)
8306                || (prefixes & PREFIX_LOCK)) {
8307                goto illegal_op;
8308            }
8309            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8310            break;
8311
8312        default:
8313            goto unknown_op;
8314        }
8315        break;
8316
8317    case 0x10d: /* 3DNow! prefetch(w) */
8318        modrm = x86_ldub_code(env, s);
8319        mod = (modrm >> 6) & 3;
8320        if (mod == 3)
8321            goto illegal_op;
8322        gen_nop_modrm(env, s, modrm);
8323        break;
8324    case 0x1aa: /* rsm */
8325        gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
8326        if (!(s->flags & HF_SMM_MASK))
8327            goto illegal_op;
8328        gen_update_cc_op(s);
8329        gen_jmp_im(s, s->pc - s->cs_base);
8330        gen_helper_rsm(cpu_env);
8331        gen_eob(s);
8332        break;
8333    case 0x1b8: /* SSE4.2 popcnt */
8334        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8335             PREFIX_REPZ)
8336            goto illegal_op;
8337        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8338            goto illegal_op;
8339
8340        modrm = x86_ldub_code(env, s);
8341        reg = ((modrm >> 3) & 7) | rex_r;
8342
8343        if (s->prefix & PREFIX_DATA) {
8344            ot = MO_16;
8345        } else {
8346            ot = mo_64_32(dflag);
8347        }
8348
8349        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8350        gen_extu(ot, s->T0);
8351        tcg_gen_mov_tl(cpu_cc_src, s->T0);
8352        tcg_gen_ctpop_tl(s->T0, s->T0);
8353        gen_op_mov_reg_v(s, ot, reg, s->T0);
8354
8355        set_cc_op(s, CC_OP_POPCNT);
8356        break;
8357    case 0x10e ... 0x10f:
8358        /* 3DNow! instructions, ignore prefixes */
8359        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8360        /* fall through */
8361    case 0x110 ... 0x117:
8362    case 0x128 ... 0x12f:
8363    case 0x138 ... 0x13a:
8364    case 0x150 ... 0x179:
8365    case 0x17c ... 0x17f:
8366    case 0x1c2:
8367    case 0x1c4 ... 0x1c6:
8368    case 0x1d0 ... 0x1fe:
8369        gen_sse(env, s, b, pc_start, rex_r);
8370        break;
8371    default:
8372        goto unknown_op;
8373    }
8374    return s->pc;
8375 illegal_op:
8376    gen_illegal_opcode(s);
8377    return s->pc;
8378 unknown_op:
8379    gen_unknown_opcode(env, s);
8380    return s->pc;
8381}
8382
8383void tcg_x86_init(void)
8384{
8385    static const char reg_names[CPU_NB_REGS][4] = {
8386#ifdef TARGET_X86_64
8387        [R_EAX] = "rax",
8388        [R_EBX] = "rbx",
8389        [R_ECX] = "rcx",
8390        [R_EDX] = "rdx",
8391        [R_ESI] = "rsi",
8392        [R_EDI] = "rdi",
8393        [R_EBP] = "rbp",
8394        [R_ESP] = "rsp",
8395        [8]  = "r8",
8396        [9]  = "r9",
8397        [10] = "r10",
8398        [11] = "r11",
8399        [12] = "r12",
8400        [13] = "r13",
8401        [14] = "r14",
8402        [15] = "r15",
8403#else
8404        [R_EAX] = "eax",
8405        [R_EBX] = "ebx",
8406        [R_ECX] = "ecx",
8407        [R_EDX] = "edx",
8408        [R_ESI] = "esi",
8409        [R_EDI] = "edi",
8410        [R_EBP] = "ebp",
8411        [R_ESP] = "esp",
8412#endif
8413    };
8414    static const char seg_base_names[6][8] = {
8415        [R_CS] = "cs_base",
8416        [R_DS] = "ds_base",
8417        [R_ES] = "es_base",
8418        [R_FS] = "fs_base",
8419        [R_GS] = "gs_base",
8420        [R_SS] = "ss_base",
8421    };
8422    static const char bnd_regl_names[4][8] = {
8423        "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8424    };
8425    static const char bnd_regu_names[4][8] = {
8426        "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8427    };
8428    int i;
8429
8430    cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8431                                       offsetof(CPUX86State, cc_op), "cc_op");
8432    cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8433                                    "cc_dst");
8434    cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8435                                    "cc_src");
8436    cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8437                                     "cc_src2");
8438
8439    for (i = 0; i < CPU_NB_REGS; ++i) {
8440        cpu_regs[i] = tcg_global_mem_new(cpu_env,
8441                                         offsetof(CPUX86State, regs[i]),
8442                                         reg_names[i]);
8443    }
8444
8445    for (i = 0; i < 6; ++i) {
8446        cpu_seg_base[i]
8447            = tcg_global_mem_new(cpu_env,
8448                                 offsetof(CPUX86State, segs[i].base),
8449                                 seg_base_names[i]);
8450    }
8451
8452    for (i = 0; i < 4; ++i) {
8453        cpu_bndl[i]
8454            = tcg_global_mem_new_i64(cpu_env,
8455                                     offsetof(CPUX86State, bnd_regs[i].lb),
8456                                     bnd_regl_names[i]);
8457        cpu_bndu[i]
8458            = tcg_global_mem_new_i64(cpu_env,
8459                                     offsetof(CPUX86State, bnd_regs[i].ub),
8460                                     bnd_regu_names[i]);
8461    }
8462}
8463
8464static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8465{
8466    DisasContext *dc = container_of(dcbase, DisasContext, base);
8467    CPUX86State *env = cpu->env_ptr;
8468    uint32_t flags = dc->base.tb->flags;
8469    target_ulong cs_base = dc->base.tb->cs_base;
8470
8471    dc->pe = (flags >> HF_PE_SHIFT) & 1;
8472    dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
8473    dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
8474    dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
8475    dc->f_st = 0;
8476    dc->vm86 = (flags >> VM_SHIFT) & 1;
8477    dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
8478    dc->iopl = (flags >> IOPL_SHIFT) & 3;
8479    dc->tf = (flags >> TF_SHIFT) & 1;
8480    dc->cc_op = CC_OP_DYNAMIC;
8481    dc->cc_op_dirty = false;
8482    dc->cs_base = cs_base;
8483    dc->popl_esp_hack = 0;
8484    /* select memory access functions */
8485    dc->mem_index = 0;
8486#ifdef CONFIG_SOFTMMU
8487    dc->mem_index = cpu_mmu_index(env, false);
8488#endif
8489    dc->cpuid_features = env->features[FEAT_1_EDX];
8490    dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8491    dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8492    dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8493    dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8494    dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8495#ifdef TARGET_X86_64
8496    dc->lma = (flags >> HF_LMA_SHIFT) & 1;
8497    dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
8498#endif
8499    dc->flags = flags;
8500    dc->jmp_opt = !(dc->tf || dc->base.singlestep_enabled ||
8501                    (flags & HF_INHIBIT_IRQ_MASK));
8502    /* Do not optimize repz jumps at all in icount mode, because
8503       rep movsS instructions are execured with different paths
8504       in !repz_opt and repz_opt modes. The first one was used
8505       always except single step mode. And this setting
8506       disables jumps optimization and control paths become
8507       equivalent in run and single step modes.
8508       Now there will be no jump optimization for repz in
8509       record/replay modes and there will always be an
8510       additional step for ecx=0 when icount is enabled.
8511     */
8512    dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT);
8513#if 0
8514    /* check addseg logic */
8515    if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
8516        printf("ERROR addseg\n");
8517#endif
8518
8519    dc->T0 = tcg_temp_new();
8520    dc->T1 = tcg_temp_new();
8521    dc->A0 = tcg_temp_new();
8522
8523    dc->tmp0 = tcg_temp_new();
8524    dc->tmp1_i64 = tcg_temp_new_i64();
8525    dc->tmp2_i32 = tcg_temp_new_i32();
8526    dc->tmp3_i32 = tcg_temp_new_i32();
8527    dc->tmp4 = tcg_temp_new();
8528    dc->ptr0 = tcg_temp_new_ptr();
8529    dc->ptr1 = tcg_temp_new_ptr();
8530    dc->cc_srcT = tcg_temp_local_new();
8531}
8532
8533static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8534{
8535}
8536
8537static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8538{
8539    DisasContext *dc = container_of(dcbase, DisasContext, base);
8540
8541    tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8542}
8543
8544static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8545                                     const CPUBreakpoint *bp)
8546{
8547    DisasContext *dc = container_of(dcbase, DisasContext, base);
8548    /* If RF is set, suppress an internally generated breakpoint.  */
8549    int flags = dc->base.tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY;
8550    if (bp->flags & flags) {
8551        gen_debug(dc, dc->base.pc_next - dc->cs_base);
8552        dc->base.is_jmp = DISAS_NORETURN;
8553        /* The address covered by the breakpoint must be included in
8554           [tb->pc, tb->pc + tb->size) in order to for it to be
8555           properly cleared -- thus we increment the PC here so that
8556           the generic logic setting tb->size later does the right thing.  */
8557        dc->base.pc_next += 1;
8558        return true;
8559    } else {
8560        return false;
8561    }
8562}
8563
8564static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8565{
8566    DisasContext *dc = container_of(dcbase, DisasContext, base);
8567    target_ulong pc_next = disas_insn(dc, cpu);
8568
8569    if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) {
8570        /* if single step mode, we generate only one instruction and
8571           generate an exception */
8572        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8573           the flag and abort the translation to give the irqs a
8574           chance to happen */
8575        dc->base.is_jmp = DISAS_TOO_MANY;
8576    } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8577               && ((pc_next & TARGET_PAGE_MASK)
8578                   != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8579                       & TARGET_PAGE_MASK)
8580                   || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8581        /* Do not cross the boundary of the pages in icount mode,
8582           it can cause an exception. Do it only when boundary is
8583           crossed by the first instruction in the block.
8584           If current instruction already crossed the bound - it's ok,
8585           because an exception hasn't stopped this code.
8586         */
8587        dc->base.is_jmp = DISAS_TOO_MANY;
8588    } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8589        dc->base.is_jmp = DISAS_TOO_MANY;
8590    }
8591
8592    dc->base.pc_next = pc_next;
8593}
8594
8595static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8596{
8597    DisasContext *dc = container_of(dcbase, DisasContext, base);
8598
8599    if (dc->base.is_jmp == DISAS_TOO_MANY) {
8600        gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8601        gen_eob(dc);
8602    }
8603}
8604
8605static void i386_tr_disas_log(const DisasContextBase *dcbase,
8606                              CPUState *cpu)
8607{
8608    DisasContext *dc = container_of(dcbase, DisasContext, base);
8609
8610    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8611    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8612}
8613
8614static const TranslatorOps i386_tr_ops = {
8615    .init_disas_context = i386_tr_init_disas_context,
8616    .tb_start           = i386_tr_tb_start,
8617    .insn_start         = i386_tr_insn_start,
8618    .breakpoint_check   = i386_tr_breakpoint_check,
8619    .translate_insn     = i386_tr_translate_insn,
8620    .tb_stop            = i386_tr_tb_stop,
8621    .disas_log          = i386_tr_disas_log,
8622};
8623
8624/* generate intermediate code for basic block 'tb'.  */
8625void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8626{
8627    DisasContext dc;
8628
8629    translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8630}
8631
8632void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8633                          target_ulong *data)
8634{
8635    int cc_op = data[1];
8636    env->eip = data[0] - tb->cs_base;
8637    if (cc_op != CC_OP_DYNAMIC) {
8638        env->cc_op = cc_op;
8639    }
8640}
8641