qemu/target-i386/translate.c
<<
>>
Prefs
   1/*
   2 *  i386 translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "qemu/host-utils.h"
  22#include "cpu.h"
  23#include "disas/disas.h"
  24#include "exec/exec-all.h"
  25#include "tcg-op.h"
  26#include "exec/cpu_ldst.h"
  27
  28#include "exec/helper-proto.h"
  29#include "exec/helper-gen.h"
  30
  31#include "trace-tcg.h"
  32#include "exec/log.h"
  33
  34
  35#define PREFIX_REPZ   0x01
  36#define PREFIX_REPNZ  0x02
  37#define PREFIX_LOCK   0x04
  38#define PREFIX_DATA   0x08
  39#define PREFIX_ADR    0x10
  40#define PREFIX_VEX    0x20
  41
  42#ifdef TARGET_X86_64
  43#define CODE64(s) ((s)->code64)
  44#define REX_X(s) ((s)->rex_x)
  45#define REX_B(s) ((s)->rex_b)
  46#else
  47#define CODE64(s) 0
  48#define REX_X(s) 0
  49#define REX_B(s) 0
  50#endif
  51
  52#ifdef TARGET_X86_64
  53# define ctztl  ctz64
  54# define clztl  clz64
  55#else
  56# define ctztl  ctz32
  57# define clztl  clz32
  58#endif
  59
  60/* For a switch indexed by MODRM, match all memory operands for a given OP.  */
  61#define CASE_MODRM_MEM_OP(OP) \
  62    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  63    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  64    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
  65
  66#define CASE_MODRM_OP(OP) \
  67    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  68    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  69    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
  70    case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
  71
  72//#define MACRO_TEST   1
  73
  74/* global register indexes */
  75static TCGv_env cpu_env;
  76static TCGv cpu_A0;
  77static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
  78static TCGv_i32 cpu_cc_op;
  79static TCGv cpu_regs[CPU_NB_REGS];
  80static TCGv cpu_seg_base[6];
  81static TCGv_i64 cpu_bndl[4];
  82static TCGv_i64 cpu_bndu[4];
  83/* local temps */
  84static TCGv cpu_T0, cpu_T1;
  85/* local register indexes (only used inside old micro ops) */
  86static TCGv cpu_tmp0, cpu_tmp4;
  87static TCGv_ptr cpu_ptr0, cpu_ptr1;
  88static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
  89static TCGv_i64 cpu_tmp1_i64;
  90
  91#include "exec/gen-icount.h"
  92
  93#ifdef TARGET_X86_64
  94static int x86_64_hregs;
  95#endif
  96
  97typedef struct DisasContext {
  98    /* current insn context */
  99    int override; /* -1 if no override */
 100    int prefix;
 101    TCGMemOp aflag;
 102    TCGMemOp dflag;
 103    target_ulong pc_start;
 104    target_ulong pc; /* pc = eip + cs_base */
 105    int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
 106                   static state change (stop translation) */
 107    /* current block context */
 108    target_ulong cs_base; /* base of CS segment */
 109    int pe;     /* protected mode */
 110    int code32; /* 32 bit code segment */
 111#ifdef TARGET_X86_64
 112    int lma;    /* long mode active */
 113    int code64; /* 64 bit code segment */
 114    int rex_x, rex_b;
 115#endif
 116    int vex_l;  /* vex vector length */
 117    int vex_v;  /* vex vvvv register, without 1's compliment.  */
 118    int ss32;   /* 32 bit stack segment */
 119    CCOp cc_op;  /* current CC operation */
 120    bool cc_op_dirty;
 121    int addseg; /* non zero if either DS/ES/SS have a non zero base */
 122    int f_st;   /* currently unused */
 123    int vm86;   /* vm86 mode */
 124    int cpl;
 125    int iopl;
 126    int tf;     /* TF cpu flag */
 127    int singlestep_enabled; /* "hardware" single step enabled */
 128    int jmp_opt; /* use direct block chaining for direct jumps */
 129    int repz_opt; /* optimize jumps within repz instructions */
 130    int mem_index; /* select memory access functions */
 131    uint64_t flags; /* all execution flags */
 132    struct TranslationBlock *tb;
 133    int popl_esp_hack; /* for correct popl with esp base handling */
 134    int rip_offset; /* only used in x86_64, but left for simplicity */
 135    int cpuid_features;
 136    int cpuid_ext_features;
 137    int cpuid_ext2_features;
 138    int cpuid_ext3_features;
 139    int cpuid_7_0_ebx_features;
 140    int cpuid_xsave_features;
 141} DisasContext;
 142
 143static void gen_eob(DisasContext *s);
 144static void gen_jmp(DisasContext *s, target_ulong eip);
 145static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 146static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
 147
 148/* i386 arith/logic operations */
 149enum {
 150    OP_ADDL,
 151    OP_ORL,
 152    OP_ADCL,
 153    OP_SBBL,
 154    OP_ANDL,
 155    OP_SUBL,
 156    OP_XORL,
 157    OP_CMPL,
 158};
 159
 160/* i386 shift ops */
 161enum {
 162    OP_ROL,
 163    OP_ROR,
 164    OP_RCL,
 165    OP_RCR,
 166    OP_SHL,
 167    OP_SHR,
 168    OP_SHL1, /* undocumented */
 169    OP_SAR = 7,
 170};
 171
 172enum {
 173    JCC_O,
 174    JCC_B,
 175    JCC_Z,
 176    JCC_BE,
 177    JCC_S,
 178    JCC_P,
 179    JCC_L,
 180    JCC_LE,
 181};
 182
 183enum {
 184    /* I386 int registers */
 185    OR_EAX,   /* MUST be even numbered */
 186    OR_ECX,
 187    OR_EDX,
 188    OR_EBX,
 189    OR_ESP,
 190    OR_EBP,
 191    OR_ESI,
 192    OR_EDI,
 193
 194    OR_TMP0 = 16,    /* temporary operand register */
 195    OR_TMP1,
 196    OR_A0, /* temporary register used when doing address evaluation */
 197};
 198
 199enum {
 200    USES_CC_DST  = 1,
 201    USES_CC_SRC  = 2,
 202    USES_CC_SRC2 = 4,
 203    USES_CC_SRCT = 8,
 204};
 205
 206/* Bit set if the global variable is live after setting CC_OP to X.  */
 207static const uint8_t cc_op_live[CC_OP_NB] = {
 208    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 209    [CC_OP_EFLAGS] = USES_CC_SRC,
 210    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
 211    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
 212    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 213    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
 214    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 215    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
 216    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
 217    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
 218    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
 219    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
 220    [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
 221    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
 222    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
 223    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 224    [CC_OP_CLR] = 0,
 225};
 226
 227static void set_cc_op(DisasContext *s, CCOp op)
 228{
 229    int dead;
 230
 231    if (s->cc_op == op) {
 232        return;
 233    }
 234
 235    /* Discard CC computation that will no longer be used.  */
 236    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
 237    if (dead & USES_CC_DST) {
 238        tcg_gen_discard_tl(cpu_cc_dst);
 239    }
 240    if (dead & USES_CC_SRC) {
 241        tcg_gen_discard_tl(cpu_cc_src);
 242    }
 243    if (dead & USES_CC_SRC2) {
 244        tcg_gen_discard_tl(cpu_cc_src2);
 245    }
 246    if (dead & USES_CC_SRCT) {
 247        tcg_gen_discard_tl(cpu_cc_srcT);
 248    }
 249
 250    if (op == CC_OP_DYNAMIC) {
 251        /* The DYNAMIC setting is translator only, and should never be
 252           stored.  Thus we always consider it clean.  */
 253        s->cc_op_dirty = false;
 254    } else {
 255        /* Discard any computed CC_OP value (see shifts).  */
 256        if (s->cc_op == CC_OP_DYNAMIC) {
 257            tcg_gen_discard_i32(cpu_cc_op);
 258        }
 259        s->cc_op_dirty = true;
 260    }
 261    s->cc_op = op;
 262}
 263
 264static void gen_update_cc_op(DisasContext *s)
 265{
 266    if (s->cc_op_dirty) {
 267        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
 268        s->cc_op_dirty = false;
 269    }
 270}
 271
 272#ifdef TARGET_X86_64
 273
 274#define NB_OP_SIZES 4
 275
 276#else /* !TARGET_X86_64 */
 277
 278#define NB_OP_SIZES 3
 279
 280#endif /* !TARGET_X86_64 */
 281
 282#if defined(HOST_WORDS_BIGENDIAN)
 283#define REG_B_OFFSET (sizeof(target_ulong) - 1)
 284#define REG_H_OFFSET (sizeof(target_ulong) - 2)
 285#define REG_W_OFFSET (sizeof(target_ulong) - 2)
 286#define REG_L_OFFSET (sizeof(target_ulong) - 4)
 287#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
 288#else
 289#define REG_B_OFFSET 0
 290#define REG_H_OFFSET 1
 291#define REG_W_OFFSET 0
 292#define REG_L_OFFSET 0
 293#define REG_LH_OFFSET 4
 294#endif
 295
 296/* In instruction encodings for byte register accesses the
 297 * register number usually indicates "low 8 bits of register N";
 298 * however there are some special cases where N 4..7 indicates
 299 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
 300 * true for this special case, false otherwise.
 301 */
 302static inline bool byte_reg_is_xH(int reg)
 303{
 304    if (reg < 4) {
 305        return false;
 306    }
 307#ifdef TARGET_X86_64
 308    if (reg >= 8 || x86_64_hregs) {
 309        return false;
 310    }
 311#endif
 312    return true;
 313}
 314
 315/* Select the size of a push/pop operation.  */
 316static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
 317{
 318    if (CODE64(s)) {
 319        return ot == MO_16 ? MO_16 : MO_64;
 320    } else {
 321        return ot;
 322    }
 323}
 324
 325/* Select the size of the stack pointer.  */
 326static inline TCGMemOp mo_stacksize(DisasContext *s)
 327{
 328    return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
 329}
 330
 331/* Select only size 64 else 32.  Used for SSE operand sizes.  */
 332static inline TCGMemOp mo_64_32(TCGMemOp ot)
 333{
 334#ifdef TARGET_X86_64
 335    return ot == MO_64 ? MO_64 : MO_32;
 336#else
 337    return MO_32;
 338#endif
 339}
 340
 341/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
 342   byte vs word opcodes.  */
 343static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
 344{
 345    return b & 1 ? ot : MO_8;
 346}
 347
 348/* Select size 8 if lsb of B is clear, else OT capped at 32.
 349   Used for decoding operand size of port opcodes.  */
 350static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
 351{
 352    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 353}
 354
 355static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
 356{
 357    switch(ot) {
 358    case MO_8:
 359        if (!byte_reg_is_xH(reg)) {
 360            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
 361        } else {
 362            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
 363        }
 364        break;
 365    case MO_16:
 366        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
 367        break;
 368    case MO_32:
 369        /* For x86_64, this sets the higher half of register to zero.
 370           For i386, this is equivalent to a mov. */
 371        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
 372        break;
 373#ifdef TARGET_X86_64
 374    case MO_64:
 375        tcg_gen_mov_tl(cpu_regs[reg], t0);
 376        break;
 377#endif
 378    default:
 379        tcg_abort();
 380    }
 381}
 382
 383static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
 384{
 385    if (ot == MO_8 && byte_reg_is_xH(reg)) {
 386        tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
 387        tcg_gen_ext8u_tl(t0, t0);
 388    } else {
 389        tcg_gen_mov_tl(t0, cpu_regs[reg]);
 390    }
 391}
 392
 393static void gen_add_A0_im(DisasContext *s, int val)
 394{
 395    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
 396    if (!CODE64(s)) {
 397        tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
 398    }
 399}
 400
 401static inline void gen_op_jmp_v(TCGv dest)
 402{
 403    tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 404}
 405
 406static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
 407{
 408    tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
 409    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 410}
 411
 412static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
 413{
 414    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0);
 415    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 416}
 417
 418static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 419{
 420    tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
 421}
 422
 423static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 424{
 425    tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 426}
 427
 428static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 429{
 430    if (d == OR_TMP0) {
 431        gen_op_st_v(s, idx, cpu_T0, cpu_A0);
 432    } else {
 433        gen_op_mov_reg_v(idx, d, cpu_T0);
 434    }
 435}
 436
 437static inline void gen_jmp_im(target_ulong pc)
 438{
 439    tcg_gen_movi_tl(cpu_tmp0, pc);
 440    gen_op_jmp_v(cpu_tmp0);
 441}
 442
 443/* Compute SEG:REG into A0.  SEG is selected from the override segment
 444   (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
 445   indicate no override.  */
 446static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
 447                          int def_seg, int ovr_seg)
 448{
 449    switch (aflag) {
 450#ifdef TARGET_X86_64
 451    case MO_64:
 452        if (ovr_seg < 0) {
 453            tcg_gen_mov_tl(cpu_A0, a0);
 454            return;
 455        }
 456        break;
 457#endif
 458    case MO_32:
 459        /* 32 bit address */
 460        if (ovr_seg < 0 && s->addseg) {
 461            ovr_seg = def_seg;
 462        }
 463        if (ovr_seg < 0) {
 464            tcg_gen_ext32u_tl(cpu_A0, a0);
 465            return;
 466        }
 467        break;
 468    case MO_16:
 469        /* 16 bit address */
 470        tcg_gen_ext16u_tl(cpu_A0, a0);
 471        a0 = cpu_A0;
 472        if (ovr_seg < 0) {
 473            if (s->addseg) {
 474                ovr_seg = def_seg;
 475            } else {
 476                return;
 477            }
 478        }
 479        break;
 480    default:
 481        tcg_abort();
 482    }
 483
 484    if (ovr_seg >= 0) {
 485        TCGv seg = cpu_seg_base[ovr_seg];
 486
 487        if (aflag == MO_64) {
 488            tcg_gen_add_tl(cpu_A0, a0, seg);
 489        } else if (CODE64(s)) {
 490            tcg_gen_ext32u_tl(cpu_A0, a0);
 491            tcg_gen_add_tl(cpu_A0, cpu_A0, seg);
 492        } else {
 493            tcg_gen_add_tl(cpu_A0, a0, seg);
 494            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
 495        }
 496    }
 497}
 498
 499static inline void gen_string_movl_A0_ESI(DisasContext *s)
 500{
 501    gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
 502}
 503
 504static inline void gen_string_movl_A0_EDI(DisasContext *s)
 505{
 506    gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
 507}
 508
 509static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
 510{
 511    tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df));
 512    tcg_gen_shli_tl(cpu_T0, cpu_T0, ot);
 513};
 514
 515static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
 516{
 517    switch (size) {
 518    case MO_8:
 519        if (sign) {
 520            tcg_gen_ext8s_tl(dst, src);
 521        } else {
 522            tcg_gen_ext8u_tl(dst, src);
 523        }
 524        return dst;
 525    case MO_16:
 526        if (sign) {
 527            tcg_gen_ext16s_tl(dst, src);
 528        } else {
 529            tcg_gen_ext16u_tl(dst, src);
 530        }
 531        return dst;
 532#ifdef TARGET_X86_64
 533    case MO_32:
 534        if (sign) {
 535            tcg_gen_ext32s_tl(dst, src);
 536        } else {
 537            tcg_gen_ext32u_tl(dst, src);
 538        }
 539        return dst;
 540#endif
 541    default:
 542        return src;
 543    }
 544}
 545
 546static void gen_extu(TCGMemOp ot, TCGv reg)
 547{
 548    gen_ext_tl(reg, reg, ot, false);
 549}
 550
 551static void gen_exts(TCGMemOp ot, TCGv reg)
 552{
 553    gen_ext_tl(reg, reg, ot, true);
 554}
 555
 556static inline void gen_op_jnz_ecx(TCGMemOp size, TCGLabel *label1)
 557{
 558    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
 559    gen_extu(size, cpu_tmp0);
 560    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
 561}
 562
 563static inline void gen_op_jz_ecx(TCGMemOp size, TCGLabel *label1)
 564{
 565    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
 566    gen_extu(size, cpu_tmp0);
 567    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
 568}
 569
 570static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
 571{
 572    switch (ot) {
 573    case MO_8:
 574        gen_helper_inb(v, cpu_env, n);
 575        break;
 576    case MO_16:
 577        gen_helper_inw(v, cpu_env, n);
 578        break;
 579    case MO_32:
 580        gen_helper_inl(v, cpu_env, n);
 581        break;
 582    default:
 583        tcg_abort();
 584    }
 585}
 586
 587static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
 588{
 589    switch (ot) {
 590    case MO_8:
 591        gen_helper_outb(cpu_env, v, n);
 592        break;
 593    case MO_16:
 594        gen_helper_outw(cpu_env, v, n);
 595        break;
 596    case MO_32:
 597        gen_helper_outl(cpu_env, v, n);
 598        break;
 599    default:
 600        tcg_abort();
 601    }
 602}
 603
 604static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
 605                         uint32_t svm_flags)
 606{
 607    target_ulong next_eip;
 608
 609    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
 610        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
 611        switch (ot) {
 612        case MO_8:
 613            gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
 614            break;
 615        case MO_16:
 616            gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
 617            break;
 618        case MO_32:
 619            gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
 620            break;
 621        default:
 622            tcg_abort();
 623        }
 624    }
 625    if(s->flags & HF_SVMI_MASK) {
 626        gen_update_cc_op(s);
 627        gen_jmp_im(cur_eip);
 628        svm_flags |= (1 << (4 + ot));
 629        next_eip = s->pc - s->cs_base;
 630        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
 631        gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
 632                                tcg_const_i32(svm_flags),
 633                                tcg_const_i32(next_eip - cur_eip));
 634    }
 635}
 636
 637static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 638{
 639    gen_string_movl_A0_ESI(s);
 640    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
 641    gen_string_movl_A0_EDI(s);
 642    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
 643    gen_op_movl_T0_Dshift(ot);
 644    gen_op_add_reg_T0(s->aflag, R_ESI);
 645    gen_op_add_reg_T0(s->aflag, R_EDI);
 646}
 647
 648static void gen_op_update1_cc(void)
 649{
 650    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 651}
 652
 653static void gen_op_update2_cc(void)
 654{
 655    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
 656    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 657}
 658
 659static void gen_op_update3_cc(TCGv reg)
 660{
 661    tcg_gen_mov_tl(cpu_cc_src2, reg);
 662    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
 663    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 664}
 665
 666static inline void gen_op_testl_T0_T1_cc(void)
 667{
 668    tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
 669}
 670
 671static void gen_op_update_neg_cc(void)
 672{
 673    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 674    tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
 675    tcg_gen_movi_tl(cpu_cc_srcT, 0);
 676}
 677
 678/* compute all eflags to cc_src */
 679static void gen_compute_eflags(DisasContext *s)
 680{
 681    TCGv zero, dst, src1, src2;
 682    int live, dead;
 683
 684    if (s->cc_op == CC_OP_EFLAGS) {
 685        return;
 686    }
 687    if (s->cc_op == CC_OP_CLR) {
 688        tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
 689        set_cc_op(s, CC_OP_EFLAGS);
 690        return;
 691    }
 692
 693    TCGV_UNUSED(zero);
 694    dst = cpu_cc_dst;
 695    src1 = cpu_cc_src;
 696    src2 = cpu_cc_src2;
 697
 698    /* Take care to not read values that are not live.  */
 699    live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
 700    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
 701    if (dead) {
 702        zero = tcg_const_tl(0);
 703        if (dead & USES_CC_DST) {
 704            dst = zero;
 705        }
 706        if (dead & USES_CC_SRC) {
 707            src1 = zero;
 708        }
 709        if (dead & USES_CC_SRC2) {
 710            src2 = zero;
 711        }
 712    }
 713
 714    gen_update_cc_op(s);
 715    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
 716    set_cc_op(s, CC_OP_EFLAGS);
 717
 718    if (dead) {
 719        tcg_temp_free(zero);
 720    }
 721}
 722
 723typedef struct CCPrepare {
 724    TCGCond cond;
 725    TCGv reg;
 726    TCGv reg2;
 727    target_ulong imm;
 728    target_ulong mask;
 729    bool use_reg2;
 730    bool no_setcond;
 731} CCPrepare;
 732
 733/* compute eflags.C to reg */
 734static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 735{
 736    TCGv t0, t1;
 737    int size, shift;
 738
 739    switch (s->cc_op) {
 740    case CC_OP_SUBB ... CC_OP_SUBQ:
 741        /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
 742        size = s->cc_op - CC_OP_SUBB;
 743        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 744        /* If no temporary was used, be careful not to alias t1 and t0.  */
 745        t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg;
 746        tcg_gen_mov_tl(t0, cpu_cc_srcT);
 747        gen_extu(size, t0);
 748        goto add_sub;
 749
 750    case CC_OP_ADDB ... CC_OP_ADDQ:
 751        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
 752        size = s->cc_op - CC_OP_ADDB;
 753        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 754        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 755    add_sub:
 756        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
 757                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 758
 759    case CC_OP_LOGICB ... CC_OP_LOGICQ:
 760    case CC_OP_CLR:
 761        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 762
 763    case CC_OP_INCB ... CC_OP_INCQ:
 764    case CC_OP_DECB ... CC_OP_DECQ:
 765        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 766                             .mask = -1, .no_setcond = true };
 767
 768    case CC_OP_SHLB ... CC_OP_SHLQ:
 769        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
 770        size = s->cc_op - CC_OP_SHLB;
 771        shift = (8 << size) - 1;
 772        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 773                             .mask = (target_ulong)1 << shift };
 774
 775    case CC_OP_MULB ... CC_OP_MULQ:
 776        return (CCPrepare) { .cond = TCG_COND_NE,
 777                             .reg = cpu_cc_src, .mask = -1 };
 778
 779    case CC_OP_BMILGB ... CC_OP_BMILGQ:
 780        size = s->cc_op - CC_OP_BMILGB;
 781        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
 782        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 783
 784    case CC_OP_ADCX:
 785    case CC_OP_ADCOX:
 786        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
 787                             .mask = -1, .no_setcond = true };
 788
 789    case CC_OP_EFLAGS:
 790    case CC_OP_SARB ... CC_OP_SARQ:
 791        /* CC_SRC & 1 */
 792        return (CCPrepare) { .cond = TCG_COND_NE,
 793                             .reg = cpu_cc_src, .mask = CC_C };
 794
 795    default:
 796       /* The need to compute only C from CC_OP_DYNAMIC is important
 797          in efficiently implementing e.g. INC at the start of a TB.  */
 798       gen_update_cc_op(s);
 799       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
 800                               cpu_cc_src2, cpu_cc_op);
 801       return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 802                            .mask = -1, .no_setcond = true };
 803    }
 804}
 805
 806/* compute eflags.P to reg */
 807static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 808{
 809    gen_compute_eflags(s);
 810    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 811                         .mask = CC_P };
 812}
 813
 814/* compute eflags.S to reg */
 815static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 816{
 817    switch (s->cc_op) {
 818    case CC_OP_DYNAMIC:
 819        gen_compute_eflags(s);
 820        /* FALLTHRU */
 821    case CC_OP_EFLAGS:
 822    case CC_OP_ADCX:
 823    case CC_OP_ADOX:
 824    case CC_OP_ADCOX:
 825        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 826                             .mask = CC_S };
 827    case CC_OP_CLR:
 828        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 829    default:
 830        {
 831            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 832            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
 833            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
 834        }
 835    }
 836}
 837
 838/* compute eflags.O to reg */
 839static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 840{
 841    switch (s->cc_op) {
 842    case CC_OP_ADOX:
 843    case CC_OP_ADCOX:
 844        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
 845                             .mask = -1, .no_setcond = true };
 846    case CC_OP_CLR:
 847        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 848    default:
 849        gen_compute_eflags(s);
 850        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 851                             .mask = CC_O };
 852    }
 853}
 854
 855/* compute eflags.Z to reg */
 856static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 857{
 858    switch (s->cc_op) {
 859    case CC_OP_DYNAMIC:
 860        gen_compute_eflags(s);
 861        /* FALLTHRU */
 862    case CC_OP_EFLAGS:
 863    case CC_OP_ADCX:
 864    case CC_OP_ADOX:
 865    case CC_OP_ADCOX:
 866        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 867                             .mask = CC_Z };
 868    case CC_OP_CLR:
 869        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
 870    default:
 871        {
 872            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 873            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 874            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 875        }
 876    }
 877}
 878
 879/* perform a conditional store into register 'reg' according to jump opcode
 880   value 'b'. In the fast case, T0 is guaranted not to be used. */
 881static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 882{
 883    int inv, jcc_op, cond;
 884    TCGMemOp size;
 885    CCPrepare cc;
 886    TCGv t0;
 887
 888    inv = b & 1;
 889    jcc_op = (b >> 1) & 7;
 890
 891    switch (s->cc_op) {
 892    case CC_OP_SUBB ... CC_OP_SUBQ:
 893        /* We optimize relational operators for the cmp/jcc case.  */
 894        size = s->cc_op - CC_OP_SUBB;
 895        switch (jcc_op) {
 896        case JCC_BE:
 897            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
 898            gen_extu(size, cpu_tmp4);
 899            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 900            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
 901                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 902            break;
 903
 904        case JCC_L:
 905            cond = TCG_COND_LT;
 906            goto fast_jcc_l;
 907        case JCC_LE:
 908            cond = TCG_COND_LE;
 909        fast_jcc_l:
 910            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
 911            gen_exts(size, cpu_tmp4);
 912            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
 913            cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
 914                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 915            break;
 916
 917        default:
 918            goto slow_jcc;
 919        }
 920        break;
 921
 922    default:
 923    slow_jcc:
 924        /* This actually generates good code for JC, JZ and JS.  */
 925        switch (jcc_op) {
 926        case JCC_O:
 927            cc = gen_prepare_eflags_o(s, reg);
 928            break;
 929        case JCC_B:
 930            cc = gen_prepare_eflags_c(s, reg);
 931            break;
 932        case JCC_Z:
 933            cc = gen_prepare_eflags_z(s, reg);
 934            break;
 935        case JCC_BE:
 936            gen_compute_eflags(s);
 937            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 938                               .mask = CC_Z | CC_C };
 939            break;
 940        case JCC_S:
 941            cc = gen_prepare_eflags_s(s, reg);
 942            break;
 943        case JCC_P:
 944            cc = gen_prepare_eflags_p(s, reg);
 945            break;
 946        case JCC_L:
 947            gen_compute_eflags(s);
 948            if (TCGV_EQUAL(reg, cpu_cc_src)) {
 949                reg = cpu_tmp0;
 950            }
 951            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 952            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 953            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 954                               .mask = CC_S };
 955            break;
 956        default:
 957        case JCC_LE:
 958            gen_compute_eflags(s);
 959            if (TCGV_EQUAL(reg, cpu_cc_src)) {
 960                reg = cpu_tmp0;
 961            }
 962            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 963            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 964            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 965                               .mask = CC_S | CC_Z };
 966            break;
 967        }
 968        break;
 969    }
 970
 971    if (inv) {
 972        cc.cond = tcg_invert_cond(cc.cond);
 973    }
 974    return cc;
 975}
 976
 977static void gen_setcc1(DisasContext *s, int b, TCGv reg)
 978{
 979    CCPrepare cc = gen_prepare_cc(s, b, reg);
 980
 981    if (cc.no_setcond) {
 982        if (cc.cond == TCG_COND_EQ) {
 983            tcg_gen_xori_tl(reg, cc.reg, 1);
 984        } else {
 985            tcg_gen_mov_tl(reg, cc.reg);
 986        }
 987        return;
 988    }
 989
 990    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
 991        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
 992        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
 993        tcg_gen_andi_tl(reg, reg, 1);
 994        return;
 995    }
 996    if (cc.mask != -1) {
 997        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
 998        cc.reg = reg;
 999    }
1000    if (cc.use_reg2) {
1001        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1002    } else {
1003        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1004    }
1005}
1006
1007static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1008{
1009    gen_setcc1(s, JCC_B << 1, reg);
1010}
1011
1012/* generate a conditional jump to label 'l1' according to jump opcode
1013   value 'b'. In the fast case, T0 is guaranted not to be used. */
1014static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1015{
1016    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1017
1018    if (cc.mask != -1) {
1019        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1020        cc.reg = cpu_T0;
1021    }
1022    if (cc.use_reg2) {
1023        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1024    } else {
1025        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1026    }
1027}
1028
1029/* Generate a conditional jump to label 'l1' according to jump opcode
1030   value 'b'. In the fast case, T0 is guaranted not to be used.
1031   A translation block must end soon.  */
1032static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1033{
1034    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1035
1036    gen_update_cc_op(s);
1037    if (cc.mask != -1) {
1038        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1039        cc.reg = cpu_T0;
1040    }
1041    set_cc_op(s, CC_OP_DYNAMIC);
1042    if (cc.use_reg2) {
1043        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1044    } else {
1045        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1046    }
1047}
1048
1049/* XXX: does not work with gdbstub "ice" single step - not a
1050   serious problem */
1051static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1052{
1053    TCGLabel *l1 = gen_new_label();
1054    TCGLabel *l2 = gen_new_label();
1055    gen_op_jnz_ecx(s->aflag, l1);
1056    gen_set_label(l2);
1057    gen_jmp_tb(s, next_eip, 1);
1058    gen_set_label(l1);
1059    return l2;
1060}
1061
1062static inline void gen_stos(DisasContext *s, TCGMemOp ot)
1063{
1064    gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
1065    gen_string_movl_A0_EDI(s);
1066    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1067    gen_op_movl_T0_Dshift(ot);
1068    gen_op_add_reg_T0(s->aflag, R_EDI);
1069}
1070
1071static inline void gen_lods(DisasContext *s, TCGMemOp ot)
1072{
1073    gen_string_movl_A0_ESI(s);
1074    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1075    gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
1076    gen_op_movl_T0_Dshift(ot);
1077    gen_op_add_reg_T0(s->aflag, R_ESI);
1078}
1079
1080static inline void gen_scas(DisasContext *s, TCGMemOp ot)
1081{
1082    gen_string_movl_A0_EDI(s);
1083    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1084    gen_op(s, OP_CMPL, ot, R_EAX);
1085    gen_op_movl_T0_Dshift(ot);
1086    gen_op_add_reg_T0(s->aflag, R_EDI);
1087}
1088
1089static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
1090{
1091    gen_string_movl_A0_EDI(s);
1092    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1093    gen_string_movl_A0_ESI(s);
1094    gen_op(s, OP_CMPL, ot, OR_TMP0);
1095    gen_op_movl_T0_Dshift(ot);
1096    gen_op_add_reg_T0(s->aflag, R_ESI);
1097    gen_op_add_reg_T0(s->aflag, R_EDI);
1098}
1099
1100static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1101{
1102    if (s->flags & HF_IOBPT_MASK) {
1103        TCGv_i32 t_size = tcg_const_i32(1 << ot);
1104        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1105
1106        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1107        tcg_temp_free_i32(t_size);
1108        tcg_temp_free(t_next);
1109    }
1110}
1111
1112
1113static inline void gen_ins(DisasContext *s, TCGMemOp ot)
1114{
1115    if (s->tb->cflags & CF_USE_ICOUNT) {
1116        gen_io_start();
1117    }
1118    gen_string_movl_A0_EDI(s);
1119    /* Note: we must do this dummy write first to be restartable in
1120       case of page fault. */
1121    tcg_gen_movi_tl(cpu_T0, 0);
1122    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1123    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1124    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1125    gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
1126    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1127    gen_op_movl_T0_Dshift(ot);
1128    gen_op_add_reg_T0(s->aflag, R_EDI);
1129    gen_bpt_io(s, cpu_tmp2_i32, ot);
1130    if (s->tb->cflags & CF_USE_ICOUNT) {
1131        gen_io_end();
1132    }
1133}
1134
1135static inline void gen_outs(DisasContext *s, TCGMemOp ot)
1136{
1137    if (s->tb->cflags & CF_USE_ICOUNT) {
1138        gen_io_start();
1139    }
1140    gen_string_movl_A0_ESI(s);
1141    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1142
1143    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1144    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1145    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0);
1146    gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
1147    gen_op_movl_T0_Dshift(ot);
1148    gen_op_add_reg_T0(s->aflag, R_ESI);
1149    gen_bpt_io(s, cpu_tmp2_i32, ot);
1150    if (s->tb->cflags & CF_USE_ICOUNT) {
1151        gen_io_end();
1152    }
1153}
1154
1155/* same method as Valgrind : we generate jumps to current or next
1156   instruction */
1157#define GEN_REPZ(op)                                                          \
1158static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1159                                 target_ulong cur_eip, target_ulong next_eip) \
1160{                                                                             \
1161    TCGLabel *l2;                                                             \
1162    gen_update_cc_op(s);                                                      \
1163    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1164    gen_ ## op(s, ot);                                                        \
1165    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1166    /* a loop would cause two single step exceptions if ECX = 1               \
1167       before rep string_insn */                                              \
1168    if (s->repz_opt)                                                          \
1169        gen_op_jz_ecx(s->aflag, l2);                                          \
1170    gen_jmp(s, cur_eip);                                                      \
1171}
1172
1173#define GEN_REPZ2(op)                                                         \
1174static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1175                                   target_ulong cur_eip,                      \
1176                                   target_ulong next_eip,                     \
1177                                   int nz)                                    \
1178{                                                                             \
1179    TCGLabel *l2;                                                             \
1180    gen_update_cc_op(s);                                                      \
1181    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1182    gen_ ## op(s, ot);                                                        \
1183    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1184    gen_update_cc_op(s);                                                      \
1185    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1186    if (s->repz_opt)                                                          \
1187        gen_op_jz_ecx(s->aflag, l2);                                          \
1188    gen_jmp(s, cur_eip);                                                      \
1189}
1190
1191GEN_REPZ(movs)
1192GEN_REPZ(stos)
1193GEN_REPZ(lods)
1194GEN_REPZ(ins)
1195GEN_REPZ(outs)
1196GEN_REPZ2(scas)
1197GEN_REPZ2(cmps)
1198
1199static void gen_helper_fp_arith_ST0_FT0(int op)
1200{
1201    switch (op) {
1202    case 0:
1203        gen_helper_fadd_ST0_FT0(cpu_env);
1204        break;
1205    case 1:
1206        gen_helper_fmul_ST0_FT0(cpu_env);
1207        break;
1208    case 2:
1209        gen_helper_fcom_ST0_FT0(cpu_env);
1210        break;
1211    case 3:
1212        gen_helper_fcom_ST0_FT0(cpu_env);
1213        break;
1214    case 4:
1215        gen_helper_fsub_ST0_FT0(cpu_env);
1216        break;
1217    case 5:
1218        gen_helper_fsubr_ST0_FT0(cpu_env);
1219        break;
1220    case 6:
1221        gen_helper_fdiv_ST0_FT0(cpu_env);
1222        break;
1223    case 7:
1224        gen_helper_fdivr_ST0_FT0(cpu_env);
1225        break;
1226    }
1227}
1228
1229/* NOTE the exception in "r" op ordering */
1230static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1231{
1232    TCGv_i32 tmp = tcg_const_i32(opreg);
1233    switch (op) {
1234    case 0:
1235        gen_helper_fadd_STN_ST0(cpu_env, tmp);
1236        break;
1237    case 1:
1238        gen_helper_fmul_STN_ST0(cpu_env, tmp);
1239        break;
1240    case 4:
1241        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1242        break;
1243    case 5:
1244        gen_helper_fsub_STN_ST0(cpu_env, tmp);
1245        break;
1246    case 6:
1247        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1248        break;
1249    case 7:
1250        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1251        break;
1252    }
1253}
1254
1255/* if d == OR_TMP0, it means memory operand (address in A0) */
1256static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
1257{
1258    if (d != OR_TMP0) {
1259        gen_op_mov_v_reg(ot, cpu_T0, d);
1260    } else if (!(s1->prefix & PREFIX_LOCK)) {
1261        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1262    }
1263    switch(op) {
1264    case OP_ADCL:
1265        gen_compute_eflags_c(s1, cpu_tmp4);
1266        if (s1->prefix & PREFIX_LOCK) {
1267            tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
1268            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
1269                                        s1->mem_index, ot | MO_LE);
1270        } else {
1271            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1272            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
1273            gen_op_st_rm_T0_A0(s1, ot, d);
1274        }
1275        gen_op_update3_cc(cpu_tmp4);
1276        set_cc_op(s1, CC_OP_ADCB + ot);
1277        break;
1278    case OP_SBBL:
1279        gen_compute_eflags_c(s1, cpu_tmp4);
1280        if (s1->prefix & PREFIX_LOCK) {
1281            tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
1282            tcg_gen_neg_tl(cpu_T0, cpu_T0);
1283            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
1284                                        s1->mem_index, ot | MO_LE);
1285        } else {
1286            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1287            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
1288            gen_op_st_rm_T0_A0(s1, ot, d);
1289        }
1290        gen_op_update3_cc(cpu_tmp4);
1291        set_cc_op(s1, CC_OP_SBBB + ot);
1292        break;
1293    case OP_ADDL:
1294        if (s1->prefix & PREFIX_LOCK) {
1295            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1296                                        s1->mem_index, ot | MO_LE);
1297        } else {
1298            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1299            gen_op_st_rm_T0_A0(s1, ot, d);
1300        }
1301        gen_op_update2_cc();
1302        set_cc_op(s1, CC_OP_ADDB + ot);
1303        break;
1304    case OP_SUBL:
1305        if (s1->prefix & PREFIX_LOCK) {
1306            tcg_gen_neg_tl(cpu_T0, cpu_T1);
1307            tcg_gen_atomic_fetch_add_tl(cpu_cc_srcT, cpu_A0, cpu_T0,
1308                                        s1->mem_index, ot | MO_LE);
1309            tcg_gen_sub_tl(cpu_T0, cpu_cc_srcT, cpu_T1);
1310        } else {
1311            tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1312            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1313            gen_op_st_rm_T0_A0(s1, ot, d);
1314        }
1315        gen_op_update2_cc();
1316        set_cc_op(s1, CC_OP_SUBB + ot);
1317        break;
1318    default:
1319    case OP_ANDL:
1320        if (s1->prefix & PREFIX_LOCK) {
1321            tcg_gen_atomic_and_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1322                                        s1->mem_index, ot | MO_LE);
1323        } else {
1324            tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
1325            gen_op_st_rm_T0_A0(s1, ot, d);
1326        }
1327        gen_op_update1_cc();
1328        set_cc_op(s1, CC_OP_LOGICB + ot);
1329        break;
1330    case OP_ORL:
1331        if (s1->prefix & PREFIX_LOCK) {
1332            tcg_gen_atomic_or_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1333                                       s1->mem_index, ot | MO_LE);
1334        } else {
1335            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1336            gen_op_st_rm_T0_A0(s1, ot, d);
1337        }
1338        gen_op_update1_cc();
1339        set_cc_op(s1, CC_OP_LOGICB + ot);
1340        break;
1341    case OP_XORL:
1342        if (s1->prefix & PREFIX_LOCK) {
1343            tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1344                                        s1->mem_index, ot | MO_LE);
1345        } else {
1346            tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
1347            gen_op_st_rm_T0_A0(s1, ot, d);
1348        }
1349        gen_op_update1_cc();
1350        set_cc_op(s1, CC_OP_LOGICB + ot);
1351        break;
1352    case OP_CMPL:
1353        tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
1354        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1355        tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
1356        set_cc_op(s1, CC_OP_SUBB + ot);
1357        break;
1358    }
1359}
1360
1361/* if d == OR_TMP0, it means memory operand (address in A0) */
1362static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
1363{
1364    if (s1->prefix & PREFIX_LOCK) {
1365        tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
1366        tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
1367                                    s1->mem_index, ot | MO_LE);
1368    } else {
1369        if (d != OR_TMP0) {
1370            gen_op_mov_v_reg(ot, cpu_T0, d);
1371        } else {
1372            gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1373        }
1374        tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
1375        gen_op_st_rm_T0_A0(s1, ot, d);
1376    }
1377
1378    gen_compute_eflags_c(s1, cpu_cc_src);
1379    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1380    set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1381}
1382
1383static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
1384                            TCGv shm1, TCGv count, bool is_right)
1385{
1386    TCGv_i32 z32, s32, oldop;
1387    TCGv z_tl;
1388
1389    /* Store the results into the CC variables.  If we know that the
1390       variable must be dead, store unconditionally.  Otherwise we'll
1391       need to not disrupt the current contents.  */
1392    z_tl = tcg_const_tl(0);
1393    if (cc_op_live[s->cc_op] & USES_CC_DST) {
1394        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1395                           result, cpu_cc_dst);
1396    } else {
1397        tcg_gen_mov_tl(cpu_cc_dst, result);
1398    }
1399    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1400        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1401                           shm1, cpu_cc_src);
1402    } else {
1403        tcg_gen_mov_tl(cpu_cc_src, shm1);
1404    }
1405    tcg_temp_free(z_tl);
1406
1407    /* Get the two potential CC_OP values into temporaries.  */
1408    tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1409    if (s->cc_op == CC_OP_DYNAMIC) {
1410        oldop = cpu_cc_op;
1411    } else {
1412        tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
1413        oldop = cpu_tmp3_i32;
1414    }
1415
1416    /* Conditionally store the CC_OP value.  */
1417    z32 = tcg_const_i32(0);
1418    s32 = tcg_temp_new_i32();
1419    tcg_gen_trunc_tl_i32(s32, count);
1420    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
1421    tcg_temp_free_i32(z32);
1422    tcg_temp_free_i32(s32);
1423
1424    /* The CC_OP value is no longer predictable.  */
1425    set_cc_op(s, CC_OP_DYNAMIC);
1426}
1427
1428static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1429                            int is_right, int is_arith)
1430{
1431    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1432
1433    /* load */
1434    if (op1 == OR_TMP0) {
1435        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1436    } else {
1437        gen_op_mov_v_reg(ot, cpu_T0, op1);
1438    }
1439
1440    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1441    tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1);
1442
1443    if (is_right) {
1444        if (is_arith) {
1445            gen_exts(ot, cpu_T0);
1446            tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1447            tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
1448        } else {
1449            gen_extu(ot, cpu_T0);
1450            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1451            tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
1452        }
1453    } else {
1454        tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1455        tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
1456    }
1457
1458    /* store */
1459    gen_op_st_rm_T0_A0(s, ot, op1);
1460
1461    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right);
1462}
1463
1464static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1465                            int is_right, int is_arith)
1466{
1467    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1468
1469    /* load */
1470    if (op1 == OR_TMP0)
1471        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1472    else
1473        gen_op_mov_v_reg(ot, cpu_T0, op1);
1474
1475    op2 &= mask;
1476    if (op2 != 0) {
1477        if (is_right) {
1478            if (is_arith) {
1479                gen_exts(ot, cpu_T0);
1480                tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1);
1481                tcg_gen_sari_tl(cpu_T0, cpu_T0, op2);
1482            } else {
1483                gen_extu(ot, cpu_T0);
1484                tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1);
1485                tcg_gen_shri_tl(cpu_T0, cpu_T0, op2);
1486            }
1487        } else {
1488            tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1);
1489            tcg_gen_shli_tl(cpu_T0, cpu_T0, op2);
1490        }
1491    }
1492
1493    /* store */
1494    gen_op_st_rm_T0_A0(s, ot, op1);
1495
1496    /* update eflags if non zero shift */
1497    if (op2 != 0) {
1498        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1499        tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1500        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1501    }
1502}
1503
1504static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
1505{
1506    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1507    TCGv_i32 t0, t1;
1508
1509    /* load */
1510    if (op1 == OR_TMP0) {
1511        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1512    } else {
1513        gen_op_mov_v_reg(ot, cpu_T0, op1);
1514    }
1515
1516    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1517
1518    switch (ot) {
1519    case MO_8:
1520        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1521        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
1522        tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101);
1523        goto do_long;
1524    case MO_16:
1525        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1526        tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16);
1527        goto do_long;
1528    do_long:
1529#ifdef TARGET_X86_64
1530    case MO_32:
1531        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1532        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
1533        if (is_right) {
1534            tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1535        } else {
1536            tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1537        }
1538        tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1539        break;
1540#endif
1541    default:
1542        if (is_right) {
1543            tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1);
1544        } else {
1545            tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1);
1546        }
1547        break;
1548    }
1549
1550    /* store */
1551    gen_op_st_rm_T0_A0(s, ot, op1);
1552
1553    /* We'll need the flags computed into CC_SRC.  */
1554    gen_compute_eflags(s);
1555
1556    /* The value that was "rotated out" is now present at the other end
1557       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1558       since we've computed the flags into CC_SRC, these variables are
1559       currently dead.  */
1560    if (is_right) {
1561        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1562        tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1563        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1564    } else {
1565        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1566        tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1567    }
1568    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1569    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1570
1571    /* Now conditionally store the new CC_OP value.  If the shift count
1572       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1573       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1574       exactly as we computed above.  */
1575    t0 = tcg_const_i32(0);
1576    t1 = tcg_temp_new_i32();
1577    tcg_gen_trunc_tl_i32(t1, cpu_T1);
1578    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); 
1579    tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
1580    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1581                        cpu_tmp2_i32, cpu_tmp3_i32);
1582    tcg_temp_free_i32(t0);
1583    tcg_temp_free_i32(t1);
1584
1585    /* The CC_OP value is no longer predictable.  */ 
1586    set_cc_op(s, CC_OP_DYNAMIC);
1587}
1588
1589static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1590                          int is_right)
1591{
1592    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1593    int shift;
1594
1595    /* load */
1596    if (op1 == OR_TMP0) {
1597        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1598    } else {
1599        gen_op_mov_v_reg(ot, cpu_T0, op1);
1600    }
1601
1602    op2 &= mask;
1603    if (op2 != 0) {
1604        switch (ot) {
1605#ifdef TARGET_X86_64
1606        case MO_32:
1607            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1608            if (is_right) {
1609                tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1610            } else {
1611                tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1612            }
1613            tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1614            break;
1615#endif
1616        default:
1617            if (is_right) {
1618                tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2);
1619            } else {
1620                tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2);
1621            }
1622            break;
1623        case MO_8:
1624            mask = 7;
1625            goto do_shifts;
1626        case MO_16:
1627            mask = 15;
1628        do_shifts:
1629            shift = op2 & mask;
1630            if (is_right) {
1631                shift = mask + 1 - shift;
1632            }
1633            gen_extu(ot, cpu_T0);
1634            tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift);
1635            tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift);
1636            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
1637            break;
1638        }
1639    }
1640
1641    /* store */
1642    gen_op_st_rm_T0_A0(s, ot, op1);
1643
1644    if (op2 != 0) {
1645        /* Compute the flags into CC_SRC.  */
1646        gen_compute_eflags(s);
1647
1648        /* The value that was "rotated out" is now present at the other end
1649           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1650           since we've computed the flags into CC_SRC, these variables are
1651           currently dead.  */
1652        if (is_right) {
1653            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1654            tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1655            tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1656        } else {
1657            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1658            tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1659        }
1660        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1661        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1662        set_cc_op(s, CC_OP_ADCOX);
1663    }
1664}
1665
1666/* XXX: add faster immediate = 1 case */
1667static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1668                           int is_right)
1669{
1670    gen_compute_eflags(s);
1671    assert(s->cc_op == CC_OP_EFLAGS);
1672
1673    /* load */
1674    if (op1 == OR_TMP0)
1675        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1676    else
1677        gen_op_mov_v_reg(ot, cpu_T0, op1);
1678    
1679    if (is_right) {
1680        switch (ot) {
1681        case MO_8:
1682            gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1683            break;
1684        case MO_16:
1685            gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1686            break;
1687        case MO_32:
1688            gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1689            break;
1690#ifdef TARGET_X86_64
1691        case MO_64:
1692            gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1693            break;
1694#endif
1695        default:
1696            tcg_abort();
1697        }
1698    } else {
1699        switch (ot) {
1700        case MO_8:
1701            gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1702            break;
1703        case MO_16:
1704            gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1705            break;
1706        case MO_32:
1707            gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1708            break;
1709#ifdef TARGET_X86_64
1710        case MO_64:
1711            gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1712            break;
1713#endif
1714        default:
1715            tcg_abort();
1716        }
1717    }
1718    /* store */
1719    gen_op_st_rm_T0_A0(s, ot, op1);
1720}
1721
1722/* XXX: add faster immediate case */
1723static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1724                             bool is_right, TCGv count_in)
1725{
1726    target_ulong mask = (ot == MO_64 ? 63 : 31);
1727    TCGv count;
1728
1729    /* load */
1730    if (op1 == OR_TMP0) {
1731        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1732    } else {
1733        gen_op_mov_v_reg(ot, cpu_T0, op1);
1734    }
1735
1736    count = tcg_temp_new();
1737    tcg_gen_andi_tl(count, count_in, mask);
1738
1739    switch (ot) {
1740    case MO_16:
1741        /* Note: we implement the Intel behaviour for shift count > 16.
1742           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1743           portion by constructing it as a 32-bit value.  */
1744        if (is_right) {
1745            tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16);
1746            tcg_gen_mov_tl(cpu_T1, cpu_T0);
1747            tcg_gen_mov_tl(cpu_T0, cpu_tmp0);
1748        } else {
1749            tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16);
1750        }
1751        /* FALLTHRU */
1752#ifdef TARGET_X86_64
1753    case MO_32:
1754        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1755        tcg_gen_subi_tl(cpu_tmp0, count, 1);
1756        if (is_right) {
1757            tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1);
1758            tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1759            tcg_gen_shr_i64(cpu_T0, cpu_T0, count);
1760        } else {
1761            tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0);
1762            tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1763            tcg_gen_shl_i64(cpu_T0, cpu_T0, count);
1764            tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
1765            tcg_gen_shri_i64(cpu_T0, cpu_T0, 32);
1766        }
1767        break;
1768#endif
1769    default:
1770        tcg_gen_subi_tl(cpu_tmp0, count, 1);
1771        if (is_right) {
1772            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1773
1774            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1775            tcg_gen_shr_tl(cpu_T0, cpu_T0, count);
1776            tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
1777        } else {
1778            tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1779            if (ot == MO_16) {
1780                /* Only needed if count > 16, for Intel behaviour.  */
1781                tcg_gen_subfi_tl(cpu_tmp4, 33, count);
1782                tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4);
1783                tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
1784            }
1785
1786            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1787            tcg_gen_shl_tl(cpu_T0, cpu_T0, count);
1788            tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
1789        }
1790        tcg_gen_movi_tl(cpu_tmp4, 0);
1791        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
1792                           cpu_tmp4, cpu_T1);
1793        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1794        break;
1795    }
1796
1797    /* store */
1798    gen_op_st_rm_T0_A0(s, ot, op1);
1799
1800    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right);
1801    tcg_temp_free(count);
1802}
1803
1804static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
1805{
1806    if (s != OR_TMP1)
1807        gen_op_mov_v_reg(ot, cpu_T1, s);
1808    switch(op) {
1809    case OP_ROL:
1810        gen_rot_rm_T1(s1, ot, d, 0);
1811        break;
1812    case OP_ROR:
1813        gen_rot_rm_T1(s1, ot, d, 1);
1814        break;
1815    case OP_SHL:
1816    case OP_SHL1:
1817        gen_shift_rm_T1(s1, ot, d, 0, 0);
1818        break;
1819    case OP_SHR:
1820        gen_shift_rm_T1(s1, ot, d, 1, 0);
1821        break;
1822    case OP_SAR:
1823        gen_shift_rm_T1(s1, ot, d, 1, 1);
1824        break;
1825    case OP_RCL:
1826        gen_rotc_rm_T1(s1, ot, d, 0);
1827        break;
1828    case OP_RCR:
1829        gen_rotc_rm_T1(s1, ot, d, 1);
1830        break;
1831    }
1832}
1833
1834static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
1835{
1836    switch(op) {
1837    case OP_ROL:
1838        gen_rot_rm_im(s1, ot, d, c, 0);
1839        break;
1840    case OP_ROR:
1841        gen_rot_rm_im(s1, ot, d, c, 1);
1842        break;
1843    case OP_SHL:
1844    case OP_SHL1:
1845        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1846        break;
1847    case OP_SHR:
1848        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1849        break;
1850    case OP_SAR:
1851        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1852        break;
1853    default:
1854        /* currently not optimized */
1855        tcg_gen_movi_tl(cpu_T1, c);
1856        gen_shift(s1, op, ot, d, OR_TMP1);
1857        break;
1858    }
1859}
1860
1861/* Decompose an address.  */
1862
1863typedef struct AddressParts {
1864    int def_seg;
1865    int base;
1866    int index;
1867    int scale;
1868    target_long disp;
1869} AddressParts;
1870
1871static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1872                                    int modrm)
1873{
1874    int def_seg, base, index, scale, mod, rm;
1875    target_long disp;
1876    bool havesib;
1877
1878    def_seg = R_DS;
1879    index = -1;
1880    scale = 0;
1881    disp = 0;
1882
1883    mod = (modrm >> 6) & 3;
1884    rm = modrm & 7;
1885    base = rm | REX_B(s);
1886
1887    if (mod == 3) {
1888        /* Normally filtered out earlier, but including this path
1889           simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
1890        goto done;
1891    }
1892
1893    switch (s->aflag) {
1894    case MO_64:
1895    case MO_32:
1896        havesib = 0;
1897        if (rm == 4) {
1898            int code = cpu_ldub_code(env, s->pc++);
1899            scale = (code >> 6) & 3;
1900            index = ((code >> 3) & 7) | REX_X(s);
1901            if (index == 4) {
1902                index = -1;  /* no index */
1903            }
1904            base = (code & 7) | REX_B(s);
1905            havesib = 1;
1906        }
1907
1908        switch (mod) {
1909        case 0:
1910            if ((base & 7) == 5) {
1911                base = -1;
1912                disp = (int32_t)cpu_ldl_code(env, s->pc);
1913                s->pc += 4;
1914                if (CODE64(s) && !havesib) {
1915                    base = -2;
1916                    disp += s->pc + s->rip_offset;
1917                }
1918            }
1919            break;
1920        case 1:
1921            disp = (int8_t)cpu_ldub_code(env, s->pc++);
1922            break;
1923        default:
1924        case 2:
1925            disp = (int32_t)cpu_ldl_code(env, s->pc);
1926            s->pc += 4;
1927            break;
1928        }
1929
1930        /* For correct popl handling with esp.  */
1931        if (base == R_ESP && s->popl_esp_hack) {
1932            disp += s->popl_esp_hack;
1933        }
1934        if (base == R_EBP || base == R_ESP) {
1935            def_seg = R_SS;
1936        }
1937        break;
1938
1939    case MO_16:
1940        if (mod == 0) {
1941            if (rm == 6) {
1942                base = -1;
1943                disp = cpu_lduw_code(env, s->pc);
1944                s->pc += 2;
1945                break;
1946            }
1947        } else if (mod == 1) {
1948            disp = (int8_t)cpu_ldub_code(env, s->pc++);
1949        } else {
1950            disp = (int16_t)cpu_lduw_code(env, s->pc);
1951            s->pc += 2;
1952        }
1953
1954        switch (rm) {
1955        case 0:
1956            base = R_EBX;
1957            index = R_ESI;
1958            break;
1959        case 1:
1960            base = R_EBX;
1961            index = R_EDI;
1962            break;
1963        case 2:
1964            base = R_EBP;
1965            index = R_ESI;
1966            def_seg = R_SS;
1967            break;
1968        case 3:
1969            base = R_EBP;
1970            index = R_EDI;
1971            def_seg = R_SS;
1972            break;
1973        case 4:
1974            base = R_ESI;
1975            break;
1976        case 5:
1977            base = R_EDI;
1978            break;
1979        case 6:
1980            base = R_EBP;
1981            def_seg = R_SS;
1982            break;
1983        default:
1984        case 7:
1985            base = R_EBX;
1986            break;
1987        }
1988        break;
1989
1990    default:
1991        tcg_abort();
1992    }
1993
1994 done:
1995    return (AddressParts){ def_seg, base, index, scale, disp };
1996}
1997
1998/* Compute the address, with a minimum number of TCG ops.  */
1999static TCGv gen_lea_modrm_1(AddressParts a)
2000{
2001    TCGv ea;
2002
2003    TCGV_UNUSED(ea);
2004    if (a.index >= 0) {
2005        if (a.scale == 0) {
2006            ea = cpu_regs[a.index];
2007        } else {
2008            tcg_gen_shli_tl(cpu_A0, cpu_regs[a.index], a.scale);
2009            ea = cpu_A0;
2010        }
2011        if (a.base >= 0) {
2012            tcg_gen_add_tl(cpu_A0, ea, cpu_regs[a.base]);
2013            ea = cpu_A0;
2014        }
2015    } else if (a.base >= 0) {
2016        ea = cpu_regs[a.base];
2017    }
2018    if (TCGV_IS_UNUSED(ea)) {
2019        tcg_gen_movi_tl(cpu_A0, a.disp);
2020        ea = cpu_A0;
2021    } else if (a.disp != 0) {
2022        tcg_gen_addi_tl(cpu_A0, ea, a.disp);
2023        ea = cpu_A0;
2024    }
2025
2026    return ea;
2027}
2028
2029static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2030{
2031    AddressParts a = gen_lea_modrm_0(env, s, modrm);
2032    TCGv ea = gen_lea_modrm_1(a);
2033    gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2034}
2035
2036static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2037{
2038    (void)gen_lea_modrm_0(env, s, modrm);
2039}
2040
2041/* Used for BNDCL, BNDCU, BNDCN.  */
2042static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2043                      TCGCond cond, TCGv_i64 bndv)
2044{
2045    TCGv ea = gen_lea_modrm_1(gen_lea_modrm_0(env, s, modrm));
2046
2047    tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
2048    if (!CODE64(s)) {
2049        tcg_gen_ext32u_i64(cpu_tmp1_i64, cpu_tmp1_i64);
2050    }
2051    tcg_gen_setcond_i64(cond, cpu_tmp1_i64, cpu_tmp1_i64, bndv);
2052    tcg_gen_extrl_i64_i32(cpu_tmp2_i32, cpu_tmp1_i64);
2053    gen_helper_bndck(cpu_env, cpu_tmp2_i32);
2054}
2055
2056/* used for LEA and MOV AX, mem */
2057static void gen_add_A0_ds_seg(DisasContext *s)
2058{
2059    gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override);
2060}
2061
2062/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2063   OR_TMP0 */
2064static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2065                           TCGMemOp ot, int reg, int is_store)
2066{
2067    int mod, rm;
2068
2069    mod = (modrm >> 6) & 3;
2070    rm = (modrm & 7) | REX_B(s);
2071    if (mod == 3) {
2072        if (is_store) {
2073            if (reg != OR_TMP0)
2074                gen_op_mov_v_reg(ot, cpu_T0, reg);
2075            gen_op_mov_reg_v(ot, rm, cpu_T0);
2076        } else {
2077            gen_op_mov_v_reg(ot, cpu_T0, rm);
2078            if (reg != OR_TMP0)
2079                gen_op_mov_reg_v(ot, reg, cpu_T0);
2080        }
2081    } else {
2082        gen_lea_modrm(env, s, modrm);
2083        if (is_store) {
2084            if (reg != OR_TMP0)
2085                gen_op_mov_v_reg(ot, cpu_T0, reg);
2086            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
2087        } else {
2088            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
2089            if (reg != OR_TMP0)
2090                gen_op_mov_reg_v(ot, reg, cpu_T0);
2091        }
2092    }
2093}
2094
2095static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
2096{
2097    uint32_t ret;
2098
2099    switch (ot) {
2100    case MO_8:
2101        ret = cpu_ldub_code(env, s->pc);
2102        s->pc++;
2103        break;
2104    case MO_16:
2105        ret = cpu_lduw_code(env, s->pc);
2106        s->pc += 2;
2107        break;
2108    case MO_32:
2109#ifdef TARGET_X86_64
2110    case MO_64:
2111#endif
2112        ret = cpu_ldl_code(env, s->pc);
2113        s->pc += 4;
2114        break;
2115    default:
2116        tcg_abort();
2117    }
2118    return ret;
2119}
2120
2121static inline int insn_const_size(TCGMemOp ot)
2122{
2123    if (ot <= MO_32) {
2124        return 1 << ot;
2125    } else {
2126        return 4;
2127    }
2128}
2129
2130static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2131{
2132#ifndef CONFIG_USER_ONLY
2133    return (pc & TARGET_PAGE_MASK) == (s->tb->pc & TARGET_PAGE_MASK) ||
2134           (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2135#else
2136    return true;
2137#endif
2138}
2139
2140static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2141{
2142    target_ulong pc = s->cs_base + eip;
2143
2144    if (use_goto_tb(s, pc))  {
2145        /* jump to same page: we can use a direct jump */
2146        tcg_gen_goto_tb(tb_num);
2147        gen_jmp_im(eip);
2148        tcg_gen_exit_tb((uintptr_t)s->tb + tb_num);
2149    } else {
2150        /* jump to another page: currently not optimized */
2151        gen_jmp_im(eip);
2152        gen_eob(s);
2153    }
2154}
2155
2156static inline void gen_jcc(DisasContext *s, int b,
2157                           target_ulong val, target_ulong next_eip)
2158{
2159    TCGLabel *l1, *l2;
2160
2161    if (s->jmp_opt) {
2162        l1 = gen_new_label();
2163        gen_jcc1(s, b, l1);
2164
2165        gen_goto_tb(s, 0, next_eip);
2166
2167        gen_set_label(l1);
2168        gen_goto_tb(s, 1, val);
2169        s->is_jmp = DISAS_TB_JUMP;
2170    } else {
2171        l1 = gen_new_label();
2172        l2 = gen_new_label();
2173        gen_jcc1(s, b, l1);
2174
2175        gen_jmp_im(next_eip);
2176        tcg_gen_br(l2);
2177
2178        gen_set_label(l1);
2179        gen_jmp_im(val);
2180        gen_set_label(l2);
2181        gen_eob(s);
2182    }
2183}
2184
2185static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
2186                        int modrm, int reg)
2187{
2188    CCPrepare cc;
2189
2190    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2191
2192    cc = gen_prepare_cc(s, b, cpu_T1);
2193    if (cc.mask != -1) {
2194        TCGv t0 = tcg_temp_new();
2195        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2196        cc.reg = t0;
2197    }
2198    if (!cc.use_reg2) {
2199        cc.reg2 = tcg_const_tl(cc.imm);
2200    }
2201
2202    tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2,
2203                       cpu_T0, cpu_regs[reg]);
2204    gen_op_mov_reg_v(ot, reg, cpu_T0);
2205
2206    if (cc.mask != -1) {
2207        tcg_temp_free(cc.reg);
2208    }
2209    if (!cc.use_reg2) {
2210        tcg_temp_free(cc.reg2);
2211    }
2212}
2213
2214static inline void gen_op_movl_T0_seg(int seg_reg)
2215{
2216    tcg_gen_ld32u_tl(cpu_T0, cpu_env,
2217                     offsetof(CPUX86State,segs[seg_reg].selector));
2218}
2219
2220static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2221{
2222    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
2223    tcg_gen_st32_tl(cpu_T0, cpu_env,
2224                    offsetof(CPUX86State,segs[seg_reg].selector));
2225    tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4);
2226}
2227
2228/* move T0 to seg_reg and compute if the CPU state may change. Never
2229   call this function with seg_reg == R_CS */
2230static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2231{
2232    if (s->pe && !s->vm86) {
2233        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
2234        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
2235        /* abort translation because the addseg value may change or
2236           because ss32 may change. For R_SS, translation must always
2237           stop as a special handling must be done to disable hardware
2238           interrupts for the next instruction */
2239        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
2240            s->is_jmp = DISAS_TB_JUMP;
2241    } else {
2242        gen_op_movl_seg_T0_vm(seg_reg);
2243        if (seg_reg == R_SS)
2244            s->is_jmp = DISAS_TB_JUMP;
2245    }
2246}
2247
2248static inline int svm_is_rep(int prefixes)
2249{
2250    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2251}
2252
2253static inline void
2254gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2255                              uint32_t type, uint64_t param)
2256{
2257    /* no SVM activated; fast case */
2258    if (likely(!(s->flags & HF_SVMI_MASK)))
2259        return;
2260    gen_update_cc_op(s);
2261    gen_jmp_im(pc_start - s->cs_base);
2262    gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2263                                         tcg_const_i64(param));
2264}
2265
2266static inline void
2267gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2268{
2269    gen_svm_check_intercept_param(s, pc_start, type, 0);
2270}
2271
2272static inline void gen_stack_update(DisasContext *s, int addend)
2273{
2274    gen_op_add_reg_im(mo_stacksize(s), R_ESP, addend);
2275}
2276
2277/* Generate a push. It depends on ss32, addseg and dflag.  */
2278static void gen_push_v(DisasContext *s, TCGv val)
2279{
2280    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2281    TCGMemOp a_ot = mo_stacksize(s);
2282    int size = 1 << d_ot;
2283    TCGv new_esp = cpu_A0;
2284
2285    tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
2286
2287    if (!CODE64(s)) {
2288        if (s->addseg) {
2289            new_esp = cpu_tmp4;
2290            tcg_gen_mov_tl(new_esp, cpu_A0);
2291        }
2292        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2293    }
2294
2295    gen_op_st_v(s, d_ot, val, cpu_A0);
2296    gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
2297}
2298
2299/* two step pop is necessary for precise exceptions */
2300static TCGMemOp gen_pop_T0(DisasContext *s)
2301{
2302    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2303
2304    gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2305    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2306
2307    return d_ot;
2308}
2309
2310static inline void gen_pop_update(DisasContext *s, TCGMemOp ot)
2311{
2312    gen_stack_update(s, 1 << ot);
2313}
2314
2315static inline void gen_stack_A0(DisasContext *s)
2316{
2317    gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2318}
2319
2320static void gen_pusha(DisasContext *s)
2321{
2322    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2323    TCGMemOp d_ot = s->dflag;
2324    int size = 1 << d_ot;
2325    int i;
2326
2327    for (i = 0; i < 8; i++) {
2328        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size);
2329        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2330        gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0);
2331    }
2332
2333    gen_stack_update(s, -8 * size);
2334}
2335
2336static void gen_popa(DisasContext *s)
2337{
2338    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2339    TCGMemOp d_ot = s->dflag;
2340    int size = 1 << d_ot;
2341    int i;
2342
2343    for (i = 0; i < 8; i++) {
2344        /* ESP is not reloaded */
2345        if (7 - i == R_ESP) {
2346            continue;
2347        }
2348        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size);
2349        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2350        gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2351        gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
2352    }
2353
2354    gen_stack_update(s, 8 * size);
2355}
2356
2357static void gen_enter(DisasContext *s, int esp_addend, int level)
2358{
2359    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2360    TCGMemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2361    int size = 1 << d_ot;
2362
2363    /* Push BP; compute FrameTemp into T1.  */
2364    tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
2365    gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
2366    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0);
2367
2368    level &= 31;
2369    if (level != 0) {
2370        int i;
2371
2372        /* Copy level-1 pointers from the previous frame.  */
2373        for (i = 1; i < level; ++i) {
2374            tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i);
2375            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2376            gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0);
2377
2378            tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i);
2379            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2380            gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0);
2381        }
2382
2383        /* Push the current FrameTemp as the last level.  */
2384        tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level);
2385        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2386        gen_op_st_v(s, d_ot, cpu_T1, cpu_A0);
2387    }
2388
2389    /* Copy the FrameTemp value to EBP.  */
2390    gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1);
2391
2392    /* Compute the final value of ESP.  */
2393    tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level);
2394    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2395}
2396
2397static void gen_leave(DisasContext *s)
2398{
2399    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2400    TCGMemOp a_ot = mo_stacksize(s);
2401
2402    gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2403    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2404
2405    tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
2406
2407    gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0);
2408    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2409}
2410
2411static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2412{
2413    gen_update_cc_op(s);
2414    gen_jmp_im(cur_eip);
2415    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
2416    s->is_jmp = DISAS_TB_JUMP;
2417}
2418
2419/* Generate #UD for the current instruction.  The assumption here is that
2420   the instruction is known, but it isn't allowed in the current cpu mode.  */
2421static void gen_illegal_opcode(DisasContext *s)
2422{
2423    gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
2424}
2425
2426/* Similarly, except that the assumption here is that we don't decode
2427   the instruction at all -- either a missing opcode, an unimplemented
2428   feature, or just a bogus instruction stream.  */
2429static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2430{
2431    gen_illegal_opcode(s);
2432
2433    if (qemu_loglevel_mask(LOG_UNIMP)) {
2434        target_ulong pc = s->pc_start, end = s->pc;
2435        qemu_log_lock();
2436        qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2437        for (; pc < end; ++pc) {
2438            qemu_log(" %02x", cpu_ldub_code(env, pc));
2439        }
2440        qemu_log("\n");
2441        qemu_log_unlock();
2442    }
2443}
2444
2445/* an interrupt is different from an exception because of the
2446   privilege checks */
2447static void gen_interrupt(DisasContext *s, int intno,
2448                          target_ulong cur_eip, target_ulong next_eip)
2449{
2450    gen_update_cc_op(s);
2451    gen_jmp_im(cur_eip);
2452    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2453                               tcg_const_i32(next_eip - cur_eip));
2454    s->is_jmp = DISAS_TB_JUMP;
2455}
2456
2457static void gen_debug(DisasContext *s, target_ulong cur_eip)
2458{
2459    gen_update_cc_op(s);
2460    gen_jmp_im(cur_eip);
2461    gen_helper_debug(cpu_env);
2462    s->is_jmp = DISAS_TB_JUMP;
2463}
2464
2465static void gen_set_hflag(DisasContext *s, uint32_t mask)
2466{
2467    if ((s->flags & mask) == 0) {
2468        TCGv_i32 t = tcg_temp_new_i32();
2469        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2470        tcg_gen_ori_i32(t, t, mask);
2471        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2472        tcg_temp_free_i32(t);
2473        s->flags |= mask;
2474    }
2475}
2476
2477static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2478{
2479    if (s->flags & mask) {
2480        TCGv_i32 t = tcg_temp_new_i32();
2481        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2482        tcg_gen_andi_i32(t, t, ~mask);
2483        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2484        tcg_temp_free_i32(t);
2485        s->flags &= ~mask;
2486    }
2487}
2488
2489/* Clear BND registers during legacy branches.  */
2490static void gen_bnd_jmp(DisasContext *s)
2491{
2492    /* Clear the registers only if BND prefix is missing, MPX is enabled,
2493       and if the BNDREGs are known to be in use (non-zero) already.
2494       The helper itself will check BNDPRESERVE at runtime.  */
2495    if ((s->prefix & PREFIX_REPNZ) == 0
2496        && (s->flags & HF_MPX_EN_MASK) != 0
2497        && (s->flags & HF_MPX_IU_MASK) != 0) {
2498        gen_helper_bnd_jmp(cpu_env);
2499    }
2500}
2501
2502/* Generate an end of block. Trace exception is also generated if needed.
2503   If IIM, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2504static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2505{
2506    gen_update_cc_op(s);
2507
2508    /* If several instructions disable interrupts, only the first does it.  */
2509    if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2510        gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2511    } else {
2512        gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2513    }
2514
2515    if (s->tb->flags & HF_RF_MASK) {
2516        gen_helper_reset_rf(cpu_env);
2517    }
2518    if (s->singlestep_enabled) {
2519        gen_helper_debug(cpu_env);
2520    } else if (s->tf) {
2521        gen_helper_single_step(cpu_env);
2522    } else {
2523        tcg_gen_exit_tb(0);
2524    }
2525    s->is_jmp = DISAS_TB_JUMP;
2526}
2527
2528/* End of block, resetting the inhibit irq flag.  */
2529static void gen_eob(DisasContext *s)
2530{
2531    gen_eob_inhibit_irq(s, false);
2532}
2533
2534/* generate a jump to eip. No segment change must happen before as a
2535   direct call to the next block may occur */
2536static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2537{
2538    gen_update_cc_op(s);
2539    set_cc_op(s, CC_OP_DYNAMIC);
2540    if (s->jmp_opt) {
2541        gen_goto_tb(s, tb_num, eip);
2542        s->is_jmp = DISAS_TB_JUMP;
2543    } else {
2544        gen_jmp_im(eip);
2545        gen_eob(s);
2546    }
2547}
2548
2549static void gen_jmp(DisasContext *s, target_ulong eip)
2550{
2551    gen_jmp_tb(s, eip, 0);
2552}
2553
2554static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2555{
2556    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2557    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2558}
2559
2560static inline void gen_stq_env_A0(DisasContext *s, int offset)
2561{
2562    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2563    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2564}
2565
2566static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2567{
2568    int mem_index = s->mem_index;
2569    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2570    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2571    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2572    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2573    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2574}
2575
2576static inline void gen_sto_env_A0(DisasContext *s, int offset)
2577{
2578    int mem_index = s->mem_index;
2579    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2580    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2581    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2582    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2583    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2584}
2585
2586static inline void gen_op_movo(int d_offset, int s_offset)
2587{
2588    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2589    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2590    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2591    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2592}
2593
2594static inline void gen_op_movq(int d_offset, int s_offset)
2595{
2596    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2597    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2598}
2599
2600static inline void gen_op_movl(int d_offset, int s_offset)
2601{
2602    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2603    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2604}
2605
2606static inline void gen_op_movq_env_0(int d_offset)
2607{
2608    tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2609    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2610}
2611
2612typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2613typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2614typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2615typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2616typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2617typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2618                               TCGv_i32 val);
2619typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2620typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2621                               TCGv val);
2622
2623#define SSE_SPECIAL ((void *)1)
2624#define SSE_DUMMY ((void *)2)
2625
2626#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2627#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2628                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2629
2630static const SSEFunc_0_epp sse_op_table1[256][4] = {
2631    /* 3DNow! extensions */
2632    [0x0e] = { SSE_DUMMY }, /* femms */
2633    [0x0f] = { SSE_DUMMY }, /* pf... */
2634    /* pure SSE operations */
2635    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2636    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2637    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2638    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2639    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2640    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2641    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2642    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2643
2644    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2645    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2646    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2647    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2648    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2649    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2650    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2651    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2652    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2653    [0x51] = SSE_FOP(sqrt),
2654    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2655    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2656    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2657    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2658    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2659    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2660    [0x58] = SSE_FOP(add),
2661    [0x59] = SSE_FOP(mul),
2662    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2663               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2664    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2665    [0x5c] = SSE_FOP(sub),
2666    [0x5d] = SSE_FOP(min),
2667    [0x5e] = SSE_FOP(div),
2668    [0x5f] = SSE_FOP(max),
2669
2670    [0xc2] = SSE_FOP(cmpeq),
2671    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2672               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2673
2674    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2675    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2676    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2677
2678    /* MMX ops and their SSE extensions */
2679    [0x60] = MMX_OP2(punpcklbw),
2680    [0x61] = MMX_OP2(punpcklwd),
2681    [0x62] = MMX_OP2(punpckldq),
2682    [0x63] = MMX_OP2(packsswb),
2683    [0x64] = MMX_OP2(pcmpgtb),
2684    [0x65] = MMX_OP2(pcmpgtw),
2685    [0x66] = MMX_OP2(pcmpgtl),
2686    [0x67] = MMX_OP2(packuswb),
2687    [0x68] = MMX_OP2(punpckhbw),
2688    [0x69] = MMX_OP2(punpckhwd),
2689    [0x6a] = MMX_OP2(punpckhdq),
2690    [0x6b] = MMX_OP2(packssdw),
2691    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2692    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2693    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2694    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2695    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2696               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2697               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2698               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2699    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2700    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2701    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2702    [0x74] = MMX_OP2(pcmpeqb),
2703    [0x75] = MMX_OP2(pcmpeqw),
2704    [0x76] = MMX_OP2(pcmpeql),
2705    [0x77] = { SSE_DUMMY }, /* emms */
2706    [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2707    [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2708    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2709    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2710    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2711    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2712    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2713    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2714    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2715    [0xd1] = MMX_OP2(psrlw),
2716    [0xd2] = MMX_OP2(psrld),
2717    [0xd3] = MMX_OP2(psrlq),
2718    [0xd4] = MMX_OP2(paddq),
2719    [0xd5] = MMX_OP2(pmullw),
2720    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2721    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2722    [0xd8] = MMX_OP2(psubusb),
2723    [0xd9] = MMX_OP2(psubusw),
2724    [0xda] = MMX_OP2(pminub),
2725    [0xdb] = MMX_OP2(pand),
2726    [0xdc] = MMX_OP2(paddusb),
2727    [0xdd] = MMX_OP2(paddusw),
2728    [0xde] = MMX_OP2(pmaxub),
2729    [0xdf] = MMX_OP2(pandn),
2730    [0xe0] = MMX_OP2(pavgb),
2731    [0xe1] = MMX_OP2(psraw),
2732    [0xe2] = MMX_OP2(psrad),
2733    [0xe3] = MMX_OP2(pavgw),
2734    [0xe4] = MMX_OP2(pmulhuw),
2735    [0xe5] = MMX_OP2(pmulhw),
2736    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2737    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2738    [0xe8] = MMX_OP2(psubsb),
2739    [0xe9] = MMX_OP2(psubsw),
2740    [0xea] = MMX_OP2(pminsw),
2741    [0xeb] = MMX_OP2(por),
2742    [0xec] = MMX_OP2(paddsb),
2743    [0xed] = MMX_OP2(paddsw),
2744    [0xee] = MMX_OP2(pmaxsw),
2745    [0xef] = MMX_OP2(pxor),
2746    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2747    [0xf1] = MMX_OP2(psllw),
2748    [0xf2] = MMX_OP2(pslld),
2749    [0xf3] = MMX_OP2(psllq),
2750    [0xf4] = MMX_OP2(pmuludq),
2751    [0xf5] = MMX_OP2(pmaddwd),
2752    [0xf6] = MMX_OP2(psadbw),
2753    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2754               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2755    [0xf8] = MMX_OP2(psubb),
2756    [0xf9] = MMX_OP2(psubw),
2757    [0xfa] = MMX_OP2(psubl),
2758    [0xfb] = MMX_OP2(psubq),
2759    [0xfc] = MMX_OP2(paddb),
2760    [0xfd] = MMX_OP2(paddw),
2761    [0xfe] = MMX_OP2(paddl),
2762};
2763
2764static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2765    [0 + 2] = MMX_OP2(psrlw),
2766    [0 + 4] = MMX_OP2(psraw),
2767    [0 + 6] = MMX_OP2(psllw),
2768    [8 + 2] = MMX_OP2(psrld),
2769    [8 + 4] = MMX_OP2(psrad),
2770    [8 + 6] = MMX_OP2(pslld),
2771    [16 + 2] = MMX_OP2(psrlq),
2772    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2773    [16 + 6] = MMX_OP2(psllq),
2774    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2775};
2776
2777static const SSEFunc_0_epi sse_op_table3ai[] = {
2778    gen_helper_cvtsi2ss,
2779    gen_helper_cvtsi2sd
2780};
2781
2782#ifdef TARGET_X86_64
2783static const SSEFunc_0_epl sse_op_table3aq[] = {
2784    gen_helper_cvtsq2ss,
2785    gen_helper_cvtsq2sd
2786};
2787#endif
2788
2789static const SSEFunc_i_ep sse_op_table3bi[] = {
2790    gen_helper_cvttss2si,
2791    gen_helper_cvtss2si,
2792    gen_helper_cvttsd2si,
2793    gen_helper_cvtsd2si
2794};
2795
2796#ifdef TARGET_X86_64
2797static const SSEFunc_l_ep sse_op_table3bq[] = {
2798    gen_helper_cvttss2sq,
2799    gen_helper_cvtss2sq,
2800    gen_helper_cvttsd2sq,
2801    gen_helper_cvtsd2sq
2802};
2803#endif
2804
2805static const SSEFunc_0_epp sse_op_table4[8][4] = {
2806    SSE_FOP(cmpeq),
2807    SSE_FOP(cmplt),
2808    SSE_FOP(cmple),
2809    SSE_FOP(cmpunord),
2810    SSE_FOP(cmpneq),
2811    SSE_FOP(cmpnlt),
2812    SSE_FOP(cmpnle),
2813    SSE_FOP(cmpord),
2814};
2815
2816static const SSEFunc_0_epp sse_op_table5[256] = {
2817    [0x0c] = gen_helper_pi2fw,
2818    [0x0d] = gen_helper_pi2fd,
2819    [0x1c] = gen_helper_pf2iw,
2820    [0x1d] = gen_helper_pf2id,
2821    [0x8a] = gen_helper_pfnacc,
2822    [0x8e] = gen_helper_pfpnacc,
2823    [0x90] = gen_helper_pfcmpge,
2824    [0x94] = gen_helper_pfmin,
2825    [0x96] = gen_helper_pfrcp,
2826    [0x97] = gen_helper_pfrsqrt,
2827    [0x9a] = gen_helper_pfsub,
2828    [0x9e] = gen_helper_pfadd,
2829    [0xa0] = gen_helper_pfcmpgt,
2830    [0xa4] = gen_helper_pfmax,
2831    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2832    [0xa7] = gen_helper_movq, /* pfrsqit1 */
2833    [0xaa] = gen_helper_pfsubr,
2834    [0xae] = gen_helper_pfacc,
2835    [0xb0] = gen_helper_pfcmpeq,
2836    [0xb4] = gen_helper_pfmul,
2837    [0xb6] = gen_helper_movq, /* pfrcpit2 */
2838    [0xb7] = gen_helper_pmulhrw_mmx,
2839    [0xbb] = gen_helper_pswapd,
2840    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2841};
2842
2843struct SSEOpHelper_epp {
2844    SSEFunc_0_epp op[2];
2845    uint32_t ext_mask;
2846};
2847
2848struct SSEOpHelper_eppi {
2849    SSEFunc_0_eppi op[2];
2850    uint32_t ext_mask;
2851};
2852
2853#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2854#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2855#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2856#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2857#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2858        CPUID_EXT_PCLMULQDQ }
2859#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2860
2861static const struct SSEOpHelper_epp sse_op_table6[256] = {
2862    [0x00] = SSSE3_OP(pshufb),
2863    [0x01] = SSSE3_OP(phaddw),
2864    [0x02] = SSSE3_OP(phaddd),
2865    [0x03] = SSSE3_OP(phaddsw),
2866    [0x04] = SSSE3_OP(pmaddubsw),
2867    [0x05] = SSSE3_OP(phsubw),
2868    [0x06] = SSSE3_OP(phsubd),
2869    [0x07] = SSSE3_OP(phsubsw),
2870    [0x08] = SSSE3_OP(psignb),
2871    [0x09] = SSSE3_OP(psignw),
2872    [0x0a] = SSSE3_OP(psignd),
2873    [0x0b] = SSSE3_OP(pmulhrsw),
2874    [0x10] = SSE41_OP(pblendvb),
2875    [0x14] = SSE41_OP(blendvps),
2876    [0x15] = SSE41_OP(blendvpd),
2877    [0x17] = SSE41_OP(ptest),
2878    [0x1c] = SSSE3_OP(pabsb),
2879    [0x1d] = SSSE3_OP(pabsw),
2880    [0x1e] = SSSE3_OP(pabsd),
2881    [0x20] = SSE41_OP(pmovsxbw),
2882    [0x21] = SSE41_OP(pmovsxbd),
2883    [0x22] = SSE41_OP(pmovsxbq),
2884    [0x23] = SSE41_OP(pmovsxwd),
2885    [0x24] = SSE41_OP(pmovsxwq),
2886    [0x25] = SSE41_OP(pmovsxdq),
2887    [0x28] = SSE41_OP(pmuldq),
2888    [0x29] = SSE41_OP(pcmpeqq),
2889    [0x2a] = SSE41_SPECIAL, /* movntqda */
2890    [0x2b] = SSE41_OP(packusdw),
2891    [0x30] = SSE41_OP(pmovzxbw),
2892    [0x31] = SSE41_OP(pmovzxbd),
2893    [0x32] = SSE41_OP(pmovzxbq),
2894    [0x33] = SSE41_OP(pmovzxwd),
2895    [0x34] = SSE41_OP(pmovzxwq),
2896    [0x35] = SSE41_OP(pmovzxdq),
2897    [0x37] = SSE42_OP(pcmpgtq),
2898    [0x38] = SSE41_OP(pminsb),
2899    [0x39] = SSE41_OP(pminsd),
2900    [0x3a] = SSE41_OP(pminuw),
2901    [0x3b] = SSE41_OP(pminud),
2902    [0x3c] = SSE41_OP(pmaxsb),
2903    [0x3d] = SSE41_OP(pmaxsd),
2904    [0x3e] = SSE41_OP(pmaxuw),
2905    [0x3f] = SSE41_OP(pmaxud),
2906    [0x40] = SSE41_OP(pmulld),
2907    [0x41] = SSE41_OP(phminposuw),
2908    [0xdb] = AESNI_OP(aesimc),
2909    [0xdc] = AESNI_OP(aesenc),
2910    [0xdd] = AESNI_OP(aesenclast),
2911    [0xde] = AESNI_OP(aesdec),
2912    [0xdf] = AESNI_OP(aesdeclast),
2913};
2914
2915static const struct SSEOpHelper_eppi sse_op_table7[256] = {
2916    [0x08] = SSE41_OP(roundps),
2917    [0x09] = SSE41_OP(roundpd),
2918    [0x0a] = SSE41_OP(roundss),
2919    [0x0b] = SSE41_OP(roundsd),
2920    [0x0c] = SSE41_OP(blendps),
2921    [0x0d] = SSE41_OP(blendpd),
2922    [0x0e] = SSE41_OP(pblendw),
2923    [0x0f] = SSSE3_OP(palignr),
2924    [0x14] = SSE41_SPECIAL, /* pextrb */
2925    [0x15] = SSE41_SPECIAL, /* pextrw */
2926    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
2927    [0x17] = SSE41_SPECIAL, /* extractps */
2928    [0x20] = SSE41_SPECIAL, /* pinsrb */
2929    [0x21] = SSE41_SPECIAL, /* insertps */
2930    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
2931    [0x40] = SSE41_OP(dpps),
2932    [0x41] = SSE41_OP(dppd),
2933    [0x42] = SSE41_OP(mpsadbw),
2934    [0x44] = PCLMULQDQ_OP(pclmulqdq),
2935    [0x60] = SSE42_OP(pcmpestrm),
2936    [0x61] = SSE42_OP(pcmpestri),
2937    [0x62] = SSE42_OP(pcmpistrm),
2938    [0x63] = SSE42_OP(pcmpistri),
2939    [0xdf] = AESNI_OP(aeskeygenassist),
2940};
2941
2942static void gen_sse(CPUX86State *env, DisasContext *s, int b,
2943                    target_ulong pc_start, int rex_r)
2944{
2945    int b1, op1_offset, op2_offset, is_xmm, val;
2946    int modrm, mod, rm, reg;
2947    SSEFunc_0_epp sse_fn_epp;
2948    SSEFunc_0_eppi sse_fn_eppi;
2949    SSEFunc_0_ppi sse_fn_ppi;
2950    SSEFunc_0_eppt sse_fn_eppt;
2951    TCGMemOp ot;
2952
2953    b &= 0xff;
2954    if (s->prefix & PREFIX_DATA)
2955        b1 = 1;
2956    else if (s->prefix & PREFIX_REPZ)
2957        b1 = 2;
2958    else if (s->prefix & PREFIX_REPNZ)
2959        b1 = 3;
2960    else
2961        b1 = 0;
2962    sse_fn_epp = sse_op_table1[b][b1];
2963    if (!sse_fn_epp) {
2964        goto unknown_op;
2965    }
2966    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
2967        is_xmm = 1;
2968    } else {
2969        if (b1 == 0) {
2970            /* MMX case */
2971            is_xmm = 0;
2972        } else {
2973            is_xmm = 1;
2974        }
2975    }
2976    /* simple MMX/SSE operation */
2977    if (s->flags & HF_TS_MASK) {
2978        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
2979        return;
2980    }
2981    if (s->flags & HF_EM_MASK) {
2982    illegal_op:
2983        gen_illegal_opcode(s);
2984        return;
2985    }
2986    if (is_xmm
2987        && !(s->flags & HF_OSFXSR_MASK)
2988        && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
2989        goto unknown_op;
2990    }
2991    if (b == 0x0e) {
2992        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
2993            /* If we were fully decoding this we might use illegal_op.  */
2994            goto unknown_op;
2995        }
2996        /* femms */
2997        gen_helper_emms(cpu_env);
2998        return;
2999    }
3000    if (b == 0x77) {
3001        /* emms */
3002        gen_helper_emms(cpu_env);
3003        return;
3004    }
3005    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3006       the static cpu state) */
3007    if (!is_xmm) {
3008        gen_helper_enter_mmx(cpu_env);
3009    }
3010
3011    modrm = cpu_ldub_code(env, s->pc++);
3012    reg = ((modrm >> 3) & 7);
3013    if (is_xmm)
3014        reg |= rex_r;
3015    mod = (modrm >> 6) & 3;
3016    if (sse_fn_epp == SSE_SPECIAL) {
3017        b |= (b1 << 8);
3018        switch(b) {
3019        case 0x0e7: /* movntq */
3020            if (mod == 3) {
3021                goto illegal_op;
3022            }
3023            gen_lea_modrm(env, s, modrm);
3024            gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3025            break;
3026        case 0x1e7: /* movntdq */
3027        case 0x02b: /* movntps */
3028        case 0x12b: /* movntps */
3029            if (mod == 3)
3030                goto illegal_op;
3031            gen_lea_modrm(env, s, modrm);
3032            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3033            break;
3034        case 0x3f0: /* lddqu */
3035            if (mod == 3)
3036                goto illegal_op;
3037            gen_lea_modrm(env, s, modrm);
3038            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3039            break;
3040        case 0x22b: /* movntss */
3041        case 0x32b: /* movntsd */
3042            if (mod == 3)
3043                goto illegal_op;
3044            gen_lea_modrm(env, s, modrm);
3045            if (b1 & 1) {
3046                gen_stq_env_A0(s, offsetof(CPUX86State,
3047                                           xmm_regs[reg].ZMM_Q(0)));
3048            } else {
3049                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
3050                    xmm_regs[reg].ZMM_L(0)));
3051                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3052            }
3053            break;
3054        case 0x6e: /* movd mm, ea */
3055#ifdef TARGET_X86_64
3056            if (s->dflag == MO_64) {
3057                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3058                tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
3059            } else
3060#endif
3061            {
3062                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3063                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3064                                 offsetof(CPUX86State,fpregs[reg].mmx));
3065                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3066                gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
3067            }
3068            break;
3069        case 0x16e: /* movd xmm, ea */
3070#ifdef TARGET_X86_64
3071            if (s->dflag == MO_64) {
3072                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3073                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3074                                 offsetof(CPUX86State,xmm_regs[reg]));
3075                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0);
3076            } else
3077#endif
3078            {
3079                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3080                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3081                                 offsetof(CPUX86State,xmm_regs[reg]));
3082                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3083                gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
3084            }
3085            break;
3086        case 0x6f: /* movq mm, ea */
3087            if (mod != 3) {
3088                gen_lea_modrm(env, s, modrm);
3089                gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3090            } else {
3091                rm = (modrm & 7);
3092                tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3093                               offsetof(CPUX86State,fpregs[rm].mmx));
3094                tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3095                               offsetof(CPUX86State,fpregs[reg].mmx));
3096            }
3097            break;
3098        case 0x010: /* movups */
3099        case 0x110: /* movupd */
3100        case 0x028: /* movaps */
3101        case 0x128: /* movapd */
3102        case 0x16f: /* movdqa xmm, ea */
3103        case 0x26f: /* movdqu xmm, ea */
3104            if (mod != 3) {
3105                gen_lea_modrm(env, s, modrm);
3106                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3107            } else {
3108                rm = (modrm & 7) | REX_B(s);
3109                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
3110                            offsetof(CPUX86State,xmm_regs[rm]));
3111            }
3112            break;
3113        case 0x210: /* movss xmm, ea */
3114            if (mod != 3) {
3115                gen_lea_modrm(env, s, modrm);
3116                gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3117                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3118                tcg_gen_movi_tl(cpu_T0, 0);
3119                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3120                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3121                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3122            } else {
3123                rm = (modrm & 7) | REX_B(s);
3124                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3125                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3126            }
3127            break;
3128        case 0x310: /* movsd xmm, ea */
3129            if (mod != 3) {
3130                gen_lea_modrm(env, s, modrm);
3131                gen_ldq_env_A0(s, offsetof(CPUX86State,
3132                                           xmm_regs[reg].ZMM_Q(0)));
3133                tcg_gen_movi_tl(cpu_T0, 0);
3134                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3135                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3136            } else {
3137                rm = (modrm & 7) | REX_B(s);
3138                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3139                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3140            }
3141            break;
3142        case 0x012: /* movlps */
3143        case 0x112: /* movlpd */
3144            if (mod != 3) {
3145                gen_lea_modrm(env, s, modrm);
3146                gen_ldq_env_A0(s, offsetof(CPUX86State,
3147                                           xmm_regs[reg].ZMM_Q(0)));
3148            } else {
3149                /* movhlps */
3150                rm = (modrm & 7) | REX_B(s);
3151                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3152                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3153            }
3154            break;
3155        case 0x212: /* movsldup */
3156            if (mod != 3) {
3157                gen_lea_modrm(env, s, modrm);
3158                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3159            } else {
3160                rm = (modrm & 7) | REX_B(s);
3161                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3162                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3163                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3164                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3165            }
3166            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3167                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3168            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3169                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3170            break;
3171        case 0x312: /* movddup */
3172            if (mod != 3) {
3173                gen_lea_modrm(env, s, modrm);
3174                gen_ldq_env_A0(s, offsetof(CPUX86State,
3175                                           xmm_regs[reg].ZMM_Q(0)));
3176            } else {
3177                rm = (modrm & 7) | REX_B(s);
3178                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3179                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3180            }
3181            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3182                        offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3183            break;
3184        case 0x016: /* movhps */
3185        case 0x116: /* movhpd */
3186            if (mod != 3) {
3187                gen_lea_modrm(env, s, modrm);
3188                gen_ldq_env_A0(s, offsetof(CPUX86State,
3189                                           xmm_regs[reg].ZMM_Q(1)));
3190            } else {
3191                /* movlhps */
3192                rm = (modrm & 7) | REX_B(s);
3193                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3194                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3195            }
3196            break;
3197        case 0x216: /* movshdup */
3198            if (mod != 3) {
3199                gen_lea_modrm(env, s, modrm);
3200                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3201            } else {
3202                rm = (modrm & 7) | REX_B(s);
3203                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3204                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3205                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3206                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3207            }
3208            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3209                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3210            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3211                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3212            break;
3213        case 0x178:
3214        case 0x378:
3215            {
3216                int bit_index, field_length;
3217
3218                if (b1 == 1 && reg != 0)
3219                    goto illegal_op;
3220                field_length = cpu_ldub_code(env, s->pc++) & 0x3F;
3221                bit_index = cpu_ldub_code(env, s->pc++) & 0x3F;
3222                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3223                    offsetof(CPUX86State,xmm_regs[reg]));
3224                if (b1 == 1)
3225                    gen_helper_extrq_i(cpu_env, cpu_ptr0,
3226                                       tcg_const_i32(bit_index),
3227                                       tcg_const_i32(field_length));
3228                else
3229                    gen_helper_insertq_i(cpu_env, cpu_ptr0,
3230                                         tcg_const_i32(bit_index),
3231                                         tcg_const_i32(field_length));
3232            }
3233            break;
3234        case 0x7e: /* movd ea, mm */
3235#ifdef TARGET_X86_64
3236            if (s->dflag == MO_64) {
3237                tcg_gen_ld_i64(cpu_T0, cpu_env,
3238                               offsetof(CPUX86State,fpregs[reg].mmx));
3239                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3240            } else
3241#endif
3242            {
3243                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3244                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3245                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3246            }
3247            break;
3248        case 0x17e: /* movd ea, xmm */
3249#ifdef TARGET_X86_64
3250            if (s->dflag == MO_64) {
3251                tcg_gen_ld_i64(cpu_T0, cpu_env,
3252                               offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3253                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3254            } else
3255#endif
3256            {
3257                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3258                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3259                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3260            }
3261            break;
3262        case 0x27e: /* movq xmm, ea */
3263            if (mod != 3) {
3264                gen_lea_modrm(env, s, modrm);
3265                gen_ldq_env_A0(s, offsetof(CPUX86State,
3266                                           xmm_regs[reg].ZMM_Q(0)));
3267            } else {
3268                rm = (modrm & 7) | REX_B(s);
3269                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3270                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3271            }
3272            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3273            break;
3274        case 0x7f: /* movq ea, mm */
3275            if (mod != 3) {
3276                gen_lea_modrm(env, s, modrm);
3277                gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3278            } else {
3279                rm = (modrm & 7);
3280                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
3281                            offsetof(CPUX86State,fpregs[reg].mmx));
3282            }
3283            break;
3284        case 0x011: /* movups */
3285        case 0x111: /* movupd */
3286        case 0x029: /* movaps */
3287        case 0x129: /* movapd */
3288        case 0x17f: /* movdqa ea, xmm */
3289        case 0x27f: /* movdqu ea, xmm */
3290            if (mod != 3) {
3291                gen_lea_modrm(env, s, modrm);
3292                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3293            } else {
3294                rm = (modrm & 7) | REX_B(s);
3295                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
3296                            offsetof(CPUX86State,xmm_regs[reg]));
3297            }
3298            break;
3299        case 0x211: /* movss ea, xmm */
3300            if (mod != 3) {
3301                gen_lea_modrm(env, s, modrm);
3302                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3303                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3304            } else {
3305                rm = (modrm & 7) | REX_B(s);
3306                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
3307                            offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3308            }
3309            break;
3310        case 0x311: /* movsd ea, xmm */
3311            if (mod != 3) {
3312                gen_lea_modrm(env, s, modrm);
3313                gen_stq_env_A0(s, offsetof(CPUX86State,
3314                                           xmm_regs[reg].ZMM_Q(0)));
3315            } else {
3316                rm = (modrm & 7) | REX_B(s);
3317                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
3318                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3319            }
3320            break;
3321        case 0x013: /* movlps */
3322        case 0x113: /* movlpd */
3323            if (mod != 3) {
3324                gen_lea_modrm(env, s, modrm);
3325                gen_stq_env_A0(s, offsetof(CPUX86State,
3326                                           xmm_regs[reg].ZMM_Q(0)));
3327            } else {
3328                goto illegal_op;
3329            }
3330            break;
3331        case 0x017: /* movhps */
3332        case 0x117: /* movhpd */
3333            if (mod != 3) {
3334                gen_lea_modrm(env, s, modrm);
3335                gen_stq_env_A0(s, offsetof(CPUX86State,
3336                                           xmm_regs[reg].ZMM_Q(1)));
3337            } else {
3338                goto illegal_op;
3339            }
3340            break;
3341        case 0x71: /* shift mm, im */
3342        case 0x72:
3343        case 0x73:
3344        case 0x171: /* shift xmm, im */
3345        case 0x172:
3346        case 0x173:
3347            if (b1 >= 2) {
3348                goto unknown_op;
3349            }
3350            val = cpu_ldub_code(env, s->pc++);
3351            if (is_xmm) {
3352                tcg_gen_movi_tl(cpu_T0, val);
3353                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
3354                tcg_gen_movi_tl(cpu_T0, 0);
3355                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(1)));
3356                op1_offset = offsetof(CPUX86State,xmm_t0);
3357            } else {
3358                tcg_gen_movi_tl(cpu_T0, val);
3359                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
3360                tcg_gen_movi_tl(cpu_T0, 0);
3361                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
3362                op1_offset = offsetof(CPUX86State,mmx_t0);
3363            }
3364            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3365                                       (((modrm >> 3)) & 7)][b1];
3366            if (!sse_fn_epp) {
3367                goto unknown_op;
3368            }
3369            if (is_xmm) {
3370                rm = (modrm & 7) | REX_B(s);
3371                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3372            } else {
3373                rm = (modrm & 7);
3374                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3375            }
3376            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3377            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
3378            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3379            break;
3380        case 0x050: /* movmskps */
3381            rm = (modrm & 7) | REX_B(s);
3382            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3383                             offsetof(CPUX86State,xmm_regs[rm]));
3384            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3385            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3386            break;
3387        case 0x150: /* movmskpd */
3388            rm = (modrm & 7) | REX_B(s);
3389            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3390                             offsetof(CPUX86State,xmm_regs[rm]));
3391            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3392            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3393            break;
3394        case 0x02a: /* cvtpi2ps */
3395        case 0x12a: /* cvtpi2pd */
3396            gen_helper_enter_mmx(cpu_env);
3397            if (mod != 3) {
3398                gen_lea_modrm(env, s, modrm);
3399                op2_offset = offsetof(CPUX86State,mmx_t0);
3400                gen_ldq_env_A0(s, op2_offset);
3401            } else {
3402                rm = (modrm & 7);
3403                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3404            }
3405            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3406            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3407            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3408            switch(b >> 8) {
3409            case 0x0:
3410                gen_helper_cvtpi2ps(cpu_env, cpu_ptr0, cpu_ptr1);
3411                break;
3412            default:
3413            case 0x1:
3414                gen_helper_cvtpi2pd(cpu_env, cpu_ptr0, cpu_ptr1);
3415                break;
3416            }
3417            break;
3418        case 0x22a: /* cvtsi2ss */
3419        case 0x32a: /* cvtsi2sd */
3420            ot = mo_64_32(s->dflag);
3421            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3422            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3423            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3424            if (ot == MO_32) {
3425                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3426                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3427                sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
3428            } else {
3429#ifdef TARGET_X86_64
3430                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3431                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T0);
3432#else
3433                goto illegal_op;
3434#endif
3435            }
3436            break;
3437        case 0x02c: /* cvttps2pi */
3438        case 0x12c: /* cvttpd2pi */
3439        case 0x02d: /* cvtps2pi */
3440        case 0x12d: /* cvtpd2pi */
3441            gen_helper_enter_mmx(cpu_env);
3442            if (mod != 3) {
3443                gen_lea_modrm(env, s, modrm);
3444                op2_offset = offsetof(CPUX86State,xmm_t0);
3445                gen_ldo_env_A0(s, op2_offset);
3446            } else {
3447                rm = (modrm & 7) | REX_B(s);
3448                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3449            }
3450            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3451            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3452            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3453            switch(b) {
3454            case 0x02c:
3455                gen_helper_cvttps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3456                break;
3457            case 0x12c:
3458                gen_helper_cvttpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3459                break;
3460            case 0x02d:
3461                gen_helper_cvtps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3462                break;
3463            case 0x12d:
3464                gen_helper_cvtpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3465                break;
3466            }
3467            break;
3468        case 0x22c: /* cvttss2si */
3469        case 0x32c: /* cvttsd2si */
3470        case 0x22d: /* cvtss2si */
3471        case 0x32d: /* cvtsd2si */
3472            ot = mo_64_32(s->dflag);
3473            if (mod != 3) {
3474                gen_lea_modrm(env, s, modrm);
3475                if ((b >> 8) & 1) {
3476                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3477                } else {
3478                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3479                    tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
3480                }
3481                op2_offset = offsetof(CPUX86State,xmm_t0);
3482            } else {
3483                rm = (modrm & 7) | REX_B(s);
3484                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3485            }
3486            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3487            if (ot == MO_32) {
3488                SSEFunc_i_ep sse_fn_i_ep =
3489                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3490                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3491                tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
3492            } else {
3493#ifdef TARGET_X86_64
3494                SSEFunc_l_ep sse_fn_l_ep =
3495                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3496                sse_fn_l_ep(cpu_T0, cpu_env, cpu_ptr0);
3497#else
3498                goto illegal_op;
3499#endif
3500            }
3501            gen_op_mov_reg_v(ot, reg, cpu_T0);
3502            break;
3503        case 0xc4: /* pinsrw */
3504        case 0x1c4:
3505            s->rip_offset = 1;
3506            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3507            val = cpu_ldub_code(env, s->pc++);
3508            if (b1) {
3509                val &= 7;
3510                tcg_gen_st16_tl(cpu_T0, cpu_env,
3511                                offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3512            } else {
3513                val &= 3;
3514                tcg_gen_st16_tl(cpu_T0, cpu_env,
3515                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3516            }
3517            break;
3518        case 0xc5: /* pextrw */
3519        case 0x1c5:
3520            if (mod != 3)
3521                goto illegal_op;
3522            ot = mo_64_32(s->dflag);
3523            val = cpu_ldub_code(env, s->pc++);
3524            if (b1) {
3525                val &= 7;
3526                rm = (modrm & 7) | REX_B(s);
3527                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
3528                                 offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3529            } else {
3530                val &= 3;
3531                rm = (modrm & 7);
3532                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
3533                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3534            }
3535            reg = ((modrm >> 3) & 7) | rex_r;
3536            gen_op_mov_reg_v(ot, reg, cpu_T0);
3537            break;
3538        case 0x1d6: /* movq ea, xmm */
3539            if (mod != 3) {
3540                gen_lea_modrm(env, s, modrm);
3541                gen_stq_env_A0(s, offsetof(CPUX86State,
3542                                           xmm_regs[reg].ZMM_Q(0)));
3543            } else {
3544                rm = (modrm & 7) | REX_B(s);
3545                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
3546                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3547                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3548            }
3549            break;
3550        case 0x2d6: /* movq2dq */
3551            gen_helper_enter_mmx(cpu_env);
3552            rm = (modrm & 7);
3553            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3554                        offsetof(CPUX86State,fpregs[rm].mmx));
3555            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3556            break;
3557        case 0x3d6: /* movdq2q */
3558            gen_helper_enter_mmx(cpu_env);
3559            rm = (modrm & 7) | REX_B(s);
3560            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3561                        offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3562            break;
3563        case 0xd7: /* pmovmskb */
3564        case 0x1d7:
3565            if (mod != 3)
3566                goto illegal_op;
3567            if (b1) {
3568                rm = (modrm & 7) | REX_B(s);
3569                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
3570                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3571            } else {
3572                rm = (modrm & 7);
3573                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
3574                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3575            }
3576            reg = ((modrm >> 3) & 7) | rex_r;
3577            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3578            break;
3579
3580        case 0x138:
3581        case 0x038:
3582            b = modrm;
3583            if ((b & 0xf0) == 0xf0) {
3584                goto do_0f_38_fx;
3585            }
3586            modrm = cpu_ldub_code(env, s->pc++);
3587            rm = modrm & 7;
3588            reg = ((modrm >> 3) & 7) | rex_r;
3589            mod = (modrm >> 6) & 3;
3590            if (b1 >= 2) {
3591                goto unknown_op;
3592            }
3593
3594            sse_fn_epp = sse_op_table6[b].op[b1];
3595            if (!sse_fn_epp) {
3596                goto unknown_op;
3597            }
3598            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3599                goto illegal_op;
3600
3601            if (b1) {
3602                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3603                if (mod == 3) {
3604                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3605                } else {
3606                    op2_offset = offsetof(CPUX86State,xmm_t0);
3607                    gen_lea_modrm(env, s, modrm);
3608                    switch (b) {
3609                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3610                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3611                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3612                        gen_ldq_env_A0(s, op2_offset +
3613                                        offsetof(ZMMReg, ZMM_Q(0)));
3614                        break;
3615                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3616                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3617                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
3618                                            s->mem_index, MO_LEUL);
3619                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
3620                                        offsetof(ZMMReg, ZMM_L(0)));
3621                        break;
3622                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3623                        tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
3624                                           s->mem_index, MO_LEUW);
3625                        tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
3626                                        offsetof(ZMMReg, ZMM_W(0)));
3627                        break;
3628                    case 0x2a:            /* movntqda */
3629                        gen_ldo_env_A0(s, op1_offset);
3630                        return;
3631                    default:
3632                        gen_ldo_env_A0(s, op2_offset);
3633                    }
3634                }
3635            } else {
3636                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3637                if (mod == 3) {
3638                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3639                } else {
3640                    op2_offset = offsetof(CPUX86State,mmx_t0);
3641                    gen_lea_modrm(env, s, modrm);
3642                    gen_ldq_env_A0(s, op2_offset);
3643                }
3644            }
3645            if (sse_fn_epp == SSE_SPECIAL) {
3646                goto unknown_op;
3647            }
3648
3649            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3650            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3651            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3652
3653            if (b == 0x17) {
3654                set_cc_op(s, CC_OP_EFLAGS);
3655            }
3656            break;
3657
3658        case 0x238:
3659        case 0x338:
3660        do_0f_38_fx:
3661            /* Various integer extensions at 0f 38 f[0-f].  */
3662            b = modrm | (b1 << 8);
3663            modrm = cpu_ldub_code(env, s->pc++);
3664            reg = ((modrm >> 3) & 7) | rex_r;
3665
3666            switch (b) {
3667            case 0x3f0: /* crc32 Gd,Eb */
3668            case 0x3f1: /* crc32 Gd,Ey */
3669            do_crc32:
3670                if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3671                    goto illegal_op;
3672                }
3673                if ((b & 0xff) == 0xf0) {
3674                    ot = MO_8;
3675                } else if (s->dflag != MO_64) {
3676                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3677                } else {
3678                    ot = MO_64;
3679                }
3680
3681                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
3682                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3683                gen_helper_crc32(cpu_T0, cpu_tmp2_i32,
3684                                 cpu_T0, tcg_const_i32(8 << ot));
3685
3686                ot = mo_64_32(s->dflag);
3687                gen_op_mov_reg_v(ot, reg, cpu_T0);
3688                break;
3689
3690            case 0x1f0: /* crc32 or movbe */
3691            case 0x1f1:
3692                /* For these insns, the f3 prefix is supposed to have priority
3693                   over the 66 prefix, but that's not what we implement above
3694                   setting b1.  */
3695                if (s->prefix & PREFIX_REPNZ) {
3696                    goto do_crc32;
3697                }
3698                /* FALLTHRU */
3699            case 0x0f0: /* movbe Gy,My */
3700            case 0x0f1: /* movbe My,Gy */
3701                if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3702                    goto illegal_op;
3703                }
3704                if (s->dflag != MO_64) {
3705                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3706                } else {
3707                    ot = MO_64;
3708                }
3709
3710                gen_lea_modrm(env, s, modrm);
3711                if ((b & 1) == 0) {
3712                    tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
3713                                       s->mem_index, ot | MO_BE);
3714                    gen_op_mov_reg_v(ot, reg, cpu_T0);
3715                } else {
3716                    tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
3717                                       s->mem_index, ot | MO_BE);
3718                }
3719                break;
3720
3721            case 0x0f2: /* andn Gy, By, Ey */
3722                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3723                    || !(s->prefix & PREFIX_VEX)
3724                    || s->vex_l != 0) {
3725                    goto illegal_op;
3726                }
3727                ot = mo_64_32(s->dflag);
3728                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3729                tcg_gen_andc_tl(cpu_T0, cpu_regs[s->vex_v], cpu_T0);
3730                gen_op_mov_reg_v(ot, reg, cpu_T0);
3731                gen_op_update1_cc();
3732                set_cc_op(s, CC_OP_LOGICB + ot);
3733                break;
3734
3735            case 0x0f7: /* bextr Gy, Ey, By */
3736                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3737                    || !(s->prefix & PREFIX_VEX)
3738                    || s->vex_l != 0) {
3739                    goto illegal_op;
3740                }
3741                ot = mo_64_32(s->dflag);
3742                {
3743                    TCGv bound, zero;
3744
3745                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3746                    /* Extract START, and shift the operand.
3747                       Shifts larger than operand size get zeros.  */
3748                    tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
3749                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_A0);
3750
3751                    bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3752                    zero = tcg_const_tl(0);
3753                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, cpu_A0, bound,
3754                                       cpu_T0, zero);
3755                    tcg_temp_free(zero);
3756
3757                    /* Extract the LEN into a mask.  Lengths larger than
3758                       operand size get all ones.  */
3759                    tcg_gen_shri_tl(cpu_A0, cpu_regs[s->vex_v], 8);
3760                    tcg_gen_ext8u_tl(cpu_A0, cpu_A0);
3761                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
3762                                       cpu_A0, bound);
3763                    tcg_temp_free(bound);
3764                    tcg_gen_movi_tl(cpu_T1, 1);
3765                    tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_A0);
3766                    tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
3767                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
3768
3769                    gen_op_mov_reg_v(ot, reg, cpu_T0);
3770                    gen_op_update1_cc();
3771                    set_cc_op(s, CC_OP_LOGICB + ot);
3772                }
3773                break;
3774
3775            case 0x0f5: /* bzhi Gy, Ey, By */
3776                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3777                    || !(s->prefix & PREFIX_VEX)
3778                    || s->vex_l != 0) {
3779                    goto illegal_op;
3780                }
3781                ot = mo_64_32(s->dflag);
3782                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3783                tcg_gen_ext8u_tl(cpu_T1, cpu_regs[s->vex_v]);
3784                {
3785                    TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3786                    /* Note that since we're using BMILG (in order to get O
3787                       cleared) we need to store the inverse into C.  */
3788                    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3789                                       cpu_T1, bound);
3790                    tcg_gen_movcond_tl(TCG_COND_GT, cpu_T1, cpu_T1,
3791                                       bound, bound, cpu_T1);
3792                    tcg_temp_free(bound);
3793                }
3794                tcg_gen_movi_tl(cpu_A0, -1);
3795                tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T1);
3796                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_A0);
3797                gen_op_mov_reg_v(ot, reg, cpu_T0);
3798                gen_op_update1_cc();
3799                set_cc_op(s, CC_OP_BMILGB + ot);
3800                break;
3801
3802            case 0x3f6: /* mulx By, Gy, rdx, Ey */
3803                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3804                    || !(s->prefix & PREFIX_VEX)
3805                    || s->vex_l != 0) {
3806                    goto illegal_op;
3807                }
3808                ot = mo_64_32(s->dflag);
3809                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3810                switch (ot) {
3811                default:
3812                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3813                    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
3814                    tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
3815                                      cpu_tmp2_i32, cpu_tmp3_i32);
3816                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
3817                    tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
3818                    break;
3819#ifdef TARGET_X86_64
3820                case MO_64:
3821                    tcg_gen_mulu2_i64(cpu_T0, cpu_T1,
3822                                      cpu_T0, cpu_regs[R_EDX]);
3823                    tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T0);
3824                    tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
3825                    break;
3826#endif
3827                }
3828                break;
3829
3830            case 0x3f5: /* pdep Gy, By, Ey */
3831                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3832                    || !(s->prefix & PREFIX_VEX)
3833                    || s->vex_l != 0) {
3834                    goto illegal_op;
3835                }
3836                ot = mo_64_32(s->dflag);
3837                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3838                /* Note that by zero-extending the mask operand, we
3839                   automatically handle zero-extending the result.  */
3840                if (ot == MO_64) {
3841                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
3842                } else {
3843                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
3844                }
3845                gen_helper_pdep(cpu_regs[reg], cpu_T0, cpu_T1);
3846                break;
3847
3848            case 0x2f5: /* pext Gy, By, Ey */
3849                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3850                    || !(s->prefix & PREFIX_VEX)
3851                    || s->vex_l != 0) {
3852                    goto illegal_op;
3853                }
3854                ot = mo_64_32(s->dflag);
3855                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3856                /* Note that by zero-extending the mask operand, we
3857                   automatically handle zero-extending the result.  */
3858                if (ot == MO_64) {
3859                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
3860                } else {
3861                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
3862                }
3863                gen_helper_pext(cpu_regs[reg], cpu_T0, cpu_T1);
3864                break;
3865
3866            case 0x1f6: /* adcx Gy, Ey */
3867            case 0x2f6: /* adox Gy, Ey */
3868                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3869                    goto illegal_op;
3870                } else {
3871                    TCGv carry_in, carry_out, zero;
3872                    int end_op;
3873
3874                    ot = mo_64_32(s->dflag);
3875                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3876
3877                    /* Re-use the carry-out from a previous round.  */
3878                    TCGV_UNUSED(carry_in);
3879                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3880                    switch (s->cc_op) {
3881                    case CC_OP_ADCX:
3882                        if (b == 0x1f6) {
3883                            carry_in = cpu_cc_dst;
3884                            end_op = CC_OP_ADCX;
3885                        } else {
3886                            end_op = CC_OP_ADCOX;
3887                        }
3888                        break;
3889                    case CC_OP_ADOX:
3890                        if (b == 0x1f6) {
3891                            end_op = CC_OP_ADCOX;
3892                        } else {
3893                            carry_in = cpu_cc_src2;
3894                            end_op = CC_OP_ADOX;
3895                        }
3896                        break;
3897                    case CC_OP_ADCOX:
3898                        end_op = CC_OP_ADCOX;
3899                        carry_in = carry_out;
3900                        break;
3901                    default:
3902                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
3903                        break;
3904                    }
3905                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
3906                    if (TCGV_IS_UNUSED(carry_in)) {
3907                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
3908                            gen_compute_eflags(s);
3909                        }
3910                        carry_in = cpu_tmp0;
3911                        tcg_gen_shri_tl(carry_in, cpu_cc_src,
3912                                        ctz32(b == 0x1f6 ? CC_C : CC_O));
3913                        tcg_gen_andi_tl(carry_in, carry_in, 1);
3914                    }
3915
3916                    switch (ot) {
3917#ifdef TARGET_X86_64
3918                    case MO_32:
3919                        /* If we know TL is 64-bit, and we want a 32-bit
3920                           result, just do everything in 64-bit arithmetic.  */
3921                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
3922                        tcg_gen_ext32u_i64(cpu_T0, cpu_T0);
3923                        tcg_gen_add_i64(cpu_T0, cpu_T0, cpu_regs[reg]);
3924                        tcg_gen_add_i64(cpu_T0, cpu_T0, carry_in);
3925                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T0);
3926                        tcg_gen_shri_i64(carry_out, cpu_T0, 32);
3927                        break;
3928#endif
3929                    default:
3930                        /* Otherwise compute the carry-out in two steps.  */
3931                        zero = tcg_const_tl(0);
3932                        tcg_gen_add2_tl(cpu_T0, carry_out,
3933                                        cpu_T0, zero,
3934                                        carry_in, zero);
3935                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
3936                                        cpu_regs[reg], carry_out,
3937                                        cpu_T0, zero);
3938                        tcg_temp_free(zero);
3939                        break;
3940                    }
3941                    set_cc_op(s, end_op);
3942                }
3943                break;
3944
3945            case 0x1f7: /* shlx Gy, Ey, By */
3946            case 0x2f7: /* sarx Gy, Ey, By */
3947            case 0x3f7: /* shrx Gy, Ey, By */
3948                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3949                    || !(s->prefix & PREFIX_VEX)
3950                    || s->vex_l != 0) {
3951                    goto illegal_op;
3952                }
3953                ot = mo_64_32(s->dflag);
3954                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3955                if (ot == MO_64) {
3956                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 63);
3957                } else {
3958                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
3959                }
3960                if (b == 0x1f7) {
3961                    tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
3962                } else if (b == 0x2f7) {
3963                    if (ot != MO_64) {
3964                        tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
3965                    }
3966                    tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
3967                } else {
3968                    if (ot != MO_64) {
3969                        tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
3970                    }
3971                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
3972                }
3973                gen_op_mov_reg_v(ot, reg, cpu_T0);
3974                break;
3975
3976            case 0x0f3:
3977            case 0x1f3:
3978            case 0x2f3:
3979            case 0x3f3: /* Group 17 */
3980                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3981                    || !(s->prefix & PREFIX_VEX)
3982                    || s->vex_l != 0) {
3983                    goto illegal_op;
3984                }
3985                ot = mo_64_32(s->dflag);
3986                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3987
3988                switch (reg & 7) {
3989                case 1: /* blsr By,Ey */
3990                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
3991                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
3992                    gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
3993                    gen_op_update2_cc();
3994                    set_cc_op(s, CC_OP_BMILGB + ot);
3995                    break;
3996
3997                case 2: /* blsmsk By,Ey */
3998                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
3999                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
4000                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
4001                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4002                    set_cc_op(s, CC_OP_BMILGB + ot);
4003                    break;
4004
4005                case 3: /* blsi By, Ey */
4006                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4007                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
4008                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
4009                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4010                    set_cc_op(s, CC_OP_BMILGB + ot);
4011                    break;
4012
4013                default:
4014                    goto unknown_op;
4015                }
4016                break;
4017
4018            default:
4019                goto unknown_op;
4020            }
4021            break;
4022
4023        case 0x03a:
4024        case 0x13a:
4025            b = modrm;
4026            modrm = cpu_ldub_code(env, s->pc++);
4027            rm = modrm & 7;
4028            reg = ((modrm >> 3) & 7) | rex_r;
4029            mod = (modrm >> 6) & 3;
4030            if (b1 >= 2) {
4031                goto unknown_op;
4032            }
4033
4034            sse_fn_eppi = sse_op_table7[b].op[b1];
4035            if (!sse_fn_eppi) {
4036                goto unknown_op;
4037            }
4038            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4039                goto illegal_op;
4040
4041            if (sse_fn_eppi == SSE_SPECIAL) {
4042                ot = mo_64_32(s->dflag);
4043                rm = (modrm & 7) | REX_B(s);
4044                if (mod != 3)
4045                    gen_lea_modrm(env, s, modrm);
4046                reg = ((modrm >> 3) & 7) | rex_r;
4047                val = cpu_ldub_code(env, s->pc++);
4048                switch (b) {
4049                case 0x14: /* pextrb */
4050                    tcg_gen_ld8u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4051                                            xmm_regs[reg].ZMM_B(val & 15)));
4052                    if (mod == 3) {
4053                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4054                    } else {
4055                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4056                                           s->mem_index, MO_UB);
4057                    }
4058                    break;
4059                case 0x15: /* pextrw */
4060                    tcg_gen_ld16u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4061                                            xmm_regs[reg].ZMM_W(val & 7)));
4062                    if (mod == 3) {
4063                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4064                    } else {
4065                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4066                                           s->mem_index, MO_LEUW);
4067                    }
4068                    break;
4069                case 0x16:
4070                    if (ot == MO_32) { /* pextrd */
4071                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4072                                        offsetof(CPUX86State,
4073                                                xmm_regs[reg].ZMM_L(val & 3)));
4074                        if (mod == 3) {
4075                            tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
4076                        } else {
4077                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
4078                                                s->mem_index, MO_LEUL);
4079                        }
4080                    } else { /* pextrq */
4081#ifdef TARGET_X86_64
4082                        tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
4083                                        offsetof(CPUX86State,
4084                                                xmm_regs[reg].ZMM_Q(val & 1)));
4085                        if (mod == 3) {
4086                            tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
4087                        } else {
4088                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
4089                                                s->mem_index, MO_LEQ);
4090                        }
4091#else
4092                        goto illegal_op;
4093#endif
4094                    }
4095                    break;
4096                case 0x17: /* extractps */
4097                    tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4098                                            xmm_regs[reg].ZMM_L(val & 3)));
4099                    if (mod == 3) {
4100                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4101                    } else {
4102                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4103                                           s->mem_index, MO_LEUL);
4104                    }
4105                    break;
4106                case 0x20: /* pinsrb */
4107                    if (mod == 3) {
4108                        gen_op_mov_v_reg(MO_32, cpu_T0, rm);
4109                    } else {
4110                        tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
4111                                           s->mem_index, MO_UB);
4112                    }
4113                    tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4114                                            xmm_regs[reg].ZMM_B(val & 15)));
4115                    break;
4116                case 0x21: /* insertps */
4117                    if (mod == 3) {
4118                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4119                                        offsetof(CPUX86State,xmm_regs[rm]
4120                                                .ZMM_L((val >> 6) & 3)));
4121                    } else {
4122                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4123                                            s->mem_index, MO_LEUL);
4124                    }
4125                    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4126                                    offsetof(CPUX86State,xmm_regs[reg]
4127                                            .ZMM_L((val >> 4) & 3)));
4128                    if ((val >> 0) & 1)
4129                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4130                                        cpu_env, offsetof(CPUX86State,
4131                                                xmm_regs[reg].ZMM_L(0)));
4132                    if ((val >> 1) & 1)
4133                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4134                                        cpu_env, offsetof(CPUX86State,
4135                                                xmm_regs[reg].ZMM_L(1)));
4136                    if ((val >> 2) & 1)
4137                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4138                                        cpu_env, offsetof(CPUX86State,
4139                                                xmm_regs[reg].ZMM_L(2)));
4140                    if ((val >> 3) & 1)
4141                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4142                                        cpu_env, offsetof(CPUX86State,
4143                                                xmm_regs[reg].ZMM_L(3)));
4144                    break;
4145                case 0x22:
4146                    if (ot == MO_32) { /* pinsrd */
4147                        if (mod == 3) {
4148                            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
4149                        } else {
4150                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4151                                                s->mem_index, MO_LEUL);
4152                        }
4153                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4154                                        offsetof(CPUX86State,
4155                                                xmm_regs[reg].ZMM_L(val & 3)));
4156                    } else { /* pinsrq */
4157#ifdef TARGET_X86_64
4158                        if (mod == 3) {
4159                            gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
4160                        } else {
4161                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
4162                                                s->mem_index, MO_LEQ);
4163                        }
4164                        tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
4165                                        offsetof(CPUX86State,
4166                                                xmm_regs[reg].ZMM_Q(val & 1)));
4167#else
4168                        goto illegal_op;
4169#endif
4170                    }
4171                    break;
4172                }
4173                return;
4174            }
4175
4176            if (b1) {
4177                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4178                if (mod == 3) {
4179                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4180                } else {
4181                    op2_offset = offsetof(CPUX86State,xmm_t0);
4182                    gen_lea_modrm(env, s, modrm);
4183                    gen_ldo_env_A0(s, op2_offset);
4184                }
4185            } else {
4186                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4187                if (mod == 3) {
4188                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4189                } else {
4190                    op2_offset = offsetof(CPUX86State,mmx_t0);
4191                    gen_lea_modrm(env, s, modrm);
4192                    gen_ldq_env_A0(s, op2_offset);
4193                }
4194            }
4195            val = cpu_ldub_code(env, s->pc++);
4196
4197            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4198                set_cc_op(s, CC_OP_EFLAGS);
4199
4200                if (s->dflag == MO_64) {
4201                    /* The helper must use entire 64-bit gp registers */
4202                    val |= 1 << 8;
4203                }
4204            }
4205
4206            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4207            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4208            sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4209            break;
4210
4211        case 0x33a:
4212            /* Various integer extensions at 0f 3a f[0-f].  */
4213            b = modrm | (b1 << 8);
4214            modrm = cpu_ldub_code(env, s->pc++);
4215            reg = ((modrm >> 3) & 7) | rex_r;
4216
4217            switch (b) {
4218            case 0x3f0: /* rorx Gy,Ey, Ib */
4219                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4220                    || !(s->prefix & PREFIX_VEX)
4221                    || s->vex_l != 0) {
4222                    goto illegal_op;
4223                }
4224                ot = mo_64_32(s->dflag);
4225                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4226                b = cpu_ldub_code(env, s->pc++);
4227                if (ot == MO_64) {
4228                    tcg_gen_rotri_tl(cpu_T0, cpu_T0, b & 63);
4229                } else {
4230                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4231                    tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
4232                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
4233                }
4234                gen_op_mov_reg_v(ot, reg, cpu_T0);
4235                break;
4236
4237            default:
4238                goto unknown_op;
4239            }
4240            break;
4241
4242        default:
4243        unknown_op:
4244            gen_unknown_opcode(env, s);
4245            return;
4246        }
4247    } else {
4248        /* generic MMX or SSE operation */
4249        switch(b) {
4250        case 0x70: /* pshufx insn */
4251        case 0xc6: /* pshufx insn */
4252        case 0xc2: /* compare insns */
4253            s->rip_offset = 1;
4254            break;
4255        default:
4256            break;
4257        }
4258        if (is_xmm) {
4259            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4260            if (mod != 3) {
4261                int sz = 4;
4262
4263                gen_lea_modrm(env, s, modrm);
4264                op2_offset = offsetof(CPUX86State,xmm_t0);
4265
4266                switch (b) {
4267                case 0x50 ... 0x5a:
4268                case 0x5c ... 0x5f:
4269                case 0xc2:
4270                    /* Most sse scalar operations.  */
4271                    if (b1 == 2) {
4272                        sz = 2;
4273                    } else if (b1 == 3) {
4274                        sz = 3;
4275                    }
4276                    break;
4277
4278                case 0x2e:  /* ucomis[sd] */
4279                case 0x2f:  /* comis[sd] */
4280                    if (b1 == 0) {
4281                        sz = 2;
4282                    } else {
4283                        sz = 3;
4284                    }
4285                    break;
4286                }
4287
4288                switch (sz) {
4289                case 2:
4290                    /* 32 bit access */
4291                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
4292                    tcg_gen_st32_tl(cpu_T0, cpu_env,
4293                                    offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4294                    break;
4295                case 3:
4296                    /* 64 bit access */
4297                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4298                    break;
4299                default:
4300                    /* 128 bit access */
4301                    gen_ldo_env_A0(s, op2_offset);
4302                    break;
4303                }
4304            } else {
4305                rm = (modrm & 7) | REX_B(s);
4306                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4307            }
4308        } else {
4309            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4310            if (mod != 3) {
4311                gen_lea_modrm(env, s, modrm);
4312                op2_offset = offsetof(CPUX86State,mmx_t0);
4313                gen_ldq_env_A0(s, op2_offset);
4314            } else {
4315                rm = (modrm & 7);
4316                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4317            }
4318        }
4319        switch(b) {
4320        case 0x0f: /* 3DNow! data insns */
4321            val = cpu_ldub_code(env, s->pc++);
4322            sse_fn_epp = sse_op_table5[val];
4323            if (!sse_fn_epp) {
4324                goto unknown_op;
4325            }
4326            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4327                goto illegal_op;
4328            }
4329            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4330            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4331            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4332            break;
4333        case 0x70: /* pshufx insn */
4334        case 0xc6: /* pshufx insn */
4335            val = cpu_ldub_code(env, s->pc++);
4336            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4337            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4338            /* XXX: introduce a new table? */
4339            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4340            sse_fn_ppi(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4341            break;
4342        case 0xc2:
4343            /* compare insns */
4344            val = cpu_ldub_code(env, s->pc++);
4345            if (val >= 8)
4346                goto unknown_op;
4347            sse_fn_epp = sse_op_table4[val][b1];
4348
4349            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4350            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4351            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4352            break;
4353        case 0xf7:
4354            /* maskmov : we must prepare A0 */
4355            if (mod != 3)
4356                goto illegal_op;
4357            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]);
4358            gen_extu(s->aflag, cpu_A0);
4359            gen_add_A0_ds_seg(s);
4360
4361            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4362            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4363            /* XXX: introduce a new table? */
4364            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4365            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
4366            break;
4367        default:
4368            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4369            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4370            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4371            break;
4372        }
4373        if (b == 0x2e || b == 0x2f) {
4374            set_cc_op(s, CC_OP_EFLAGS);
4375        }
4376    }
4377}
4378
4379/* convert one instruction. s->is_jmp is set if the translation must
4380   be stopped. Return the next pc value */
4381static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
4382                               target_ulong pc_start)
4383{
4384    int b, prefixes;
4385    int shift;
4386    TCGMemOp ot, aflag, dflag;
4387    int modrm, reg, rm, mod, op, opreg, val;
4388    target_ulong next_eip, tval;
4389    int rex_w, rex_r;
4390
4391    s->pc_start = s->pc = pc_start;
4392    prefixes = 0;
4393    s->override = -1;
4394    rex_w = -1;
4395    rex_r = 0;
4396#ifdef TARGET_X86_64
4397    s->rex_x = 0;
4398    s->rex_b = 0;
4399    x86_64_hregs = 0;
4400#endif
4401    s->rip_offset = 0; /* for relative ip address */
4402    s->vex_l = 0;
4403    s->vex_v = 0;
4404 next_byte:
4405    b = cpu_ldub_code(env, s->pc);
4406    s->pc++;
4407    /* Collect prefixes.  */
4408    switch (b) {
4409    case 0xf3:
4410        prefixes |= PREFIX_REPZ;
4411        goto next_byte;
4412    case 0xf2:
4413        prefixes |= PREFIX_REPNZ;
4414        goto next_byte;
4415    case 0xf0:
4416        prefixes |= PREFIX_LOCK;
4417        goto next_byte;
4418    case 0x2e:
4419        s->override = R_CS;
4420        goto next_byte;
4421    case 0x36:
4422        s->override = R_SS;
4423        goto next_byte;
4424    case 0x3e:
4425        s->override = R_DS;
4426        goto next_byte;
4427    case 0x26:
4428        s->override = R_ES;
4429        goto next_byte;
4430    case 0x64:
4431        s->override = R_FS;
4432        goto next_byte;
4433    case 0x65:
4434        s->override = R_GS;
4435        goto next_byte;
4436    case 0x66:
4437        prefixes |= PREFIX_DATA;
4438        goto next_byte;
4439    case 0x67:
4440        prefixes |= PREFIX_ADR;
4441        goto next_byte;
4442#ifdef TARGET_X86_64
4443    case 0x40 ... 0x4f:
4444        if (CODE64(s)) {
4445            /* REX prefix */
4446            rex_w = (b >> 3) & 1;
4447            rex_r = (b & 0x4) << 1;
4448            s->rex_x = (b & 0x2) << 2;
4449            REX_B(s) = (b & 0x1) << 3;
4450            x86_64_hregs = 1; /* select uniform byte register addressing */
4451            goto next_byte;
4452        }
4453        break;
4454#endif
4455    case 0xc5: /* 2-byte VEX */
4456    case 0xc4: /* 3-byte VEX */
4457        /* VEX prefixes cannot be used except in 32-bit mode.
4458           Otherwise the instruction is LES or LDS.  */
4459        if (s->code32 && !s->vm86) {
4460            static const int pp_prefix[4] = {
4461                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4462            };
4463            int vex3, vex2 = cpu_ldub_code(env, s->pc);
4464
4465            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4466                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4467                   otherwise the instruction is LES or LDS.  */
4468                break;
4469            }
4470            s->pc++;
4471
4472            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4473            if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4474                            | PREFIX_LOCK | PREFIX_DATA)) {
4475                goto illegal_op;
4476            }
4477#ifdef TARGET_X86_64
4478            if (x86_64_hregs) {
4479                goto illegal_op;
4480            }
4481#endif
4482            rex_r = (~vex2 >> 4) & 8;
4483            if (b == 0xc5) {
4484                vex3 = vex2;
4485                b = cpu_ldub_code(env, s->pc++);
4486            } else {
4487#ifdef TARGET_X86_64
4488                s->rex_x = (~vex2 >> 3) & 8;
4489                s->rex_b = (~vex2 >> 2) & 8;
4490#endif
4491                vex3 = cpu_ldub_code(env, s->pc++);
4492                rex_w = (vex3 >> 7) & 1;
4493                switch (vex2 & 0x1f) {
4494                case 0x01: /* Implied 0f leading opcode bytes.  */
4495                    b = cpu_ldub_code(env, s->pc++) | 0x100;
4496                    break;
4497                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4498                    b = 0x138;
4499                    break;
4500                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4501                    b = 0x13a;
4502                    break;
4503                default:   /* Reserved for future use.  */
4504                    goto unknown_op;
4505                }
4506            }
4507            s->vex_v = (~vex3 >> 3) & 0xf;
4508            s->vex_l = (vex3 >> 2) & 1;
4509            prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4510        }
4511        break;
4512    }
4513
4514    /* Post-process prefixes.  */
4515    if (CODE64(s)) {
4516        /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4517           data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4518           over 0x66 if both are present.  */
4519        dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4520        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4521        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4522    } else {
4523        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4524        if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4525            dflag = MO_32;
4526        } else {
4527            dflag = MO_16;
4528        }
4529        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4530        if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4531            aflag = MO_32;
4532        }  else {
4533            aflag = MO_16;
4534        }
4535    }
4536
4537    s->prefix = prefixes;
4538    s->aflag = aflag;
4539    s->dflag = dflag;
4540
4541    /* now check op code */
4542 reswitch:
4543    switch(b) {
4544    case 0x0f:
4545        /**************************/
4546        /* extended op code */
4547        b = cpu_ldub_code(env, s->pc++) | 0x100;
4548        goto reswitch;
4549
4550        /**************************/
4551        /* arith & logic */
4552    case 0x00 ... 0x05:
4553    case 0x08 ... 0x0d:
4554    case 0x10 ... 0x15:
4555    case 0x18 ... 0x1d:
4556    case 0x20 ... 0x25:
4557    case 0x28 ... 0x2d:
4558    case 0x30 ... 0x35:
4559    case 0x38 ... 0x3d:
4560        {
4561            int op, f, val;
4562            op = (b >> 3) & 7;
4563            f = (b >> 1) & 3;
4564
4565            ot = mo_b_d(b, dflag);
4566
4567            switch(f) {
4568            case 0: /* OP Ev, Gv */
4569                modrm = cpu_ldub_code(env, s->pc++);
4570                reg = ((modrm >> 3) & 7) | rex_r;
4571                mod = (modrm >> 6) & 3;
4572                rm = (modrm & 7) | REX_B(s);
4573                if (mod != 3) {
4574                    gen_lea_modrm(env, s, modrm);
4575                    opreg = OR_TMP0;
4576                } else if (op == OP_XORL && rm == reg) {
4577                xor_zero:
4578                    /* xor reg, reg optimisation */
4579                    set_cc_op(s, CC_OP_CLR);
4580                    tcg_gen_movi_tl(cpu_T0, 0);
4581                    gen_op_mov_reg_v(ot, reg, cpu_T0);
4582                    break;
4583                } else {
4584                    opreg = rm;
4585                }
4586                gen_op_mov_v_reg(ot, cpu_T1, reg);
4587                gen_op(s, op, ot, opreg);
4588                break;
4589            case 1: /* OP Gv, Ev */
4590                modrm = cpu_ldub_code(env, s->pc++);
4591                mod = (modrm >> 6) & 3;
4592                reg = ((modrm >> 3) & 7) | rex_r;
4593                rm = (modrm & 7) | REX_B(s);
4594                if (mod != 3) {
4595                    gen_lea_modrm(env, s, modrm);
4596                    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4597                } else if (op == OP_XORL && rm == reg) {
4598                    goto xor_zero;
4599                } else {
4600                    gen_op_mov_v_reg(ot, cpu_T1, rm);
4601                }
4602                gen_op(s, op, ot, reg);
4603                break;
4604            case 2: /* OP A, Iv */
4605                val = insn_get(env, s, ot);
4606                tcg_gen_movi_tl(cpu_T1, val);
4607                gen_op(s, op, ot, OR_EAX);
4608                break;
4609            }
4610        }
4611        break;
4612
4613    case 0x82:
4614        if (CODE64(s))
4615            goto illegal_op;
4616    case 0x80: /* GRP1 */
4617    case 0x81:
4618    case 0x83:
4619        {
4620            int val;
4621
4622            ot = mo_b_d(b, dflag);
4623
4624            modrm = cpu_ldub_code(env, s->pc++);
4625            mod = (modrm >> 6) & 3;
4626            rm = (modrm & 7) | REX_B(s);
4627            op = (modrm >> 3) & 7;
4628
4629            if (mod != 3) {
4630                if (b == 0x83)
4631                    s->rip_offset = 1;
4632                else
4633                    s->rip_offset = insn_const_size(ot);
4634                gen_lea_modrm(env, s, modrm);
4635                opreg = OR_TMP0;
4636            } else {
4637                opreg = rm;
4638            }
4639
4640            switch(b) {
4641            default:
4642            case 0x80:
4643            case 0x81:
4644            case 0x82:
4645                val = insn_get(env, s, ot);
4646                break;
4647            case 0x83:
4648                val = (int8_t)insn_get(env, s, MO_8);
4649                break;
4650            }
4651            tcg_gen_movi_tl(cpu_T1, val);
4652            gen_op(s, op, ot, opreg);
4653        }
4654        break;
4655
4656        /**************************/
4657        /* inc, dec, and other misc arith */
4658    case 0x40 ... 0x47: /* inc Gv */
4659        ot = dflag;
4660        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4661        break;
4662    case 0x48 ... 0x4f: /* dec Gv */
4663        ot = dflag;
4664        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4665        break;
4666    case 0xf6: /* GRP3 */
4667    case 0xf7:
4668        ot = mo_b_d(b, dflag);
4669
4670        modrm = cpu_ldub_code(env, s->pc++);
4671        mod = (modrm >> 6) & 3;
4672        rm = (modrm & 7) | REX_B(s);
4673        op = (modrm >> 3) & 7;
4674        if (mod != 3) {
4675            if (op == 0) {
4676                s->rip_offset = insn_const_size(ot);
4677            }
4678            gen_lea_modrm(env, s, modrm);
4679            /* For those below that handle locked memory, don't load here.  */
4680            if (!(s->prefix & PREFIX_LOCK)
4681                || op != 2) {
4682                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
4683            }
4684        } else {
4685            gen_op_mov_v_reg(ot, cpu_T0, rm);
4686        }
4687
4688        switch(op) {
4689        case 0: /* test */
4690            val = insn_get(env, s, ot);
4691            tcg_gen_movi_tl(cpu_T1, val);
4692            gen_op_testl_T0_T1_cc();
4693            set_cc_op(s, CC_OP_LOGICB + ot);
4694            break;
4695        case 2: /* not */
4696            if (s->prefix & PREFIX_LOCK) {
4697                if (mod == 3) {
4698                    goto illegal_op;
4699                }
4700                tcg_gen_movi_tl(cpu_T0, ~0);
4701                tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
4702                                            s->mem_index, ot | MO_LE);
4703            } else {
4704                tcg_gen_not_tl(cpu_T0, cpu_T0);
4705                if (mod != 3) {
4706                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
4707                } else {
4708                    gen_op_mov_reg_v(ot, rm, cpu_T0);
4709                }
4710            }
4711            break;
4712        case 3: /* neg */
4713            if (s->prefix & PREFIX_LOCK) {
4714                TCGLabel *label1;
4715                TCGv a0, t0, t1, t2;
4716
4717                if (mod == 3) {
4718                    goto illegal_op;
4719                }
4720                a0 = tcg_temp_local_new();
4721                t0 = tcg_temp_local_new();
4722                label1 = gen_new_label();
4723
4724                tcg_gen_mov_tl(a0, cpu_A0);
4725                tcg_gen_mov_tl(t0, cpu_T0);
4726
4727                gen_set_label(label1);
4728                t1 = tcg_temp_new();
4729                t2 = tcg_temp_new();
4730                tcg_gen_mov_tl(t2, t0);
4731                tcg_gen_neg_tl(t1, t0);
4732                tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4733                                          s->mem_index, ot | MO_LE);
4734                tcg_temp_free(t1);
4735                tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4736
4737                tcg_temp_free(t2);
4738                tcg_temp_free(a0);
4739                tcg_gen_mov_tl(cpu_T0, t0);
4740                tcg_temp_free(t0);
4741            } else {
4742                tcg_gen_neg_tl(cpu_T0, cpu_T0);
4743                if (mod != 3) {
4744                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
4745                } else {
4746                    gen_op_mov_reg_v(ot, rm, cpu_T0);
4747                }
4748            }
4749            gen_op_update_neg_cc();
4750            set_cc_op(s, CC_OP_SUBB + ot);
4751            break;
4752        case 4: /* mul */
4753            switch(ot) {
4754            case MO_8:
4755                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
4756                tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
4757                tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
4758                /* XXX: use 32 bit mul which could be faster */
4759                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4760                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4761                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4762                tcg_gen_andi_tl(cpu_cc_src, cpu_T0, 0xff00);
4763                set_cc_op(s, CC_OP_MULB);
4764                break;
4765            case MO_16:
4766                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
4767                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4768                tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
4769                /* XXX: use 32 bit mul which could be faster */
4770                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4771                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4772                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4773                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
4774                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4775                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4776                set_cc_op(s, CC_OP_MULW);
4777                break;
4778            default:
4779            case MO_32:
4780                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4781                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4782                tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4783                                  cpu_tmp2_i32, cpu_tmp3_i32);
4784                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4785                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4786                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4787                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4788                set_cc_op(s, CC_OP_MULL);
4789                break;
4790#ifdef TARGET_X86_64
4791            case MO_64:
4792                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4793                                  cpu_T0, cpu_regs[R_EAX]);
4794                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4795                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4796                set_cc_op(s, CC_OP_MULQ);
4797                break;
4798#endif
4799            }
4800            break;
4801        case 5: /* imul */
4802            switch(ot) {
4803            case MO_8:
4804                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
4805                tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
4806                tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
4807                /* XXX: use 32 bit mul which could be faster */
4808                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4809                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4810                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4811                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T0);
4812                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
4813                set_cc_op(s, CC_OP_MULB);
4814                break;
4815            case MO_16:
4816                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
4817                tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
4818                tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
4819                /* XXX: use 32 bit mul which could be faster */
4820                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4821                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4822                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4823                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
4824                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
4825                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
4826                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4827                set_cc_op(s, CC_OP_MULW);
4828                break;
4829            default:
4830            case MO_32:
4831                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4832                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4833                tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4834                                  cpu_tmp2_i32, cpu_tmp3_i32);
4835                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4836                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4837                tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
4838                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4839                tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
4840                tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
4841                set_cc_op(s, CC_OP_MULL);
4842                break;
4843#ifdef TARGET_X86_64
4844            case MO_64:
4845                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4846                                  cpu_T0, cpu_regs[R_EAX]);
4847                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4848                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4849                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4850                set_cc_op(s, CC_OP_MULQ);
4851                break;
4852#endif
4853            }
4854            break;
4855        case 6: /* div */
4856            switch(ot) {
4857            case MO_8:
4858                gen_helper_divb_AL(cpu_env, cpu_T0);
4859                break;
4860            case MO_16:
4861                gen_helper_divw_AX(cpu_env, cpu_T0);
4862                break;
4863            default:
4864            case MO_32:
4865                gen_helper_divl_EAX(cpu_env, cpu_T0);
4866                break;
4867#ifdef TARGET_X86_64
4868            case MO_64:
4869                gen_helper_divq_EAX(cpu_env, cpu_T0);
4870                break;
4871#endif
4872            }
4873            break;
4874        case 7: /* idiv */
4875            switch(ot) {
4876            case MO_8:
4877                gen_helper_idivb_AL(cpu_env, cpu_T0);
4878                break;
4879            case MO_16:
4880                gen_helper_idivw_AX(cpu_env, cpu_T0);
4881                break;
4882            default:
4883            case MO_32:
4884                gen_helper_idivl_EAX(cpu_env, cpu_T0);
4885                break;
4886#ifdef TARGET_X86_64
4887            case MO_64:
4888                gen_helper_idivq_EAX(cpu_env, cpu_T0);
4889                break;
4890#endif
4891            }
4892            break;
4893        default:
4894            goto unknown_op;
4895        }
4896        break;
4897
4898    case 0xfe: /* GRP4 */
4899    case 0xff: /* GRP5 */
4900        ot = mo_b_d(b, dflag);
4901
4902        modrm = cpu_ldub_code(env, s->pc++);
4903        mod = (modrm >> 6) & 3;
4904        rm = (modrm & 7) | REX_B(s);
4905        op = (modrm >> 3) & 7;
4906        if (op >= 2 && b == 0xfe) {
4907            goto unknown_op;
4908        }
4909        if (CODE64(s)) {
4910            if (op == 2 || op == 4) {
4911                /* operand size for jumps is 64 bit */
4912                ot = MO_64;
4913            } else if (op == 3 || op == 5) {
4914                ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
4915            } else if (op == 6) {
4916                /* default push size is 64 bit */
4917                ot = mo_pushpop(s, dflag);
4918            }
4919        }
4920        if (mod != 3) {
4921            gen_lea_modrm(env, s, modrm);
4922            if (op >= 2 && op != 3 && op != 5)
4923                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
4924        } else {
4925            gen_op_mov_v_reg(ot, cpu_T0, rm);
4926        }
4927
4928        switch(op) {
4929        case 0: /* inc Ev */
4930            if (mod != 3)
4931                opreg = OR_TMP0;
4932            else
4933                opreg = rm;
4934            gen_inc(s, ot, opreg, 1);
4935            break;
4936        case 1: /* dec Ev */
4937            if (mod != 3)
4938                opreg = OR_TMP0;
4939            else
4940                opreg = rm;
4941            gen_inc(s, ot, opreg, -1);
4942            break;
4943        case 2: /* call Ev */
4944            /* XXX: optimize if memory (no 'and' is necessary) */
4945            if (dflag == MO_16) {
4946                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4947            }
4948            next_eip = s->pc - s->cs_base;
4949            tcg_gen_movi_tl(cpu_T1, next_eip);
4950            gen_push_v(s, cpu_T1);
4951            gen_op_jmp_v(cpu_T0);
4952            gen_bnd_jmp(s);
4953            gen_eob(s);
4954            break;
4955        case 3: /* lcall Ev */
4956            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4957            gen_add_A0_im(s, 1 << ot);
4958            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
4959        do_lcall:
4960            if (s->pe && !s->vm86) {
4961                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4962                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
4963                                           tcg_const_i32(dflag - 1),
4964                                           tcg_const_tl(s->pc - s->cs_base));
4965            } else {
4966                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4967                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
4968                                      tcg_const_i32(dflag - 1),
4969                                      tcg_const_i32(s->pc - s->cs_base));
4970            }
4971            gen_eob(s);
4972            break;
4973        case 4: /* jmp Ev */
4974            if (dflag == MO_16) {
4975                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4976            }
4977            gen_op_jmp_v(cpu_T0);
4978            gen_bnd_jmp(s);
4979            gen_eob(s);
4980            break;
4981        case 5: /* ljmp Ev */
4982            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4983            gen_add_A0_im(s, 1 << ot);
4984            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
4985        do_ljmp:
4986            if (s->pe && !s->vm86) {
4987                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4988                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
4989                                          tcg_const_tl(s->pc - s->cs_base));
4990            } else {
4991                gen_op_movl_seg_T0_vm(R_CS);
4992                gen_op_jmp_v(cpu_T1);
4993            }
4994            gen_eob(s);
4995            break;
4996        case 6: /* push Ev */
4997            gen_push_v(s, cpu_T0);
4998            break;
4999        default:
5000            goto unknown_op;
5001        }
5002        break;
5003
5004    case 0x84: /* test Ev, Gv */
5005    case 0x85:
5006        ot = mo_b_d(b, dflag);
5007
5008        modrm = cpu_ldub_code(env, s->pc++);
5009        reg = ((modrm >> 3) & 7) | rex_r;
5010
5011        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5012        gen_op_mov_v_reg(ot, cpu_T1, reg);
5013        gen_op_testl_T0_T1_cc();
5014        set_cc_op(s, CC_OP_LOGICB + ot);
5015        break;
5016
5017    case 0xa8: /* test eAX, Iv */
5018    case 0xa9:
5019        ot = mo_b_d(b, dflag);
5020        val = insn_get(env, s, ot);
5021
5022        gen_op_mov_v_reg(ot, cpu_T0, OR_EAX);
5023        tcg_gen_movi_tl(cpu_T1, val);
5024        gen_op_testl_T0_T1_cc();
5025        set_cc_op(s, CC_OP_LOGICB + ot);
5026        break;
5027
5028    case 0x98: /* CWDE/CBW */
5029        switch (dflag) {
5030#ifdef TARGET_X86_64
5031        case MO_64:
5032            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
5033            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
5034            gen_op_mov_reg_v(MO_64, R_EAX, cpu_T0);
5035            break;
5036#endif
5037        case MO_32:
5038            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
5039            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5040            gen_op_mov_reg_v(MO_32, R_EAX, cpu_T0);
5041            break;
5042        case MO_16:
5043            gen_op_mov_v_reg(MO_8, cpu_T0, R_EAX);
5044            tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
5045            gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
5046            break;
5047        default:
5048            tcg_abort();
5049        }
5050        break;
5051    case 0x99: /* CDQ/CWD */
5052        switch (dflag) {
5053#ifdef TARGET_X86_64
5054        case MO_64:
5055            gen_op_mov_v_reg(MO_64, cpu_T0, R_EAX);
5056            tcg_gen_sari_tl(cpu_T0, cpu_T0, 63);
5057            gen_op_mov_reg_v(MO_64, R_EDX, cpu_T0);
5058            break;
5059#endif
5060        case MO_32:
5061            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
5062            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
5063            tcg_gen_sari_tl(cpu_T0, cpu_T0, 31);
5064            gen_op_mov_reg_v(MO_32, R_EDX, cpu_T0);
5065            break;
5066        case MO_16:
5067            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
5068            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5069            tcg_gen_sari_tl(cpu_T0, cpu_T0, 15);
5070            gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
5071            break;
5072        default:
5073            tcg_abort();
5074        }
5075        break;
5076    case 0x1af: /* imul Gv, Ev */
5077    case 0x69: /* imul Gv, Ev, I */
5078    case 0x6b:
5079        ot = dflag;
5080        modrm = cpu_ldub_code(env, s->pc++);
5081        reg = ((modrm >> 3) & 7) | rex_r;
5082        if (b == 0x69)
5083            s->rip_offset = insn_const_size(ot);
5084        else if (b == 0x6b)
5085            s->rip_offset = 1;
5086        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5087        if (b == 0x69) {
5088            val = insn_get(env, s, ot);
5089            tcg_gen_movi_tl(cpu_T1, val);
5090        } else if (b == 0x6b) {
5091            val = (int8_t)insn_get(env, s, MO_8);
5092            tcg_gen_movi_tl(cpu_T1, val);
5093        } else {
5094            gen_op_mov_v_reg(ot, cpu_T1, reg);
5095        }
5096        switch (ot) {
5097#ifdef TARGET_X86_64
5098        case MO_64:
5099            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, cpu_T0, cpu_T1);
5100            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5101            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5102            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
5103            break;
5104#endif
5105        case MO_32:
5106            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
5107            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
5108            tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
5109                              cpu_tmp2_i32, cpu_tmp3_i32);
5110            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
5111            tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
5112            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5113            tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
5114            tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
5115            break;
5116        default:
5117            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5118            tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
5119            /* XXX: use 32 bit mul which could be faster */
5120            tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
5121            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
5122            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
5123            tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
5124            gen_op_mov_reg_v(ot, reg, cpu_T0);
5125            break;
5126        }
5127        set_cc_op(s, CC_OP_MULB + ot);
5128        break;
5129    case 0x1c0:
5130    case 0x1c1: /* xadd Ev, Gv */
5131        ot = mo_b_d(b, dflag);
5132        modrm = cpu_ldub_code(env, s->pc++);
5133        reg = ((modrm >> 3) & 7) | rex_r;
5134        mod = (modrm >> 6) & 3;
5135        gen_op_mov_v_reg(ot, cpu_T0, reg);
5136        if (mod == 3) {
5137            rm = (modrm & 7) | REX_B(s);
5138            gen_op_mov_v_reg(ot, cpu_T1, rm);
5139            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5140            gen_op_mov_reg_v(ot, reg, cpu_T1);
5141            gen_op_mov_reg_v(ot, rm, cpu_T0);
5142        } else {
5143            gen_lea_modrm(env, s, modrm);
5144            if (s->prefix & PREFIX_LOCK) {
5145                tcg_gen_atomic_fetch_add_tl(cpu_T1, cpu_A0, cpu_T0,
5146                                            s->mem_index, ot | MO_LE);
5147                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5148            } else {
5149                gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5150                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5151                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5152            }
5153            gen_op_mov_reg_v(ot, reg, cpu_T1);
5154        }
5155        gen_op_update2_cc();
5156        set_cc_op(s, CC_OP_ADDB + ot);
5157        break;
5158    case 0x1b0:
5159    case 0x1b1: /* cmpxchg Ev, Gv */
5160        {
5161            TCGv oldv, newv, cmpv;
5162
5163            ot = mo_b_d(b, dflag);
5164            modrm = cpu_ldub_code(env, s->pc++);
5165            reg = ((modrm >> 3) & 7) | rex_r;
5166            mod = (modrm >> 6) & 3;
5167            oldv = tcg_temp_new();
5168            newv = tcg_temp_new();
5169            cmpv = tcg_temp_new();
5170            gen_op_mov_v_reg(ot, newv, reg);
5171            tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5172
5173            if (s->prefix & PREFIX_LOCK) {
5174                if (mod == 3) {
5175                    goto illegal_op;
5176                }
5177                gen_lea_modrm(env, s, modrm);
5178                tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv,
5179                                          s->mem_index, ot | MO_LE);
5180                gen_op_mov_reg_v(ot, R_EAX, oldv);
5181            } else {
5182                if (mod == 3) {
5183                    rm = (modrm & 7) | REX_B(s);
5184                    gen_op_mov_v_reg(ot, oldv, rm);
5185                } else {
5186                    gen_lea_modrm(env, s, modrm);
5187                    gen_op_ld_v(s, ot, oldv, cpu_A0);
5188                    rm = 0; /* avoid warning */
5189                }
5190                gen_extu(ot, oldv);
5191                gen_extu(ot, cmpv);
5192                /* store value = (old == cmp ? new : old);  */
5193                tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5194                if (mod == 3) {
5195                    gen_op_mov_reg_v(ot, R_EAX, oldv);
5196                    gen_op_mov_reg_v(ot, rm, newv);
5197                } else {
5198                    /* Perform an unconditional store cycle like physical cpu;
5199                       must be before changing accumulator to ensure
5200                       idempotency if the store faults and the instruction
5201                       is restarted */
5202                    gen_op_st_v(s, ot, newv, cpu_A0);
5203                    gen_op_mov_reg_v(ot, R_EAX, oldv);
5204                }
5205            }
5206            tcg_gen_mov_tl(cpu_cc_src, oldv);
5207            tcg_gen_mov_tl(cpu_cc_srcT, cmpv);
5208            tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5209            set_cc_op(s, CC_OP_SUBB + ot);
5210            tcg_temp_free(oldv);
5211            tcg_temp_free(newv);
5212            tcg_temp_free(cmpv);
5213        }
5214        break;
5215    case 0x1c7: /* cmpxchg8b */
5216        modrm = cpu_ldub_code(env, s->pc++);
5217        mod = (modrm >> 6) & 3;
5218        if ((mod == 3) || ((modrm & 0x38) != 0x8))
5219            goto illegal_op;
5220#ifdef TARGET_X86_64
5221        if (dflag == MO_64) {
5222            if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
5223                goto illegal_op;
5224            gen_lea_modrm(env, s, modrm);
5225            if ((s->prefix & PREFIX_LOCK) && parallel_cpus) {
5226                gen_helper_cmpxchg16b(cpu_env, cpu_A0);
5227            } else {
5228                gen_helper_cmpxchg16b_unlocked(cpu_env, cpu_A0);
5229            }
5230        } else
5231#endif        
5232        {
5233            if (!(s->cpuid_features & CPUID_CX8))
5234                goto illegal_op;
5235            gen_lea_modrm(env, s, modrm);
5236            if ((s->prefix & PREFIX_LOCK) && parallel_cpus) {
5237                gen_helper_cmpxchg8b(cpu_env, cpu_A0);
5238            } else {
5239                gen_helper_cmpxchg8b_unlocked(cpu_env, cpu_A0);
5240            }
5241        }
5242        set_cc_op(s, CC_OP_EFLAGS);
5243        break;
5244
5245        /**************************/
5246        /* push/pop */
5247    case 0x50 ... 0x57: /* push */
5248        gen_op_mov_v_reg(MO_32, cpu_T0, (b & 7) | REX_B(s));
5249        gen_push_v(s, cpu_T0);
5250        break;
5251    case 0x58 ... 0x5f: /* pop */
5252        ot = gen_pop_T0(s);
5253        /* NOTE: order is important for pop %sp */
5254        gen_pop_update(s, ot);
5255        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T0);
5256        break;
5257    case 0x60: /* pusha */
5258        if (CODE64(s))
5259            goto illegal_op;
5260        gen_pusha(s);
5261        break;
5262    case 0x61: /* popa */
5263        if (CODE64(s))
5264            goto illegal_op;
5265        gen_popa(s);
5266        break;
5267    case 0x68: /* push Iv */
5268    case 0x6a:
5269        ot = mo_pushpop(s, dflag);
5270        if (b == 0x68)
5271            val = insn_get(env, s, ot);
5272        else
5273            val = (int8_t)insn_get(env, s, MO_8);
5274        tcg_gen_movi_tl(cpu_T0, val);
5275        gen_push_v(s, cpu_T0);
5276        break;
5277    case 0x8f: /* pop Ev */
5278        modrm = cpu_ldub_code(env, s->pc++);
5279        mod = (modrm >> 6) & 3;
5280        ot = gen_pop_T0(s);
5281        if (mod == 3) {
5282            /* NOTE: order is important for pop %sp */
5283            gen_pop_update(s, ot);
5284            rm = (modrm & 7) | REX_B(s);
5285            gen_op_mov_reg_v(ot, rm, cpu_T0);
5286        } else {
5287            /* NOTE: order is important too for MMU exceptions */
5288            s->popl_esp_hack = 1 << ot;
5289            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5290            s->popl_esp_hack = 0;
5291            gen_pop_update(s, ot);
5292        }
5293        break;
5294    case 0xc8: /* enter */
5295        {
5296            int level;
5297            val = cpu_lduw_code(env, s->pc);
5298            s->pc += 2;
5299            level = cpu_ldub_code(env, s->pc++);
5300            gen_enter(s, val, level);
5301        }
5302        break;
5303    case 0xc9: /* leave */
5304        gen_leave(s);
5305        break;
5306    case 0x06: /* push es */
5307    case 0x0e: /* push cs */
5308    case 0x16: /* push ss */
5309    case 0x1e: /* push ds */
5310        if (CODE64(s))
5311            goto illegal_op;
5312        gen_op_movl_T0_seg(b >> 3);
5313        gen_push_v(s, cpu_T0);
5314        break;
5315    case 0x1a0: /* push fs */
5316    case 0x1a8: /* push gs */
5317        gen_op_movl_T0_seg((b >> 3) & 7);
5318        gen_push_v(s, cpu_T0);
5319        break;
5320    case 0x07: /* pop es */
5321    case 0x17: /* pop ss */
5322    case 0x1f: /* pop ds */
5323        if (CODE64(s))
5324            goto illegal_op;
5325        reg = b >> 3;
5326        ot = gen_pop_T0(s);
5327        gen_movl_seg_T0(s, reg);
5328        gen_pop_update(s, ot);
5329        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5330        if (s->is_jmp) {
5331            gen_jmp_im(s->pc - s->cs_base);
5332            if (reg == R_SS) {
5333                s->tf = 0;
5334                gen_eob_inhibit_irq(s, true);
5335            } else {
5336                gen_eob(s);
5337            }
5338        }
5339        break;
5340    case 0x1a1: /* pop fs */
5341    case 0x1a9: /* pop gs */
5342        ot = gen_pop_T0(s);
5343        gen_movl_seg_T0(s, (b >> 3) & 7);
5344        gen_pop_update(s, ot);
5345        if (s->is_jmp) {
5346            gen_jmp_im(s->pc - s->cs_base);
5347            gen_eob(s);
5348        }
5349        break;
5350
5351        /**************************/
5352        /* mov */
5353    case 0x88:
5354    case 0x89: /* mov Gv, Ev */
5355        ot = mo_b_d(b, dflag);
5356        modrm = cpu_ldub_code(env, s->pc++);
5357        reg = ((modrm >> 3) & 7) | rex_r;
5358
5359        /* generate a generic store */
5360        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5361        break;
5362    case 0xc6:
5363    case 0xc7: /* mov Ev, Iv */
5364        ot = mo_b_d(b, dflag);
5365        modrm = cpu_ldub_code(env, s->pc++);
5366        mod = (modrm >> 6) & 3;
5367        if (mod != 3) {
5368            s->rip_offset = insn_const_size(ot);
5369            gen_lea_modrm(env, s, modrm);
5370        }
5371        val = insn_get(env, s, ot);
5372        tcg_gen_movi_tl(cpu_T0, val);
5373        if (mod != 3) {
5374            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5375        } else {
5376            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
5377        }
5378        break;
5379    case 0x8a:
5380    case 0x8b: /* mov Ev, Gv */
5381        ot = mo_b_d(b, dflag);
5382        modrm = cpu_ldub_code(env, s->pc++);
5383        reg = ((modrm >> 3) & 7) | rex_r;
5384
5385        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5386        gen_op_mov_reg_v(ot, reg, cpu_T0);
5387        break;
5388    case 0x8e: /* mov seg, Gv */
5389        modrm = cpu_ldub_code(env, s->pc++);
5390        reg = (modrm >> 3) & 7;
5391        if (reg >= 6 || reg == R_CS)
5392            goto illegal_op;
5393        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5394        gen_movl_seg_T0(s, reg);
5395        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5396        if (s->is_jmp) {
5397            gen_jmp_im(s->pc - s->cs_base);
5398            if (reg == R_SS) {
5399                s->tf = 0;
5400                gen_eob_inhibit_irq(s, true);
5401            } else {
5402                gen_eob(s);
5403            }
5404        }
5405        break;
5406    case 0x8c: /* mov Gv, seg */
5407        modrm = cpu_ldub_code(env, s->pc++);
5408        reg = (modrm >> 3) & 7;
5409        mod = (modrm >> 6) & 3;
5410        if (reg >= 6)
5411            goto illegal_op;
5412        gen_op_movl_T0_seg(reg);
5413        ot = mod == 3 ? dflag : MO_16;
5414        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5415        break;
5416
5417    case 0x1b6: /* movzbS Gv, Eb */
5418    case 0x1b7: /* movzwS Gv, Eb */
5419    case 0x1be: /* movsbS Gv, Eb */
5420    case 0x1bf: /* movswS Gv, Eb */
5421        {
5422            TCGMemOp d_ot;
5423            TCGMemOp s_ot;
5424
5425            /* d_ot is the size of destination */
5426            d_ot = dflag;
5427            /* ot is the size of source */
5428            ot = (b & 1) + MO_8;
5429            /* s_ot is the sign+size of source */
5430            s_ot = b & 8 ? MO_SIGN | ot : ot;
5431
5432            modrm = cpu_ldub_code(env, s->pc++);
5433            reg = ((modrm >> 3) & 7) | rex_r;
5434            mod = (modrm >> 6) & 3;
5435            rm = (modrm & 7) | REX_B(s);
5436
5437            if (mod == 3) {
5438                gen_op_mov_v_reg(ot, cpu_T0, rm);
5439                switch (s_ot) {
5440                case MO_UB:
5441                    tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
5442                    break;
5443                case MO_SB:
5444                    tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
5445                    break;
5446                case MO_UW:
5447                    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
5448                    break;
5449                default:
5450                case MO_SW:
5451                    tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5452                    break;
5453                }
5454                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
5455            } else {
5456                gen_lea_modrm(env, s, modrm);
5457                gen_op_ld_v(s, s_ot, cpu_T0, cpu_A0);
5458                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
5459            }
5460        }
5461        break;
5462
5463    case 0x8d: /* lea */
5464        modrm = cpu_ldub_code(env, s->pc++);
5465        mod = (modrm >> 6) & 3;
5466        if (mod == 3)
5467            goto illegal_op;
5468        reg = ((modrm >> 3) & 7) | rex_r;
5469        {
5470            AddressParts a = gen_lea_modrm_0(env, s, modrm);
5471            TCGv ea = gen_lea_modrm_1(a);
5472            gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5473            gen_op_mov_reg_v(dflag, reg, cpu_A0);
5474        }
5475        break;
5476
5477    case 0xa0: /* mov EAX, Ov */
5478    case 0xa1:
5479    case 0xa2: /* mov Ov, EAX */
5480    case 0xa3:
5481        {
5482            target_ulong offset_addr;
5483
5484            ot = mo_b_d(b, dflag);
5485            switch (s->aflag) {
5486#ifdef TARGET_X86_64
5487            case MO_64:
5488                offset_addr = cpu_ldq_code(env, s->pc);
5489                s->pc += 8;
5490                break;
5491#endif
5492            default:
5493                offset_addr = insn_get(env, s, s->aflag);
5494                break;
5495            }
5496            tcg_gen_movi_tl(cpu_A0, offset_addr);
5497            gen_add_A0_ds_seg(s);
5498            if ((b & 2) == 0) {
5499                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
5500                gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
5501            } else {
5502                gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
5503                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5504            }
5505        }
5506        break;
5507    case 0xd7: /* xlat */
5508        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]);
5509        tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
5510        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T0);
5511        gen_extu(s->aflag, cpu_A0);
5512        gen_add_A0_ds_seg(s);
5513        gen_op_ld_v(s, MO_8, cpu_T0, cpu_A0);
5514        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
5515        break;
5516    case 0xb0 ... 0xb7: /* mov R, Ib */
5517        val = insn_get(env, s, MO_8);
5518        tcg_gen_movi_tl(cpu_T0, val);
5519        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T0);
5520        break;
5521    case 0xb8 ... 0xbf: /* mov R, Iv */
5522#ifdef TARGET_X86_64
5523        if (dflag == MO_64) {
5524            uint64_t tmp;
5525            /* 64 bit case */
5526            tmp = cpu_ldq_code(env, s->pc);
5527            s->pc += 8;
5528            reg = (b & 7) | REX_B(s);
5529            tcg_gen_movi_tl(cpu_T0, tmp);
5530            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
5531        } else
5532#endif
5533        {
5534            ot = dflag;
5535            val = insn_get(env, s, ot);
5536            reg = (b & 7) | REX_B(s);
5537            tcg_gen_movi_tl(cpu_T0, val);
5538            gen_op_mov_reg_v(ot, reg, cpu_T0);
5539        }
5540        break;
5541
5542    case 0x91 ... 0x97: /* xchg R, EAX */
5543    do_xchg_reg_eax:
5544        ot = dflag;
5545        reg = (b & 7) | REX_B(s);
5546        rm = R_EAX;
5547        goto do_xchg_reg;
5548    case 0x86:
5549    case 0x87: /* xchg Ev, Gv */
5550        ot = mo_b_d(b, dflag);
5551        modrm = cpu_ldub_code(env, s->pc++);
5552        reg = ((modrm >> 3) & 7) | rex_r;
5553        mod = (modrm >> 6) & 3;
5554        if (mod == 3) {
5555            rm = (modrm & 7) | REX_B(s);
5556        do_xchg_reg:
5557            gen_op_mov_v_reg(ot, cpu_T0, reg);
5558            gen_op_mov_v_reg(ot, cpu_T1, rm);
5559            gen_op_mov_reg_v(ot, rm, cpu_T0);
5560            gen_op_mov_reg_v(ot, reg, cpu_T1);
5561        } else {
5562            gen_lea_modrm(env, s, modrm);
5563            gen_op_mov_v_reg(ot, cpu_T0, reg);
5564            /* for xchg, lock is implicit */
5565            tcg_gen_atomic_xchg_tl(cpu_T1, cpu_A0, cpu_T0,
5566                                   s->mem_index, ot | MO_LE);
5567            gen_op_mov_reg_v(ot, reg, cpu_T1);
5568        }
5569        break;
5570    case 0xc4: /* les Gv */
5571        /* In CODE64 this is VEX3; see above.  */
5572        op = R_ES;
5573        goto do_lxx;
5574    case 0xc5: /* lds Gv */
5575        /* In CODE64 this is VEX2; see above.  */
5576        op = R_DS;
5577        goto do_lxx;
5578    case 0x1b2: /* lss Gv */
5579        op = R_SS;
5580        goto do_lxx;
5581    case 0x1b4: /* lfs Gv */
5582        op = R_FS;
5583        goto do_lxx;
5584    case 0x1b5: /* lgs Gv */
5585        op = R_GS;
5586    do_lxx:
5587        ot = dflag != MO_16 ? MO_32 : MO_16;
5588        modrm = cpu_ldub_code(env, s->pc++);
5589        reg = ((modrm >> 3) & 7) | rex_r;
5590        mod = (modrm >> 6) & 3;
5591        if (mod == 3)
5592            goto illegal_op;
5593        gen_lea_modrm(env, s, modrm);
5594        gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5595        gen_add_A0_im(s, 1 << ot);
5596        /* load the segment first to handle exceptions properly */
5597        gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
5598        gen_movl_seg_T0(s, op);
5599        /* then put the data */
5600        gen_op_mov_reg_v(ot, reg, cpu_T1);
5601        if (s->is_jmp) {
5602            gen_jmp_im(s->pc - s->cs_base);
5603            gen_eob(s);
5604        }
5605        break;
5606
5607        /************************/
5608        /* shifts */
5609    case 0xc0:
5610    case 0xc1:
5611        /* shift Ev,Ib */
5612        shift = 2;
5613    grp2:
5614        {
5615            ot = mo_b_d(b, dflag);
5616            modrm = cpu_ldub_code(env, s->pc++);
5617            mod = (modrm >> 6) & 3;
5618            op = (modrm >> 3) & 7;
5619
5620            if (mod != 3) {
5621                if (shift == 2) {
5622                    s->rip_offset = 1;
5623                }
5624                gen_lea_modrm(env, s, modrm);
5625                opreg = OR_TMP0;
5626            } else {
5627                opreg = (modrm & 7) | REX_B(s);
5628            }
5629
5630            /* simpler op */
5631            if (shift == 0) {
5632                gen_shift(s, op, ot, opreg, OR_ECX);
5633            } else {
5634                if (shift == 2) {
5635                    shift = cpu_ldub_code(env, s->pc++);
5636                }
5637                gen_shifti(s, op, ot, opreg, shift);
5638            }
5639        }
5640        break;
5641    case 0xd0:
5642    case 0xd1:
5643        /* shift Ev,1 */
5644        shift = 1;
5645        goto grp2;
5646    case 0xd2:
5647    case 0xd3:
5648        /* shift Ev,cl */
5649        shift = 0;
5650        goto grp2;
5651
5652    case 0x1a4: /* shld imm */
5653        op = 0;
5654        shift = 1;
5655        goto do_shiftd;
5656    case 0x1a5: /* shld cl */
5657        op = 0;
5658        shift = 0;
5659        goto do_shiftd;
5660    case 0x1ac: /* shrd imm */
5661        op = 1;
5662        shift = 1;
5663        goto do_shiftd;
5664    case 0x1ad: /* shrd cl */
5665        op = 1;
5666        shift = 0;
5667    do_shiftd:
5668        ot = dflag;
5669        modrm = cpu_ldub_code(env, s->pc++);
5670        mod = (modrm >> 6) & 3;
5671        rm = (modrm & 7) | REX_B(s);
5672        reg = ((modrm >> 3) & 7) | rex_r;
5673        if (mod != 3) {
5674            gen_lea_modrm(env, s, modrm);
5675            opreg = OR_TMP0;
5676        } else {
5677            opreg = rm;
5678        }
5679        gen_op_mov_v_reg(ot, cpu_T1, reg);
5680
5681        if (shift) {
5682            TCGv imm = tcg_const_tl(cpu_ldub_code(env, s->pc++));
5683            gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5684            tcg_temp_free(imm);
5685        } else {
5686            gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5687        }
5688        break;
5689
5690        /************************/
5691        /* floats */
5692    case 0xd8 ... 0xdf:
5693        if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5694            /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5695            /* XXX: what to do if illegal op ? */
5696            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5697            break;
5698        }
5699        modrm = cpu_ldub_code(env, s->pc++);
5700        mod = (modrm >> 6) & 3;
5701        rm = modrm & 7;
5702        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5703        if (mod != 3) {
5704            /* memory op */
5705            gen_lea_modrm(env, s, modrm);
5706            switch(op) {
5707            case 0x00 ... 0x07: /* fxxxs */
5708            case 0x10 ... 0x17: /* fixxxl */
5709            case 0x20 ... 0x27: /* fxxxl */
5710            case 0x30 ... 0x37: /* fixxx */
5711                {
5712                    int op1;
5713                    op1 = op & 7;
5714
5715                    switch(op >> 4) {
5716                    case 0:
5717                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5718                                            s->mem_index, MO_LEUL);
5719                        gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
5720                        break;
5721                    case 1:
5722                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5723                                            s->mem_index, MO_LEUL);
5724                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5725                        break;
5726                    case 2:
5727                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5728                                            s->mem_index, MO_LEQ);
5729                        gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
5730                        break;
5731                    case 3:
5732                    default:
5733                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5734                                            s->mem_index, MO_LESW);
5735                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5736                        break;
5737                    }
5738
5739                    gen_helper_fp_arith_ST0_FT0(op1);
5740                    if (op1 == 3) {
5741                        /* fcomp needs pop */
5742                        gen_helper_fpop(cpu_env);
5743                    }
5744                }
5745                break;
5746            case 0x08: /* flds */
5747            case 0x0a: /* fsts */
5748            case 0x0b: /* fstps */
5749            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5750            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5751            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5752                switch(op & 7) {
5753                case 0:
5754                    switch(op >> 4) {
5755                    case 0:
5756                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5757                                            s->mem_index, MO_LEUL);
5758                        gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
5759                        break;
5760                    case 1:
5761                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5762                                            s->mem_index, MO_LEUL);
5763                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5764                        break;
5765                    case 2:
5766                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5767                                            s->mem_index, MO_LEQ);
5768                        gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
5769                        break;
5770                    case 3:
5771                    default:
5772                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5773                                            s->mem_index, MO_LESW);
5774                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5775                        break;
5776                    }
5777                    break;
5778                case 1:
5779                    /* XXX: the corresponding CPUID bit must be tested ! */
5780                    switch(op >> 4) {
5781                    case 1:
5782                        gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
5783                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5784                                            s->mem_index, MO_LEUL);
5785                        break;
5786                    case 2:
5787                        gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
5788                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5789                                            s->mem_index, MO_LEQ);
5790                        break;
5791                    case 3:
5792                    default:
5793                        gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
5794                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5795                                            s->mem_index, MO_LEUW);
5796                        break;
5797                    }
5798                    gen_helper_fpop(cpu_env);
5799                    break;
5800                default:
5801                    switch(op >> 4) {
5802                    case 0:
5803                        gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
5804                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5805                                            s->mem_index, MO_LEUL);
5806                        break;
5807                    case 1:
5808                        gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
5809                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5810                                            s->mem_index, MO_LEUL);
5811                        break;
5812                    case 2:
5813                        gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
5814                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5815                                            s->mem_index, MO_LEQ);
5816                        break;
5817                    case 3:
5818                    default:
5819                        gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
5820                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5821                                            s->mem_index, MO_LEUW);
5822                        break;
5823                    }
5824                    if ((op & 7) == 3)
5825                        gen_helper_fpop(cpu_env);
5826                    break;
5827                }
5828                break;
5829            case 0x0c: /* fldenv mem */
5830                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5831                break;
5832            case 0x0d: /* fldcw mem */
5833                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5834                                    s->mem_index, MO_LEUW);
5835                gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
5836                break;
5837            case 0x0e: /* fnstenv mem */
5838                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5839                break;
5840            case 0x0f: /* fnstcw mem */
5841                gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
5842                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5843                                    s->mem_index, MO_LEUW);
5844                break;
5845            case 0x1d: /* fldt mem */
5846                gen_helper_fldt_ST0(cpu_env, cpu_A0);
5847                break;
5848            case 0x1f: /* fstpt mem */
5849                gen_helper_fstt_ST0(cpu_env, cpu_A0);
5850                gen_helper_fpop(cpu_env);
5851                break;
5852            case 0x2c: /* frstor mem */
5853                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5854                break;
5855            case 0x2e: /* fnsave mem */
5856                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5857                break;
5858            case 0x2f: /* fnstsw mem */
5859                gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
5860                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5861                                    s->mem_index, MO_LEUW);
5862                break;
5863            case 0x3c: /* fbld */
5864                gen_helper_fbld_ST0(cpu_env, cpu_A0);
5865                break;
5866            case 0x3e: /* fbstp */
5867                gen_helper_fbst_ST0(cpu_env, cpu_A0);
5868                gen_helper_fpop(cpu_env);
5869                break;
5870            case 0x3d: /* fildll */
5871                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5872                gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
5873                break;
5874            case 0x3f: /* fistpll */
5875                gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
5876                tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5877                gen_helper_fpop(cpu_env);
5878                break;
5879            default:
5880                goto unknown_op;
5881            }
5882        } else {
5883            /* register float ops */
5884            opreg = rm;
5885
5886            switch(op) {
5887            case 0x08: /* fld sti */
5888                gen_helper_fpush(cpu_env);
5889                gen_helper_fmov_ST0_STN(cpu_env,
5890                                        tcg_const_i32((opreg + 1) & 7));
5891                break;
5892            case 0x09: /* fxchg sti */
5893            case 0x29: /* fxchg4 sti, undocumented op */
5894            case 0x39: /* fxchg7 sti, undocumented op */
5895                gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
5896                break;
5897            case 0x0a: /* grp d9/2 */
5898                switch(rm) {
5899                case 0: /* fnop */
5900                    /* check exceptions (FreeBSD FPU probe) */
5901                    gen_helper_fwait(cpu_env);
5902                    break;
5903                default:
5904                    goto unknown_op;
5905                }
5906                break;
5907            case 0x0c: /* grp d9/4 */
5908                switch(rm) {
5909                case 0: /* fchs */
5910                    gen_helper_fchs_ST0(cpu_env);
5911                    break;
5912                case 1: /* fabs */
5913                    gen_helper_fabs_ST0(cpu_env);
5914                    break;
5915                case 4: /* ftst */
5916                    gen_helper_fldz_FT0(cpu_env);
5917                    gen_helper_fcom_ST0_FT0(cpu_env);
5918                    break;
5919                case 5: /* fxam */
5920                    gen_helper_fxam_ST0(cpu_env);
5921                    break;
5922                default:
5923                    goto unknown_op;
5924                }
5925                break;
5926            case 0x0d: /* grp d9/5 */
5927                {
5928                    switch(rm) {
5929                    case 0:
5930                        gen_helper_fpush(cpu_env);
5931                        gen_helper_fld1_ST0(cpu_env);
5932                        break;
5933                    case 1:
5934                        gen_helper_fpush(cpu_env);
5935                        gen_helper_fldl2t_ST0(cpu_env);
5936                        break;
5937                    case 2:
5938                        gen_helper_fpush(cpu_env);
5939                        gen_helper_fldl2e_ST0(cpu_env);
5940                        break;
5941                    case 3:
5942                        gen_helper_fpush(cpu_env);
5943                        gen_helper_fldpi_ST0(cpu_env);
5944                        break;
5945                    case 4:
5946                        gen_helper_fpush(cpu_env);
5947                        gen_helper_fldlg2_ST0(cpu_env);
5948                        break;
5949                    case 5:
5950                        gen_helper_fpush(cpu_env);
5951                        gen_helper_fldln2_ST0(cpu_env);
5952                        break;
5953                    case 6:
5954                        gen_helper_fpush(cpu_env);
5955                        gen_helper_fldz_ST0(cpu_env);
5956                        break;
5957                    default:
5958                        goto unknown_op;
5959                    }
5960                }
5961                break;
5962            case 0x0e: /* grp d9/6 */
5963                switch(rm) {
5964                case 0: /* f2xm1 */
5965                    gen_helper_f2xm1(cpu_env);
5966                    break;
5967                case 1: /* fyl2x */
5968                    gen_helper_fyl2x(cpu_env);
5969                    break;
5970                case 2: /* fptan */
5971                    gen_helper_fptan(cpu_env);
5972                    break;
5973                case 3: /* fpatan */
5974                    gen_helper_fpatan(cpu_env);
5975                    break;
5976                case 4: /* fxtract */
5977                    gen_helper_fxtract(cpu_env);
5978                    break;
5979                case 5: /* fprem1 */
5980                    gen_helper_fprem1(cpu_env);
5981                    break;
5982                case 6: /* fdecstp */
5983                    gen_helper_fdecstp(cpu_env);
5984                    break;
5985                default:
5986                case 7: /* fincstp */
5987                    gen_helper_fincstp(cpu_env);
5988                    break;
5989                }
5990                break;
5991            case 0x0f: /* grp d9/7 */
5992                switch(rm) {
5993                case 0: /* fprem */
5994                    gen_helper_fprem(cpu_env);
5995                    break;
5996                case 1: /* fyl2xp1 */
5997                    gen_helper_fyl2xp1(cpu_env);
5998                    break;
5999                case 2: /* fsqrt */
6000                    gen_helper_fsqrt(cpu_env);
6001                    break;
6002                case 3: /* fsincos */
6003                    gen_helper_fsincos(cpu_env);
6004                    break;
6005                case 5: /* fscale */
6006                    gen_helper_fscale(cpu_env);
6007                    break;
6008                case 4: /* frndint */
6009                    gen_helper_frndint(cpu_env);
6010                    break;
6011                case 6: /* fsin */
6012                    gen_helper_fsin(cpu_env);
6013                    break;
6014                default:
6015                case 7: /* fcos */
6016                    gen_helper_fcos(cpu_env);
6017                    break;
6018                }
6019                break;
6020            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6021            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6022            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6023                {
6024                    int op1;
6025
6026                    op1 = op & 7;
6027                    if (op >= 0x20) {
6028                        gen_helper_fp_arith_STN_ST0(op1, opreg);
6029                        if (op >= 0x30)
6030                            gen_helper_fpop(cpu_env);
6031                    } else {
6032                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6033                        gen_helper_fp_arith_ST0_FT0(op1);
6034                    }
6035                }
6036                break;
6037            case 0x02: /* fcom */
6038            case 0x22: /* fcom2, undocumented op */
6039                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6040                gen_helper_fcom_ST0_FT0(cpu_env);
6041                break;
6042            case 0x03: /* fcomp */
6043            case 0x23: /* fcomp3, undocumented op */
6044            case 0x32: /* fcomp5, undocumented op */
6045                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6046                gen_helper_fcom_ST0_FT0(cpu_env);
6047                gen_helper_fpop(cpu_env);
6048                break;
6049            case 0x15: /* da/5 */
6050                switch(rm) {
6051                case 1: /* fucompp */
6052                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6053                    gen_helper_fucom_ST0_FT0(cpu_env);
6054                    gen_helper_fpop(cpu_env);
6055                    gen_helper_fpop(cpu_env);
6056                    break;
6057                default:
6058                    goto unknown_op;
6059                }
6060                break;
6061            case 0x1c:
6062                switch(rm) {
6063                case 0: /* feni (287 only, just do nop here) */
6064                    break;
6065                case 1: /* fdisi (287 only, just do nop here) */
6066                    break;
6067                case 2: /* fclex */
6068                    gen_helper_fclex(cpu_env);
6069                    break;
6070                case 3: /* fninit */
6071                    gen_helper_fninit(cpu_env);
6072                    break;
6073                case 4: /* fsetpm (287 only, just do nop here) */
6074                    break;
6075                default:
6076                    goto unknown_op;
6077                }
6078                break;
6079            case 0x1d: /* fucomi */
6080                if (!(s->cpuid_features & CPUID_CMOV)) {
6081                    goto illegal_op;
6082                }
6083                gen_update_cc_op(s);
6084                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6085                gen_helper_fucomi_ST0_FT0(cpu_env);
6086                set_cc_op(s, CC_OP_EFLAGS);
6087                break;
6088            case 0x1e: /* fcomi */
6089                if (!(s->cpuid_features & CPUID_CMOV)) {
6090                    goto illegal_op;
6091                }
6092                gen_update_cc_op(s);
6093                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6094                gen_helper_fcomi_ST0_FT0(cpu_env);
6095                set_cc_op(s, CC_OP_EFLAGS);
6096                break;
6097            case 0x28: /* ffree sti */
6098                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6099                break;
6100            case 0x2a: /* fst sti */
6101                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6102                break;
6103            case 0x2b: /* fstp sti */
6104            case 0x0b: /* fstp1 sti, undocumented op */
6105            case 0x3a: /* fstp8 sti, undocumented op */
6106            case 0x3b: /* fstp9 sti, undocumented op */
6107                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6108                gen_helper_fpop(cpu_env);
6109                break;
6110            case 0x2c: /* fucom st(i) */
6111                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6112                gen_helper_fucom_ST0_FT0(cpu_env);
6113                break;
6114            case 0x2d: /* fucomp st(i) */
6115                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6116                gen_helper_fucom_ST0_FT0(cpu_env);
6117                gen_helper_fpop(cpu_env);
6118                break;
6119            case 0x33: /* de/3 */
6120                switch(rm) {
6121                case 1: /* fcompp */
6122                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6123                    gen_helper_fcom_ST0_FT0(cpu_env);
6124                    gen_helper_fpop(cpu_env);
6125                    gen_helper_fpop(cpu_env);
6126                    break;
6127                default:
6128                    goto unknown_op;
6129                }
6130                break;
6131            case 0x38: /* ffreep sti, undocumented op */
6132                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6133                gen_helper_fpop(cpu_env);
6134                break;
6135            case 0x3c: /* df/4 */
6136                switch(rm) {
6137                case 0:
6138                    gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
6139                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
6140                    gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
6141                    break;
6142                default:
6143                    goto unknown_op;
6144                }
6145                break;
6146            case 0x3d: /* fucomip */
6147                if (!(s->cpuid_features & CPUID_CMOV)) {
6148                    goto illegal_op;
6149                }
6150                gen_update_cc_op(s);
6151                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6152                gen_helper_fucomi_ST0_FT0(cpu_env);
6153                gen_helper_fpop(cpu_env);
6154                set_cc_op(s, CC_OP_EFLAGS);
6155                break;
6156            case 0x3e: /* fcomip */
6157                if (!(s->cpuid_features & CPUID_CMOV)) {
6158                    goto illegal_op;
6159                }
6160                gen_update_cc_op(s);
6161                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6162                gen_helper_fcomi_ST0_FT0(cpu_env);
6163                gen_helper_fpop(cpu_env);
6164                set_cc_op(s, CC_OP_EFLAGS);
6165                break;
6166            case 0x10 ... 0x13: /* fcmovxx */
6167            case 0x18 ... 0x1b:
6168                {
6169                    int op1;
6170                    TCGLabel *l1;
6171                    static const uint8_t fcmov_cc[8] = {
6172                        (JCC_B << 1),
6173                        (JCC_Z << 1),
6174                        (JCC_BE << 1),
6175                        (JCC_P << 1),
6176                    };
6177
6178                    if (!(s->cpuid_features & CPUID_CMOV)) {
6179                        goto illegal_op;
6180                    }
6181                    op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6182                    l1 = gen_new_label();
6183                    gen_jcc1_noeob(s, op1, l1);
6184                    gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6185                    gen_set_label(l1);
6186                }
6187                break;
6188            default:
6189                goto unknown_op;
6190            }
6191        }
6192        break;
6193        /************************/
6194        /* string ops */
6195
6196    case 0xa4: /* movsS */
6197    case 0xa5:
6198        ot = mo_b_d(b, dflag);
6199        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6200            gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6201        } else {
6202            gen_movs(s, ot);
6203        }
6204        break;
6205
6206    case 0xaa: /* stosS */
6207    case 0xab:
6208        ot = mo_b_d(b, dflag);
6209        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6210            gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6211        } else {
6212            gen_stos(s, ot);
6213        }
6214        break;
6215    case 0xac: /* lodsS */
6216    case 0xad:
6217        ot = mo_b_d(b, dflag);
6218        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6219            gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6220        } else {
6221            gen_lods(s, ot);
6222        }
6223        break;
6224    case 0xae: /* scasS */
6225    case 0xaf:
6226        ot = mo_b_d(b, dflag);
6227        if (prefixes & PREFIX_REPNZ) {
6228            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6229        } else if (prefixes & PREFIX_REPZ) {
6230            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6231        } else {
6232            gen_scas(s, ot);
6233        }
6234        break;
6235
6236    case 0xa6: /* cmpsS */
6237    case 0xa7:
6238        ot = mo_b_d(b, dflag);
6239        if (prefixes & PREFIX_REPNZ) {
6240            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6241        } else if (prefixes & PREFIX_REPZ) {
6242            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6243        } else {
6244            gen_cmps(s, ot);
6245        }
6246        break;
6247    case 0x6c: /* insS */
6248    case 0x6d:
6249        ot = mo_b_d32(b, dflag);
6250        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6251        gen_check_io(s, ot, pc_start - s->cs_base, 
6252                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6253        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6254            gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6255        } else {
6256            gen_ins(s, ot);
6257            if (s->tb->cflags & CF_USE_ICOUNT) {
6258                gen_jmp(s, s->pc - s->cs_base);
6259            }
6260        }
6261        break;
6262    case 0x6e: /* outsS */
6263    case 0x6f:
6264        ot = mo_b_d32(b, dflag);
6265        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6266        gen_check_io(s, ot, pc_start - s->cs_base,
6267                     svm_is_rep(prefixes) | 4);
6268        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6269            gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6270        } else {
6271            gen_outs(s, ot);
6272            if (s->tb->cflags & CF_USE_ICOUNT) {
6273                gen_jmp(s, s->pc - s->cs_base);
6274            }
6275        }
6276        break;
6277
6278        /************************/
6279        /* port I/O */
6280
6281    case 0xe4:
6282    case 0xe5:
6283        ot = mo_b_d32(b, dflag);
6284        val = cpu_ldub_code(env, s->pc++);
6285        tcg_gen_movi_tl(cpu_T0, val);
6286        gen_check_io(s, ot, pc_start - s->cs_base,
6287                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6288        if (s->tb->cflags & CF_USE_ICOUNT) {
6289            gen_io_start();
6290        }
6291        tcg_gen_movi_i32(cpu_tmp2_i32, val);
6292        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
6293        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
6294        gen_bpt_io(s, cpu_tmp2_i32, ot);
6295        if (s->tb->cflags & CF_USE_ICOUNT) {
6296            gen_io_end();
6297            gen_jmp(s, s->pc - s->cs_base);
6298        }
6299        break;
6300    case 0xe6:
6301    case 0xe7:
6302        ot = mo_b_d32(b, dflag);
6303        val = cpu_ldub_code(env, s->pc++);
6304        tcg_gen_movi_tl(cpu_T0, val);
6305        gen_check_io(s, ot, pc_start - s->cs_base,
6306                     svm_is_rep(prefixes));
6307        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
6308
6309        if (s->tb->cflags & CF_USE_ICOUNT) {
6310            gen_io_start();
6311        }
6312        tcg_gen_movi_i32(cpu_tmp2_i32, val);
6313        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
6314        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6315        gen_bpt_io(s, cpu_tmp2_i32, ot);
6316        if (s->tb->cflags & CF_USE_ICOUNT) {
6317            gen_io_end();
6318            gen_jmp(s, s->pc - s->cs_base);
6319        }
6320        break;
6321    case 0xec:
6322    case 0xed:
6323        ot = mo_b_d32(b, dflag);
6324        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6325        gen_check_io(s, ot, pc_start - s->cs_base,
6326                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6327        if (s->tb->cflags & CF_USE_ICOUNT) {
6328            gen_io_start();
6329        }
6330        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6331        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
6332        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
6333        gen_bpt_io(s, cpu_tmp2_i32, ot);
6334        if (s->tb->cflags & CF_USE_ICOUNT) {
6335            gen_io_end();
6336            gen_jmp(s, s->pc - s->cs_base);
6337        }
6338        break;
6339    case 0xee:
6340    case 0xef:
6341        ot = mo_b_d32(b, dflag);
6342        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6343        gen_check_io(s, ot, pc_start - s->cs_base,
6344                     svm_is_rep(prefixes));
6345        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
6346
6347        if (s->tb->cflags & CF_USE_ICOUNT) {
6348            gen_io_start();
6349        }
6350        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6351        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
6352        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6353        gen_bpt_io(s, cpu_tmp2_i32, ot);
6354        if (s->tb->cflags & CF_USE_ICOUNT) {
6355            gen_io_end();
6356            gen_jmp(s, s->pc - s->cs_base);
6357        }
6358        break;
6359
6360        /************************/
6361        /* control */
6362    case 0xc2: /* ret im */
6363        val = cpu_ldsw_code(env, s->pc);
6364        s->pc += 2;
6365        ot = gen_pop_T0(s);
6366        gen_stack_update(s, val + (1 << ot));
6367        /* Note that gen_pop_T0 uses a zero-extending load.  */
6368        gen_op_jmp_v(cpu_T0);
6369        gen_bnd_jmp(s);
6370        gen_eob(s);
6371        break;
6372    case 0xc3: /* ret */
6373        ot = gen_pop_T0(s);
6374        gen_pop_update(s, ot);
6375        /* Note that gen_pop_T0 uses a zero-extending load.  */
6376        gen_op_jmp_v(cpu_T0);
6377        gen_bnd_jmp(s);
6378        gen_eob(s);
6379        break;
6380    case 0xca: /* lret im */
6381        val = cpu_ldsw_code(env, s->pc);
6382        s->pc += 2;
6383    do_lret:
6384        if (s->pe && !s->vm86) {
6385            gen_update_cc_op(s);
6386            gen_jmp_im(pc_start - s->cs_base);
6387            gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6388                                      tcg_const_i32(val));
6389        } else {
6390            gen_stack_A0(s);
6391            /* pop offset */
6392            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
6393            /* NOTE: keeping EIP updated is not a problem in case of
6394               exception */
6395            gen_op_jmp_v(cpu_T0);
6396            /* pop selector */
6397            gen_add_A0_im(s, 1 << dflag);
6398            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
6399            gen_op_movl_seg_T0_vm(R_CS);
6400            /* add stack offset */
6401            gen_stack_update(s, val + (2 << dflag));
6402        }
6403        gen_eob(s);
6404        break;
6405    case 0xcb: /* lret */
6406        val = 0;
6407        goto do_lret;
6408    case 0xcf: /* iret */
6409        gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6410        if (!s->pe) {
6411            /* real mode */
6412            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6413            set_cc_op(s, CC_OP_EFLAGS);
6414        } else if (s->vm86) {
6415            if (s->iopl != 3) {
6416                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6417            } else {
6418                gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6419                set_cc_op(s, CC_OP_EFLAGS);
6420            }
6421        } else {
6422            gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6423                                      tcg_const_i32(s->pc - s->cs_base));
6424            set_cc_op(s, CC_OP_EFLAGS);
6425        }
6426        gen_eob(s);
6427        break;
6428    case 0xe8: /* call im */
6429        {
6430            if (dflag != MO_16) {
6431                tval = (int32_t)insn_get(env, s, MO_32);
6432            } else {
6433                tval = (int16_t)insn_get(env, s, MO_16);
6434            }
6435            next_eip = s->pc - s->cs_base;
6436            tval += next_eip;
6437            if (dflag == MO_16) {
6438                tval &= 0xffff;
6439            } else if (!CODE64(s)) {
6440                tval &= 0xffffffff;
6441            }
6442            tcg_gen_movi_tl(cpu_T0, next_eip);
6443            gen_push_v(s, cpu_T0);
6444            gen_bnd_jmp(s);
6445            gen_jmp(s, tval);
6446        }
6447        break;
6448    case 0x9a: /* lcall im */
6449        {
6450            unsigned int selector, offset;
6451
6452            if (CODE64(s))
6453                goto illegal_op;
6454            ot = dflag;
6455            offset = insn_get(env, s, ot);
6456            selector = insn_get(env, s, MO_16);
6457
6458            tcg_gen_movi_tl(cpu_T0, selector);
6459            tcg_gen_movi_tl(cpu_T1, offset);
6460        }
6461        goto do_lcall;
6462    case 0xe9: /* jmp im */
6463        if (dflag != MO_16) {
6464            tval = (int32_t)insn_get(env, s, MO_32);
6465        } else {
6466            tval = (int16_t)insn_get(env, s, MO_16);
6467        }
6468        tval += s->pc - s->cs_base;
6469        if (dflag == MO_16) {
6470            tval &= 0xffff;
6471        } else if (!CODE64(s)) {
6472            tval &= 0xffffffff;
6473        }
6474        gen_bnd_jmp(s);
6475        gen_jmp(s, tval);
6476        break;
6477    case 0xea: /* ljmp im */
6478        {
6479            unsigned int selector, offset;
6480
6481            if (CODE64(s))
6482                goto illegal_op;
6483            ot = dflag;
6484            offset = insn_get(env, s, ot);
6485            selector = insn_get(env, s, MO_16);
6486
6487            tcg_gen_movi_tl(cpu_T0, selector);
6488            tcg_gen_movi_tl(cpu_T1, offset);
6489        }
6490        goto do_ljmp;
6491    case 0xeb: /* jmp Jb */
6492        tval = (int8_t)insn_get(env, s, MO_8);
6493        tval += s->pc - s->cs_base;
6494        if (dflag == MO_16) {
6495            tval &= 0xffff;
6496        }
6497        gen_jmp(s, tval);
6498        break;
6499    case 0x70 ... 0x7f: /* jcc Jb */
6500        tval = (int8_t)insn_get(env, s, MO_8);
6501        goto do_jcc;
6502    case 0x180 ... 0x18f: /* jcc Jv */
6503        if (dflag != MO_16) {
6504            tval = (int32_t)insn_get(env, s, MO_32);
6505        } else {
6506            tval = (int16_t)insn_get(env, s, MO_16);
6507        }
6508    do_jcc:
6509        next_eip = s->pc - s->cs_base;
6510        tval += next_eip;
6511        if (dflag == MO_16) {
6512            tval &= 0xffff;
6513        }
6514        gen_bnd_jmp(s);
6515        gen_jcc(s, b, tval, next_eip);
6516        break;
6517
6518    case 0x190 ... 0x19f: /* setcc Gv */
6519        modrm = cpu_ldub_code(env, s->pc++);
6520        gen_setcc1(s, b, cpu_T0);
6521        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6522        break;
6523    case 0x140 ... 0x14f: /* cmov Gv, Ev */
6524        if (!(s->cpuid_features & CPUID_CMOV)) {
6525            goto illegal_op;
6526        }
6527        ot = dflag;
6528        modrm = cpu_ldub_code(env, s->pc++);
6529        reg = ((modrm >> 3) & 7) | rex_r;
6530        gen_cmovcc1(env, s, ot, b, modrm, reg);
6531        break;
6532
6533        /************************/
6534        /* flags */
6535    case 0x9c: /* pushf */
6536        gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6537        if (s->vm86 && s->iopl != 3) {
6538            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6539        } else {
6540            gen_update_cc_op(s);
6541            gen_helper_read_eflags(cpu_T0, cpu_env);
6542            gen_push_v(s, cpu_T0);
6543        }
6544        break;
6545    case 0x9d: /* popf */
6546        gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6547        if (s->vm86 && s->iopl != 3) {
6548            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6549        } else {
6550            ot = gen_pop_T0(s);
6551            if (s->cpl == 0) {
6552                if (dflag != MO_16) {
6553                    gen_helper_write_eflags(cpu_env, cpu_T0,
6554                                            tcg_const_i32((TF_MASK | AC_MASK |
6555                                                           ID_MASK | NT_MASK |
6556                                                           IF_MASK |
6557                                                           IOPL_MASK)));
6558                } else {
6559                    gen_helper_write_eflags(cpu_env, cpu_T0,
6560                                            tcg_const_i32((TF_MASK | AC_MASK |
6561                                                           ID_MASK | NT_MASK |
6562                                                           IF_MASK | IOPL_MASK)
6563                                                          & 0xffff));
6564                }
6565            } else {
6566                if (s->cpl <= s->iopl) {
6567                    if (dflag != MO_16) {
6568                        gen_helper_write_eflags(cpu_env, cpu_T0,
6569                                                tcg_const_i32((TF_MASK |
6570                                                               AC_MASK |
6571                                                               ID_MASK |
6572                                                               NT_MASK |
6573                                                               IF_MASK)));
6574                    } else {
6575                        gen_helper_write_eflags(cpu_env, cpu_T0,
6576                                                tcg_const_i32((TF_MASK |
6577                                                               AC_MASK |
6578                                                               ID_MASK |
6579                                                               NT_MASK |
6580                                                               IF_MASK)
6581                                                              & 0xffff));
6582                    }
6583                } else {
6584                    if (dflag != MO_16) {
6585                        gen_helper_write_eflags(cpu_env, cpu_T0,
6586                                           tcg_const_i32((TF_MASK | AC_MASK |
6587                                                          ID_MASK | NT_MASK)));
6588                    } else {
6589                        gen_helper_write_eflags(cpu_env, cpu_T0,
6590                                           tcg_const_i32((TF_MASK | AC_MASK |
6591                                                          ID_MASK | NT_MASK)
6592                                                         & 0xffff));
6593                    }
6594                }
6595            }
6596            gen_pop_update(s, ot);
6597            set_cc_op(s, CC_OP_EFLAGS);
6598            /* abort translation because TF/AC flag may change */
6599            gen_jmp_im(s->pc - s->cs_base);
6600            gen_eob(s);
6601        }
6602        break;
6603    case 0x9e: /* sahf */
6604        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6605            goto illegal_op;
6606        gen_op_mov_v_reg(MO_8, cpu_T0, R_AH);
6607        gen_compute_eflags(s);
6608        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6609        tcg_gen_andi_tl(cpu_T0, cpu_T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6610        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T0);
6611        break;
6612    case 0x9f: /* lahf */
6613        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6614            goto illegal_op;
6615        gen_compute_eflags(s);
6616        /* Note: gen_compute_eflags() only gives the condition codes */
6617        tcg_gen_ori_tl(cpu_T0, cpu_cc_src, 0x02);
6618        gen_op_mov_reg_v(MO_8, R_AH, cpu_T0);
6619        break;
6620    case 0xf5: /* cmc */
6621        gen_compute_eflags(s);
6622        tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6623        break;
6624    case 0xf8: /* clc */
6625        gen_compute_eflags(s);
6626        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6627        break;
6628    case 0xf9: /* stc */
6629        gen_compute_eflags(s);
6630        tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6631        break;
6632    case 0xfc: /* cld */
6633        tcg_gen_movi_i32(cpu_tmp2_i32, 1);
6634        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6635        break;
6636    case 0xfd: /* std */
6637        tcg_gen_movi_i32(cpu_tmp2_i32, -1);
6638        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6639        break;
6640
6641        /************************/
6642        /* bit operations */
6643    case 0x1ba: /* bt/bts/btr/btc Gv, im */
6644        ot = dflag;
6645        modrm = cpu_ldub_code(env, s->pc++);
6646        op = (modrm >> 3) & 7;
6647        mod = (modrm >> 6) & 3;
6648        rm = (modrm & 7) | REX_B(s);
6649        if (mod != 3) {
6650            s->rip_offset = 1;
6651            gen_lea_modrm(env, s, modrm);
6652            if (!(s->prefix & PREFIX_LOCK)) {
6653                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6654            }
6655        } else {
6656            gen_op_mov_v_reg(ot, cpu_T0, rm);
6657        }
6658        /* load shift */
6659        val = cpu_ldub_code(env, s->pc++);
6660        tcg_gen_movi_tl(cpu_T1, val);
6661        if (op < 4)
6662            goto unknown_op;
6663        op -= 4;
6664        goto bt_op;
6665    case 0x1a3: /* bt Gv, Ev */
6666        op = 0;
6667        goto do_btx;
6668    case 0x1ab: /* bts */
6669        op = 1;
6670        goto do_btx;
6671    case 0x1b3: /* btr */
6672        op = 2;
6673        goto do_btx;
6674    case 0x1bb: /* btc */
6675        op = 3;
6676    do_btx:
6677        ot = dflag;
6678        modrm = cpu_ldub_code(env, s->pc++);
6679        reg = ((modrm >> 3) & 7) | rex_r;
6680        mod = (modrm >> 6) & 3;
6681        rm = (modrm & 7) | REX_B(s);
6682        gen_op_mov_v_reg(MO_32, cpu_T1, reg);
6683        if (mod != 3) {
6684            AddressParts a = gen_lea_modrm_0(env, s, modrm);
6685            /* specific case: we need to add a displacement */
6686            gen_exts(ot, cpu_T1);
6687            tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
6688            tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
6689            tcg_gen_add_tl(cpu_A0, gen_lea_modrm_1(a), cpu_tmp0);
6690            gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
6691            if (!(s->prefix & PREFIX_LOCK)) {
6692                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6693            }
6694        } else {
6695            gen_op_mov_v_reg(ot, cpu_T0, rm);
6696        }
6697    bt_op:
6698        tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
6699        tcg_gen_movi_tl(cpu_tmp0, 1);
6700        tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
6701        if (s->prefix & PREFIX_LOCK) {
6702            switch (op) {
6703            case 0: /* bt */
6704                /* Needs no atomic ops; we surpressed the normal
6705                   memory load for LOCK above so do it now.  */
6706                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6707                break;
6708            case 1: /* bts */
6709                tcg_gen_atomic_fetch_or_tl(cpu_T0, cpu_A0, cpu_tmp0,
6710                                           s->mem_index, ot | MO_LE);
6711                break;
6712            case 2: /* btr */
6713                tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
6714                tcg_gen_atomic_fetch_and_tl(cpu_T0, cpu_A0, cpu_tmp0,
6715                                            s->mem_index, ot | MO_LE);
6716                break;
6717            default:
6718            case 3: /* btc */
6719                tcg_gen_atomic_fetch_xor_tl(cpu_T0, cpu_A0, cpu_tmp0,
6720                                            s->mem_index, ot | MO_LE);
6721                break;
6722            }
6723            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
6724        } else {
6725            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
6726            switch (op) {
6727            case 0: /* bt */
6728                /* Data already loaded; nothing to do.  */
6729                break;
6730            case 1: /* bts */
6731                tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
6732                break;
6733            case 2: /* btr */
6734                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
6735                break;
6736            default:
6737            case 3: /* btc */
6738                tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
6739                break;
6740            }
6741            if (op != 0) {
6742                if (mod != 3) {
6743                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
6744                } else {
6745                    gen_op_mov_reg_v(ot, rm, cpu_T0);
6746                }
6747            }
6748        }
6749
6750        /* Delay all CC updates until after the store above.  Note that
6751           C is the result of the test, Z is unchanged, and the others
6752           are all undefined.  */
6753        switch (s->cc_op) {
6754        case CC_OP_MULB ... CC_OP_MULQ:
6755        case CC_OP_ADDB ... CC_OP_ADDQ:
6756        case CC_OP_ADCB ... CC_OP_ADCQ:
6757        case CC_OP_SUBB ... CC_OP_SUBQ:
6758        case CC_OP_SBBB ... CC_OP_SBBQ:
6759        case CC_OP_LOGICB ... CC_OP_LOGICQ:
6760        case CC_OP_INCB ... CC_OP_INCQ:
6761        case CC_OP_DECB ... CC_OP_DECQ:
6762        case CC_OP_SHLB ... CC_OP_SHLQ:
6763        case CC_OP_SARB ... CC_OP_SARQ:
6764        case CC_OP_BMILGB ... CC_OP_BMILGQ:
6765            /* Z was going to be computed from the non-zero status of CC_DST.
6766               We can get that same Z value (and the new C value) by leaving
6767               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6768               same width.  */
6769            tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
6770            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6771            break;
6772        default:
6773            /* Otherwise, generate EFLAGS and replace the C bit.  */
6774            gen_compute_eflags(s);
6775            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4,
6776                               ctz32(CC_C), 1);
6777            break;
6778        }
6779        break;
6780    case 0x1bc: /* bsf / tzcnt */
6781    case 0x1bd: /* bsr / lzcnt */
6782        ot = dflag;
6783        modrm = cpu_ldub_code(env, s->pc++);
6784        reg = ((modrm >> 3) & 7) | rex_r;
6785        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6786        gen_extu(ot, cpu_T0);
6787
6788        /* Note that lzcnt and tzcnt are in different extensions.  */
6789        if ((prefixes & PREFIX_REPZ)
6790            && (b & 1
6791                ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6792                : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6793            int size = 8 << ot;
6794            tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
6795            if (b & 1) {
6796                /* For lzcnt, reduce the target_ulong result by the
6797                   number of zeros that we expect to find at the top.  */
6798                gen_helper_clz(cpu_T0, cpu_T0);
6799                tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size);
6800            } else {
6801                /* For tzcnt, a zero input must return the operand size:
6802                   force all bits outside the operand size to 1.  */
6803                target_ulong mask = (target_ulong)-2 << (size - 1);
6804                tcg_gen_ori_tl(cpu_T0, cpu_T0, mask);
6805                gen_helper_ctz(cpu_T0, cpu_T0);
6806            }
6807            /* For lzcnt/tzcnt, C and Z bits are defined and are
6808               related to the result.  */
6809            gen_op_update1_cc();
6810            set_cc_op(s, CC_OP_BMILGB + ot);
6811        } else {
6812            /* For bsr/bsf, only the Z bit is defined and it is related
6813               to the input and not the result.  */
6814            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
6815            set_cc_op(s, CC_OP_LOGICB + ot);
6816            if (b & 1) {
6817                /* For bsr, return the bit index of the first 1 bit,
6818                   not the count of leading zeros.  */
6819                gen_helper_clz(cpu_T0, cpu_T0);
6820                tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1);
6821            } else {
6822                gen_helper_ctz(cpu_T0, cpu_T0);
6823            }
6824            /* ??? The manual says that the output is undefined when the
6825               input is zero, but real hardware leaves it unchanged, and
6826               real programs appear to depend on that.  */
6827            tcg_gen_movi_tl(cpu_tmp0, 0);
6828            tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T0, cpu_cc_dst, cpu_tmp0,
6829                               cpu_regs[reg], cpu_T0);
6830        }
6831        gen_op_mov_reg_v(ot, reg, cpu_T0);
6832        break;
6833        /************************/
6834        /* bcd */
6835    case 0x27: /* daa */
6836        if (CODE64(s))
6837            goto illegal_op;
6838        gen_update_cc_op(s);
6839        gen_helper_daa(cpu_env);
6840        set_cc_op(s, CC_OP_EFLAGS);
6841        break;
6842    case 0x2f: /* das */
6843        if (CODE64(s))
6844            goto illegal_op;
6845        gen_update_cc_op(s);
6846        gen_helper_das(cpu_env);
6847        set_cc_op(s, CC_OP_EFLAGS);
6848        break;
6849    case 0x37: /* aaa */
6850        if (CODE64(s))
6851            goto illegal_op;
6852        gen_update_cc_op(s);
6853        gen_helper_aaa(cpu_env);
6854        set_cc_op(s, CC_OP_EFLAGS);
6855        break;
6856    case 0x3f: /* aas */
6857        if (CODE64(s))
6858            goto illegal_op;
6859        gen_update_cc_op(s);
6860        gen_helper_aas(cpu_env);
6861        set_cc_op(s, CC_OP_EFLAGS);
6862        break;
6863    case 0xd4: /* aam */
6864        if (CODE64(s))
6865            goto illegal_op;
6866        val = cpu_ldub_code(env, s->pc++);
6867        if (val == 0) {
6868            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
6869        } else {
6870            gen_helper_aam(cpu_env, tcg_const_i32(val));
6871            set_cc_op(s, CC_OP_LOGICB);
6872        }
6873        break;
6874    case 0xd5: /* aad */
6875        if (CODE64(s))
6876            goto illegal_op;
6877        val = cpu_ldub_code(env, s->pc++);
6878        gen_helper_aad(cpu_env, tcg_const_i32(val));
6879        set_cc_op(s, CC_OP_LOGICB);
6880        break;
6881        /************************/
6882        /* misc */
6883    case 0x90: /* nop */
6884        /* XXX: correct lock test for all insn */
6885        if (prefixes & PREFIX_LOCK) {
6886            goto illegal_op;
6887        }
6888        /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
6889        if (REX_B(s)) {
6890            goto do_xchg_reg_eax;
6891        }
6892        if (prefixes & PREFIX_REPZ) {
6893            gen_update_cc_op(s);
6894            gen_jmp_im(pc_start - s->cs_base);
6895            gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
6896            s->is_jmp = DISAS_TB_JUMP;
6897        }
6898        break;
6899    case 0x9b: /* fwait */
6900        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
6901            (HF_MP_MASK | HF_TS_MASK)) {
6902            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
6903        } else {
6904            gen_helper_fwait(cpu_env);
6905        }
6906        break;
6907    case 0xcc: /* int3 */
6908        gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
6909        break;
6910    case 0xcd: /* int N */
6911        val = cpu_ldub_code(env, s->pc++);
6912        if (s->vm86 && s->iopl != 3) {
6913            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6914        } else {
6915            gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
6916        }
6917        break;
6918    case 0xce: /* into */
6919        if (CODE64(s))
6920            goto illegal_op;
6921        gen_update_cc_op(s);
6922        gen_jmp_im(pc_start - s->cs_base);
6923        gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
6924        break;
6925#ifdef WANT_ICEBP
6926    case 0xf1: /* icebp (undocumented, exits to external debugger) */
6927        gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
6928#if 1
6929        gen_debug(s, pc_start - s->cs_base);
6930#else
6931        /* start debug */
6932        tb_flush(CPU(x86_env_get_cpu(env)));
6933        qemu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
6934#endif
6935        break;
6936#endif
6937    case 0xfa: /* cli */
6938        if (!s->vm86) {
6939            if (s->cpl <= s->iopl) {
6940                gen_helper_cli(cpu_env);
6941            } else {
6942                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6943            }
6944        } else {
6945            if (s->iopl == 3) {
6946                gen_helper_cli(cpu_env);
6947            } else {
6948                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6949            }
6950        }
6951        break;
6952    case 0xfb: /* sti */
6953        if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
6954            gen_helper_sti(cpu_env);
6955            /* interruptions are enabled only the first insn after sti */
6956            gen_jmp_im(s->pc - s->cs_base);
6957            gen_eob_inhibit_irq(s, true);
6958        } else {
6959            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6960        }
6961        break;
6962    case 0x62: /* bound */
6963        if (CODE64(s))
6964            goto illegal_op;
6965        ot = dflag;
6966        modrm = cpu_ldub_code(env, s->pc++);
6967        reg = (modrm >> 3) & 7;
6968        mod = (modrm >> 6) & 3;
6969        if (mod == 3)
6970            goto illegal_op;
6971        gen_op_mov_v_reg(ot, cpu_T0, reg);
6972        gen_lea_modrm(env, s, modrm);
6973        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6974        if (ot == MO_16) {
6975            gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
6976        } else {
6977            gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
6978        }
6979        break;
6980    case 0x1c8 ... 0x1cf: /* bswap reg */
6981        reg = (b & 7) | REX_B(s);
6982#ifdef TARGET_X86_64
6983        if (dflag == MO_64) {
6984            gen_op_mov_v_reg(MO_64, cpu_T0, reg);
6985            tcg_gen_bswap64_i64(cpu_T0, cpu_T0);
6986            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
6987        } else
6988#endif
6989        {
6990            gen_op_mov_v_reg(MO_32, cpu_T0, reg);
6991            tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
6992            tcg_gen_bswap32_tl(cpu_T0, cpu_T0);
6993            gen_op_mov_reg_v(MO_32, reg, cpu_T0);
6994        }
6995        break;
6996    case 0xd6: /* salc */
6997        if (CODE64(s))
6998            goto illegal_op;
6999        gen_compute_eflags_c(s, cpu_T0);
7000        tcg_gen_neg_tl(cpu_T0, cpu_T0);
7001        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
7002        break;
7003    case 0xe0: /* loopnz */
7004    case 0xe1: /* loopz */
7005    case 0xe2: /* loop */
7006    case 0xe3: /* jecxz */
7007        {
7008            TCGLabel *l1, *l2, *l3;
7009
7010            tval = (int8_t)insn_get(env, s, MO_8);
7011            next_eip = s->pc - s->cs_base;
7012            tval += next_eip;
7013            if (dflag == MO_16) {
7014                tval &= 0xffff;
7015            }
7016
7017            l1 = gen_new_label();
7018            l2 = gen_new_label();
7019            l3 = gen_new_label();
7020            b &= 3;
7021            switch(b) {
7022            case 0: /* loopnz */
7023            case 1: /* loopz */
7024                gen_op_add_reg_im(s->aflag, R_ECX, -1);
7025                gen_op_jz_ecx(s->aflag, l3);
7026                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7027                break;
7028            case 2: /* loop */
7029                gen_op_add_reg_im(s->aflag, R_ECX, -1);
7030                gen_op_jnz_ecx(s->aflag, l1);
7031                break;
7032            default:
7033            case 3: /* jcxz */
7034                gen_op_jz_ecx(s->aflag, l1);
7035                break;
7036            }
7037
7038            gen_set_label(l3);
7039            gen_jmp_im(next_eip);
7040            tcg_gen_br(l2);
7041
7042            gen_set_label(l1);
7043            gen_jmp_im(tval);
7044            gen_set_label(l2);
7045            gen_eob(s);
7046        }
7047        break;
7048    case 0x130: /* wrmsr */
7049    case 0x132: /* rdmsr */
7050        if (s->cpl != 0) {
7051            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7052        } else {
7053            gen_update_cc_op(s);
7054            gen_jmp_im(pc_start - s->cs_base);
7055            if (b & 2) {
7056                gen_helper_rdmsr(cpu_env);
7057            } else {
7058                gen_helper_wrmsr(cpu_env);
7059            }
7060        }
7061        break;
7062    case 0x131: /* rdtsc */
7063        gen_update_cc_op(s);
7064        gen_jmp_im(pc_start - s->cs_base);
7065        if (s->tb->cflags & CF_USE_ICOUNT) {
7066            gen_io_start();
7067        }
7068        gen_helper_rdtsc(cpu_env);
7069        if (s->tb->cflags & CF_USE_ICOUNT) {
7070            gen_io_end();
7071            gen_jmp(s, s->pc - s->cs_base);
7072        }
7073        break;
7074    case 0x133: /* rdpmc */
7075        gen_update_cc_op(s);
7076        gen_jmp_im(pc_start - s->cs_base);
7077        gen_helper_rdpmc(cpu_env);
7078        break;
7079    case 0x134: /* sysenter */
7080        /* For Intel SYSENTER is valid on 64-bit */
7081        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7082            goto illegal_op;
7083        if (!s->pe) {
7084            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7085        } else {
7086            gen_helper_sysenter(cpu_env);
7087            gen_eob(s);
7088        }
7089        break;
7090    case 0x135: /* sysexit */
7091        /* For Intel SYSEXIT is valid on 64-bit */
7092        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7093            goto illegal_op;
7094        if (!s->pe) {
7095            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7096        } else {
7097            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7098            gen_eob(s);
7099        }
7100        break;
7101#ifdef TARGET_X86_64
7102    case 0x105: /* syscall */
7103        /* XXX: is it usable in real mode ? */
7104        gen_update_cc_op(s);
7105        gen_jmp_im(pc_start - s->cs_base);
7106        gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7107        gen_eob(s);
7108        break;
7109    case 0x107: /* sysret */
7110        if (!s->pe) {
7111            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7112        } else {
7113            gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7114            /* condition codes are modified only in long mode */
7115            if (s->lma) {
7116                set_cc_op(s, CC_OP_EFLAGS);
7117            }
7118            gen_eob(s);
7119        }
7120        break;
7121#endif
7122    case 0x1a2: /* cpuid */
7123        gen_update_cc_op(s);
7124        gen_jmp_im(pc_start - s->cs_base);
7125        gen_helper_cpuid(cpu_env);
7126        break;
7127    case 0xf4: /* hlt */
7128        if (s->cpl != 0) {
7129            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7130        } else {
7131            gen_update_cc_op(s);
7132            gen_jmp_im(pc_start - s->cs_base);
7133            gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7134            s->is_jmp = DISAS_TB_JUMP;
7135        }
7136        break;
7137    case 0x100:
7138        modrm = cpu_ldub_code(env, s->pc++);
7139        mod = (modrm >> 6) & 3;
7140        op = (modrm >> 3) & 7;
7141        switch(op) {
7142        case 0: /* sldt */
7143            if (!s->pe || s->vm86)
7144                goto illegal_op;
7145            gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7146            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
7147                             offsetof(CPUX86State, ldt.selector));
7148            ot = mod == 3 ? dflag : MO_16;
7149            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7150            break;
7151        case 2: /* lldt */
7152            if (!s->pe || s->vm86)
7153                goto illegal_op;
7154            if (s->cpl != 0) {
7155                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7156            } else {
7157                gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7158                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7159                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
7160                gen_helper_lldt(cpu_env, cpu_tmp2_i32);
7161            }
7162            break;
7163        case 1: /* str */
7164            if (!s->pe || s->vm86)
7165                goto illegal_op;
7166            gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7167            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
7168                             offsetof(CPUX86State, tr.selector));
7169            ot = mod == 3 ? dflag : MO_16;
7170            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7171            break;
7172        case 3: /* ltr */
7173            if (!s->pe || s->vm86)
7174                goto illegal_op;
7175            if (s->cpl != 0) {
7176                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7177            } else {
7178                gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7179                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7180                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
7181                gen_helper_ltr(cpu_env, cpu_tmp2_i32);
7182            }
7183            break;
7184        case 4: /* verr */
7185        case 5: /* verw */
7186            if (!s->pe || s->vm86)
7187                goto illegal_op;
7188            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7189            gen_update_cc_op(s);
7190            if (op == 4) {
7191                gen_helper_verr(cpu_env, cpu_T0);
7192            } else {
7193                gen_helper_verw(cpu_env, cpu_T0);
7194            }
7195            set_cc_op(s, CC_OP_EFLAGS);
7196            break;
7197        default:
7198            goto unknown_op;
7199        }
7200        break;
7201
7202    case 0x101:
7203        modrm = cpu_ldub_code(env, s->pc++);
7204        switch (modrm) {
7205        CASE_MODRM_MEM_OP(0): /* sgdt */
7206            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7207            gen_lea_modrm(env, s, modrm);
7208            tcg_gen_ld32u_tl(cpu_T0,
7209                             cpu_env, offsetof(CPUX86State, gdt.limit));
7210            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
7211            gen_add_A0_im(s, 2);
7212            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
7213            if (dflag == MO_16) {
7214                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7215            }
7216            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7217            break;
7218
7219        case 0xc8: /* monitor */
7220            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7221                goto illegal_op;
7222            }
7223            gen_update_cc_op(s);
7224            gen_jmp_im(pc_start - s->cs_base);
7225            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
7226            gen_extu(s->aflag, cpu_A0);
7227            gen_add_A0_ds_seg(s);
7228            gen_helper_monitor(cpu_env, cpu_A0);
7229            break;
7230
7231        case 0xc9: /* mwait */
7232            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7233                goto illegal_op;
7234            }
7235            gen_update_cc_op(s);
7236            gen_jmp_im(pc_start - s->cs_base);
7237            gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7238            gen_eob(s);
7239            break;
7240
7241        case 0xca: /* clac */
7242            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7243                || s->cpl != 0) {
7244                goto illegal_op;
7245            }
7246            gen_helper_clac(cpu_env);
7247            gen_jmp_im(s->pc - s->cs_base);
7248            gen_eob(s);
7249            break;
7250
7251        case 0xcb: /* stac */
7252            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7253                || s->cpl != 0) {
7254                goto illegal_op;
7255            }
7256            gen_helper_stac(cpu_env);
7257            gen_jmp_im(s->pc - s->cs_base);
7258            gen_eob(s);
7259            break;
7260
7261        CASE_MODRM_MEM_OP(1): /* sidt */
7262            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7263            gen_lea_modrm(env, s, modrm);
7264            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
7265            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
7266            gen_add_A0_im(s, 2);
7267            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
7268            if (dflag == MO_16) {
7269                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7270            }
7271            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7272            break;
7273
7274        case 0xd0: /* xgetbv */
7275            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7276                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7277                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7278                goto illegal_op;
7279            }
7280            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7281            gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
7282            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
7283            break;
7284
7285        case 0xd1: /* xsetbv */
7286            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7287                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7288                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7289                goto illegal_op;
7290            }
7291            if (s->cpl != 0) {
7292                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7293                break;
7294            }
7295            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7296                                  cpu_regs[R_EDX]);
7297            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7298            gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
7299            /* End TB because translation flags may change.  */
7300            gen_jmp_im(s->pc - s->cs_base);
7301            gen_eob(s);
7302            break;
7303
7304        case 0xd8: /* VMRUN */
7305            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7306                goto illegal_op;
7307            }
7308            if (s->cpl != 0) {
7309                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7310                break;
7311            }
7312            gen_update_cc_op(s);
7313            gen_jmp_im(pc_start - s->cs_base);
7314            gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7315                             tcg_const_i32(s->pc - pc_start));
7316            tcg_gen_exit_tb(0);
7317            s->is_jmp = DISAS_TB_JUMP;
7318            break;
7319
7320        case 0xd9: /* VMMCALL */
7321            if (!(s->flags & HF_SVME_MASK)) {
7322                goto illegal_op;
7323            }
7324            gen_update_cc_op(s);
7325            gen_jmp_im(pc_start - s->cs_base);
7326            gen_helper_vmmcall(cpu_env);
7327            break;
7328
7329        case 0xda: /* VMLOAD */
7330            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7331                goto illegal_op;
7332            }
7333            if (s->cpl != 0) {
7334                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7335                break;
7336            }
7337            gen_update_cc_op(s);
7338            gen_jmp_im(pc_start - s->cs_base);
7339            gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7340            break;
7341
7342        case 0xdb: /* VMSAVE */
7343            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7344                goto illegal_op;
7345            }
7346            if (s->cpl != 0) {
7347                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7348                break;
7349            }
7350            gen_update_cc_op(s);
7351            gen_jmp_im(pc_start - s->cs_base);
7352            gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7353            break;
7354
7355        case 0xdc: /* STGI */
7356            if ((!(s->flags & HF_SVME_MASK)
7357                   && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7358                || !s->pe) {
7359                goto illegal_op;
7360            }
7361            if (s->cpl != 0) {
7362                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7363                break;
7364            }
7365            gen_update_cc_op(s);
7366            gen_jmp_im(pc_start - s->cs_base);
7367            gen_helper_stgi(cpu_env);
7368            break;
7369
7370        case 0xdd: /* CLGI */
7371            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7372                goto illegal_op;
7373            }
7374            if (s->cpl != 0) {
7375                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7376                break;
7377            }
7378            gen_update_cc_op(s);
7379            gen_jmp_im(pc_start - s->cs_base);
7380            gen_helper_clgi(cpu_env);
7381            break;
7382
7383        case 0xde: /* SKINIT */
7384            if ((!(s->flags & HF_SVME_MASK)
7385                 && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7386                || !s->pe) {
7387                goto illegal_op;
7388            }
7389            gen_update_cc_op(s);
7390            gen_jmp_im(pc_start - s->cs_base);
7391            gen_helper_skinit(cpu_env);
7392            break;
7393
7394        case 0xdf: /* INVLPGA */
7395            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7396                goto illegal_op;
7397            }
7398            if (s->cpl != 0) {
7399                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7400                break;
7401            }
7402            gen_update_cc_op(s);
7403            gen_jmp_im(pc_start - s->cs_base);
7404            gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
7405            break;
7406
7407        CASE_MODRM_MEM_OP(2): /* lgdt */
7408            if (s->cpl != 0) {
7409                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7410                break;
7411            }
7412            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
7413            gen_lea_modrm(env, s, modrm);
7414            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
7415            gen_add_A0_im(s, 2);
7416            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7417            if (dflag == MO_16) {
7418                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7419            }
7420            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
7421            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7422            break;
7423
7424        CASE_MODRM_MEM_OP(3): /* lidt */
7425            if (s->cpl != 0) {
7426                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7427                break;
7428            }
7429            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
7430            gen_lea_modrm(env, s, modrm);
7431            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
7432            gen_add_A0_im(s, 2);
7433            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7434            if (dflag == MO_16) {
7435                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7436            }
7437            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
7438            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
7439            break;
7440
7441        CASE_MODRM_OP(4): /* smsw */
7442            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7443            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
7444            if (CODE64(s)) {
7445                mod = (modrm >> 6) & 3;
7446                ot = (mod != 3 ? MO_16 : s->dflag);
7447            } else {
7448                ot = MO_16;
7449            }
7450            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7451            break;
7452        case 0xee: /* rdpkru */
7453            if (prefixes & PREFIX_LOCK) {
7454                goto illegal_op;
7455            }
7456            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7457            gen_helper_rdpkru(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
7458            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
7459            break;
7460        case 0xef: /* wrpkru */
7461            if (prefixes & PREFIX_LOCK) {
7462                goto illegal_op;
7463            }
7464            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7465                                  cpu_regs[R_EDX]);
7466            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7467            gen_helper_wrpkru(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
7468            break;
7469        CASE_MODRM_OP(6): /* lmsw */
7470            if (s->cpl != 0) {
7471                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7472                break;
7473            }
7474            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7475            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7476            gen_helper_lmsw(cpu_env, cpu_T0);
7477            gen_jmp_im(s->pc - s->cs_base);
7478            gen_eob(s);
7479            break;
7480
7481        CASE_MODRM_MEM_OP(7): /* invlpg */
7482            if (s->cpl != 0) {
7483                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7484                break;
7485            }
7486            gen_update_cc_op(s);
7487            gen_jmp_im(pc_start - s->cs_base);
7488            gen_lea_modrm(env, s, modrm);
7489            gen_helper_invlpg(cpu_env, cpu_A0);
7490            gen_jmp_im(s->pc - s->cs_base);
7491            gen_eob(s);
7492            break;
7493
7494        case 0xf8: /* swapgs */
7495#ifdef TARGET_X86_64
7496            if (CODE64(s)) {
7497                if (s->cpl != 0) {
7498                    gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7499                } else {
7500                    tcg_gen_mov_tl(cpu_T0, cpu_seg_base[R_GS]);
7501                    tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7502                                  offsetof(CPUX86State, kernelgsbase));
7503                    tcg_gen_st_tl(cpu_T0, cpu_env,
7504                                  offsetof(CPUX86State, kernelgsbase));
7505                }
7506                break;
7507            }
7508#endif
7509            goto illegal_op;
7510
7511        case 0xf9: /* rdtscp */
7512            if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7513                goto illegal_op;
7514            }
7515            gen_update_cc_op(s);
7516            gen_jmp_im(pc_start - s->cs_base);
7517            if (s->tb->cflags & CF_USE_ICOUNT) {
7518                gen_io_start();
7519            }
7520            gen_helper_rdtscp(cpu_env);
7521            if (s->tb->cflags & CF_USE_ICOUNT) {
7522                gen_io_end();
7523                gen_jmp(s, s->pc - s->cs_base);
7524            }
7525            break;
7526
7527        default:
7528            goto unknown_op;
7529        }
7530        break;
7531
7532    case 0x108: /* invd */
7533    case 0x109: /* wbinvd */
7534        if (s->cpl != 0) {
7535            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7536        } else {
7537            gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7538            /* nothing to do */
7539        }
7540        break;
7541    case 0x63: /* arpl or movslS (x86_64) */
7542#ifdef TARGET_X86_64
7543        if (CODE64(s)) {
7544            int d_ot;
7545            /* d_ot is the size of destination */
7546            d_ot = dflag;
7547
7548            modrm = cpu_ldub_code(env, s->pc++);
7549            reg = ((modrm >> 3) & 7) | rex_r;
7550            mod = (modrm >> 6) & 3;
7551            rm = (modrm & 7) | REX_B(s);
7552
7553            if (mod == 3) {
7554                gen_op_mov_v_reg(MO_32, cpu_T0, rm);
7555                /* sign extend */
7556                if (d_ot == MO_64) {
7557                    tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
7558                }
7559                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
7560            } else {
7561                gen_lea_modrm(env, s, modrm);
7562                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, cpu_A0);
7563                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
7564            }
7565        } else
7566#endif
7567        {
7568            TCGLabel *label1;
7569            TCGv t0, t1, t2, a0;
7570
7571            if (!s->pe || s->vm86)
7572                goto illegal_op;
7573            t0 = tcg_temp_local_new();
7574            t1 = tcg_temp_local_new();
7575            t2 = tcg_temp_local_new();
7576            ot = MO_16;
7577            modrm = cpu_ldub_code(env, s->pc++);
7578            reg = (modrm >> 3) & 7;
7579            mod = (modrm >> 6) & 3;
7580            rm = modrm & 7;
7581            if (mod != 3) {
7582                gen_lea_modrm(env, s, modrm);
7583                gen_op_ld_v(s, ot, t0, cpu_A0);
7584                a0 = tcg_temp_local_new();
7585                tcg_gen_mov_tl(a0, cpu_A0);
7586            } else {
7587                gen_op_mov_v_reg(ot, t0, rm);
7588                TCGV_UNUSED(a0);
7589            }
7590            gen_op_mov_v_reg(ot, t1, reg);
7591            tcg_gen_andi_tl(cpu_tmp0, t0, 3);
7592            tcg_gen_andi_tl(t1, t1, 3);
7593            tcg_gen_movi_tl(t2, 0);
7594            label1 = gen_new_label();
7595            tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
7596            tcg_gen_andi_tl(t0, t0, ~3);
7597            tcg_gen_or_tl(t0, t0, t1);
7598            tcg_gen_movi_tl(t2, CC_Z);
7599            gen_set_label(label1);
7600            if (mod != 3) {
7601                gen_op_st_v(s, ot, t0, a0);
7602                tcg_temp_free(a0);
7603           } else {
7604                gen_op_mov_reg_v(ot, rm, t0);
7605            }
7606            gen_compute_eflags(s);
7607            tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7608            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7609            tcg_temp_free(t0);
7610            tcg_temp_free(t1);
7611            tcg_temp_free(t2);
7612        }
7613        break;
7614    case 0x102: /* lar */
7615    case 0x103: /* lsl */
7616        {
7617            TCGLabel *label1;
7618            TCGv t0;
7619            if (!s->pe || s->vm86)
7620                goto illegal_op;
7621            ot = dflag != MO_16 ? MO_32 : MO_16;
7622            modrm = cpu_ldub_code(env, s->pc++);
7623            reg = ((modrm >> 3) & 7) | rex_r;
7624            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7625            t0 = tcg_temp_local_new();
7626            gen_update_cc_op(s);
7627            if (b == 0x102) {
7628                gen_helper_lar(t0, cpu_env, cpu_T0);
7629            } else {
7630                gen_helper_lsl(t0, cpu_env, cpu_T0);
7631            }
7632            tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
7633            label1 = gen_new_label();
7634            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
7635            gen_op_mov_reg_v(ot, reg, t0);
7636            gen_set_label(label1);
7637            set_cc_op(s, CC_OP_EFLAGS);
7638            tcg_temp_free(t0);
7639        }
7640        break;
7641    case 0x118:
7642        modrm = cpu_ldub_code(env, s->pc++);
7643        mod = (modrm >> 6) & 3;
7644        op = (modrm >> 3) & 7;
7645        switch(op) {
7646        case 0: /* prefetchnta */
7647        case 1: /* prefetchnt0 */
7648        case 2: /* prefetchnt0 */
7649        case 3: /* prefetchnt0 */
7650            if (mod == 3)
7651                goto illegal_op;
7652            gen_nop_modrm(env, s, modrm);
7653            /* nothing more to do */
7654            break;
7655        default: /* nop (multi byte) */
7656            gen_nop_modrm(env, s, modrm);
7657            break;
7658        }
7659        break;
7660    case 0x11a:
7661        modrm = cpu_ldub_code(env, s->pc++);
7662        if (s->flags & HF_MPX_EN_MASK) {
7663            mod = (modrm >> 6) & 3;
7664            reg = ((modrm >> 3) & 7) | rex_r;
7665            if (prefixes & PREFIX_REPZ) {
7666                /* bndcl */
7667                if (reg >= 4
7668                    || (prefixes & PREFIX_LOCK)
7669                    || s->aflag == MO_16) {
7670                    goto illegal_op;
7671                }
7672                gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7673            } else if (prefixes & PREFIX_REPNZ) {
7674                /* bndcu */
7675                if (reg >= 4
7676                    || (prefixes & PREFIX_LOCK)
7677                    || s->aflag == MO_16) {
7678                    goto illegal_op;
7679                }
7680                TCGv_i64 notu = tcg_temp_new_i64();
7681                tcg_gen_not_i64(notu, cpu_bndu[reg]);
7682                gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7683                tcg_temp_free_i64(notu);
7684            } else if (prefixes & PREFIX_DATA) {
7685                /* bndmov -- from reg/mem */
7686                if (reg >= 4 || s->aflag == MO_16) {
7687                    goto illegal_op;
7688                }
7689                if (mod == 3) {
7690                    int reg2 = (modrm & 7) | REX_B(s);
7691                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7692                        goto illegal_op;
7693                    }
7694                    if (s->flags & HF_MPX_IU_MASK) {
7695                        tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7696                        tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7697                    }
7698                } else {
7699                    gen_lea_modrm(env, s, modrm);
7700                    if (CODE64(s)) {
7701                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
7702                                            s->mem_index, MO_LEQ);
7703                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
7704                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
7705                                            s->mem_index, MO_LEQ);
7706                    } else {
7707                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
7708                                            s->mem_index, MO_LEUL);
7709                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
7710                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
7711                                            s->mem_index, MO_LEUL);
7712                    }
7713                    /* bnd registers are now in-use */
7714                    gen_set_hflag(s, HF_MPX_IU_MASK);
7715                }
7716            } else if (mod != 3) {
7717                /* bndldx */
7718                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7719                if (reg >= 4
7720                    || (prefixes & PREFIX_LOCK)
7721                    || s->aflag == MO_16
7722                    || a.base < -1) {
7723                    goto illegal_op;
7724                }
7725                if (a.base >= 0) {
7726                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
7727                } else {
7728                    tcg_gen_movi_tl(cpu_A0, 0);
7729                }
7730                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
7731                if (a.index >= 0) {
7732                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
7733                } else {
7734                    tcg_gen_movi_tl(cpu_T0, 0);
7735                }
7736                if (CODE64(s)) {
7737                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, cpu_A0, cpu_T0);
7738                    tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7739                                   offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7740                } else {
7741                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, cpu_A0, cpu_T0);
7742                    tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7743                    tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7744                }
7745                gen_set_hflag(s, HF_MPX_IU_MASK);
7746            }
7747        }
7748        gen_nop_modrm(env, s, modrm);
7749        break;
7750    case 0x11b:
7751        modrm = cpu_ldub_code(env, s->pc++);
7752        if (s->flags & HF_MPX_EN_MASK) {
7753            mod = (modrm >> 6) & 3;
7754            reg = ((modrm >> 3) & 7) | rex_r;
7755            if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7756                /* bndmk */
7757                if (reg >= 4
7758                    || (prefixes & PREFIX_LOCK)
7759                    || s->aflag == MO_16) {
7760                    goto illegal_op;
7761                }
7762                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7763                if (a.base >= 0) {
7764                    tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7765                    if (!CODE64(s)) {
7766                        tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7767                    }
7768                } else if (a.base == -1) {
7769                    /* no base register has lower bound of 0 */
7770                    tcg_gen_movi_i64(cpu_bndl[reg], 0);
7771                } else {
7772                    /* rip-relative generates #ud */
7773                    goto illegal_op;
7774                }
7775                tcg_gen_not_tl(cpu_A0, gen_lea_modrm_1(a));
7776                if (!CODE64(s)) {
7777                    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
7778                }
7779                tcg_gen_extu_tl_i64(cpu_bndu[reg], cpu_A0);
7780                /* bnd registers are now in-use */
7781                gen_set_hflag(s, HF_MPX_IU_MASK);
7782                break;
7783            } else if (prefixes & PREFIX_REPNZ) {
7784                /* bndcn */
7785                if (reg >= 4
7786                    || (prefixes & PREFIX_LOCK)
7787                    || s->aflag == MO_16) {
7788                    goto illegal_op;
7789                }
7790                gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7791            } else if (prefixes & PREFIX_DATA) {
7792                /* bndmov -- to reg/mem */
7793                if (reg >= 4 || s->aflag == MO_16) {
7794                    goto illegal_op;
7795                }
7796                if (mod == 3) {
7797                    int reg2 = (modrm & 7) | REX_B(s);
7798                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7799                        goto illegal_op;
7800                    }
7801                    if (s->flags & HF_MPX_IU_MASK) {
7802                        tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7803                        tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
7804                    }
7805                } else {
7806                    gen_lea_modrm(env, s, modrm);
7807                    if (CODE64(s)) {
7808                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
7809                                            s->mem_index, MO_LEQ);
7810                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
7811                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
7812                                            s->mem_index, MO_LEQ);
7813                    } else {
7814                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
7815                                            s->mem_index, MO_LEUL);
7816                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
7817                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
7818                                            s->mem_index, MO_LEUL);
7819                    }
7820                }
7821            } else if (mod != 3) {
7822                /* bndstx */
7823                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7824                if (reg >= 4
7825                    || (prefixes & PREFIX_LOCK)
7826                    || s->aflag == MO_16
7827                    || a.base < -1) {
7828                    goto illegal_op;
7829                }
7830                if (a.base >= 0) {
7831                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
7832                } else {
7833                    tcg_gen_movi_tl(cpu_A0, 0);
7834                }
7835                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
7836                if (a.index >= 0) {
7837                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
7838                } else {
7839                    tcg_gen_movi_tl(cpu_T0, 0);
7840                }
7841                if (CODE64(s)) {
7842                    gen_helper_bndstx64(cpu_env, cpu_A0, cpu_T0,
7843                                        cpu_bndl[reg], cpu_bndu[reg]);
7844                } else {
7845                    gen_helper_bndstx32(cpu_env, cpu_A0, cpu_T0,
7846                                        cpu_bndl[reg], cpu_bndu[reg]);
7847                }
7848            }
7849        }
7850        gen_nop_modrm(env, s, modrm);
7851        break;
7852    case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
7853        modrm = cpu_ldub_code(env, s->pc++);
7854        gen_nop_modrm(env, s, modrm);
7855        break;
7856    case 0x120: /* mov reg, crN */
7857    case 0x122: /* mov crN, reg */
7858        if (s->cpl != 0) {
7859            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7860        } else {
7861            modrm = cpu_ldub_code(env, s->pc++);
7862            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7863             * AMD documentation (24594.pdf) and testing of
7864             * intel 386 and 486 processors all show that the mod bits
7865             * are assumed to be 1's, regardless of actual values.
7866             */
7867            rm = (modrm & 7) | REX_B(s);
7868            reg = ((modrm >> 3) & 7) | rex_r;
7869            if (CODE64(s))
7870                ot = MO_64;
7871            else
7872                ot = MO_32;
7873            if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
7874                (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
7875                reg = 8;
7876            }
7877            switch(reg) {
7878            case 0:
7879            case 2:
7880            case 3:
7881            case 4:
7882            case 8:
7883                gen_update_cc_op(s);
7884                gen_jmp_im(pc_start - s->cs_base);
7885                if (b & 2) {
7886                    gen_op_mov_v_reg(ot, cpu_T0, rm);
7887                    gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
7888                                         cpu_T0);
7889                    gen_jmp_im(s->pc - s->cs_base);
7890                    gen_eob(s);
7891                } else {
7892                    gen_helper_read_crN(cpu_T0, cpu_env, tcg_const_i32(reg));
7893                    gen_op_mov_reg_v(ot, rm, cpu_T0);
7894                }
7895                break;
7896            default:
7897                goto unknown_op;
7898            }
7899        }
7900        break;
7901    case 0x121: /* mov reg, drN */
7902    case 0x123: /* mov drN, reg */
7903        if (s->cpl != 0) {
7904            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7905        } else {
7906            modrm = cpu_ldub_code(env, s->pc++);
7907            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7908             * AMD documentation (24594.pdf) and testing of
7909             * intel 386 and 486 processors all show that the mod bits
7910             * are assumed to be 1's, regardless of actual values.
7911             */
7912            rm = (modrm & 7) | REX_B(s);
7913            reg = ((modrm >> 3) & 7) | rex_r;
7914            if (CODE64(s))
7915                ot = MO_64;
7916            else
7917                ot = MO_32;
7918            if (reg >= 8) {
7919                goto illegal_op;
7920            }
7921            if (b & 2) {
7922                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
7923                gen_op_mov_v_reg(ot, cpu_T0, rm);
7924                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
7925                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T0);
7926                gen_jmp_im(s->pc - s->cs_base);
7927                gen_eob(s);
7928            } else {
7929                gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
7930                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
7931                gen_helper_get_dr(cpu_T0, cpu_env, cpu_tmp2_i32);
7932                gen_op_mov_reg_v(ot, rm, cpu_T0);
7933            }
7934        }
7935        break;
7936    case 0x106: /* clts */
7937        if (s->cpl != 0) {
7938            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7939        } else {
7940            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7941            gen_helper_clts(cpu_env);
7942            /* abort block because static cpu state changed */
7943            gen_jmp_im(s->pc - s->cs_base);
7944            gen_eob(s);
7945        }
7946        break;
7947    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
7948    case 0x1c3: /* MOVNTI reg, mem */
7949        if (!(s->cpuid_features & CPUID_SSE2))
7950            goto illegal_op;
7951        ot = mo_64_32(dflag);
7952        modrm = cpu_ldub_code(env, s->pc++);
7953        mod = (modrm >> 6) & 3;
7954        if (mod == 3)
7955            goto illegal_op;
7956        reg = ((modrm >> 3) & 7) | rex_r;
7957        /* generate a generic store */
7958        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
7959        break;
7960    case 0x1ae:
7961        modrm = cpu_ldub_code(env, s->pc++);
7962        switch (modrm) {
7963        CASE_MODRM_MEM_OP(0): /* fxsave */
7964            if (!(s->cpuid_features & CPUID_FXSR)
7965                || (prefixes & PREFIX_LOCK)) {
7966                goto illegal_op;
7967            }
7968            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
7969                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7970                break;
7971            }
7972            gen_lea_modrm(env, s, modrm);
7973            gen_helper_fxsave(cpu_env, cpu_A0);
7974            break;
7975
7976        CASE_MODRM_MEM_OP(1): /* fxrstor */
7977            if (!(s->cpuid_features & CPUID_FXSR)
7978                || (prefixes & PREFIX_LOCK)) {
7979                goto illegal_op;
7980            }
7981            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
7982                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7983                break;
7984            }
7985            gen_lea_modrm(env, s, modrm);
7986            gen_helper_fxrstor(cpu_env, cpu_A0);
7987            break;
7988
7989        CASE_MODRM_MEM_OP(2): /* ldmxcsr */
7990            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
7991                goto illegal_op;
7992            }
7993            if (s->flags & HF_TS_MASK) {
7994                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7995                break;
7996            }
7997            gen_lea_modrm(env, s, modrm);
7998            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, s->mem_index, MO_LEUL);
7999            gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
8000            break;
8001
8002        CASE_MODRM_MEM_OP(3): /* stmxcsr */
8003            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8004                goto illegal_op;
8005            }
8006            if (s->flags & HF_TS_MASK) {
8007                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8008                break;
8009            }
8010            gen_lea_modrm(env, s, modrm);
8011            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
8012            gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
8013            break;
8014
8015        CASE_MODRM_MEM_OP(4): /* xsave */
8016            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8017                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8018                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8019                goto illegal_op;
8020            }
8021            gen_lea_modrm(env, s, modrm);
8022            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
8023                                  cpu_regs[R_EDX]);
8024            gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
8025            break;
8026
8027        CASE_MODRM_MEM_OP(5): /* xrstor */
8028            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8029                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8030                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8031                goto illegal_op;
8032            }
8033            gen_lea_modrm(env, s, modrm);
8034            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
8035                                  cpu_regs[R_EDX]);
8036            gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
8037            /* XRSTOR is how MPX is enabled, which changes how
8038               we translate.  Thus we need to end the TB.  */
8039            gen_update_cc_op(s);
8040            gen_jmp_im(s->pc - s->cs_base);
8041            gen_eob(s);
8042            break;
8043
8044        CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8045            if (prefixes & PREFIX_LOCK) {
8046                goto illegal_op;
8047            }
8048            if (prefixes & PREFIX_DATA) {
8049                /* clwb */
8050                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8051                    goto illegal_op;
8052                }
8053                gen_nop_modrm(env, s, modrm);
8054            } else {
8055                /* xsaveopt */
8056                if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8057                    || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8058                    || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8059                    goto illegal_op;
8060                }
8061                gen_lea_modrm(env, s, modrm);
8062                tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
8063                                      cpu_regs[R_EDX]);
8064                gen_helper_xsaveopt(cpu_env, cpu_A0, cpu_tmp1_i64);
8065            }
8066            break;
8067
8068        CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8069            if (prefixes & PREFIX_LOCK) {
8070                goto illegal_op;
8071            }
8072            if (prefixes & PREFIX_DATA) {
8073                /* clflushopt */
8074                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8075                    goto illegal_op;
8076                }
8077            } else {
8078                /* clflush */
8079                if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8080                    || !(s->cpuid_features & CPUID_CLFLUSH)) {
8081                    goto illegal_op;
8082                }
8083            }
8084            gen_nop_modrm(env, s, modrm);
8085            break;
8086
8087        case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8088        case 0xc8 ... 0xc8: /* rdgsbase (f3 0f ae /1) */
8089        case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8090        case 0xd8 ... 0xd8: /* wrgsbase (f3 0f ae /3) */
8091            if (CODE64(s)
8092                && (prefixes & PREFIX_REPZ)
8093                && !(prefixes & PREFIX_LOCK)
8094                && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8095                TCGv base, treg, src, dst;
8096
8097                /* Preserve hflags bits by testing CR4 at runtime.  */
8098                tcg_gen_movi_i32(cpu_tmp2_i32, CR4_FSGSBASE_MASK);
8099                gen_helper_cr4_testbit(cpu_env, cpu_tmp2_i32);
8100
8101                base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8102                treg = cpu_regs[(modrm & 7) | REX_B(s)];
8103
8104                if (modrm & 0x10) {
8105                    /* wr*base */
8106                    dst = base, src = treg;
8107                } else {
8108                    /* rd*base */
8109                    dst = treg, src = base;
8110                }
8111
8112                if (s->dflag == MO_32) {
8113                    tcg_gen_ext32u_tl(dst, src);
8114                } else {
8115                    tcg_gen_mov_tl(dst, src);
8116                }
8117                break;
8118            }
8119            goto unknown_op;
8120
8121        case 0xf8: /* sfence / pcommit */
8122            if (prefixes & PREFIX_DATA) {
8123                /* pcommit */
8124                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8125                    || (prefixes & PREFIX_LOCK)) {
8126                    goto illegal_op;
8127                }
8128                break;
8129            }
8130            /* fallthru */
8131        case 0xf9 ... 0xff: /* sfence */
8132            if (!(s->cpuid_features & CPUID_SSE)
8133                || (prefixes & PREFIX_LOCK)) {
8134                goto illegal_op;
8135            }
8136            tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8137            break;
8138        case 0xe8 ... 0xef: /* lfence */
8139            if (!(s->cpuid_features & CPUID_SSE)
8140                || (prefixes & PREFIX_LOCK)) {
8141                goto illegal_op;
8142            }
8143            tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8144            break;
8145        case 0xf0 ... 0xf7: /* mfence */
8146            if (!(s->cpuid_features & CPUID_SSE2)
8147                || (prefixes & PREFIX_LOCK)) {
8148                goto illegal_op;
8149            }
8150            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8151            break;
8152
8153        default:
8154            goto unknown_op;
8155        }
8156        break;
8157
8158    case 0x10d: /* 3DNow! prefetch(w) */
8159        modrm = cpu_ldub_code(env, s->pc++);
8160        mod = (modrm >> 6) & 3;
8161        if (mod == 3)
8162            goto illegal_op;
8163        gen_nop_modrm(env, s, modrm);
8164        break;
8165    case 0x1aa: /* rsm */
8166        gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
8167        if (!(s->flags & HF_SMM_MASK))
8168            goto illegal_op;
8169        gen_update_cc_op(s);
8170        gen_jmp_im(s->pc - s->cs_base);
8171        gen_helper_rsm(cpu_env);
8172        gen_eob(s);
8173        break;
8174    case 0x1b8: /* SSE4.2 popcnt */
8175        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8176             PREFIX_REPZ)
8177            goto illegal_op;
8178        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8179            goto illegal_op;
8180
8181        modrm = cpu_ldub_code(env, s->pc++);
8182        reg = ((modrm >> 3) & 7) | rex_r;
8183
8184        if (s->prefix & PREFIX_DATA) {
8185            ot = MO_16;
8186        } else {
8187            ot = mo_64_32(dflag);
8188        }
8189
8190        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8191        gen_helper_popcnt(cpu_T0, cpu_env, cpu_T0, tcg_const_i32(ot));
8192        gen_op_mov_reg_v(ot, reg, cpu_T0);
8193
8194        set_cc_op(s, CC_OP_EFLAGS);
8195        break;
8196    case 0x10e ... 0x10f:
8197        /* 3DNow! instructions, ignore prefixes */
8198        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8199    case 0x110 ... 0x117:
8200    case 0x128 ... 0x12f:
8201    case 0x138 ... 0x13a:
8202    case 0x150 ... 0x179:
8203    case 0x17c ... 0x17f:
8204    case 0x1c2:
8205    case 0x1c4 ... 0x1c6:
8206    case 0x1d0 ... 0x1fe:
8207        gen_sse(env, s, b, pc_start, rex_r);
8208        break;
8209    default:
8210        goto unknown_op;
8211    }
8212    return s->pc;
8213 illegal_op:
8214    gen_illegal_opcode(s);
8215    return s->pc;
8216 unknown_op:
8217    gen_unknown_opcode(env, s);
8218    return s->pc;
8219}
8220
8221void tcg_x86_init(void)
8222{
8223    static const char reg_names[CPU_NB_REGS][4] = {
8224#ifdef TARGET_X86_64
8225        [R_EAX] = "rax",
8226        [R_EBX] = "rbx",
8227        [R_ECX] = "rcx",
8228        [R_EDX] = "rdx",
8229        [R_ESI] = "rsi",
8230        [R_EDI] = "rdi",
8231        [R_EBP] = "rbp",
8232        [R_ESP] = "rsp",
8233        [8]  = "r8",
8234        [9]  = "r9",
8235        [10] = "r10",
8236        [11] = "r11",
8237        [12] = "r12",
8238        [13] = "r13",
8239        [14] = "r14",
8240        [15] = "r15",
8241#else
8242        [R_EAX] = "eax",
8243        [R_EBX] = "ebx",
8244        [R_ECX] = "ecx",
8245        [R_EDX] = "edx",
8246        [R_ESI] = "esi",
8247        [R_EDI] = "edi",
8248        [R_EBP] = "ebp",
8249        [R_ESP] = "esp",
8250#endif
8251    };
8252    static const char seg_base_names[6][8] = {
8253        [R_CS] = "cs_base",
8254        [R_DS] = "ds_base",
8255        [R_ES] = "es_base",
8256        [R_FS] = "fs_base",
8257        [R_GS] = "gs_base",
8258        [R_SS] = "ss_base",
8259    };
8260    static const char bnd_regl_names[4][8] = {
8261        "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8262    };
8263    static const char bnd_regu_names[4][8] = {
8264        "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8265    };
8266    int i;
8267    static bool initialized;
8268
8269    if (initialized) {
8270        return;
8271    }
8272    initialized = true;
8273
8274    cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
8275    tcg_ctx.tcg_env = cpu_env;
8276    cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8277                                       offsetof(CPUX86State, cc_op), "cc_op");
8278    cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8279                                    "cc_dst");
8280    cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8281                                    "cc_src");
8282    cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8283                                     "cc_src2");
8284
8285    for (i = 0; i < CPU_NB_REGS; ++i) {
8286        cpu_regs[i] = tcg_global_mem_new(cpu_env,
8287                                         offsetof(CPUX86State, regs[i]),
8288                                         reg_names[i]);
8289    }
8290
8291    for (i = 0; i < 6; ++i) {
8292        cpu_seg_base[i]
8293            = tcg_global_mem_new(cpu_env,
8294                                 offsetof(CPUX86State, segs[i].base),
8295                                 seg_base_names[i]);
8296    }
8297
8298    for (i = 0; i < 4; ++i) {
8299        cpu_bndl[i]
8300            = tcg_global_mem_new_i64(cpu_env,
8301                                     offsetof(CPUX86State, bnd_regs[i].lb),
8302                                     bnd_regl_names[i]);
8303        cpu_bndu[i]
8304            = tcg_global_mem_new_i64(cpu_env,
8305                                     offsetof(CPUX86State, bnd_regs[i].ub),
8306                                     bnd_regu_names[i]);
8307    }
8308}
8309
8310/* generate intermediate code for basic block 'tb'.  */
8311void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
8312{
8313    X86CPU *cpu = x86_env_get_cpu(env);
8314    CPUState *cs = CPU(cpu);
8315    DisasContext dc1, *dc = &dc1;
8316    target_ulong pc_ptr;
8317    uint32_t flags;
8318    target_ulong pc_start;
8319    target_ulong cs_base;
8320    int num_insns;
8321    int max_insns;
8322
8323    /* generate intermediate code */
8324    pc_start = tb->pc;
8325    cs_base = tb->cs_base;
8326    flags = tb->flags;
8327
8328    dc->pe = (flags >> HF_PE_SHIFT) & 1;
8329    dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
8330    dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
8331    dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
8332    dc->f_st = 0;
8333    dc->vm86 = (flags >> VM_SHIFT) & 1;
8334    dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
8335    dc->iopl = (flags >> IOPL_SHIFT) & 3;
8336    dc->tf = (flags >> TF_SHIFT) & 1;
8337    dc->singlestep_enabled = cs->singlestep_enabled;
8338    dc->cc_op = CC_OP_DYNAMIC;
8339    dc->cc_op_dirty = false;
8340    dc->cs_base = cs_base;
8341    dc->tb = tb;
8342    dc->popl_esp_hack = 0;
8343    /* select memory access functions */
8344    dc->mem_index = 0;
8345#ifdef CONFIG_SOFTMMU
8346    dc->mem_index = cpu_mmu_index(env, false);
8347#endif
8348    dc->cpuid_features = env->features[FEAT_1_EDX];
8349    dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8350    dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8351    dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8352    dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8353    dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8354#ifdef TARGET_X86_64
8355    dc->lma = (flags >> HF_LMA_SHIFT) & 1;
8356    dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
8357#endif
8358    dc->flags = flags;
8359    dc->jmp_opt = !(dc->tf || cs->singlestep_enabled ||
8360                    (flags & HF_INHIBIT_IRQ_MASK));
8361    /* Do not optimize repz jumps at all in icount mode, because
8362       rep movsS instructions are execured with different paths
8363       in !repz_opt and repz_opt modes. The first one was used
8364       always except single step mode. And this setting
8365       disables jumps optimization and control paths become
8366       equivalent in run and single step modes.
8367       Now there will be no jump optimization for repz in
8368       record/replay modes and there will always be an
8369       additional step for ecx=0 when icount is enabled.
8370     */
8371    dc->repz_opt = !dc->jmp_opt && !(tb->cflags & CF_USE_ICOUNT);
8372#if 0
8373    /* check addseg logic */
8374    if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
8375        printf("ERROR addseg\n");
8376#endif
8377
8378    cpu_T0 = tcg_temp_new();
8379    cpu_T1 = tcg_temp_new();
8380    cpu_A0 = tcg_temp_new();
8381
8382    cpu_tmp0 = tcg_temp_new();
8383    cpu_tmp1_i64 = tcg_temp_new_i64();
8384    cpu_tmp2_i32 = tcg_temp_new_i32();
8385    cpu_tmp3_i32 = tcg_temp_new_i32();
8386    cpu_tmp4 = tcg_temp_new();
8387    cpu_ptr0 = tcg_temp_new_ptr();
8388    cpu_ptr1 = tcg_temp_new_ptr();
8389    cpu_cc_srcT = tcg_temp_local_new();
8390
8391    dc->is_jmp = DISAS_NEXT;
8392    pc_ptr = pc_start;
8393    num_insns = 0;
8394    max_insns = tb->cflags & CF_COUNT_MASK;
8395    if (max_insns == 0) {
8396        max_insns = CF_COUNT_MASK;
8397    }
8398    if (max_insns > TCG_MAX_INSNS) {
8399        max_insns = TCG_MAX_INSNS;
8400    }
8401
8402    gen_tb_start(tb);
8403    for(;;) {
8404        tcg_gen_insn_start(pc_ptr, dc->cc_op);
8405        num_insns++;
8406
8407        /* If RF is set, suppress an internally generated breakpoint.  */
8408        if (unlikely(cpu_breakpoint_test(cs, pc_ptr,
8409                                         tb->flags & HF_RF_MASK
8410                                         ? BP_GDB : BP_ANY))) {
8411            gen_debug(dc, pc_ptr - dc->cs_base);
8412            /* The address covered by the breakpoint must be included in
8413               [tb->pc, tb->pc + tb->size) in order to for it to be
8414               properly cleared -- thus we increment the PC here so that
8415               the logic setting tb->size below does the right thing.  */
8416            pc_ptr += 1;
8417            goto done_generating;
8418        }
8419        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
8420            gen_io_start();
8421        }
8422
8423        pc_ptr = disas_insn(env, dc, pc_ptr);
8424        /* stop translation if indicated */
8425        if (dc->is_jmp)
8426            break;
8427        /* if single step mode, we generate only one instruction and
8428           generate an exception */
8429        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8430           the flag and abort the translation to give the irqs a
8431           change to be happen */
8432        if (dc->tf || dc->singlestep_enabled ||
8433            (flags & HF_INHIBIT_IRQ_MASK)) {
8434            gen_jmp_im(pc_ptr - dc->cs_base);
8435            gen_eob(dc);
8436            break;
8437        }
8438        /* Do not cross the boundary of the pages in icount mode,
8439           it can cause an exception. Do it only when boundary is
8440           crossed by the first instruction in the block.
8441           If current instruction already crossed the bound - it's ok,
8442           because an exception hasn't stopped this code.
8443         */
8444        if ((tb->cflags & CF_USE_ICOUNT)
8445            && ((pc_ptr & TARGET_PAGE_MASK)
8446                != ((pc_ptr + TARGET_MAX_INSN_SIZE - 1) & TARGET_PAGE_MASK)
8447                || (pc_ptr & ~TARGET_PAGE_MASK) == 0)) {
8448            gen_jmp_im(pc_ptr - dc->cs_base);
8449            gen_eob(dc);
8450            break;
8451        }
8452        /* if too long translation, stop generation too */
8453        if (tcg_op_buf_full() ||
8454            (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
8455            num_insns >= max_insns) {
8456            gen_jmp_im(pc_ptr - dc->cs_base);
8457            gen_eob(dc);
8458            break;
8459        }
8460        if (singlestep) {
8461            gen_jmp_im(pc_ptr - dc->cs_base);
8462            gen_eob(dc);
8463            break;
8464        }
8465    }
8466    if (tb->cflags & CF_LAST_IO)
8467        gen_io_end();
8468done_generating:
8469    gen_tb_end(tb, num_insns);
8470
8471#ifdef DEBUG_DISAS
8472    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
8473        && qemu_log_in_addr_range(pc_start)) {
8474        int disas_flags;
8475        qemu_log_lock();
8476        qemu_log("----------------\n");
8477        qemu_log("IN: %s\n", lookup_symbol(pc_start));
8478#ifdef TARGET_X86_64
8479        if (dc->code64)
8480            disas_flags = 2;
8481        else
8482#endif
8483            disas_flags = !dc->code32;
8484        log_target_disas(cs, pc_start, pc_ptr - pc_start, disas_flags);
8485        qemu_log("\n");
8486        qemu_log_unlock();
8487    }
8488#endif
8489
8490    tb->size = pc_ptr - pc_start;
8491    tb->icount = num_insns;
8492}
8493
8494void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8495                          target_ulong *data)
8496{
8497    int cc_op = data[1];
8498    env->eip = data[0] - tb->cs_base;
8499    if (cc_op != CC_OP_DYNAMIC) {
8500        env->cc_op = cc_op;
8501    }
8502}
8503