qemu/target/i386/tcg/translate.c
<<
>>
Prefs
   1/*
   2 *  i386 translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "qemu/host-utils.h"
  22#include "cpu.h"
  23#include "disas/disas.h"
  24#include "exec/exec-all.h"
  25#include "tcg/tcg-op.h"
  26#include "exec/cpu_ldst.h"
  27#include "exec/translator.h"
  28
  29#include "exec/helper-proto.h"
  30#include "exec/helper-gen.h"
  31#include "helper-tcg.h"
  32
  33#include "trace-tcg.h"
  34#include "exec/log.h"
  35
  36#define PREFIX_REPZ   0x01
  37#define PREFIX_REPNZ  0x02
  38#define PREFIX_LOCK   0x04
  39#define PREFIX_DATA   0x08
  40#define PREFIX_ADR    0x10
  41#define PREFIX_VEX    0x20
  42
  43#ifdef TARGET_X86_64
  44#define CODE64(s) ((s)->code64)
  45#define REX_X(s) ((s)->rex_x)
  46#define REX_B(s) ((s)->rex_b)
  47#else
  48#define CODE64(s) 0
  49#define REX_X(s) 0
  50#define REX_B(s) 0
  51#endif
  52
  53#ifdef TARGET_X86_64
  54# define ctztl  ctz64
  55# define clztl  clz64
  56#else
  57# define ctztl  ctz32
  58# define clztl  clz32
  59#endif
  60
  61/* For a switch indexed by MODRM, match all memory operands for a given OP.  */
  62#define CASE_MODRM_MEM_OP(OP) \
  63    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  64    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  65    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
  66
  67#define CASE_MODRM_OP(OP) \
  68    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  69    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  70    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
  71    case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
  72
  73//#define MACRO_TEST   1
  74
  75/* global register indexes */
  76static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
  77static TCGv_i32 cpu_cc_op;
  78static TCGv cpu_regs[CPU_NB_REGS];
  79static TCGv cpu_seg_base[6];
  80static TCGv_i64 cpu_bndl[4];
  81static TCGv_i64 cpu_bndu[4];
  82
  83#include "exec/gen-icount.h"
  84
  85typedef struct DisasContext {
  86    DisasContextBase base;
  87
  88    /* current insn context */
  89    int override; /* -1 if no override */
  90    int prefix;
  91    MemOp aflag;
  92    MemOp dflag;
  93    target_ulong pc_start;
  94    target_ulong pc; /* pc = eip + cs_base */
  95    /* current block context */
  96    target_ulong cs_base; /* base of CS segment */
  97    int pe;     /* protected mode */
  98    int code32; /* 32 bit code segment */
  99#ifdef TARGET_X86_64
 100    int lma;    /* long mode active */
 101    int code64; /* 64 bit code segment */
 102    int rex_x, rex_b;
 103#endif
 104    int vex_l;  /* vex vector length */
 105    int vex_v;  /* vex vvvv register, without 1's complement.  */
 106    int ss32;   /* 32 bit stack segment */
 107    CCOp cc_op;  /* current CC operation */
 108    bool cc_op_dirty;
 109#ifdef TARGET_X86_64
 110    bool x86_64_hregs;
 111#endif
 112    int addseg; /* non zero if either DS/ES/SS have a non zero base */
 113    int f_st;   /* currently unused */
 114    int vm86;   /* vm86 mode */
 115    int cpl;
 116    int iopl;
 117    int tf;     /* TF cpu flag */
 118    int jmp_opt; /* use direct block chaining for direct jumps */
 119    int repz_opt; /* optimize jumps within repz instructions */
 120    int mem_index; /* select memory access functions */
 121    uint64_t flags; /* all execution flags */
 122    int popl_esp_hack; /* for correct popl with esp base handling */
 123    int rip_offset; /* only used in x86_64, but left for simplicity */
 124    int cpuid_features;
 125    int cpuid_ext_features;
 126    int cpuid_ext2_features;
 127    int cpuid_ext3_features;
 128    int cpuid_7_0_ebx_features;
 129    int cpuid_xsave_features;
 130
 131    /* TCG local temps */
 132    TCGv cc_srcT;
 133    TCGv A0;
 134    TCGv T0;
 135    TCGv T1;
 136
 137    /* TCG local register indexes (only used inside old micro ops) */
 138    TCGv tmp0;
 139    TCGv tmp4;
 140    TCGv_ptr ptr0;
 141    TCGv_ptr ptr1;
 142    TCGv_i32 tmp2_i32;
 143    TCGv_i32 tmp3_i32;
 144    TCGv_i64 tmp1_i64;
 145
 146    sigjmp_buf jmpbuf;
 147} DisasContext;
 148
 149static void gen_eob(DisasContext *s);
 150static void gen_jr(DisasContext *s, TCGv dest);
 151static void gen_jmp(DisasContext *s, target_ulong eip);
 152static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 153static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
 154
 155/* i386 arith/logic operations */
 156enum {
 157    OP_ADDL,
 158    OP_ORL,
 159    OP_ADCL,
 160    OP_SBBL,
 161    OP_ANDL,
 162    OP_SUBL,
 163    OP_XORL,
 164    OP_CMPL,
 165};
 166
 167/* i386 shift ops */
 168enum {
 169    OP_ROL,
 170    OP_ROR,
 171    OP_RCL,
 172    OP_RCR,
 173    OP_SHL,
 174    OP_SHR,
 175    OP_SHL1, /* undocumented */
 176    OP_SAR = 7,
 177};
 178
 179enum {
 180    JCC_O,
 181    JCC_B,
 182    JCC_Z,
 183    JCC_BE,
 184    JCC_S,
 185    JCC_P,
 186    JCC_L,
 187    JCC_LE,
 188};
 189
 190enum {
 191    /* I386 int registers */
 192    OR_EAX,   /* MUST be even numbered */
 193    OR_ECX,
 194    OR_EDX,
 195    OR_EBX,
 196    OR_ESP,
 197    OR_EBP,
 198    OR_ESI,
 199    OR_EDI,
 200
 201    OR_TMP0 = 16,    /* temporary operand register */
 202    OR_TMP1,
 203    OR_A0, /* temporary register used when doing address evaluation */
 204};
 205
 206enum {
 207    USES_CC_DST  = 1,
 208    USES_CC_SRC  = 2,
 209    USES_CC_SRC2 = 4,
 210    USES_CC_SRCT = 8,
 211};
 212
 213/* Bit set if the global variable is live after setting CC_OP to X.  */
 214static const uint8_t cc_op_live[CC_OP_NB] = {
 215    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 216    [CC_OP_EFLAGS] = USES_CC_SRC,
 217    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
 218    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
 219    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 220    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
 221    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 222    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
 223    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
 224    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
 225    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
 226    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
 227    [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
 228    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
 229    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
 230    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 231    [CC_OP_CLR] = 0,
 232    [CC_OP_POPCNT] = USES_CC_SRC,
 233};
 234
 235static void set_cc_op(DisasContext *s, CCOp op)
 236{
 237    int dead;
 238
 239    if (s->cc_op == op) {
 240        return;
 241    }
 242
 243    /* Discard CC computation that will no longer be used.  */
 244    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
 245    if (dead & USES_CC_DST) {
 246        tcg_gen_discard_tl(cpu_cc_dst);
 247    }
 248    if (dead & USES_CC_SRC) {
 249        tcg_gen_discard_tl(cpu_cc_src);
 250    }
 251    if (dead & USES_CC_SRC2) {
 252        tcg_gen_discard_tl(cpu_cc_src2);
 253    }
 254    if (dead & USES_CC_SRCT) {
 255        tcg_gen_discard_tl(s->cc_srcT);
 256    }
 257
 258    if (op == CC_OP_DYNAMIC) {
 259        /* The DYNAMIC setting is translator only, and should never be
 260           stored.  Thus we always consider it clean.  */
 261        s->cc_op_dirty = false;
 262    } else {
 263        /* Discard any computed CC_OP value (see shifts).  */
 264        if (s->cc_op == CC_OP_DYNAMIC) {
 265            tcg_gen_discard_i32(cpu_cc_op);
 266        }
 267        s->cc_op_dirty = true;
 268    }
 269    s->cc_op = op;
 270}
 271
 272static void gen_update_cc_op(DisasContext *s)
 273{
 274    if (s->cc_op_dirty) {
 275        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
 276        s->cc_op_dirty = false;
 277    }
 278}
 279
 280#ifdef TARGET_X86_64
 281
 282#define NB_OP_SIZES 4
 283
 284#else /* !TARGET_X86_64 */
 285
 286#define NB_OP_SIZES 3
 287
 288#endif /* !TARGET_X86_64 */
 289
 290#if defined(HOST_WORDS_BIGENDIAN)
 291#define REG_B_OFFSET (sizeof(target_ulong) - 1)
 292#define REG_H_OFFSET (sizeof(target_ulong) - 2)
 293#define REG_W_OFFSET (sizeof(target_ulong) - 2)
 294#define REG_L_OFFSET (sizeof(target_ulong) - 4)
 295#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
 296#else
 297#define REG_B_OFFSET 0
 298#define REG_H_OFFSET 1
 299#define REG_W_OFFSET 0
 300#define REG_L_OFFSET 0
 301#define REG_LH_OFFSET 4
 302#endif
 303
 304/* In instruction encodings for byte register accesses the
 305 * register number usually indicates "low 8 bits of register N";
 306 * however there are some special cases where N 4..7 indicates
 307 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
 308 * true for this special case, false otherwise.
 309 */
 310static inline bool byte_reg_is_xH(DisasContext *s, int reg)
 311{
 312    if (reg < 4) {
 313        return false;
 314    }
 315#ifdef TARGET_X86_64
 316    if (reg >= 8 || s->x86_64_hregs) {
 317        return false;
 318    }
 319#endif
 320    return true;
 321}
 322
 323/* Select the size of a push/pop operation.  */
 324static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
 325{
 326    if (CODE64(s)) {
 327        return ot == MO_16 ? MO_16 : MO_64;
 328    } else {
 329        return ot;
 330    }
 331}
 332
 333/* Select the size of the stack pointer.  */
 334static inline MemOp mo_stacksize(DisasContext *s)
 335{
 336    return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
 337}
 338
 339/* Select only size 64 else 32.  Used for SSE operand sizes.  */
 340static inline MemOp mo_64_32(MemOp ot)
 341{
 342#ifdef TARGET_X86_64
 343    return ot == MO_64 ? MO_64 : MO_32;
 344#else
 345    return MO_32;
 346#endif
 347}
 348
 349/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
 350   byte vs word opcodes.  */
 351static inline MemOp mo_b_d(int b, MemOp ot)
 352{
 353    return b & 1 ? ot : MO_8;
 354}
 355
 356/* Select size 8 if lsb of B is clear, else OT capped at 32.
 357   Used for decoding operand size of port opcodes.  */
 358static inline MemOp mo_b_d32(int b, MemOp ot)
 359{
 360    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 361}
 362
 363static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
 364{
 365    switch(ot) {
 366    case MO_8:
 367        if (!byte_reg_is_xH(s, reg)) {
 368            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
 369        } else {
 370            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
 371        }
 372        break;
 373    case MO_16:
 374        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
 375        break;
 376    case MO_32:
 377        /* For x86_64, this sets the higher half of register to zero.
 378           For i386, this is equivalent to a mov. */
 379        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
 380        break;
 381#ifdef TARGET_X86_64
 382    case MO_64:
 383        tcg_gen_mov_tl(cpu_regs[reg], t0);
 384        break;
 385#endif
 386    default:
 387        tcg_abort();
 388    }
 389}
 390
 391static inline
 392void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
 393{
 394    if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
 395        tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
 396    } else {
 397        tcg_gen_mov_tl(t0, cpu_regs[reg]);
 398    }
 399}
 400
 401static void gen_add_A0_im(DisasContext *s, int val)
 402{
 403    tcg_gen_addi_tl(s->A0, s->A0, val);
 404    if (!CODE64(s)) {
 405        tcg_gen_ext32u_tl(s->A0, s->A0);
 406    }
 407}
 408
 409static inline void gen_op_jmp_v(TCGv dest)
 410{
 411    tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 412}
 413
 414static inline
 415void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
 416{
 417    tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
 418    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 419}
 420
 421static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
 422{
 423    tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
 424    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 425}
 426
 427static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 428{
 429    tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
 430}
 431
 432static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 433{
 434    tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 435}
 436
 437static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 438{
 439    if (d == OR_TMP0) {
 440        gen_op_st_v(s, idx, s->T0, s->A0);
 441    } else {
 442        gen_op_mov_reg_v(s, idx, d, s->T0);
 443    }
 444}
 445
 446static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
 447{
 448    tcg_gen_movi_tl(s->tmp0, pc);
 449    gen_op_jmp_v(s->tmp0);
 450}
 451
 452/* Compute SEG:REG into A0.  SEG is selected from the override segment
 453   (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
 454   indicate no override.  */
 455static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
 456                          int def_seg, int ovr_seg)
 457{
 458    switch (aflag) {
 459#ifdef TARGET_X86_64
 460    case MO_64:
 461        if (ovr_seg < 0) {
 462            tcg_gen_mov_tl(s->A0, a0);
 463            return;
 464        }
 465        break;
 466#endif
 467    case MO_32:
 468        /* 32 bit address */
 469        if (ovr_seg < 0 && s->addseg) {
 470            ovr_seg = def_seg;
 471        }
 472        if (ovr_seg < 0) {
 473            tcg_gen_ext32u_tl(s->A0, a0);
 474            return;
 475        }
 476        break;
 477    case MO_16:
 478        /* 16 bit address */
 479        tcg_gen_ext16u_tl(s->A0, a0);
 480        a0 = s->A0;
 481        if (ovr_seg < 0) {
 482            if (s->addseg) {
 483                ovr_seg = def_seg;
 484            } else {
 485                return;
 486            }
 487        }
 488        break;
 489    default:
 490        tcg_abort();
 491    }
 492
 493    if (ovr_seg >= 0) {
 494        TCGv seg = cpu_seg_base[ovr_seg];
 495
 496        if (aflag == MO_64) {
 497            tcg_gen_add_tl(s->A0, a0, seg);
 498        } else if (CODE64(s)) {
 499            tcg_gen_ext32u_tl(s->A0, a0);
 500            tcg_gen_add_tl(s->A0, s->A0, seg);
 501        } else {
 502            tcg_gen_add_tl(s->A0, a0, seg);
 503            tcg_gen_ext32u_tl(s->A0, s->A0);
 504        }
 505    }
 506}
 507
 508static inline void gen_string_movl_A0_ESI(DisasContext *s)
 509{
 510    gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
 511}
 512
 513static inline void gen_string_movl_A0_EDI(DisasContext *s)
 514{
 515    gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
 516}
 517
 518static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
 519{
 520    tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
 521    tcg_gen_shli_tl(s->T0, s->T0, ot);
 522};
 523
 524static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
 525{
 526    switch (size) {
 527    case MO_8:
 528        if (sign) {
 529            tcg_gen_ext8s_tl(dst, src);
 530        } else {
 531            tcg_gen_ext8u_tl(dst, src);
 532        }
 533        return dst;
 534    case MO_16:
 535        if (sign) {
 536            tcg_gen_ext16s_tl(dst, src);
 537        } else {
 538            tcg_gen_ext16u_tl(dst, src);
 539        }
 540        return dst;
 541#ifdef TARGET_X86_64
 542    case MO_32:
 543        if (sign) {
 544            tcg_gen_ext32s_tl(dst, src);
 545        } else {
 546            tcg_gen_ext32u_tl(dst, src);
 547        }
 548        return dst;
 549#endif
 550    default:
 551        return src;
 552    }
 553}
 554
 555static void gen_extu(MemOp ot, TCGv reg)
 556{
 557    gen_ext_tl(reg, reg, ot, false);
 558}
 559
 560static void gen_exts(MemOp ot, TCGv reg)
 561{
 562    gen_ext_tl(reg, reg, ot, true);
 563}
 564
 565static inline
 566void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
 567{
 568    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
 569    gen_extu(size, s->tmp0);
 570    tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
 571}
 572
 573static inline
 574void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
 575{
 576    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
 577    gen_extu(size, s->tmp0);
 578    tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
 579}
 580
 581static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
 582{
 583    switch (ot) {
 584    case MO_8:
 585        gen_helper_inb(v, cpu_env, n);
 586        break;
 587    case MO_16:
 588        gen_helper_inw(v, cpu_env, n);
 589        break;
 590    case MO_32:
 591        gen_helper_inl(v, cpu_env, n);
 592        break;
 593    default:
 594        tcg_abort();
 595    }
 596}
 597
 598static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
 599{
 600    switch (ot) {
 601    case MO_8:
 602        gen_helper_outb(cpu_env, v, n);
 603        break;
 604    case MO_16:
 605        gen_helper_outw(cpu_env, v, n);
 606        break;
 607    case MO_32:
 608        gen_helper_outl(cpu_env, v, n);
 609        break;
 610    default:
 611        tcg_abort();
 612    }
 613}
 614
 615static void gen_check_io(DisasContext *s, MemOp ot, target_ulong cur_eip,
 616                         uint32_t svm_flags)
 617{
 618    target_ulong next_eip;
 619
 620    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
 621        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 622        switch (ot) {
 623        case MO_8:
 624            gen_helper_check_iob(cpu_env, s->tmp2_i32);
 625            break;
 626        case MO_16:
 627            gen_helper_check_iow(cpu_env, s->tmp2_i32);
 628            break;
 629        case MO_32:
 630            gen_helper_check_iol(cpu_env, s->tmp2_i32);
 631            break;
 632        default:
 633            tcg_abort();
 634        }
 635    }
 636    if(s->flags & HF_GUEST_MASK) {
 637        gen_update_cc_op(s);
 638        gen_jmp_im(s, cur_eip);
 639        svm_flags |= (1 << (4 + ot));
 640        next_eip = s->pc - s->cs_base;
 641        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 642        gen_helper_svm_check_io(cpu_env, s->tmp2_i32,
 643                                tcg_const_i32(svm_flags),
 644                                tcg_const_i32(next_eip - cur_eip));
 645    }
 646}
 647
 648static inline void gen_movs(DisasContext *s, MemOp ot)
 649{
 650    gen_string_movl_A0_ESI(s);
 651    gen_op_ld_v(s, ot, s->T0, s->A0);
 652    gen_string_movl_A0_EDI(s);
 653    gen_op_st_v(s, ot, s->T0, s->A0);
 654    gen_op_movl_T0_Dshift(s, ot);
 655    gen_op_add_reg_T0(s, s->aflag, R_ESI);
 656    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 657}
 658
 659static void gen_op_update1_cc(DisasContext *s)
 660{
 661    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 662}
 663
 664static void gen_op_update2_cc(DisasContext *s)
 665{
 666    tcg_gen_mov_tl(cpu_cc_src, s->T1);
 667    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 668}
 669
 670static void gen_op_update3_cc(DisasContext *s, TCGv reg)
 671{
 672    tcg_gen_mov_tl(cpu_cc_src2, reg);
 673    tcg_gen_mov_tl(cpu_cc_src, s->T1);
 674    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 675}
 676
 677static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
 678{
 679    tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
 680}
 681
 682static void gen_op_update_neg_cc(DisasContext *s)
 683{
 684    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 685    tcg_gen_neg_tl(cpu_cc_src, s->T0);
 686    tcg_gen_movi_tl(s->cc_srcT, 0);
 687}
 688
 689/* compute all eflags to cc_src */
 690static void gen_compute_eflags(DisasContext *s)
 691{
 692    TCGv zero, dst, src1, src2;
 693    int live, dead;
 694
 695    if (s->cc_op == CC_OP_EFLAGS) {
 696        return;
 697    }
 698    if (s->cc_op == CC_OP_CLR) {
 699        tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
 700        set_cc_op(s, CC_OP_EFLAGS);
 701        return;
 702    }
 703
 704    zero = NULL;
 705    dst = cpu_cc_dst;
 706    src1 = cpu_cc_src;
 707    src2 = cpu_cc_src2;
 708
 709    /* Take care to not read values that are not live.  */
 710    live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
 711    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
 712    if (dead) {
 713        zero = tcg_const_tl(0);
 714        if (dead & USES_CC_DST) {
 715            dst = zero;
 716        }
 717        if (dead & USES_CC_SRC) {
 718            src1 = zero;
 719        }
 720        if (dead & USES_CC_SRC2) {
 721            src2 = zero;
 722        }
 723    }
 724
 725    gen_update_cc_op(s);
 726    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
 727    set_cc_op(s, CC_OP_EFLAGS);
 728
 729    if (dead) {
 730        tcg_temp_free(zero);
 731    }
 732}
 733
 734typedef struct CCPrepare {
 735    TCGCond cond;
 736    TCGv reg;
 737    TCGv reg2;
 738    target_ulong imm;
 739    target_ulong mask;
 740    bool use_reg2;
 741    bool no_setcond;
 742} CCPrepare;
 743
 744/* compute eflags.C to reg */
 745static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 746{
 747    TCGv t0, t1;
 748    int size, shift;
 749
 750    switch (s->cc_op) {
 751    case CC_OP_SUBB ... CC_OP_SUBQ:
 752        /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
 753        size = s->cc_op - CC_OP_SUBB;
 754        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 755        /* If no temporary was used, be careful not to alias t1 and t0.  */
 756        t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
 757        tcg_gen_mov_tl(t0, s->cc_srcT);
 758        gen_extu(size, t0);
 759        goto add_sub;
 760
 761    case CC_OP_ADDB ... CC_OP_ADDQ:
 762        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
 763        size = s->cc_op - CC_OP_ADDB;
 764        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 765        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 766    add_sub:
 767        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
 768                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 769
 770    case CC_OP_LOGICB ... CC_OP_LOGICQ:
 771    case CC_OP_CLR:
 772    case CC_OP_POPCNT:
 773        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 774
 775    case CC_OP_INCB ... CC_OP_INCQ:
 776    case CC_OP_DECB ... CC_OP_DECQ:
 777        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 778                             .mask = -1, .no_setcond = true };
 779
 780    case CC_OP_SHLB ... CC_OP_SHLQ:
 781        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
 782        size = s->cc_op - CC_OP_SHLB;
 783        shift = (8 << size) - 1;
 784        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 785                             .mask = (target_ulong)1 << shift };
 786
 787    case CC_OP_MULB ... CC_OP_MULQ:
 788        return (CCPrepare) { .cond = TCG_COND_NE,
 789                             .reg = cpu_cc_src, .mask = -1 };
 790
 791    case CC_OP_BMILGB ... CC_OP_BMILGQ:
 792        size = s->cc_op - CC_OP_BMILGB;
 793        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
 794        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 795
 796    case CC_OP_ADCX:
 797    case CC_OP_ADCOX:
 798        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
 799                             .mask = -1, .no_setcond = true };
 800
 801    case CC_OP_EFLAGS:
 802    case CC_OP_SARB ... CC_OP_SARQ:
 803        /* CC_SRC & 1 */
 804        return (CCPrepare) { .cond = TCG_COND_NE,
 805                             .reg = cpu_cc_src, .mask = CC_C };
 806
 807    default:
 808       /* The need to compute only C from CC_OP_DYNAMIC is important
 809          in efficiently implementing e.g. INC at the start of a TB.  */
 810       gen_update_cc_op(s);
 811       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
 812                               cpu_cc_src2, cpu_cc_op);
 813       return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 814                            .mask = -1, .no_setcond = true };
 815    }
 816}
 817
 818/* compute eflags.P to reg */
 819static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 820{
 821    gen_compute_eflags(s);
 822    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 823                         .mask = CC_P };
 824}
 825
 826/* compute eflags.S to reg */
 827static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 828{
 829    switch (s->cc_op) {
 830    case CC_OP_DYNAMIC:
 831        gen_compute_eflags(s);
 832        /* FALLTHRU */
 833    case CC_OP_EFLAGS:
 834    case CC_OP_ADCX:
 835    case CC_OP_ADOX:
 836    case CC_OP_ADCOX:
 837        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 838                             .mask = CC_S };
 839    case CC_OP_CLR:
 840    case CC_OP_POPCNT:
 841        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 842    default:
 843        {
 844            MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 845            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
 846            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
 847        }
 848    }
 849}
 850
 851/* compute eflags.O to reg */
 852static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 853{
 854    switch (s->cc_op) {
 855    case CC_OP_ADOX:
 856    case CC_OP_ADCOX:
 857        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
 858                             .mask = -1, .no_setcond = true };
 859    case CC_OP_CLR:
 860    case CC_OP_POPCNT:
 861        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 862    default:
 863        gen_compute_eflags(s);
 864        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 865                             .mask = CC_O };
 866    }
 867}
 868
 869/* compute eflags.Z to reg */
 870static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 871{
 872    switch (s->cc_op) {
 873    case CC_OP_DYNAMIC:
 874        gen_compute_eflags(s);
 875        /* FALLTHRU */
 876    case CC_OP_EFLAGS:
 877    case CC_OP_ADCX:
 878    case CC_OP_ADOX:
 879    case CC_OP_ADCOX:
 880        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 881                             .mask = CC_Z };
 882    case CC_OP_CLR:
 883        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
 884    case CC_OP_POPCNT:
 885        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
 886                             .mask = -1 };
 887    default:
 888        {
 889            MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 890            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 891            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 892        }
 893    }
 894}
 895
 896/* perform a conditional store into register 'reg' according to jump opcode
 897   value 'b'. In the fast case, T0 is guaranted not to be used. */
 898static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 899{
 900    int inv, jcc_op, cond;
 901    MemOp size;
 902    CCPrepare cc;
 903    TCGv t0;
 904
 905    inv = b & 1;
 906    jcc_op = (b >> 1) & 7;
 907
 908    switch (s->cc_op) {
 909    case CC_OP_SUBB ... CC_OP_SUBQ:
 910        /* We optimize relational operators for the cmp/jcc case.  */
 911        size = s->cc_op - CC_OP_SUBB;
 912        switch (jcc_op) {
 913        case JCC_BE:
 914            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
 915            gen_extu(size, s->tmp4);
 916            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 917            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
 918                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 919            break;
 920
 921        case JCC_L:
 922            cond = TCG_COND_LT;
 923            goto fast_jcc_l;
 924        case JCC_LE:
 925            cond = TCG_COND_LE;
 926        fast_jcc_l:
 927            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
 928            gen_exts(size, s->tmp4);
 929            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
 930            cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
 931                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 932            break;
 933
 934        default:
 935            goto slow_jcc;
 936        }
 937        break;
 938
 939    default:
 940    slow_jcc:
 941        /* This actually generates good code for JC, JZ and JS.  */
 942        switch (jcc_op) {
 943        case JCC_O:
 944            cc = gen_prepare_eflags_o(s, reg);
 945            break;
 946        case JCC_B:
 947            cc = gen_prepare_eflags_c(s, reg);
 948            break;
 949        case JCC_Z:
 950            cc = gen_prepare_eflags_z(s, reg);
 951            break;
 952        case JCC_BE:
 953            gen_compute_eflags(s);
 954            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 955                               .mask = CC_Z | CC_C };
 956            break;
 957        case JCC_S:
 958            cc = gen_prepare_eflags_s(s, reg);
 959            break;
 960        case JCC_P:
 961            cc = gen_prepare_eflags_p(s, reg);
 962            break;
 963        case JCC_L:
 964            gen_compute_eflags(s);
 965            if (reg == cpu_cc_src) {
 966                reg = s->tmp0;
 967            }
 968            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 969            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 970            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 971                               .mask = CC_S };
 972            break;
 973        default:
 974        case JCC_LE:
 975            gen_compute_eflags(s);
 976            if (reg == cpu_cc_src) {
 977                reg = s->tmp0;
 978            }
 979            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 980            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 981            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 982                               .mask = CC_S | CC_Z };
 983            break;
 984        }
 985        break;
 986    }
 987
 988    if (inv) {
 989        cc.cond = tcg_invert_cond(cc.cond);
 990    }
 991    return cc;
 992}
 993
 994static void gen_setcc1(DisasContext *s, int b, TCGv reg)
 995{
 996    CCPrepare cc = gen_prepare_cc(s, b, reg);
 997
 998    if (cc.no_setcond) {
 999        if (cc.cond == TCG_COND_EQ) {
1000            tcg_gen_xori_tl(reg, cc.reg, 1);
1001        } else {
1002            tcg_gen_mov_tl(reg, cc.reg);
1003        }
1004        return;
1005    }
1006
1007    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1008        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1009        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1010        tcg_gen_andi_tl(reg, reg, 1);
1011        return;
1012    }
1013    if (cc.mask != -1) {
1014        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1015        cc.reg = reg;
1016    }
1017    if (cc.use_reg2) {
1018        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1019    } else {
1020        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1021    }
1022}
1023
1024static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1025{
1026    gen_setcc1(s, JCC_B << 1, reg);
1027}
1028
1029/* generate a conditional jump to label 'l1' according to jump opcode
1030   value 'b'. In the fast case, T0 is guaranted not to be used. */
1031static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1032{
1033    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1034
1035    if (cc.mask != -1) {
1036        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1037        cc.reg = s->T0;
1038    }
1039    if (cc.use_reg2) {
1040        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1041    } else {
1042        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1043    }
1044}
1045
1046/* Generate a conditional jump to label 'l1' according to jump opcode
1047   value 'b'. In the fast case, T0 is guaranted not to be used.
1048   A translation block must end soon.  */
1049static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1050{
1051    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1052
1053    gen_update_cc_op(s);
1054    if (cc.mask != -1) {
1055        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1056        cc.reg = s->T0;
1057    }
1058    set_cc_op(s, CC_OP_DYNAMIC);
1059    if (cc.use_reg2) {
1060        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1061    } else {
1062        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1063    }
1064}
1065
1066/* XXX: does not work with gdbstub "ice" single step - not a
1067   serious problem */
1068static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1069{
1070    TCGLabel *l1 = gen_new_label();
1071    TCGLabel *l2 = gen_new_label();
1072    gen_op_jnz_ecx(s, s->aflag, l1);
1073    gen_set_label(l2);
1074    gen_jmp_tb(s, next_eip, 1);
1075    gen_set_label(l1);
1076    return l2;
1077}
1078
1079static inline void gen_stos(DisasContext *s, MemOp ot)
1080{
1081    gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1082    gen_string_movl_A0_EDI(s);
1083    gen_op_st_v(s, ot, s->T0, s->A0);
1084    gen_op_movl_T0_Dshift(s, ot);
1085    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1086}
1087
1088static inline void gen_lods(DisasContext *s, MemOp ot)
1089{
1090    gen_string_movl_A0_ESI(s);
1091    gen_op_ld_v(s, ot, s->T0, s->A0);
1092    gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1093    gen_op_movl_T0_Dshift(s, ot);
1094    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1095}
1096
1097static inline void gen_scas(DisasContext *s, MemOp ot)
1098{
1099    gen_string_movl_A0_EDI(s);
1100    gen_op_ld_v(s, ot, s->T1, s->A0);
1101    gen_op(s, OP_CMPL, ot, R_EAX);
1102    gen_op_movl_T0_Dshift(s, ot);
1103    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1104}
1105
1106static inline void gen_cmps(DisasContext *s, MemOp ot)
1107{
1108    gen_string_movl_A0_EDI(s);
1109    gen_op_ld_v(s, ot, s->T1, s->A0);
1110    gen_string_movl_A0_ESI(s);
1111    gen_op(s, OP_CMPL, ot, OR_TMP0);
1112    gen_op_movl_T0_Dshift(s, ot);
1113    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1114    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1115}
1116
1117static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1118{
1119    if (s->flags & HF_IOBPT_MASK) {
1120        TCGv_i32 t_size = tcg_const_i32(1 << ot);
1121        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1122
1123        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1124        tcg_temp_free_i32(t_size);
1125        tcg_temp_free(t_next);
1126    }
1127}
1128
1129
1130static inline void gen_ins(DisasContext *s, MemOp ot)
1131{
1132    gen_string_movl_A0_EDI(s);
1133    /* Note: we must do this dummy write first to be restartable in
1134       case of page fault. */
1135    tcg_gen_movi_tl(s->T0, 0);
1136    gen_op_st_v(s, ot, s->T0, s->A0);
1137    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1138    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1139    gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1140    gen_op_st_v(s, ot, s->T0, s->A0);
1141    gen_op_movl_T0_Dshift(s, ot);
1142    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1143    gen_bpt_io(s, s->tmp2_i32, ot);
1144}
1145
1146static inline void gen_outs(DisasContext *s, MemOp ot)
1147{
1148    gen_string_movl_A0_ESI(s);
1149    gen_op_ld_v(s, ot, s->T0, s->A0);
1150
1151    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1152    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1153    tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1154    gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1155    gen_op_movl_T0_Dshift(s, ot);
1156    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1157    gen_bpt_io(s, s->tmp2_i32, ot);
1158}
1159
1160/* same method as Valgrind : we generate jumps to current or next
1161   instruction */
1162#define GEN_REPZ(op)                                                          \
1163static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1164                                 target_ulong cur_eip, target_ulong next_eip) \
1165{                                                                             \
1166    TCGLabel *l2;                                                             \
1167    gen_update_cc_op(s);                                                      \
1168    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1169    gen_ ## op(s, ot);                                                        \
1170    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1171    /* a loop would cause two single step exceptions if ECX = 1               \
1172       before rep string_insn */                                              \
1173    if (s->repz_opt)                                                          \
1174        gen_op_jz_ecx(s, s->aflag, l2);                                       \
1175    gen_jmp(s, cur_eip);                                                      \
1176}
1177
1178#define GEN_REPZ2(op)                                                         \
1179static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1180                                   target_ulong cur_eip,                      \
1181                                   target_ulong next_eip,                     \
1182                                   int nz)                                    \
1183{                                                                             \
1184    TCGLabel *l2;                                                             \
1185    gen_update_cc_op(s);                                                      \
1186    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1187    gen_ ## op(s, ot);                                                        \
1188    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1189    gen_update_cc_op(s);                                                      \
1190    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1191    if (s->repz_opt)                                                          \
1192        gen_op_jz_ecx(s, s->aflag, l2);                                       \
1193    gen_jmp(s, cur_eip);                                                      \
1194}
1195
1196GEN_REPZ(movs)
1197GEN_REPZ(stos)
1198GEN_REPZ(lods)
1199GEN_REPZ(ins)
1200GEN_REPZ(outs)
1201GEN_REPZ2(scas)
1202GEN_REPZ2(cmps)
1203
1204static void gen_helper_fp_arith_ST0_FT0(int op)
1205{
1206    switch (op) {
1207    case 0:
1208        gen_helper_fadd_ST0_FT0(cpu_env);
1209        break;
1210    case 1:
1211        gen_helper_fmul_ST0_FT0(cpu_env);
1212        break;
1213    case 2:
1214        gen_helper_fcom_ST0_FT0(cpu_env);
1215        break;
1216    case 3:
1217        gen_helper_fcom_ST0_FT0(cpu_env);
1218        break;
1219    case 4:
1220        gen_helper_fsub_ST0_FT0(cpu_env);
1221        break;
1222    case 5:
1223        gen_helper_fsubr_ST0_FT0(cpu_env);
1224        break;
1225    case 6:
1226        gen_helper_fdiv_ST0_FT0(cpu_env);
1227        break;
1228    case 7:
1229        gen_helper_fdivr_ST0_FT0(cpu_env);
1230        break;
1231    }
1232}
1233
1234/* NOTE the exception in "r" op ordering */
1235static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1236{
1237    TCGv_i32 tmp = tcg_const_i32(opreg);
1238    switch (op) {
1239    case 0:
1240        gen_helper_fadd_STN_ST0(cpu_env, tmp);
1241        break;
1242    case 1:
1243        gen_helper_fmul_STN_ST0(cpu_env, tmp);
1244        break;
1245    case 4:
1246        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1247        break;
1248    case 5:
1249        gen_helper_fsub_STN_ST0(cpu_env, tmp);
1250        break;
1251    case 6:
1252        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1253        break;
1254    case 7:
1255        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1256        break;
1257    }
1258}
1259
1260static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1261{
1262    gen_update_cc_op(s);
1263    gen_jmp_im(s, cur_eip);
1264    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1265    s->base.is_jmp = DISAS_NORETURN;
1266}
1267
1268/* Generate #UD for the current instruction.  The assumption here is that
1269   the instruction is known, but it isn't allowed in the current cpu mode.  */
1270static void gen_illegal_opcode(DisasContext *s)
1271{
1272    gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1273}
1274
1275/* if d == OR_TMP0, it means memory operand (address in A0) */
1276static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1277{
1278    if (d != OR_TMP0) {
1279        if (s1->prefix & PREFIX_LOCK) {
1280            /* Lock prefix when destination is not memory.  */
1281            gen_illegal_opcode(s1);
1282            return;
1283        }
1284        gen_op_mov_v_reg(s1, ot, s1->T0, d);
1285    } else if (!(s1->prefix & PREFIX_LOCK)) {
1286        gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1287    }
1288    switch(op) {
1289    case OP_ADCL:
1290        gen_compute_eflags_c(s1, s1->tmp4);
1291        if (s1->prefix & PREFIX_LOCK) {
1292            tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1293            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1294                                        s1->mem_index, ot | MO_LE);
1295        } else {
1296            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1297            tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1298            gen_op_st_rm_T0_A0(s1, ot, d);
1299        }
1300        gen_op_update3_cc(s1, s1->tmp4);
1301        set_cc_op(s1, CC_OP_ADCB + ot);
1302        break;
1303    case OP_SBBL:
1304        gen_compute_eflags_c(s1, s1->tmp4);
1305        if (s1->prefix & PREFIX_LOCK) {
1306            tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1307            tcg_gen_neg_tl(s1->T0, s1->T0);
1308            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1309                                        s1->mem_index, ot | MO_LE);
1310        } else {
1311            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1312            tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1313            gen_op_st_rm_T0_A0(s1, ot, d);
1314        }
1315        gen_op_update3_cc(s1, s1->tmp4);
1316        set_cc_op(s1, CC_OP_SBBB + ot);
1317        break;
1318    case OP_ADDL:
1319        if (s1->prefix & PREFIX_LOCK) {
1320            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1321                                        s1->mem_index, ot | MO_LE);
1322        } else {
1323            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1324            gen_op_st_rm_T0_A0(s1, ot, d);
1325        }
1326        gen_op_update2_cc(s1);
1327        set_cc_op(s1, CC_OP_ADDB + ot);
1328        break;
1329    case OP_SUBL:
1330        if (s1->prefix & PREFIX_LOCK) {
1331            tcg_gen_neg_tl(s1->T0, s1->T1);
1332            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1333                                        s1->mem_index, ot | MO_LE);
1334            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1335        } else {
1336            tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1337            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1338            gen_op_st_rm_T0_A0(s1, ot, d);
1339        }
1340        gen_op_update2_cc(s1);
1341        set_cc_op(s1, CC_OP_SUBB + ot);
1342        break;
1343    default:
1344    case OP_ANDL:
1345        if (s1->prefix & PREFIX_LOCK) {
1346            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1347                                        s1->mem_index, ot | MO_LE);
1348        } else {
1349            tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1350            gen_op_st_rm_T0_A0(s1, ot, d);
1351        }
1352        gen_op_update1_cc(s1);
1353        set_cc_op(s1, CC_OP_LOGICB + ot);
1354        break;
1355    case OP_ORL:
1356        if (s1->prefix & PREFIX_LOCK) {
1357            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1358                                       s1->mem_index, ot | MO_LE);
1359        } else {
1360            tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1361            gen_op_st_rm_T0_A0(s1, ot, d);
1362        }
1363        gen_op_update1_cc(s1);
1364        set_cc_op(s1, CC_OP_LOGICB + ot);
1365        break;
1366    case OP_XORL:
1367        if (s1->prefix & PREFIX_LOCK) {
1368            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1369                                        s1->mem_index, ot | MO_LE);
1370        } else {
1371            tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1372            gen_op_st_rm_T0_A0(s1, ot, d);
1373        }
1374        gen_op_update1_cc(s1);
1375        set_cc_op(s1, CC_OP_LOGICB + ot);
1376        break;
1377    case OP_CMPL:
1378        tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1379        tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1380        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1381        set_cc_op(s1, CC_OP_SUBB + ot);
1382        break;
1383    }
1384}
1385
1386/* if d == OR_TMP0, it means memory operand (address in A0) */
1387static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1388{
1389    if (s1->prefix & PREFIX_LOCK) {
1390        if (d != OR_TMP0) {
1391            /* Lock prefix when destination is not memory */
1392            gen_illegal_opcode(s1);
1393            return;
1394        }
1395        tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1396        tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1397                                    s1->mem_index, ot | MO_LE);
1398    } else {
1399        if (d != OR_TMP0) {
1400            gen_op_mov_v_reg(s1, ot, s1->T0, d);
1401        } else {
1402            gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1403        }
1404        tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1405        gen_op_st_rm_T0_A0(s1, ot, d);
1406    }
1407
1408    gen_compute_eflags_c(s1, cpu_cc_src);
1409    tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1410    set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1411}
1412
1413static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1414                            TCGv shm1, TCGv count, bool is_right)
1415{
1416    TCGv_i32 z32, s32, oldop;
1417    TCGv z_tl;
1418
1419    /* Store the results into the CC variables.  If we know that the
1420       variable must be dead, store unconditionally.  Otherwise we'll
1421       need to not disrupt the current contents.  */
1422    z_tl = tcg_const_tl(0);
1423    if (cc_op_live[s->cc_op] & USES_CC_DST) {
1424        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1425                           result, cpu_cc_dst);
1426    } else {
1427        tcg_gen_mov_tl(cpu_cc_dst, result);
1428    }
1429    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1430        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1431                           shm1, cpu_cc_src);
1432    } else {
1433        tcg_gen_mov_tl(cpu_cc_src, shm1);
1434    }
1435    tcg_temp_free(z_tl);
1436
1437    /* Get the two potential CC_OP values into temporaries.  */
1438    tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1439    if (s->cc_op == CC_OP_DYNAMIC) {
1440        oldop = cpu_cc_op;
1441    } else {
1442        tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1443        oldop = s->tmp3_i32;
1444    }
1445
1446    /* Conditionally store the CC_OP value.  */
1447    z32 = tcg_const_i32(0);
1448    s32 = tcg_temp_new_i32();
1449    tcg_gen_trunc_tl_i32(s32, count);
1450    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1451    tcg_temp_free_i32(z32);
1452    tcg_temp_free_i32(s32);
1453
1454    /* The CC_OP value is no longer predictable.  */
1455    set_cc_op(s, CC_OP_DYNAMIC);
1456}
1457
1458static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1459                            int is_right, int is_arith)
1460{
1461    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1462
1463    /* load */
1464    if (op1 == OR_TMP0) {
1465        gen_op_ld_v(s, ot, s->T0, s->A0);
1466    } else {
1467        gen_op_mov_v_reg(s, ot, s->T0, op1);
1468    }
1469
1470    tcg_gen_andi_tl(s->T1, s->T1, mask);
1471    tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1472
1473    if (is_right) {
1474        if (is_arith) {
1475            gen_exts(ot, s->T0);
1476            tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1477            tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1478        } else {
1479            gen_extu(ot, s->T0);
1480            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1481            tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1482        }
1483    } else {
1484        tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1485        tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1486    }
1487
1488    /* store */
1489    gen_op_st_rm_T0_A0(s, ot, op1);
1490
1491    gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1492}
1493
1494static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1495                            int is_right, int is_arith)
1496{
1497    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1498
1499    /* load */
1500    if (op1 == OR_TMP0)
1501        gen_op_ld_v(s, ot, s->T0, s->A0);
1502    else
1503        gen_op_mov_v_reg(s, ot, s->T0, op1);
1504
1505    op2 &= mask;
1506    if (op2 != 0) {
1507        if (is_right) {
1508            if (is_arith) {
1509                gen_exts(ot, s->T0);
1510                tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1511                tcg_gen_sari_tl(s->T0, s->T0, op2);
1512            } else {
1513                gen_extu(ot, s->T0);
1514                tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1515                tcg_gen_shri_tl(s->T0, s->T0, op2);
1516            }
1517        } else {
1518            tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1519            tcg_gen_shli_tl(s->T0, s->T0, op2);
1520        }
1521    }
1522
1523    /* store */
1524    gen_op_st_rm_T0_A0(s, ot, op1);
1525
1526    /* update eflags if non zero shift */
1527    if (op2 != 0) {
1528        tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1529        tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1530        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1531    }
1532}
1533
1534static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1535{
1536    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1537    TCGv_i32 t0, t1;
1538
1539    /* load */
1540    if (op1 == OR_TMP0) {
1541        gen_op_ld_v(s, ot, s->T0, s->A0);
1542    } else {
1543        gen_op_mov_v_reg(s, ot, s->T0, op1);
1544    }
1545
1546    tcg_gen_andi_tl(s->T1, s->T1, mask);
1547
1548    switch (ot) {
1549    case MO_8:
1550        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1551        tcg_gen_ext8u_tl(s->T0, s->T0);
1552        tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1553        goto do_long;
1554    case MO_16:
1555        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1556        tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1557        goto do_long;
1558    do_long:
1559#ifdef TARGET_X86_64
1560    case MO_32:
1561        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1562        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1563        if (is_right) {
1564            tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1565        } else {
1566            tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1567        }
1568        tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1569        break;
1570#endif
1571    default:
1572        if (is_right) {
1573            tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1574        } else {
1575            tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1576        }
1577        break;
1578    }
1579
1580    /* store */
1581    gen_op_st_rm_T0_A0(s, ot, op1);
1582
1583    /* We'll need the flags computed into CC_SRC.  */
1584    gen_compute_eflags(s);
1585
1586    /* The value that was "rotated out" is now present at the other end
1587       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1588       since we've computed the flags into CC_SRC, these variables are
1589       currently dead.  */
1590    if (is_right) {
1591        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1592        tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1593        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1594    } else {
1595        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1596        tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1597    }
1598    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1599    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1600
1601    /* Now conditionally store the new CC_OP value.  If the shift count
1602       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1603       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1604       exactly as we computed above.  */
1605    t0 = tcg_const_i32(0);
1606    t1 = tcg_temp_new_i32();
1607    tcg_gen_trunc_tl_i32(t1, s->T1);
1608    tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1609    tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1610    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1611                        s->tmp2_i32, s->tmp3_i32);
1612    tcg_temp_free_i32(t0);
1613    tcg_temp_free_i32(t1);
1614
1615    /* The CC_OP value is no longer predictable.  */ 
1616    set_cc_op(s, CC_OP_DYNAMIC);
1617}
1618
1619static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1620                          int is_right)
1621{
1622    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1623    int shift;
1624
1625    /* load */
1626    if (op1 == OR_TMP0) {
1627        gen_op_ld_v(s, ot, s->T0, s->A0);
1628    } else {
1629        gen_op_mov_v_reg(s, ot, s->T0, op1);
1630    }
1631
1632    op2 &= mask;
1633    if (op2 != 0) {
1634        switch (ot) {
1635#ifdef TARGET_X86_64
1636        case MO_32:
1637            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1638            if (is_right) {
1639                tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1640            } else {
1641                tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1642            }
1643            tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1644            break;
1645#endif
1646        default:
1647            if (is_right) {
1648                tcg_gen_rotri_tl(s->T0, s->T0, op2);
1649            } else {
1650                tcg_gen_rotli_tl(s->T0, s->T0, op2);
1651            }
1652            break;
1653        case MO_8:
1654            mask = 7;
1655            goto do_shifts;
1656        case MO_16:
1657            mask = 15;
1658        do_shifts:
1659            shift = op2 & mask;
1660            if (is_right) {
1661                shift = mask + 1 - shift;
1662            }
1663            gen_extu(ot, s->T0);
1664            tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1665            tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1666            tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1667            break;
1668        }
1669    }
1670
1671    /* store */
1672    gen_op_st_rm_T0_A0(s, ot, op1);
1673
1674    if (op2 != 0) {
1675        /* Compute the flags into CC_SRC.  */
1676        gen_compute_eflags(s);
1677
1678        /* The value that was "rotated out" is now present at the other end
1679           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1680           since we've computed the flags into CC_SRC, these variables are
1681           currently dead.  */
1682        if (is_right) {
1683            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1684            tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1685            tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1686        } else {
1687            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1688            tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1689        }
1690        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1691        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1692        set_cc_op(s, CC_OP_ADCOX);
1693    }
1694}
1695
1696/* XXX: add faster immediate = 1 case */
1697static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1698                           int is_right)
1699{
1700    gen_compute_eflags(s);
1701    assert(s->cc_op == CC_OP_EFLAGS);
1702
1703    /* load */
1704    if (op1 == OR_TMP0)
1705        gen_op_ld_v(s, ot, s->T0, s->A0);
1706    else
1707        gen_op_mov_v_reg(s, ot, s->T0, op1);
1708    
1709    if (is_right) {
1710        switch (ot) {
1711        case MO_8:
1712            gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1713            break;
1714        case MO_16:
1715            gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1716            break;
1717        case MO_32:
1718            gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1719            break;
1720#ifdef TARGET_X86_64
1721        case MO_64:
1722            gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1723            break;
1724#endif
1725        default:
1726            tcg_abort();
1727        }
1728    } else {
1729        switch (ot) {
1730        case MO_8:
1731            gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1732            break;
1733        case MO_16:
1734            gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1735            break;
1736        case MO_32:
1737            gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1738            break;
1739#ifdef TARGET_X86_64
1740        case MO_64:
1741            gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1742            break;
1743#endif
1744        default:
1745            tcg_abort();
1746        }
1747    }
1748    /* store */
1749    gen_op_st_rm_T0_A0(s, ot, op1);
1750}
1751
1752/* XXX: add faster immediate case */
1753static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1754                             bool is_right, TCGv count_in)
1755{
1756    target_ulong mask = (ot == MO_64 ? 63 : 31);
1757    TCGv count;
1758
1759    /* load */
1760    if (op1 == OR_TMP0) {
1761        gen_op_ld_v(s, ot, s->T0, s->A0);
1762    } else {
1763        gen_op_mov_v_reg(s, ot, s->T0, op1);
1764    }
1765
1766    count = tcg_temp_new();
1767    tcg_gen_andi_tl(count, count_in, mask);
1768
1769    switch (ot) {
1770    case MO_16:
1771        /* Note: we implement the Intel behaviour for shift count > 16.
1772           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1773           portion by constructing it as a 32-bit value.  */
1774        if (is_right) {
1775            tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1776            tcg_gen_mov_tl(s->T1, s->T0);
1777            tcg_gen_mov_tl(s->T0, s->tmp0);
1778        } else {
1779            tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1780        }
1781        /*
1782         * If TARGET_X86_64 defined then fall through into MO_32 case,
1783         * otherwise fall through default case.
1784         */
1785    case MO_32:
1786#ifdef TARGET_X86_64
1787        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1788        tcg_gen_subi_tl(s->tmp0, count, 1);
1789        if (is_right) {
1790            tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1791            tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1792            tcg_gen_shr_i64(s->T0, s->T0, count);
1793        } else {
1794            tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1795            tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1796            tcg_gen_shl_i64(s->T0, s->T0, count);
1797            tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1798            tcg_gen_shri_i64(s->T0, s->T0, 32);
1799        }
1800        break;
1801#endif
1802    default:
1803        tcg_gen_subi_tl(s->tmp0, count, 1);
1804        if (is_right) {
1805            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1806
1807            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1808            tcg_gen_shr_tl(s->T0, s->T0, count);
1809            tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1810        } else {
1811            tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1812            if (ot == MO_16) {
1813                /* Only needed if count > 16, for Intel behaviour.  */
1814                tcg_gen_subfi_tl(s->tmp4, 33, count);
1815                tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1816                tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1817            }
1818
1819            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1820            tcg_gen_shl_tl(s->T0, s->T0, count);
1821            tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1822        }
1823        tcg_gen_movi_tl(s->tmp4, 0);
1824        tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1825                           s->tmp4, s->T1);
1826        tcg_gen_or_tl(s->T0, s->T0, s->T1);
1827        break;
1828    }
1829
1830    /* store */
1831    gen_op_st_rm_T0_A0(s, ot, op1);
1832
1833    gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1834    tcg_temp_free(count);
1835}
1836
1837static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1838{
1839    if (s != OR_TMP1)
1840        gen_op_mov_v_reg(s1, ot, s1->T1, s);
1841    switch(op) {
1842    case OP_ROL:
1843        gen_rot_rm_T1(s1, ot, d, 0);
1844        break;
1845    case OP_ROR:
1846        gen_rot_rm_T1(s1, ot, d, 1);
1847        break;
1848    case OP_SHL:
1849    case OP_SHL1:
1850        gen_shift_rm_T1(s1, ot, d, 0, 0);
1851        break;
1852    case OP_SHR:
1853        gen_shift_rm_T1(s1, ot, d, 1, 0);
1854        break;
1855    case OP_SAR:
1856        gen_shift_rm_T1(s1, ot, d, 1, 1);
1857        break;
1858    case OP_RCL:
1859        gen_rotc_rm_T1(s1, ot, d, 0);
1860        break;
1861    case OP_RCR:
1862        gen_rotc_rm_T1(s1, ot, d, 1);
1863        break;
1864    }
1865}
1866
1867static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1868{
1869    switch(op) {
1870    case OP_ROL:
1871        gen_rot_rm_im(s1, ot, d, c, 0);
1872        break;
1873    case OP_ROR:
1874        gen_rot_rm_im(s1, ot, d, c, 1);
1875        break;
1876    case OP_SHL:
1877    case OP_SHL1:
1878        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1879        break;
1880    case OP_SHR:
1881        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1882        break;
1883    case OP_SAR:
1884        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1885        break;
1886    default:
1887        /* currently not optimized */
1888        tcg_gen_movi_tl(s1->T1, c);
1889        gen_shift(s1, op, ot, d, OR_TMP1);
1890        break;
1891    }
1892}
1893
1894#define X86_MAX_INSN_LENGTH 15
1895
1896static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
1897{
1898    uint64_t pc = s->pc;
1899
1900    s->pc += num_bytes;
1901    if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
1902        /* If the instruction's 16th byte is on a different page than the 1st, a
1903         * page fault on the second page wins over the general protection fault
1904         * caused by the instruction being too long.
1905         * This can happen even if the operand is only one byte long!
1906         */
1907        if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
1908            volatile uint8_t unused =
1909                cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
1910            (void) unused;
1911        }
1912        siglongjmp(s->jmpbuf, 1);
1913    }
1914
1915    return pc;
1916}
1917
1918static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
1919{
1920    return translator_ldub(env, advance_pc(env, s, 1));
1921}
1922
1923static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
1924{
1925    return translator_ldsw(env, advance_pc(env, s, 2));
1926}
1927
1928static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
1929{
1930    return translator_lduw(env, advance_pc(env, s, 2));
1931}
1932
1933static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
1934{
1935    return translator_ldl(env, advance_pc(env, s, 4));
1936}
1937
1938#ifdef TARGET_X86_64
1939static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
1940{
1941    return translator_ldq(env, advance_pc(env, s, 8));
1942}
1943#endif
1944
1945/* Decompose an address.  */
1946
1947typedef struct AddressParts {
1948    int def_seg;
1949    int base;
1950    int index;
1951    int scale;
1952    target_long disp;
1953} AddressParts;
1954
1955static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1956                                    int modrm)
1957{
1958    int def_seg, base, index, scale, mod, rm;
1959    target_long disp;
1960    bool havesib;
1961
1962    def_seg = R_DS;
1963    index = -1;
1964    scale = 0;
1965    disp = 0;
1966
1967    mod = (modrm >> 6) & 3;
1968    rm = modrm & 7;
1969    base = rm | REX_B(s);
1970
1971    if (mod == 3) {
1972        /* Normally filtered out earlier, but including this path
1973           simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
1974        goto done;
1975    }
1976
1977    switch (s->aflag) {
1978    case MO_64:
1979    case MO_32:
1980        havesib = 0;
1981        if (rm == 4) {
1982            int code = x86_ldub_code(env, s);
1983            scale = (code >> 6) & 3;
1984            index = ((code >> 3) & 7) | REX_X(s);
1985            if (index == 4) {
1986                index = -1;  /* no index */
1987            }
1988            base = (code & 7) | REX_B(s);
1989            havesib = 1;
1990        }
1991
1992        switch (mod) {
1993        case 0:
1994            if ((base & 7) == 5) {
1995                base = -1;
1996                disp = (int32_t)x86_ldl_code(env, s);
1997                if (CODE64(s) && !havesib) {
1998                    base = -2;
1999                    disp += s->pc + s->rip_offset;
2000                }
2001            }
2002            break;
2003        case 1:
2004            disp = (int8_t)x86_ldub_code(env, s);
2005            break;
2006        default:
2007        case 2:
2008            disp = (int32_t)x86_ldl_code(env, s);
2009            break;
2010        }
2011
2012        /* For correct popl handling with esp.  */
2013        if (base == R_ESP && s->popl_esp_hack) {
2014            disp += s->popl_esp_hack;
2015        }
2016        if (base == R_EBP || base == R_ESP) {
2017            def_seg = R_SS;
2018        }
2019        break;
2020
2021    case MO_16:
2022        if (mod == 0) {
2023            if (rm == 6) {
2024                base = -1;
2025                disp = x86_lduw_code(env, s);
2026                break;
2027            }
2028        } else if (mod == 1) {
2029            disp = (int8_t)x86_ldub_code(env, s);
2030        } else {
2031            disp = (int16_t)x86_lduw_code(env, s);
2032        }
2033
2034        switch (rm) {
2035        case 0:
2036            base = R_EBX;
2037            index = R_ESI;
2038            break;
2039        case 1:
2040            base = R_EBX;
2041            index = R_EDI;
2042            break;
2043        case 2:
2044            base = R_EBP;
2045            index = R_ESI;
2046            def_seg = R_SS;
2047            break;
2048        case 3:
2049            base = R_EBP;
2050            index = R_EDI;
2051            def_seg = R_SS;
2052            break;
2053        case 4:
2054            base = R_ESI;
2055            break;
2056        case 5:
2057            base = R_EDI;
2058            break;
2059        case 6:
2060            base = R_EBP;
2061            def_seg = R_SS;
2062            break;
2063        default:
2064        case 7:
2065            base = R_EBX;
2066            break;
2067        }
2068        break;
2069
2070    default:
2071        tcg_abort();
2072    }
2073
2074 done:
2075    return (AddressParts){ def_seg, base, index, scale, disp };
2076}
2077
2078/* Compute the address, with a minimum number of TCG ops.  */
2079static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2080{
2081    TCGv ea = NULL;
2082
2083    if (a.index >= 0) {
2084        if (a.scale == 0) {
2085            ea = cpu_regs[a.index];
2086        } else {
2087            tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2088            ea = s->A0;
2089        }
2090        if (a.base >= 0) {
2091            tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2092            ea = s->A0;
2093        }
2094    } else if (a.base >= 0) {
2095        ea = cpu_regs[a.base];
2096    }
2097    if (!ea) {
2098        tcg_gen_movi_tl(s->A0, a.disp);
2099        ea = s->A0;
2100    } else if (a.disp != 0) {
2101        tcg_gen_addi_tl(s->A0, ea, a.disp);
2102        ea = s->A0;
2103    }
2104
2105    return ea;
2106}
2107
2108static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2109{
2110    AddressParts a = gen_lea_modrm_0(env, s, modrm);
2111    TCGv ea = gen_lea_modrm_1(s, a);
2112    gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2113}
2114
2115static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2116{
2117    (void)gen_lea_modrm_0(env, s, modrm);
2118}
2119
2120/* Used for BNDCL, BNDCU, BNDCN.  */
2121static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2122                      TCGCond cond, TCGv_i64 bndv)
2123{
2124    TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2125
2126    tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2127    if (!CODE64(s)) {
2128        tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2129    }
2130    tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2131    tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2132    gen_helper_bndck(cpu_env, s->tmp2_i32);
2133}
2134
2135/* used for LEA and MOV AX, mem */
2136static void gen_add_A0_ds_seg(DisasContext *s)
2137{
2138    gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2139}
2140
2141/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2142   OR_TMP0 */
2143static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2144                           MemOp ot, int reg, int is_store)
2145{
2146    int mod, rm;
2147
2148    mod = (modrm >> 6) & 3;
2149    rm = (modrm & 7) | REX_B(s);
2150    if (mod == 3) {
2151        if (is_store) {
2152            if (reg != OR_TMP0)
2153                gen_op_mov_v_reg(s, ot, s->T0, reg);
2154            gen_op_mov_reg_v(s, ot, rm, s->T0);
2155        } else {
2156            gen_op_mov_v_reg(s, ot, s->T0, rm);
2157            if (reg != OR_TMP0)
2158                gen_op_mov_reg_v(s, ot, reg, s->T0);
2159        }
2160    } else {
2161        gen_lea_modrm(env, s, modrm);
2162        if (is_store) {
2163            if (reg != OR_TMP0)
2164                gen_op_mov_v_reg(s, ot, s->T0, reg);
2165            gen_op_st_v(s, ot, s->T0, s->A0);
2166        } else {
2167            gen_op_ld_v(s, ot, s->T0, s->A0);
2168            if (reg != OR_TMP0)
2169                gen_op_mov_reg_v(s, ot, reg, s->T0);
2170        }
2171    }
2172}
2173
2174static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2175{
2176    uint32_t ret;
2177
2178    switch (ot) {
2179    case MO_8:
2180        ret = x86_ldub_code(env, s);
2181        break;
2182    case MO_16:
2183        ret = x86_lduw_code(env, s);
2184        break;
2185    case MO_32:
2186#ifdef TARGET_X86_64
2187    case MO_64:
2188#endif
2189        ret = x86_ldl_code(env, s);
2190        break;
2191    default:
2192        tcg_abort();
2193    }
2194    return ret;
2195}
2196
2197static inline int insn_const_size(MemOp ot)
2198{
2199    if (ot <= MO_32) {
2200        return 1 << ot;
2201    } else {
2202        return 4;
2203    }
2204}
2205
2206static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2207{
2208#ifndef CONFIG_USER_ONLY
2209    return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
2210           (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2211#else
2212    return true;
2213#endif
2214}
2215
2216static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2217{
2218    target_ulong pc = s->cs_base + eip;
2219
2220    if (use_goto_tb(s, pc))  {
2221        /* jump to same page: we can use a direct jump */
2222        tcg_gen_goto_tb(tb_num);
2223        gen_jmp_im(s, eip);
2224        tcg_gen_exit_tb(s->base.tb, tb_num);
2225        s->base.is_jmp = DISAS_NORETURN;
2226    } else {
2227        /* jump to another page */
2228        gen_jmp_im(s, eip);
2229        gen_jr(s, s->tmp0);
2230    }
2231}
2232
2233static inline void gen_jcc(DisasContext *s, int b,
2234                           target_ulong val, target_ulong next_eip)
2235{
2236    TCGLabel *l1, *l2;
2237
2238    if (s->jmp_opt) {
2239        l1 = gen_new_label();
2240        gen_jcc1(s, b, l1);
2241
2242        gen_goto_tb(s, 0, next_eip);
2243
2244        gen_set_label(l1);
2245        gen_goto_tb(s, 1, val);
2246    } else {
2247        l1 = gen_new_label();
2248        l2 = gen_new_label();
2249        gen_jcc1(s, b, l1);
2250
2251        gen_jmp_im(s, next_eip);
2252        tcg_gen_br(l2);
2253
2254        gen_set_label(l1);
2255        gen_jmp_im(s, val);
2256        gen_set_label(l2);
2257        gen_eob(s);
2258    }
2259}
2260
2261static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2262                        int modrm, int reg)
2263{
2264    CCPrepare cc;
2265
2266    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2267
2268    cc = gen_prepare_cc(s, b, s->T1);
2269    if (cc.mask != -1) {
2270        TCGv t0 = tcg_temp_new();
2271        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2272        cc.reg = t0;
2273    }
2274    if (!cc.use_reg2) {
2275        cc.reg2 = tcg_const_tl(cc.imm);
2276    }
2277
2278    tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2279                       s->T0, cpu_regs[reg]);
2280    gen_op_mov_reg_v(s, ot, reg, s->T0);
2281
2282    if (cc.mask != -1) {
2283        tcg_temp_free(cc.reg);
2284    }
2285    if (!cc.use_reg2) {
2286        tcg_temp_free(cc.reg2);
2287    }
2288}
2289
2290static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
2291{
2292    tcg_gen_ld32u_tl(s->T0, cpu_env,
2293                     offsetof(CPUX86State,segs[seg_reg].selector));
2294}
2295
2296static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
2297{
2298    tcg_gen_ext16u_tl(s->T0, s->T0);
2299    tcg_gen_st32_tl(s->T0, cpu_env,
2300                    offsetof(CPUX86State,segs[seg_reg].selector));
2301    tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2302}
2303
2304/* move T0 to seg_reg and compute if the CPU state may change. Never
2305   call this function with seg_reg == R_CS */
2306static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
2307{
2308    if (s->pe && !s->vm86) {
2309        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2310        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2311        /* abort translation because the addseg value may change or
2312           because ss32 may change. For R_SS, translation must always
2313           stop as a special handling must be done to disable hardware
2314           interrupts for the next instruction */
2315        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS)) {
2316            s->base.is_jmp = DISAS_TOO_MANY;
2317        }
2318    } else {
2319        gen_op_movl_seg_T0_vm(s, seg_reg);
2320        if (seg_reg == R_SS) {
2321            s->base.is_jmp = DISAS_TOO_MANY;
2322        }
2323    }
2324}
2325
2326static inline int svm_is_rep(int prefixes)
2327{
2328    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2329}
2330
2331static inline void
2332gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2333                              uint32_t type, uint64_t param)
2334{
2335    /* no SVM activated; fast case */
2336    if (likely(!(s->flags & HF_GUEST_MASK)))
2337        return;
2338    gen_update_cc_op(s);
2339    gen_jmp_im(s, pc_start - s->cs_base);
2340    gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2341                                         tcg_const_i64(param));
2342}
2343
2344static inline void
2345gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2346{
2347    gen_svm_check_intercept_param(s, pc_start, type, 0);
2348}
2349
2350static inline void gen_stack_update(DisasContext *s, int addend)
2351{
2352    gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2353}
2354
2355/* Generate a push. It depends on ss32, addseg and dflag.  */
2356static void gen_push_v(DisasContext *s, TCGv val)
2357{
2358    MemOp d_ot = mo_pushpop(s, s->dflag);
2359    MemOp a_ot = mo_stacksize(s);
2360    int size = 1 << d_ot;
2361    TCGv new_esp = s->A0;
2362
2363    tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2364
2365    if (!CODE64(s)) {
2366        if (s->addseg) {
2367            new_esp = s->tmp4;
2368            tcg_gen_mov_tl(new_esp, s->A0);
2369        }
2370        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2371    }
2372
2373    gen_op_st_v(s, d_ot, val, s->A0);
2374    gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2375}
2376
2377/* two step pop is necessary for precise exceptions */
2378static MemOp gen_pop_T0(DisasContext *s)
2379{
2380    MemOp d_ot = mo_pushpop(s, s->dflag);
2381
2382    gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2383    gen_op_ld_v(s, d_ot, s->T0, s->A0);
2384
2385    return d_ot;
2386}
2387
2388static inline void gen_pop_update(DisasContext *s, MemOp ot)
2389{
2390    gen_stack_update(s, 1 << ot);
2391}
2392
2393static inline void gen_stack_A0(DisasContext *s)
2394{
2395    gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2396}
2397
2398static void gen_pusha(DisasContext *s)
2399{
2400    MemOp s_ot = s->ss32 ? MO_32 : MO_16;
2401    MemOp d_ot = s->dflag;
2402    int size = 1 << d_ot;
2403    int i;
2404
2405    for (i = 0; i < 8; i++) {
2406        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2407        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2408        gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2409    }
2410
2411    gen_stack_update(s, -8 * size);
2412}
2413
2414static void gen_popa(DisasContext *s)
2415{
2416    MemOp s_ot = s->ss32 ? MO_32 : MO_16;
2417    MemOp d_ot = s->dflag;
2418    int size = 1 << d_ot;
2419    int i;
2420
2421    for (i = 0; i < 8; i++) {
2422        /* ESP is not reloaded */
2423        if (7 - i == R_ESP) {
2424            continue;
2425        }
2426        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2427        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2428        gen_op_ld_v(s, d_ot, s->T0, s->A0);
2429        gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2430    }
2431
2432    gen_stack_update(s, 8 * size);
2433}
2434
2435static void gen_enter(DisasContext *s, int esp_addend, int level)
2436{
2437    MemOp d_ot = mo_pushpop(s, s->dflag);
2438    MemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2439    int size = 1 << d_ot;
2440
2441    /* Push BP; compute FrameTemp into T1.  */
2442    tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2443    gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2444    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2445
2446    level &= 31;
2447    if (level != 0) {
2448        int i;
2449
2450        /* Copy level-1 pointers from the previous frame.  */
2451        for (i = 1; i < level; ++i) {
2452            tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2453            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2454            gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2455
2456            tcg_gen_subi_tl(s->A0, s->T1, size * i);
2457            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2458            gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2459        }
2460
2461        /* Push the current FrameTemp as the last level.  */
2462        tcg_gen_subi_tl(s->A0, s->T1, size * level);
2463        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2464        gen_op_st_v(s, d_ot, s->T1, s->A0);
2465    }
2466
2467    /* Copy the FrameTemp value to EBP.  */
2468    gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2469
2470    /* Compute the final value of ESP.  */
2471    tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2472    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2473}
2474
2475static void gen_leave(DisasContext *s)
2476{
2477    MemOp d_ot = mo_pushpop(s, s->dflag);
2478    MemOp a_ot = mo_stacksize(s);
2479
2480    gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2481    gen_op_ld_v(s, d_ot, s->T0, s->A0);
2482
2483    tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2484
2485    gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2486    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2487}
2488
2489/* Similarly, except that the assumption here is that we don't decode
2490   the instruction at all -- either a missing opcode, an unimplemented
2491   feature, or just a bogus instruction stream.  */
2492static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2493{
2494    gen_illegal_opcode(s);
2495
2496    if (qemu_loglevel_mask(LOG_UNIMP)) {
2497        FILE *logfile = qemu_log_lock();
2498        target_ulong pc = s->pc_start, end = s->pc;
2499
2500        qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2501        for (; pc < end; ++pc) {
2502            qemu_log(" %02x", cpu_ldub_code(env, pc));
2503        }
2504        qemu_log("\n");
2505        qemu_log_unlock(logfile);
2506    }
2507}
2508
2509/* an interrupt is different from an exception because of the
2510   privilege checks */
2511static void gen_interrupt(DisasContext *s, int intno,
2512                          target_ulong cur_eip, target_ulong next_eip)
2513{
2514    gen_update_cc_op(s);
2515    gen_jmp_im(s, cur_eip);
2516    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2517                               tcg_const_i32(next_eip - cur_eip));
2518    s->base.is_jmp = DISAS_NORETURN;
2519}
2520
2521static void gen_debug(DisasContext *s, target_ulong cur_eip)
2522{
2523    gen_update_cc_op(s);
2524    gen_jmp_im(s, cur_eip);
2525    gen_helper_debug(cpu_env);
2526    s->base.is_jmp = DISAS_NORETURN;
2527}
2528
2529static void gen_set_hflag(DisasContext *s, uint32_t mask)
2530{
2531    if ((s->flags & mask) == 0) {
2532        TCGv_i32 t = tcg_temp_new_i32();
2533        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2534        tcg_gen_ori_i32(t, t, mask);
2535        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2536        tcg_temp_free_i32(t);
2537        s->flags |= mask;
2538    }
2539}
2540
2541static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2542{
2543    if (s->flags & mask) {
2544        TCGv_i32 t = tcg_temp_new_i32();
2545        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2546        tcg_gen_andi_i32(t, t, ~mask);
2547        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2548        tcg_temp_free_i32(t);
2549        s->flags &= ~mask;
2550    }
2551}
2552
2553/* Clear BND registers during legacy branches.  */
2554static void gen_bnd_jmp(DisasContext *s)
2555{
2556    /* Clear the registers only if BND prefix is missing, MPX is enabled,
2557       and if the BNDREGs are known to be in use (non-zero) already.
2558       The helper itself will check BNDPRESERVE at runtime.  */
2559    if ((s->prefix & PREFIX_REPNZ) == 0
2560        && (s->flags & HF_MPX_EN_MASK) != 0
2561        && (s->flags & HF_MPX_IU_MASK) != 0) {
2562        gen_helper_bnd_jmp(cpu_env);
2563    }
2564}
2565
2566/* Generate an end of block. Trace exception is also generated if needed.
2567   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2568   If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2569   S->TF.  This is used by the syscall/sysret insns.  */
2570static void
2571do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2572{
2573    gen_update_cc_op(s);
2574
2575    /* If several instructions disable interrupts, only the first does it.  */
2576    if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2577        gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2578    } else {
2579        gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2580    }
2581
2582    if (s->base.tb->flags & HF_RF_MASK) {
2583        gen_helper_reset_rf(cpu_env);
2584    }
2585    if (s->base.singlestep_enabled) {
2586        gen_helper_debug(cpu_env);
2587    } else if (recheck_tf) {
2588        gen_helper_rechecking_single_step(cpu_env);
2589        tcg_gen_exit_tb(NULL, 0);
2590    } else if (s->tf) {
2591        gen_helper_single_step(cpu_env);
2592    } else if (jr) {
2593        tcg_gen_lookup_and_goto_ptr();
2594    } else {
2595        tcg_gen_exit_tb(NULL, 0);
2596    }
2597    s->base.is_jmp = DISAS_NORETURN;
2598}
2599
2600static inline void
2601gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2602{
2603    do_gen_eob_worker(s, inhibit, recheck_tf, false);
2604}
2605
2606/* End of block.
2607   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2608static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2609{
2610    gen_eob_worker(s, inhibit, false);
2611}
2612
2613/* End of block, resetting the inhibit irq flag.  */
2614static void gen_eob(DisasContext *s)
2615{
2616    gen_eob_worker(s, false, false);
2617}
2618
2619/* Jump to register */
2620static void gen_jr(DisasContext *s, TCGv dest)
2621{
2622    do_gen_eob_worker(s, false, false, true);
2623}
2624
2625/* generate a jump to eip. No segment change must happen before as a
2626   direct call to the next block may occur */
2627static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2628{
2629    gen_update_cc_op(s);
2630    set_cc_op(s, CC_OP_DYNAMIC);
2631    if (s->jmp_opt) {
2632        gen_goto_tb(s, tb_num, eip);
2633    } else {
2634        gen_jmp_im(s, eip);
2635        gen_eob(s);
2636    }
2637}
2638
2639static void gen_jmp(DisasContext *s, target_ulong eip)
2640{
2641    gen_jmp_tb(s, eip, 0);
2642}
2643
2644static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2645{
2646    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2647    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2648}
2649
2650static inline void gen_stq_env_A0(DisasContext *s, int offset)
2651{
2652    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2653    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2654}
2655
2656static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2657{
2658    int mem_index = s->mem_index;
2659    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2660    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2661    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2662    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2663    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2664}
2665
2666static inline void gen_sto_env_A0(DisasContext *s, int offset)
2667{
2668    int mem_index = s->mem_index;
2669    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2670    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2671    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2672    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2673    tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2674}
2675
2676static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2677{
2678    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2679    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2680    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2681    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2682}
2683
2684static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2685{
2686    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2687    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2688}
2689
2690static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2691{
2692    tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2693    tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2694}
2695
2696static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2697{
2698    tcg_gen_movi_i64(s->tmp1_i64, 0);
2699    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2700}
2701
2702typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2703typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2704typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2705typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2706typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2707typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2708                               TCGv_i32 val);
2709typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2710typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2711                               TCGv val);
2712
2713#define SSE_SPECIAL ((void *)1)
2714#define SSE_DUMMY ((void *)2)
2715
2716#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2717#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2718                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2719
2720static const SSEFunc_0_epp sse_op_table1[256][4] = {
2721    /* 3DNow! extensions */
2722    [0x0e] = { SSE_DUMMY }, /* femms */
2723    [0x0f] = { SSE_DUMMY }, /* pf... */
2724    /* pure SSE operations */
2725    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2726    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2727    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2728    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2729    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2730    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2731    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2732    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2733
2734    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2735    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2736    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2737    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2738    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2739    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2740    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2741    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2742    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2743    [0x51] = SSE_FOP(sqrt),
2744    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2745    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2746    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2747    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2748    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2749    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2750    [0x58] = SSE_FOP(add),
2751    [0x59] = SSE_FOP(mul),
2752    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2753               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2754    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2755    [0x5c] = SSE_FOP(sub),
2756    [0x5d] = SSE_FOP(min),
2757    [0x5e] = SSE_FOP(div),
2758    [0x5f] = SSE_FOP(max),
2759
2760    [0xc2] = SSE_FOP(cmpeq),
2761    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2762               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2763
2764    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2765    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2766    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2767
2768    /* MMX ops and their SSE extensions */
2769    [0x60] = MMX_OP2(punpcklbw),
2770    [0x61] = MMX_OP2(punpcklwd),
2771    [0x62] = MMX_OP2(punpckldq),
2772    [0x63] = MMX_OP2(packsswb),
2773    [0x64] = MMX_OP2(pcmpgtb),
2774    [0x65] = MMX_OP2(pcmpgtw),
2775    [0x66] = MMX_OP2(pcmpgtl),
2776    [0x67] = MMX_OP2(packuswb),
2777    [0x68] = MMX_OP2(punpckhbw),
2778    [0x69] = MMX_OP2(punpckhwd),
2779    [0x6a] = MMX_OP2(punpckhdq),
2780    [0x6b] = MMX_OP2(packssdw),
2781    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2782    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2783    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2784    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2785    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2786               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2787               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2788               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2789    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2790    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2791    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2792    [0x74] = MMX_OP2(pcmpeqb),
2793    [0x75] = MMX_OP2(pcmpeqw),
2794    [0x76] = MMX_OP2(pcmpeql),
2795    [0x77] = { SSE_DUMMY }, /* emms */
2796    [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2797    [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2798    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2799    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2800    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2801    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2802    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2803    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2804    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2805    [0xd1] = MMX_OP2(psrlw),
2806    [0xd2] = MMX_OP2(psrld),
2807    [0xd3] = MMX_OP2(psrlq),
2808    [0xd4] = MMX_OP2(paddq),
2809    [0xd5] = MMX_OP2(pmullw),
2810    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2811    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2812    [0xd8] = MMX_OP2(psubusb),
2813    [0xd9] = MMX_OP2(psubusw),
2814    [0xda] = MMX_OP2(pminub),
2815    [0xdb] = MMX_OP2(pand),
2816    [0xdc] = MMX_OP2(paddusb),
2817    [0xdd] = MMX_OP2(paddusw),
2818    [0xde] = MMX_OP2(pmaxub),
2819    [0xdf] = MMX_OP2(pandn),
2820    [0xe0] = MMX_OP2(pavgb),
2821    [0xe1] = MMX_OP2(psraw),
2822    [0xe2] = MMX_OP2(psrad),
2823    [0xe3] = MMX_OP2(pavgw),
2824    [0xe4] = MMX_OP2(pmulhuw),
2825    [0xe5] = MMX_OP2(pmulhw),
2826    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2827    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2828    [0xe8] = MMX_OP2(psubsb),
2829    [0xe9] = MMX_OP2(psubsw),
2830    [0xea] = MMX_OP2(pminsw),
2831    [0xeb] = MMX_OP2(por),
2832    [0xec] = MMX_OP2(paddsb),
2833    [0xed] = MMX_OP2(paddsw),
2834    [0xee] = MMX_OP2(pmaxsw),
2835    [0xef] = MMX_OP2(pxor),
2836    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2837    [0xf1] = MMX_OP2(psllw),
2838    [0xf2] = MMX_OP2(pslld),
2839    [0xf3] = MMX_OP2(psllq),
2840    [0xf4] = MMX_OP2(pmuludq),
2841    [0xf5] = MMX_OP2(pmaddwd),
2842    [0xf6] = MMX_OP2(psadbw),
2843    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2844               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2845    [0xf8] = MMX_OP2(psubb),
2846    [0xf9] = MMX_OP2(psubw),
2847    [0xfa] = MMX_OP2(psubl),
2848    [0xfb] = MMX_OP2(psubq),
2849    [0xfc] = MMX_OP2(paddb),
2850    [0xfd] = MMX_OP2(paddw),
2851    [0xfe] = MMX_OP2(paddl),
2852};
2853
2854static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2855    [0 + 2] = MMX_OP2(psrlw),
2856    [0 + 4] = MMX_OP2(psraw),
2857    [0 + 6] = MMX_OP2(psllw),
2858    [8 + 2] = MMX_OP2(psrld),
2859    [8 + 4] = MMX_OP2(psrad),
2860    [8 + 6] = MMX_OP2(pslld),
2861    [16 + 2] = MMX_OP2(psrlq),
2862    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2863    [16 + 6] = MMX_OP2(psllq),
2864    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2865};
2866
2867static const SSEFunc_0_epi sse_op_table3ai[] = {
2868    gen_helper_cvtsi2ss,
2869    gen_helper_cvtsi2sd
2870};
2871
2872#ifdef TARGET_X86_64
2873static const SSEFunc_0_epl sse_op_table3aq[] = {
2874    gen_helper_cvtsq2ss,
2875    gen_helper_cvtsq2sd
2876};
2877#endif
2878
2879static const SSEFunc_i_ep sse_op_table3bi[] = {
2880    gen_helper_cvttss2si,
2881    gen_helper_cvtss2si,
2882    gen_helper_cvttsd2si,
2883    gen_helper_cvtsd2si
2884};
2885
2886#ifdef TARGET_X86_64
2887static const SSEFunc_l_ep sse_op_table3bq[] = {
2888    gen_helper_cvttss2sq,
2889    gen_helper_cvtss2sq,
2890    gen_helper_cvttsd2sq,
2891    gen_helper_cvtsd2sq
2892};
2893#endif
2894
2895static const SSEFunc_0_epp sse_op_table4[8][4] = {
2896    SSE_FOP(cmpeq),
2897    SSE_FOP(cmplt),
2898    SSE_FOP(cmple),
2899    SSE_FOP(cmpunord),
2900    SSE_FOP(cmpneq),
2901    SSE_FOP(cmpnlt),
2902    SSE_FOP(cmpnle),
2903    SSE_FOP(cmpord),
2904};
2905
2906static const SSEFunc_0_epp sse_op_table5[256] = {
2907    [0x0c] = gen_helper_pi2fw,
2908    [0x0d] = gen_helper_pi2fd,
2909    [0x1c] = gen_helper_pf2iw,
2910    [0x1d] = gen_helper_pf2id,
2911    [0x8a] = gen_helper_pfnacc,
2912    [0x8e] = gen_helper_pfpnacc,
2913    [0x90] = gen_helper_pfcmpge,
2914    [0x94] = gen_helper_pfmin,
2915    [0x96] = gen_helper_pfrcp,
2916    [0x97] = gen_helper_pfrsqrt,
2917    [0x9a] = gen_helper_pfsub,
2918    [0x9e] = gen_helper_pfadd,
2919    [0xa0] = gen_helper_pfcmpgt,
2920    [0xa4] = gen_helper_pfmax,
2921    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2922    [0xa7] = gen_helper_movq, /* pfrsqit1 */
2923    [0xaa] = gen_helper_pfsubr,
2924    [0xae] = gen_helper_pfacc,
2925    [0xb0] = gen_helper_pfcmpeq,
2926    [0xb4] = gen_helper_pfmul,
2927    [0xb6] = gen_helper_movq, /* pfrcpit2 */
2928    [0xb7] = gen_helper_pmulhrw_mmx,
2929    [0xbb] = gen_helper_pswapd,
2930    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2931};
2932
2933struct SSEOpHelper_epp {
2934    SSEFunc_0_epp op[2];
2935    uint32_t ext_mask;
2936};
2937
2938struct SSEOpHelper_eppi {
2939    SSEFunc_0_eppi op[2];
2940    uint32_t ext_mask;
2941};
2942
2943#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2944#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2945#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2946#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2947#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2948        CPUID_EXT_PCLMULQDQ }
2949#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2950
2951static const struct SSEOpHelper_epp sse_op_table6[256] = {
2952    [0x00] = SSSE3_OP(pshufb),
2953    [0x01] = SSSE3_OP(phaddw),
2954    [0x02] = SSSE3_OP(phaddd),
2955    [0x03] = SSSE3_OP(phaddsw),
2956    [0x04] = SSSE3_OP(pmaddubsw),
2957    [0x05] = SSSE3_OP(phsubw),
2958    [0x06] = SSSE3_OP(phsubd),
2959    [0x07] = SSSE3_OP(phsubsw),
2960    [0x08] = SSSE3_OP(psignb),
2961    [0x09] = SSSE3_OP(psignw),
2962    [0x0a] = SSSE3_OP(psignd),
2963    [0x0b] = SSSE3_OP(pmulhrsw),
2964    [0x10] = SSE41_OP(pblendvb),
2965    [0x14] = SSE41_OP(blendvps),
2966    [0x15] = SSE41_OP(blendvpd),
2967    [0x17] = SSE41_OP(ptest),
2968    [0x1c] = SSSE3_OP(pabsb),
2969    [0x1d] = SSSE3_OP(pabsw),
2970    [0x1e] = SSSE3_OP(pabsd),
2971    [0x20] = SSE41_OP(pmovsxbw),
2972    [0x21] = SSE41_OP(pmovsxbd),
2973    [0x22] = SSE41_OP(pmovsxbq),
2974    [0x23] = SSE41_OP(pmovsxwd),
2975    [0x24] = SSE41_OP(pmovsxwq),
2976    [0x25] = SSE41_OP(pmovsxdq),
2977    [0x28] = SSE41_OP(pmuldq),
2978    [0x29] = SSE41_OP(pcmpeqq),
2979    [0x2a] = SSE41_SPECIAL, /* movntqda */
2980    [0x2b] = SSE41_OP(packusdw),
2981    [0x30] = SSE41_OP(pmovzxbw),
2982    [0x31] = SSE41_OP(pmovzxbd),
2983    [0x32] = SSE41_OP(pmovzxbq),
2984    [0x33] = SSE41_OP(pmovzxwd),
2985    [0x34] = SSE41_OP(pmovzxwq),
2986    [0x35] = SSE41_OP(pmovzxdq),
2987    [0x37] = SSE42_OP(pcmpgtq),
2988    [0x38] = SSE41_OP(pminsb),
2989    [0x39] = SSE41_OP(pminsd),
2990    [0x3a] = SSE41_OP(pminuw),
2991    [0x3b] = SSE41_OP(pminud),
2992    [0x3c] = SSE41_OP(pmaxsb),
2993    [0x3d] = SSE41_OP(pmaxsd),
2994    [0x3e] = SSE41_OP(pmaxuw),
2995    [0x3f] = SSE41_OP(pmaxud),
2996    [0x40] = SSE41_OP(pmulld),
2997    [0x41] = SSE41_OP(phminposuw),
2998    [0xdb] = AESNI_OP(aesimc),
2999    [0xdc] = AESNI_OP(aesenc),
3000    [0xdd] = AESNI_OP(aesenclast),
3001    [0xde] = AESNI_OP(aesdec),
3002    [0xdf] = AESNI_OP(aesdeclast),
3003};
3004
3005static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3006    [0x08] = SSE41_OP(roundps),
3007    [0x09] = SSE41_OP(roundpd),
3008    [0x0a] = SSE41_OP(roundss),
3009    [0x0b] = SSE41_OP(roundsd),
3010    [0x0c] = SSE41_OP(blendps),
3011    [0x0d] = SSE41_OP(blendpd),
3012    [0x0e] = SSE41_OP(pblendw),
3013    [0x0f] = SSSE3_OP(palignr),
3014    [0x14] = SSE41_SPECIAL, /* pextrb */
3015    [0x15] = SSE41_SPECIAL, /* pextrw */
3016    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3017    [0x17] = SSE41_SPECIAL, /* extractps */
3018    [0x20] = SSE41_SPECIAL, /* pinsrb */
3019    [0x21] = SSE41_SPECIAL, /* insertps */
3020    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3021    [0x40] = SSE41_OP(dpps),
3022    [0x41] = SSE41_OP(dppd),
3023    [0x42] = SSE41_OP(mpsadbw),
3024    [0x44] = PCLMULQDQ_OP(pclmulqdq),
3025    [0x60] = SSE42_OP(pcmpestrm),
3026    [0x61] = SSE42_OP(pcmpestri),
3027    [0x62] = SSE42_OP(pcmpistrm),
3028    [0x63] = SSE42_OP(pcmpistri),
3029    [0xdf] = AESNI_OP(aeskeygenassist),
3030};
3031
3032static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3033                    target_ulong pc_start, int rex_r)
3034{
3035    int b1, op1_offset, op2_offset, is_xmm, val;
3036    int modrm, mod, rm, reg;
3037    SSEFunc_0_epp sse_fn_epp;
3038    SSEFunc_0_eppi sse_fn_eppi;
3039    SSEFunc_0_ppi sse_fn_ppi;
3040    SSEFunc_0_eppt sse_fn_eppt;
3041    MemOp ot;
3042
3043    b &= 0xff;
3044    if (s->prefix & PREFIX_DATA)
3045        b1 = 1;
3046    else if (s->prefix & PREFIX_REPZ)
3047        b1 = 2;
3048    else if (s->prefix & PREFIX_REPNZ)
3049        b1 = 3;
3050    else
3051        b1 = 0;
3052    sse_fn_epp = sse_op_table1[b][b1];
3053    if (!sse_fn_epp) {
3054        goto unknown_op;
3055    }
3056    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3057        is_xmm = 1;
3058    } else {
3059        if (b1 == 0) {
3060            /* MMX case */
3061            is_xmm = 0;
3062        } else {
3063            is_xmm = 1;
3064        }
3065    }
3066    /* simple MMX/SSE operation */
3067    if (s->flags & HF_TS_MASK) {
3068        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3069        return;
3070    }
3071    if (s->flags & HF_EM_MASK) {
3072    illegal_op:
3073        gen_illegal_opcode(s);
3074        return;
3075    }
3076    if (is_xmm
3077        && !(s->flags & HF_OSFXSR_MASK)
3078        && (b != 0x38 && b != 0x3a)) {
3079        goto unknown_op;
3080    }
3081    if (b == 0x0e) {
3082        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3083            /* If we were fully decoding this we might use illegal_op.  */
3084            goto unknown_op;
3085        }
3086        /* femms */
3087        gen_helper_emms(cpu_env);
3088        return;
3089    }
3090    if (b == 0x77) {
3091        /* emms */
3092        gen_helper_emms(cpu_env);
3093        return;
3094    }
3095    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3096       the static cpu state) */
3097    if (!is_xmm) {
3098        gen_helper_enter_mmx(cpu_env);
3099    }
3100
3101    modrm = x86_ldub_code(env, s);
3102    reg = ((modrm >> 3) & 7);
3103    if (is_xmm)
3104        reg |= rex_r;
3105    mod = (modrm >> 6) & 3;
3106    if (sse_fn_epp == SSE_SPECIAL) {
3107        b |= (b1 << 8);
3108        switch(b) {
3109        case 0x0e7: /* movntq */
3110            if (mod == 3) {
3111                goto illegal_op;
3112            }
3113            gen_lea_modrm(env, s, modrm);
3114            gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3115            break;
3116        case 0x1e7: /* movntdq */
3117        case 0x02b: /* movntps */
3118        case 0x12b: /* movntps */
3119            if (mod == 3)
3120                goto illegal_op;
3121            gen_lea_modrm(env, s, modrm);
3122            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3123            break;
3124        case 0x3f0: /* lddqu */
3125            if (mod == 3)
3126                goto illegal_op;
3127            gen_lea_modrm(env, s, modrm);
3128            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3129            break;
3130        case 0x22b: /* movntss */
3131        case 0x32b: /* movntsd */
3132            if (mod == 3)
3133                goto illegal_op;
3134            gen_lea_modrm(env, s, modrm);
3135            if (b1 & 1) {
3136                gen_stq_env_A0(s, offsetof(CPUX86State,
3137                                           xmm_regs[reg].ZMM_Q(0)));
3138            } else {
3139                tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3140                    xmm_regs[reg].ZMM_L(0)));
3141                gen_op_st_v(s, MO_32, s->T0, s->A0);
3142            }
3143            break;
3144        case 0x6e: /* movd mm, ea */
3145#ifdef TARGET_X86_64
3146            if (s->dflag == MO_64) {
3147                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3148                tcg_gen_st_tl(s->T0, cpu_env,
3149                              offsetof(CPUX86State, fpregs[reg].mmx));
3150            } else
3151#endif
3152            {
3153                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3154                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3155                                 offsetof(CPUX86State,fpregs[reg].mmx));
3156                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3157                gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3158            }
3159            break;
3160        case 0x16e: /* movd xmm, ea */
3161#ifdef TARGET_X86_64
3162            if (s->dflag == MO_64) {
3163                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3164                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3165                                 offsetof(CPUX86State,xmm_regs[reg]));
3166                gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3167            } else
3168#endif
3169            {
3170                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3171                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3172                                 offsetof(CPUX86State,xmm_regs[reg]));
3173                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3174                gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3175            }
3176            break;
3177        case 0x6f: /* movq mm, ea */
3178            if (mod != 3) {
3179                gen_lea_modrm(env, s, modrm);
3180                gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3181            } else {
3182                rm = (modrm & 7);
3183                tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3184                               offsetof(CPUX86State,fpregs[rm].mmx));
3185                tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3186                               offsetof(CPUX86State,fpregs[reg].mmx));
3187            }
3188            break;
3189        case 0x010: /* movups */
3190        case 0x110: /* movupd */
3191        case 0x028: /* movaps */
3192        case 0x128: /* movapd */
3193        case 0x16f: /* movdqa xmm, ea */
3194        case 0x26f: /* movdqu xmm, ea */
3195            if (mod != 3) {
3196                gen_lea_modrm(env, s, modrm);
3197                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3198            } else {
3199                rm = (modrm & 7) | REX_B(s);
3200                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3201                            offsetof(CPUX86State,xmm_regs[rm]));
3202            }
3203            break;
3204        case 0x210: /* movss xmm, ea */
3205            if (mod != 3) {
3206                gen_lea_modrm(env, s, modrm);
3207                gen_op_ld_v(s, MO_32, s->T0, s->A0);
3208                tcg_gen_st32_tl(s->T0, cpu_env,
3209                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3210                tcg_gen_movi_tl(s->T0, 0);
3211                tcg_gen_st32_tl(s->T0, cpu_env,
3212                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3213                tcg_gen_st32_tl(s->T0, cpu_env,
3214                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3215                tcg_gen_st32_tl(s->T0, cpu_env,
3216                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3217            } else {
3218                rm = (modrm & 7) | REX_B(s);
3219                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3220                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3221            }
3222            break;
3223        case 0x310: /* movsd xmm, ea */
3224            if (mod != 3) {
3225                gen_lea_modrm(env, s, modrm);
3226                gen_ldq_env_A0(s, offsetof(CPUX86State,
3227                                           xmm_regs[reg].ZMM_Q(0)));
3228                tcg_gen_movi_tl(s->T0, 0);
3229                tcg_gen_st32_tl(s->T0, cpu_env,
3230                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3231                tcg_gen_st32_tl(s->T0, cpu_env,
3232                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3233            } else {
3234                rm = (modrm & 7) | REX_B(s);
3235                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3236                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3237            }
3238            break;
3239        case 0x012: /* movlps */
3240        case 0x112: /* movlpd */
3241            if (mod != 3) {
3242                gen_lea_modrm(env, s, modrm);
3243                gen_ldq_env_A0(s, offsetof(CPUX86State,
3244                                           xmm_regs[reg].ZMM_Q(0)));
3245            } else {
3246                /* movhlps */
3247                rm = (modrm & 7) | REX_B(s);
3248                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3249                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3250            }
3251            break;
3252        case 0x212: /* movsldup */
3253            if (mod != 3) {
3254                gen_lea_modrm(env, s, modrm);
3255                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3256            } else {
3257                rm = (modrm & 7) | REX_B(s);
3258                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3259                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3260                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3261                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3262            }
3263            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3264                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3265            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3266                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3267            break;
3268        case 0x312: /* movddup */
3269            if (mod != 3) {
3270                gen_lea_modrm(env, s, modrm);
3271                gen_ldq_env_A0(s, offsetof(CPUX86State,
3272                                           xmm_regs[reg].ZMM_Q(0)));
3273            } else {
3274                rm = (modrm & 7) | REX_B(s);
3275                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3276                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3277            }
3278            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3279                        offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3280            break;
3281        case 0x016: /* movhps */
3282        case 0x116: /* movhpd */
3283            if (mod != 3) {
3284                gen_lea_modrm(env, s, modrm);
3285                gen_ldq_env_A0(s, offsetof(CPUX86State,
3286                                           xmm_regs[reg].ZMM_Q(1)));
3287            } else {
3288                /* movlhps */
3289                rm = (modrm & 7) | REX_B(s);
3290                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3291                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3292            }
3293            break;
3294        case 0x216: /* movshdup */
3295            if (mod != 3) {
3296                gen_lea_modrm(env, s, modrm);
3297                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3298            } else {
3299                rm = (modrm & 7) | REX_B(s);
3300                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3301                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3302                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3303                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3304            }
3305            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3306                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3307            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3308                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3309            break;
3310        case 0x178:
3311        case 0x378:
3312            {
3313                int bit_index, field_length;
3314
3315                if (b1 == 1 && reg != 0)
3316                    goto illegal_op;
3317                field_length = x86_ldub_code(env, s) & 0x3F;
3318                bit_index = x86_ldub_code(env, s) & 0x3F;
3319                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3320                    offsetof(CPUX86State,xmm_regs[reg]));
3321                if (b1 == 1)
3322                    gen_helper_extrq_i(cpu_env, s->ptr0,
3323                                       tcg_const_i32(bit_index),
3324                                       tcg_const_i32(field_length));
3325                else
3326                    gen_helper_insertq_i(cpu_env, s->ptr0,
3327                                         tcg_const_i32(bit_index),
3328                                         tcg_const_i32(field_length));
3329            }
3330            break;
3331        case 0x7e: /* movd ea, mm */
3332#ifdef TARGET_X86_64
3333            if (s->dflag == MO_64) {
3334                tcg_gen_ld_i64(s->T0, cpu_env,
3335                               offsetof(CPUX86State,fpregs[reg].mmx));
3336                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3337            } else
3338#endif
3339            {
3340                tcg_gen_ld32u_tl(s->T0, cpu_env,
3341                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3342                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3343            }
3344            break;
3345        case 0x17e: /* movd ea, xmm */
3346#ifdef TARGET_X86_64
3347            if (s->dflag == MO_64) {
3348                tcg_gen_ld_i64(s->T0, cpu_env,
3349                               offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3350                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3351            } else
3352#endif
3353            {
3354                tcg_gen_ld32u_tl(s->T0, cpu_env,
3355                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3356                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3357            }
3358            break;
3359        case 0x27e: /* movq xmm, ea */
3360            if (mod != 3) {
3361                gen_lea_modrm(env, s, modrm);
3362                gen_ldq_env_A0(s, offsetof(CPUX86State,
3363                                           xmm_regs[reg].ZMM_Q(0)));
3364            } else {
3365                rm = (modrm & 7) | REX_B(s);
3366                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3367                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3368            }
3369            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3370            break;
3371        case 0x7f: /* movq ea, mm */
3372            if (mod != 3) {
3373                gen_lea_modrm(env, s, modrm);
3374                gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3375            } else {
3376                rm = (modrm & 7);
3377                gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3378                            offsetof(CPUX86State,fpregs[reg].mmx));
3379            }
3380            break;
3381        case 0x011: /* movups */
3382        case 0x111: /* movupd */
3383        case 0x029: /* movaps */
3384        case 0x129: /* movapd */
3385        case 0x17f: /* movdqa ea, xmm */
3386        case 0x27f: /* movdqu ea, xmm */
3387            if (mod != 3) {
3388                gen_lea_modrm(env, s, modrm);
3389                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3390            } else {
3391                rm = (modrm & 7) | REX_B(s);
3392                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3393                            offsetof(CPUX86State,xmm_regs[reg]));
3394            }
3395            break;
3396        case 0x211: /* movss ea, xmm */
3397            if (mod != 3) {
3398                gen_lea_modrm(env, s, modrm);
3399                tcg_gen_ld32u_tl(s->T0, cpu_env,
3400                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3401                gen_op_st_v(s, MO_32, s->T0, s->A0);
3402            } else {
3403                rm = (modrm & 7) | REX_B(s);
3404                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3405                            offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3406            }
3407            break;
3408        case 0x311: /* movsd ea, xmm */
3409            if (mod != 3) {
3410                gen_lea_modrm(env, s, modrm);
3411                gen_stq_env_A0(s, offsetof(CPUX86State,
3412                                           xmm_regs[reg].ZMM_Q(0)));
3413            } else {
3414                rm = (modrm & 7) | REX_B(s);
3415                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3416                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3417            }
3418            break;
3419        case 0x013: /* movlps */
3420        case 0x113: /* movlpd */
3421            if (mod != 3) {
3422                gen_lea_modrm(env, s, modrm);
3423                gen_stq_env_A0(s, offsetof(CPUX86State,
3424                                           xmm_regs[reg].ZMM_Q(0)));
3425            } else {
3426                goto illegal_op;
3427            }
3428            break;
3429        case 0x017: /* movhps */
3430        case 0x117: /* movhpd */
3431            if (mod != 3) {
3432                gen_lea_modrm(env, s, modrm);
3433                gen_stq_env_A0(s, offsetof(CPUX86State,
3434                                           xmm_regs[reg].ZMM_Q(1)));
3435            } else {
3436                goto illegal_op;
3437            }
3438            break;
3439        case 0x71: /* shift mm, im */
3440        case 0x72:
3441        case 0x73:
3442        case 0x171: /* shift xmm, im */
3443        case 0x172:
3444        case 0x173:
3445            if (b1 >= 2) {
3446                goto unknown_op;
3447            }
3448            val = x86_ldub_code(env, s);
3449            if (is_xmm) {
3450                tcg_gen_movi_tl(s->T0, val);
3451                tcg_gen_st32_tl(s->T0, cpu_env,
3452                                offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3453                tcg_gen_movi_tl(s->T0, 0);
3454                tcg_gen_st32_tl(s->T0, cpu_env,
3455                                offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3456                op1_offset = offsetof(CPUX86State,xmm_t0);
3457            } else {
3458                tcg_gen_movi_tl(s->T0, val);
3459                tcg_gen_st32_tl(s->T0, cpu_env,
3460                                offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3461                tcg_gen_movi_tl(s->T0, 0);
3462                tcg_gen_st32_tl(s->T0, cpu_env,
3463                                offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3464                op1_offset = offsetof(CPUX86State,mmx_t0);
3465            }
3466            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3467                                       (((modrm >> 3)) & 7)][b1];
3468            if (!sse_fn_epp) {
3469                goto unknown_op;
3470            }
3471            if (is_xmm) {
3472                rm = (modrm & 7) | REX_B(s);
3473                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3474            } else {
3475                rm = (modrm & 7);
3476                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3477            }
3478            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3479            tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3480            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3481            break;
3482        case 0x050: /* movmskps */
3483            rm = (modrm & 7) | REX_B(s);
3484            tcg_gen_addi_ptr(s->ptr0, cpu_env,
3485                             offsetof(CPUX86State,xmm_regs[rm]));
3486            gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3487            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3488            break;
3489        case 0x150: /* movmskpd */
3490            rm = (modrm & 7) | REX_B(s);
3491            tcg_gen_addi_ptr(s->ptr0, cpu_env,
3492                             offsetof(CPUX86State,xmm_regs[rm]));
3493            gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3494            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3495            break;
3496        case 0x02a: /* cvtpi2ps */
3497        case 0x12a: /* cvtpi2pd */
3498            gen_helper_enter_mmx(cpu_env);
3499            if (mod != 3) {
3500                gen_lea_modrm(env, s, modrm);
3501                op2_offset = offsetof(CPUX86State,mmx_t0);
3502                gen_ldq_env_A0(s, op2_offset);
3503            } else {
3504                rm = (modrm & 7);
3505                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3506            }
3507            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3508            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3509            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3510            switch(b >> 8) {
3511            case 0x0:
3512                gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3513                break;
3514            default:
3515            case 0x1:
3516                gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3517                break;
3518            }
3519            break;
3520        case 0x22a: /* cvtsi2ss */
3521        case 0x32a: /* cvtsi2sd */
3522            ot = mo_64_32(s->dflag);
3523            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3524            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3525            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3526            if (ot == MO_32) {
3527                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3528                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3529                sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3530            } else {
3531#ifdef TARGET_X86_64
3532                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3533                sse_fn_epl(cpu_env, s->ptr0, s->T0);
3534#else
3535                goto illegal_op;
3536#endif
3537            }
3538            break;
3539        case 0x02c: /* cvttps2pi */
3540        case 0x12c: /* cvttpd2pi */
3541        case 0x02d: /* cvtps2pi */
3542        case 0x12d: /* cvtpd2pi */
3543            gen_helper_enter_mmx(cpu_env);
3544            if (mod != 3) {
3545                gen_lea_modrm(env, s, modrm);
3546                op2_offset = offsetof(CPUX86State,xmm_t0);
3547                gen_ldo_env_A0(s, op2_offset);
3548            } else {
3549                rm = (modrm & 7) | REX_B(s);
3550                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3551            }
3552            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3553            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3554            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3555            switch(b) {
3556            case 0x02c:
3557                gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3558                break;
3559            case 0x12c:
3560                gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3561                break;
3562            case 0x02d:
3563                gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3564                break;
3565            case 0x12d:
3566                gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3567                break;
3568            }
3569            break;
3570        case 0x22c: /* cvttss2si */
3571        case 0x32c: /* cvttsd2si */
3572        case 0x22d: /* cvtss2si */
3573        case 0x32d: /* cvtsd2si */
3574            ot = mo_64_32(s->dflag);
3575            if (mod != 3) {
3576                gen_lea_modrm(env, s, modrm);
3577                if ((b >> 8) & 1) {
3578                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3579                } else {
3580                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
3581                    tcg_gen_st32_tl(s->T0, cpu_env,
3582                                    offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3583                }
3584                op2_offset = offsetof(CPUX86State,xmm_t0);
3585            } else {
3586                rm = (modrm & 7) | REX_B(s);
3587                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3588            }
3589            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3590            if (ot == MO_32) {
3591                SSEFunc_i_ep sse_fn_i_ep =
3592                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3593                sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3594                tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3595            } else {
3596#ifdef TARGET_X86_64
3597                SSEFunc_l_ep sse_fn_l_ep =
3598                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3599                sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3600#else
3601                goto illegal_op;
3602#endif
3603            }
3604            gen_op_mov_reg_v(s, ot, reg, s->T0);
3605            break;
3606        case 0xc4: /* pinsrw */
3607        case 0x1c4:
3608            s->rip_offset = 1;
3609            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3610            val = x86_ldub_code(env, s);
3611            if (b1) {
3612                val &= 7;
3613                tcg_gen_st16_tl(s->T0, cpu_env,
3614                                offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3615            } else {
3616                val &= 3;
3617                tcg_gen_st16_tl(s->T0, cpu_env,
3618                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3619            }
3620            break;
3621        case 0xc5: /* pextrw */
3622        case 0x1c5:
3623            if (mod != 3)
3624                goto illegal_op;
3625            ot = mo_64_32(s->dflag);
3626            val = x86_ldub_code(env, s);
3627            if (b1) {
3628                val &= 7;
3629                rm = (modrm & 7) | REX_B(s);
3630                tcg_gen_ld16u_tl(s->T0, cpu_env,
3631                                 offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3632            } else {
3633                val &= 3;
3634                rm = (modrm & 7);
3635                tcg_gen_ld16u_tl(s->T0, cpu_env,
3636                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3637            }
3638            reg = ((modrm >> 3) & 7) | rex_r;
3639            gen_op_mov_reg_v(s, ot, reg, s->T0);
3640            break;
3641        case 0x1d6: /* movq ea, xmm */
3642            if (mod != 3) {
3643                gen_lea_modrm(env, s, modrm);
3644                gen_stq_env_A0(s, offsetof(CPUX86State,
3645                                           xmm_regs[reg].ZMM_Q(0)));
3646            } else {
3647                rm = (modrm & 7) | REX_B(s);
3648                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3649                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3650                gen_op_movq_env_0(s,
3651                                  offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3652            }
3653            break;
3654        case 0x2d6: /* movq2dq */
3655            gen_helper_enter_mmx(cpu_env);
3656            rm = (modrm & 7);
3657            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3658                        offsetof(CPUX86State,fpregs[rm].mmx));
3659            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3660            break;
3661        case 0x3d6: /* movdq2q */
3662            gen_helper_enter_mmx(cpu_env);
3663            rm = (modrm & 7) | REX_B(s);
3664            gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3665                        offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3666            break;
3667        case 0xd7: /* pmovmskb */
3668        case 0x1d7:
3669            if (mod != 3)
3670                goto illegal_op;
3671            if (b1) {
3672                rm = (modrm & 7) | REX_B(s);
3673                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3674                                 offsetof(CPUX86State, xmm_regs[rm]));
3675                gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3676            } else {
3677                rm = (modrm & 7);
3678                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3679                                 offsetof(CPUX86State, fpregs[rm].mmx));
3680                gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3681            }
3682            reg = ((modrm >> 3) & 7) | rex_r;
3683            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3684            break;
3685
3686        case 0x138:
3687        case 0x038:
3688            b = modrm;
3689            if ((b & 0xf0) == 0xf0) {
3690                goto do_0f_38_fx;
3691            }
3692            modrm = x86_ldub_code(env, s);
3693            rm = modrm & 7;
3694            reg = ((modrm >> 3) & 7) | rex_r;
3695            mod = (modrm >> 6) & 3;
3696            if (b1 >= 2) {
3697                goto unknown_op;
3698            }
3699
3700            sse_fn_epp = sse_op_table6[b].op[b1];
3701            if (!sse_fn_epp) {
3702                goto unknown_op;
3703            }
3704            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3705                goto illegal_op;
3706
3707            if (b1) {
3708                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3709                if (mod == 3) {
3710                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3711                } else {
3712                    op2_offset = offsetof(CPUX86State,xmm_t0);
3713                    gen_lea_modrm(env, s, modrm);
3714                    switch (b) {
3715                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3716                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3717                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3718                        gen_ldq_env_A0(s, op2_offset +
3719                                        offsetof(ZMMReg, ZMM_Q(0)));
3720                        break;
3721                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3722                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3723                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3724                                            s->mem_index, MO_LEUL);
3725                        tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3726                                        offsetof(ZMMReg, ZMM_L(0)));
3727                        break;
3728                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3729                        tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3730                                           s->mem_index, MO_LEUW);
3731                        tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3732                                        offsetof(ZMMReg, ZMM_W(0)));
3733                        break;
3734                    case 0x2a:            /* movntqda */
3735                        gen_ldo_env_A0(s, op1_offset);
3736                        return;
3737                    default:
3738                        gen_ldo_env_A0(s, op2_offset);
3739                    }
3740                }
3741            } else {
3742                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3743                if (mod == 3) {
3744                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3745                } else {
3746                    op2_offset = offsetof(CPUX86State,mmx_t0);
3747                    gen_lea_modrm(env, s, modrm);
3748                    gen_ldq_env_A0(s, op2_offset);
3749                }
3750            }
3751            if (sse_fn_epp == SSE_SPECIAL) {
3752                goto unknown_op;
3753            }
3754
3755            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3756            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3757            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3758
3759            if (b == 0x17) {
3760                set_cc_op(s, CC_OP_EFLAGS);
3761            }
3762            break;
3763
3764        case 0x238:
3765        case 0x338:
3766        do_0f_38_fx:
3767            /* Various integer extensions at 0f 38 f[0-f].  */
3768            b = modrm | (b1 << 8);
3769            modrm = x86_ldub_code(env, s);
3770            reg = ((modrm >> 3) & 7) | rex_r;
3771
3772            switch (b) {
3773            case 0x3f0: /* crc32 Gd,Eb */
3774            case 0x3f1: /* crc32 Gd,Ey */
3775            do_crc32:
3776                if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3777                    goto illegal_op;
3778                }
3779                if ((b & 0xff) == 0xf0) {
3780                    ot = MO_8;
3781                } else if (s->dflag != MO_64) {
3782                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3783                } else {
3784                    ot = MO_64;
3785                }
3786
3787                tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3788                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3789                gen_helper_crc32(s->T0, s->tmp2_i32,
3790                                 s->T0, tcg_const_i32(8 << ot));
3791
3792                ot = mo_64_32(s->dflag);
3793                gen_op_mov_reg_v(s, ot, reg, s->T0);
3794                break;
3795
3796            case 0x1f0: /* crc32 or movbe */
3797            case 0x1f1:
3798                /* For these insns, the f3 prefix is supposed to have priority
3799                   over the 66 prefix, but that's not what we implement above
3800                   setting b1.  */
3801                if (s->prefix & PREFIX_REPNZ) {
3802                    goto do_crc32;
3803                }
3804                /* FALLTHRU */
3805            case 0x0f0: /* movbe Gy,My */
3806            case 0x0f1: /* movbe My,Gy */
3807                if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3808                    goto illegal_op;
3809                }
3810                if (s->dflag != MO_64) {
3811                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3812                } else {
3813                    ot = MO_64;
3814                }
3815
3816                gen_lea_modrm(env, s, modrm);
3817                if ((b & 1) == 0) {
3818                    tcg_gen_qemu_ld_tl(s->T0, s->A0,
3819                                       s->mem_index, ot | MO_BE);
3820                    gen_op_mov_reg_v(s, ot, reg, s->T0);
3821                } else {
3822                    tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3823                                       s->mem_index, ot | MO_BE);
3824                }
3825                break;
3826
3827            case 0x0f2: /* andn Gy, By, Ey */
3828                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3829                    || !(s->prefix & PREFIX_VEX)
3830                    || s->vex_l != 0) {
3831                    goto illegal_op;
3832                }
3833                ot = mo_64_32(s->dflag);
3834                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3835                tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3836                gen_op_mov_reg_v(s, ot, reg, s->T0);
3837                gen_op_update1_cc(s);
3838                set_cc_op(s, CC_OP_LOGICB + ot);
3839                break;
3840
3841            case 0x0f7: /* bextr Gy, Ey, By */
3842                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3843                    || !(s->prefix & PREFIX_VEX)
3844                    || s->vex_l != 0) {
3845                    goto illegal_op;
3846                }
3847                ot = mo_64_32(s->dflag);
3848                {
3849                    TCGv bound, zero;
3850
3851                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3852                    /* Extract START, and shift the operand.
3853                       Shifts larger than operand size get zeros.  */
3854                    tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3855                    tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3856
3857                    bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3858                    zero = tcg_const_tl(0);
3859                    tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3860                                       s->T0, zero);
3861                    tcg_temp_free(zero);
3862
3863                    /* Extract the LEN into a mask.  Lengths larger than
3864                       operand size get all ones.  */
3865                    tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3866                    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3867                                       s->A0, bound);
3868                    tcg_temp_free(bound);
3869                    tcg_gen_movi_tl(s->T1, 1);
3870                    tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3871                    tcg_gen_subi_tl(s->T1, s->T1, 1);
3872                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
3873
3874                    gen_op_mov_reg_v(s, ot, reg, s->T0);
3875                    gen_op_update1_cc(s);
3876                    set_cc_op(s, CC_OP_LOGICB + ot);
3877                }
3878                break;
3879
3880            case 0x0f5: /* bzhi Gy, Ey, By */
3881                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3882                    || !(s->prefix & PREFIX_VEX)
3883                    || s->vex_l != 0) {
3884                    goto illegal_op;
3885                }
3886                ot = mo_64_32(s->dflag);
3887                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3888                tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3889                {
3890                    TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3891                    /* Note that since we're using BMILG (in order to get O
3892                       cleared) we need to store the inverse into C.  */
3893                    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3894                                       s->T1, bound);
3895                    tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3896                                       bound, bound, s->T1);
3897                    tcg_temp_free(bound);
3898                }
3899                tcg_gen_movi_tl(s->A0, -1);
3900                tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3901                tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3902                gen_op_mov_reg_v(s, ot, reg, s->T0);
3903                gen_op_update1_cc(s);
3904                set_cc_op(s, CC_OP_BMILGB + ot);
3905                break;
3906
3907            case 0x3f6: /* mulx By, Gy, rdx, Ey */
3908                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3909                    || !(s->prefix & PREFIX_VEX)
3910                    || s->vex_l != 0) {
3911                    goto illegal_op;
3912                }
3913                ot = mo_64_32(s->dflag);
3914                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3915                switch (ot) {
3916                default:
3917                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3918                    tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
3919                    tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
3920                                      s->tmp2_i32, s->tmp3_i32);
3921                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
3922                    tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
3923                    break;
3924#ifdef TARGET_X86_64
3925                case MO_64:
3926                    tcg_gen_mulu2_i64(s->T0, s->T1,
3927                                      s->T0, cpu_regs[R_EDX]);
3928                    tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
3929                    tcg_gen_mov_i64(cpu_regs[reg], s->T1);
3930                    break;
3931#endif
3932                }
3933                break;
3934
3935            case 0x3f5: /* pdep Gy, By, Ey */
3936                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3937                    || !(s->prefix & PREFIX_VEX)
3938                    || s->vex_l != 0) {
3939                    goto illegal_op;
3940                }
3941                ot = mo_64_32(s->dflag);
3942                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3943                /* Note that by zero-extending the source operand, we
3944                   automatically handle zero-extending the result.  */
3945                if (ot == MO_64) {
3946                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3947                } else {
3948                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3949                }
3950                gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
3951                break;
3952
3953            case 0x2f5: /* pext Gy, By, Ey */
3954                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3955                    || !(s->prefix & PREFIX_VEX)
3956                    || s->vex_l != 0) {
3957                    goto illegal_op;
3958                }
3959                ot = mo_64_32(s->dflag);
3960                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3961                /* Note that by zero-extending the source operand, we
3962                   automatically handle zero-extending the result.  */
3963                if (ot == MO_64) {
3964                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3965                } else {
3966                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3967                }
3968                gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
3969                break;
3970
3971            case 0x1f6: /* adcx Gy, Ey */
3972            case 0x2f6: /* adox Gy, Ey */
3973                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3974                    goto illegal_op;
3975                } else {
3976                    TCGv carry_in, carry_out, zero;
3977                    int end_op;
3978
3979                    ot = mo_64_32(s->dflag);
3980                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3981
3982                    /* Re-use the carry-out from a previous round.  */
3983                    carry_in = NULL;
3984                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3985                    switch (s->cc_op) {
3986                    case CC_OP_ADCX:
3987                        if (b == 0x1f6) {
3988                            carry_in = cpu_cc_dst;
3989                            end_op = CC_OP_ADCX;
3990                        } else {
3991                            end_op = CC_OP_ADCOX;
3992                        }
3993                        break;
3994                    case CC_OP_ADOX:
3995                        if (b == 0x1f6) {
3996                            end_op = CC_OP_ADCOX;
3997                        } else {
3998                            carry_in = cpu_cc_src2;
3999                            end_op = CC_OP_ADOX;
4000                        }
4001                        break;
4002                    case CC_OP_ADCOX:
4003                        end_op = CC_OP_ADCOX;
4004                        carry_in = carry_out;
4005                        break;
4006                    default:
4007                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4008                        break;
4009                    }
4010                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
4011                    if (!carry_in) {
4012                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4013                            gen_compute_eflags(s);
4014                        }
4015                        carry_in = s->tmp0;
4016                        tcg_gen_extract_tl(carry_in, cpu_cc_src,
4017                                           ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4018                    }
4019
4020                    switch (ot) {
4021#ifdef TARGET_X86_64
4022                    case MO_32:
4023                        /* If we know TL is 64-bit, and we want a 32-bit
4024                           result, just do everything in 64-bit arithmetic.  */
4025                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4026                        tcg_gen_ext32u_i64(s->T0, s->T0);
4027                        tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4028                        tcg_gen_add_i64(s->T0, s->T0, carry_in);
4029                        tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4030                        tcg_gen_shri_i64(carry_out, s->T0, 32);
4031                        break;
4032#endif
4033                    default:
4034                        /* Otherwise compute the carry-out in two steps.  */
4035                        zero = tcg_const_tl(0);
4036                        tcg_gen_add2_tl(s->T0, carry_out,
4037                                        s->T0, zero,
4038                                        carry_in, zero);
4039                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4040                                        cpu_regs[reg], carry_out,
4041                                        s->T0, zero);
4042                        tcg_temp_free(zero);
4043                        break;
4044                    }
4045                    set_cc_op(s, end_op);
4046                }
4047                break;
4048
4049            case 0x1f7: /* shlx Gy, Ey, By */
4050            case 0x2f7: /* sarx Gy, Ey, By */
4051            case 0x3f7: /* shrx Gy, Ey, By */
4052                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4053                    || !(s->prefix & PREFIX_VEX)
4054                    || s->vex_l != 0) {
4055                    goto illegal_op;
4056                }
4057                ot = mo_64_32(s->dflag);
4058                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4059                if (ot == MO_64) {
4060                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4061                } else {
4062                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4063                }
4064                if (b == 0x1f7) {
4065                    tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4066                } else if (b == 0x2f7) {
4067                    if (ot != MO_64) {
4068                        tcg_gen_ext32s_tl(s->T0, s->T0);
4069                    }
4070                    tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4071                } else {
4072                    if (ot != MO_64) {
4073                        tcg_gen_ext32u_tl(s->T0, s->T0);
4074                    }
4075                    tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4076                }
4077                gen_op_mov_reg_v(s, ot, reg, s->T0);
4078                break;
4079
4080            case 0x0f3:
4081            case 0x1f3:
4082            case 0x2f3:
4083            case 0x3f3: /* Group 17 */
4084                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4085                    || !(s->prefix & PREFIX_VEX)
4086                    || s->vex_l != 0) {
4087                    goto illegal_op;
4088                }
4089                ot = mo_64_32(s->dflag);
4090                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4091
4092                tcg_gen_mov_tl(cpu_cc_src, s->T0);
4093                switch (reg & 7) {
4094                case 1: /* blsr By,Ey */
4095                    tcg_gen_subi_tl(s->T1, s->T0, 1);
4096                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
4097                    break;
4098                case 2: /* blsmsk By,Ey */
4099                    tcg_gen_subi_tl(s->T1, s->T0, 1);
4100                    tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4101                    break;
4102                case 3: /* blsi By, Ey */
4103                    tcg_gen_neg_tl(s->T1, s->T0);
4104                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
4105                    break;
4106                default:
4107                    goto unknown_op;
4108                }
4109                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4110                gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4111                set_cc_op(s, CC_OP_BMILGB + ot);
4112                break;
4113
4114            default:
4115                goto unknown_op;
4116            }
4117            break;
4118
4119        case 0x03a:
4120        case 0x13a:
4121            b = modrm;
4122            modrm = x86_ldub_code(env, s);
4123            rm = modrm & 7;
4124            reg = ((modrm >> 3) & 7) | rex_r;
4125            mod = (modrm >> 6) & 3;
4126            if (b1 >= 2) {
4127                goto unknown_op;
4128            }
4129
4130            sse_fn_eppi = sse_op_table7[b].op[b1];
4131            if (!sse_fn_eppi) {
4132                goto unknown_op;
4133            }
4134            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4135                goto illegal_op;
4136
4137            s->rip_offset = 1;
4138
4139            if (sse_fn_eppi == SSE_SPECIAL) {
4140                ot = mo_64_32(s->dflag);
4141                rm = (modrm & 7) | REX_B(s);
4142                if (mod != 3)
4143                    gen_lea_modrm(env, s, modrm);
4144                reg = ((modrm >> 3) & 7) | rex_r;
4145                val = x86_ldub_code(env, s);
4146                switch (b) {
4147                case 0x14: /* pextrb */
4148                    tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4149                                            xmm_regs[reg].ZMM_B(val & 15)));
4150                    if (mod == 3) {
4151                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4152                    } else {
4153                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4154                                           s->mem_index, MO_UB);
4155                    }
4156                    break;
4157                case 0x15: /* pextrw */
4158                    tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4159                                            xmm_regs[reg].ZMM_W(val & 7)));
4160                    if (mod == 3) {
4161                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4162                    } else {
4163                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4164                                           s->mem_index, MO_LEUW);
4165                    }
4166                    break;
4167                case 0x16:
4168                    if (ot == MO_32) { /* pextrd */
4169                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4170                                        offsetof(CPUX86State,
4171                                                xmm_regs[reg].ZMM_L(val & 3)));
4172                        if (mod == 3) {
4173                            tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4174                        } else {
4175                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4176                                                s->mem_index, MO_LEUL);
4177                        }
4178                    } else { /* pextrq */
4179#ifdef TARGET_X86_64
4180                        tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4181                                        offsetof(CPUX86State,
4182                                                xmm_regs[reg].ZMM_Q(val & 1)));
4183                        if (mod == 3) {
4184                            tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4185                        } else {
4186                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4187                                                s->mem_index, MO_LEQ);
4188                        }
4189#else
4190                        goto illegal_op;
4191#endif
4192                    }
4193                    break;
4194                case 0x17: /* extractps */
4195                    tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4196                                            xmm_regs[reg].ZMM_L(val & 3)));
4197                    if (mod == 3) {
4198                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4199                    } else {
4200                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4201                                           s->mem_index, MO_LEUL);
4202                    }
4203                    break;
4204                case 0x20: /* pinsrb */
4205                    if (mod == 3) {
4206                        gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4207                    } else {
4208                        tcg_gen_qemu_ld_tl(s->T0, s->A0,
4209                                           s->mem_index, MO_UB);
4210                    }
4211                    tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4212                                            xmm_regs[reg].ZMM_B(val & 15)));
4213                    break;
4214                case 0x21: /* insertps */
4215                    if (mod == 3) {
4216                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4217                                        offsetof(CPUX86State,xmm_regs[rm]
4218                                                .ZMM_L((val >> 6) & 3)));
4219                    } else {
4220                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4221                                            s->mem_index, MO_LEUL);
4222                    }
4223                    tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4224                                    offsetof(CPUX86State,xmm_regs[reg]
4225                                            .ZMM_L((val >> 4) & 3)));
4226                    if ((val >> 0) & 1)
4227                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4228                                        cpu_env, offsetof(CPUX86State,
4229                                                xmm_regs[reg].ZMM_L(0)));
4230                    if ((val >> 1) & 1)
4231                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4232                                        cpu_env, offsetof(CPUX86State,
4233                                                xmm_regs[reg].ZMM_L(1)));
4234                    if ((val >> 2) & 1)
4235                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4236                                        cpu_env, offsetof(CPUX86State,
4237                                                xmm_regs[reg].ZMM_L(2)));
4238                    if ((val >> 3) & 1)
4239                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4240                                        cpu_env, offsetof(CPUX86State,
4241                                                xmm_regs[reg].ZMM_L(3)));
4242                    break;
4243                case 0x22:
4244                    if (ot == MO_32) { /* pinsrd */
4245                        if (mod == 3) {
4246                            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4247                        } else {
4248                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4249                                                s->mem_index, MO_LEUL);
4250                        }
4251                        tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4252                                        offsetof(CPUX86State,
4253                                                xmm_regs[reg].ZMM_L(val & 3)));
4254                    } else { /* pinsrq */
4255#ifdef TARGET_X86_64
4256                        if (mod == 3) {
4257                            gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4258                        } else {
4259                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4260                                                s->mem_index, MO_LEQ);
4261                        }
4262                        tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4263                                        offsetof(CPUX86State,
4264                                                xmm_regs[reg].ZMM_Q(val & 1)));
4265#else
4266                        goto illegal_op;
4267#endif
4268                    }
4269                    break;
4270                }
4271                return;
4272            }
4273
4274            if (b1) {
4275                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4276                if (mod == 3) {
4277                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4278                } else {
4279                    op2_offset = offsetof(CPUX86State,xmm_t0);
4280                    gen_lea_modrm(env, s, modrm);
4281                    gen_ldo_env_A0(s, op2_offset);
4282                }
4283            } else {
4284                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4285                if (mod == 3) {
4286                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4287                } else {
4288                    op2_offset = offsetof(CPUX86State,mmx_t0);
4289                    gen_lea_modrm(env, s, modrm);
4290                    gen_ldq_env_A0(s, op2_offset);
4291                }
4292            }
4293            val = x86_ldub_code(env, s);
4294
4295            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4296                set_cc_op(s, CC_OP_EFLAGS);
4297
4298                if (s->dflag == MO_64) {
4299                    /* The helper must use entire 64-bit gp registers */
4300                    val |= 1 << 8;
4301                }
4302            }
4303
4304            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4305            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4306            sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4307            break;
4308
4309        case 0x33a:
4310            /* Various integer extensions at 0f 3a f[0-f].  */
4311            b = modrm | (b1 << 8);
4312            modrm = x86_ldub_code(env, s);
4313            reg = ((modrm >> 3) & 7) | rex_r;
4314
4315            switch (b) {
4316            case 0x3f0: /* rorx Gy,Ey, Ib */
4317                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4318                    || !(s->prefix & PREFIX_VEX)
4319                    || s->vex_l != 0) {
4320                    goto illegal_op;
4321                }
4322                ot = mo_64_32(s->dflag);
4323                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4324                b = x86_ldub_code(env, s);
4325                if (ot == MO_64) {
4326                    tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4327                } else {
4328                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4329                    tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4330                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4331                }
4332                gen_op_mov_reg_v(s, ot, reg, s->T0);
4333                break;
4334
4335            default:
4336                goto unknown_op;
4337            }
4338            break;
4339
4340        default:
4341        unknown_op:
4342            gen_unknown_opcode(env, s);
4343            return;
4344        }
4345    } else {
4346        /* generic MMX or SSE operation */
4347        switch(b) {
4348        case 0x70: /* pshufx insn */
4349        case 0xc6: /* pshufx insn */
4350        case 0xc2: /* compare insns */
4351            s->rip_offset = 1;
4352            break;
4353        default:
4354            break;
4355        }
4356        if (is_xmm) {
4357            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4358            if (mod != 3) {
4359                int sz = 4;
4360
4361                gen_lea_modrm(env, s, modrm);
4362                op2_offset = offsetof(CPUX86State,xmm_t0);
4363
4364                switch (b) {
4365                case 0x50 ... 0x5a:
4366                case 0x5c ... 0x5f:
4367                case 0xc2:
4368                    /* Most sse scalar operations.  */
4369                    if (b1 == 2) {
4370                        sz = 2;
4371                    } else if (b1 == 3) {
4372                        sz = 3;
4373                    }
4374                    break;
4375
4376                case 0x2e:  /* ucomis[sd] */
4377                case 0x2f:  /* comis[sd] */
4378                    if (b1 == 0) {
4379                        sz = 2;
4380                    } else {
4381                        sz = 3;
4382                    }
4383                    break;
4384                }
4385
4386                switch (sz) {
4387                case 2:
4388                    /* 32 bit access */
4389                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
4390                    tcg_gen_st32_tl(s->T0, cpu_env,
4391                                    offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4392                    break;
4393                case 3:
4394                    /* 64 bit access */
4395                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4396                    break;
4397                default:
4398                    /* 128 bit access */
4399                    gen_ldo_env_A0(s, op2_offset);
4400                    break;
4401                }
4402            } else {
4403                rm = (modrm & 7) | REX_B(s);
4404                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4405            }
4406        } else {
4407            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4408            if (mod != 3) {
4409                gen_lea_modrm(env, s, modrm);
4410                op2_offset = offsetof(CPUX86State,mmx_t0);
4411                gen_ldq_env_A0(s, op2_offset);
4412            } else {
4413                rm = (modrm & 7);
4414                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4415            }
4416        }
4417        switch(b) {
4418        case 0x0f: /* 3DNow! data insns */
4419            val = x86_ldub_code(env, s);
4420            sse_fn_epp = sse_op_table5[val];
4421            if (!sse_fn_epp) {
4422                goto unknown_op;
4423            }
4424            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4425                goto illegal_op;
4426            }
4427            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4428            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4429            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4430            break;
4431        case 0x70: /* pshufx insn */
4432        case 0xc6: /* pshufx insn */
4433            val = x86_ldub_code(env, s);
4434            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4435            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4436            /* XXX: introduce a new table? */
4437            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4438            sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4439            break;
4440        case 0xc2:
4441            /* compare insns */
4442            val = x86_ldub_code(env, s);
4443            if (val >= 8)
4444                goto unknown_op;
4445            sse_fn_epp = sse_op_table4[val][b1];
4446
4447            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4448            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4449            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4450            break;
4451        case 0xf7:
4452            /* maskmov : we must prepare A0 */
4453            if (mod != 3)
4454                goto illegal_op;
4455            tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4456            gen_extu(s->aflag, s->A0);
4457            gen_add_A0_ds_seg(s);
4458
4459            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4460            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4461            /* XXX: introduce a new table? */
4462            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4463            sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4464            break;
4465        default:
4466            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4467            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4468            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4469            break;
4470        }
4471        if (b == 0x2e || b == 0x2f) {
4472            set_cc_op(s, CC_OP_EFLAGS);
4473        }
4474    }
4475}
4476
4477/* convert one instruction. s->base.is_jmp is set if the translation must
4478   be stopped. Return the next pc value */
4479static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4480{
4481    CPUX86State *env = cpu->env_ptr;
4482    int b, prefixes;
4483    int shift;
4484    MemOp ot, aflag, dflag;
4485    int modrm, reg, rm, mod, op, opreg, val;
4486    target_ulong next_eip, tval;
4487    int rex_w, rex_r;
4488    target_ulong pc_start = s->base.pc_next;
4489
4490    s->pc_start = s->pc = pc_start;
4491    s->override = -1;
4492#ifdef TARGET_X86_64
4493    s->rex_x = 0;
4494    s->rex_b = 0;
4495    s->x86_64_hregs = false;
4496#endif
4497    s->rip_offset = 0; /* for relative ip address */
4498    s->vex_l = 0;
4499    s->vex_v = 0;
4500    if (sigsetjmp(s->jmpbuf, 0) != 0) {
4501        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
4502        return s->pc;
4503    }
4504
4505    prefixes = 0;
4506    rex_w = -1;
4507    rex_r = 0;
4508
4509 next_byte:
4510    b = x86_ldub_code(env, s);
4511    /* Collect prefixes.  */
4512    switch (b) {
4513    case 0xf3:
4514        prefixes |= PREFIX_REPZ;
4515        goto next_byte;
4516    case 0xf2:
4517        prefixes |= PREFIX_REPNZ;
4518        goto next_byte;
4519    case 0xf0:
4520        prefixes |= PREFIX_LOCK;
4521        goto next_byte;
4522    case 0x2e:
4523        s->override = R_CS;
4524        goto next_byte;
4525    case 0x36:
4526        s->override = R_SS;
4527        goto next_byte;
4528    case 0x3e:
4529        s->override = R_DS;
4530        goto next_byte;
4531    case 0x26:
4532        s->override = R_ES;
4533        goto next_byte;
4534    case 0x64:
4535        s->override = R_FS;
4536        goto next_byte;
4537    case 0x65:
4538        s->override = R_GS;
4539        goto next_byte;
4540    case 0x66:
4541        prefixes |= PREFIX_DATA;
4542        goto next_byte;
4543    case 0x67:
4544        prefixes |= PREFIX_ADR;
4545        goto next_byte;
4546#ifdef TARGET_X86_64
4547    case 0x40 ... 0x4f:
4548        if (CODE64(s)) {
4549            /* REX prefix */
4550            rex_w = (b >> 3) & 1;
4551            rex_r = (b & 0x4) << 1;
4552            s->rex_x = (b & 0x2) << 2;
4553            REX_B(s) = (b & 0x1) << 3;
4554            /* select uniform byte register addressing */
4555            s->x86_64_hregs = true;
4556            goto next_byte;
4557        }
4558        break;
4559#endif
4560    case 0xc5: /* 2-byte VEX */
4561    case 0xc4: /* 3-byte VEX */
4562        /* VEX prefixes cannot be used except in 32-bit mode.
4563           Otherwise the instruction is LES or LDS.  */
4564        if (s->code32 && !s->vm86) {
4565            static const int pp_prefix[4] = {
4566                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4567            };
4568            int vex3, vex2 = x86_ldub_code(env, s);
4569
4570            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4571                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4572                   otherwise the instruction is LES or LDS.  */
4573                s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4574                break;
4575            }
4576
4577            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4578            if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4579                            | PREFIX_LOCK | PREFIX_DATA)) {
4580                goto illegal_op;
4581            }
4582#ifdef TARGET_X86_64
4583            if (s->x86_64_hregs) {
4584                goto illegal_op;
4585            }
4586#endif
4587            rex_r = (~vex2 >> 4) & 8;
4588            if (b == 0xc5) {
4589                /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4590                vex3 = vex2;
4591                b = x86_ldub_code(env, s) | 0x100;
4592            } else {
4593                /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4594#ifdef TARGET_X86_64
4595                s->rex_x = (~vex2 >> 3) & 8;
4596                s->rex_b = (~vex2 >> 2) & 8;
4597#endif
4598                vex3 = x86_ldub_code(env, s);
4599                rex_w = (vex3 >> 7) & 1;
4600                switch (vex2 & 0x1f) {
4601                case 0x01: /* Implied 0f leading opcode bytes.  */
4602                    b = x86_ldub_code(env, s) | 0x100;
4603                    break;
4604                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4605                    b = 0x138;
4606                    break;
4607                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4608                    b = 0x13a;
4609                    break;
4610                default:   /* Reserved for future use.  */
4611                    goto unknown_op;
4612                }
4613            }
4614            s->vex_v = (~vex3 >> 3) & 0xf;
4615            s->vex_l = (vex3 >> 2) & 1;
4616            prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4617        }
4618        break;
4619    }
4620
4621    /* Post-process prefixes.  */
4622    if (CODE64(s)) {
4623        /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4624           data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4625           over 0x66 if both are present.  */
4626        dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4627        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4628        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4629    } else {
4630        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4631        if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4632            dflag = MO_32;
4633        } else {
4634            dflag = MO_16;
4635        }
4636        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4637        if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4638            aflag = MO_32;
4639        }  else {
4640            aflag = MO_16;
4641        }
4642    }
4643
4644    s->prefix = prefixes;
4645    s->aflag = aflag;
4646    s->dflag = dflag;
4647
4648    /* now check op code */
4649 reswitch:
4650    switch(b) {
4651    case 0x0f:
4652        /**************************/
4653        /* extended op code */
4654        b = x86_ldub_code(env, s) | 0x100;
4655        goto reswitch;
4656
4657        /**************************/
4658        /* arith & logic */
4659    case 0x00 ... 0x05:
4660    case 0x08 ... 0x0d:
4661    case 0x10 ... 0x15:
4662    case 0x18 ... 0x1d:
4663    case 0x20 ... 0x25:
4664    case 0x28 ... 0x2d:
4665    case 0x30 ... 0x35:
4666    case 0x38 ... 0x3d:
4667        {
4668            int op, f, val;
4669            op = (b >> 3) & 7;
4670            f = (b >> 1) & 3;
4671
4672            ot = mo_b_d(b, dflag);
4673
4674            switch(f) {
4675            case 0: /* OP Ev, Gv */
4676                modrm = x86_ldub_code(env, s);
4677                reg = ((modrm >> 3) & 7) | rex_r;
4678                mod = (modrm >> 6) & 3;
4679                rm = (modrm & 7) | REX_B(s);
4680                if (mod != 3) {
4681                    gen_lea_modrm(env, s, modrm);
4682                    opreg = OR_TMP0;
4683                } else if (op == OP_XORL && rm == reg) {
4684                xor_zero:
4685                    /* xor reg, reg optimisation */
4686                    set_cc_op(s, CC_OP_CLR);
4687                    tcg_gen_movi_tl(s->T0, 0);
4688                    gen_op_mov_reg_v(s, ot, reg, s->T0);
4689                    break;
4690                } else {
4691                    opreg = rm;
4692                }
4693                gen_op_mov_v_reg(s, ot, s->T1, reg);
4694                gen_op(s, op, ot, opreg);
4695                break;
4696            case 1: /* OP Gv, Ev */
4697                modrm = x86_ldub_code(env, s);
4698                mod = (modrm >> 6) & 3;
4699                reg = ((modrm >> 3) & 7) | rex_r;
4700                rm = (modrm & 7) | REX_B(s);
4701                if (mod != 3) {
4702                    gen_lea_modrm(env, s, modrm);
4703                    gen_op_ld_v(s, ot, s->T1, s->A0);
4704                } else if (op == OP_XORL && rm == reg) {
4705                    goto xor_zero;
4706                } else {
4707                    gen_op_mov_v_reg(s, ot, s->T1, rm);
4708                }
4709                gen_op(s, op, ot, reg);
4710                break;
4711            case 2: /* OP A, Iv */
4712                val = insn_get(env, s, ot);
4713                tcg_gen_movi_tl(s->T1, val);
4714                gen_op(s, op, ot, OR_EAX);
4715                break;
4716            }
4717        }
4718        break;
4719
4720    case 0x82:
4721        if (CODE64(s))
4722            goto illegal_op;
4723        /* fall through */
4724    case 0x80: /* GRP1 */
4725    case 0x81:
4726    case 0x83:
4727        {
4728            int val;
4729
4730            ot = mo_b_d(b, dflag);
4731
4732            modrm = x86_ldub_code(env, s);
4733            mod = (modrm >> 6) & 3;
4734            rm = (modrm & 7) | REX_B(s);
4735            op = (modrm >> 3) & 7;
4736
4737            if (mod != 3) {
4738                if (b == 0x83)
4739                    s->rip_offset = 1;
4740                else
4741                    s->rip_offset = insn_const_size(ot);
4742                gen_lea_modrm(env, s, modrm);
4743                opreg = OR_TMP0;
4744            } else {
4745                opreg = rm;
4746            }
4747
4748            switch(b) {
4749            default:
4750            case 0x80:
4751            case 0x81:
4752            case 0x82:
4753                val = insn_get(env, s, ot);
4754                break;
4755            case 0x83:
4756                val = (int8_t)insn_get(env, s, MO_8);
4757                break;
4758            }
4759            tcg_gen_movi_tl(s->T1, val);
4760            gen_op(s, op, ot, opreg);
4761        }
4762        break;
4763
4764        /**************************/
4765        /* inc, dec, and other misc arith */
4766    case 0x40 ... 0x47: /* inc Gv */
4767        ot = dflag;
4768        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4769        break;
4770    case 0x48 ... 0x4f: /* dec Gv */
4771        ot = dflag;
4772        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4773        break;
4774    case 0xf6: /* GRP3 */
4775    case 0xf7:
4776        ot = mo_b_d(b, dflag);
4777
4778        modrm = x86_ldub_code(env, s);
4779        mod = (modrm >> 6) & 3;
4780        rm = (modrm & 7) | REX_B(s);
4781        op = (modrm >> 3) & 7;
4782        if (mod != 3) {
4783            if (op == 0) {
4784                s->rip_offset = insn_const_size(ot);
4785            }
4786            gen_lea_modrm(env, s, modrm);
4787            /* For those below that handle locked memory, don't load here.  */
4788            if (!(s->prefix & PREFIX_LOCK)
4789                || op != 2) {
4790                gen_op_ld_v(s, ot, s->T0, s->A0);
4791            }
4792        } else {
4793            gen_op_mov_v_reg(s, ot, s->T0, rm);
4794        }
4795
4796        switch(op) {
4797        case 0: /* test */
4798            val = insn_get(env, s, ot);
4799            tcg_gen_movi_tl(s->T1, val);
4800            gen_op_testl_T0_T1_cc(s);
4801            set_cc_op(s, CC_OP_LOGICB + ot);
4802            break;
4803        case 2: /* not */
4804            if (s->prefix & PREFIX_LOCK) {
4805                if (mod == 3) {
4806                    goto illegal_op;
4807                }
4808                tcg_gen_movi_tl(s->T0, ~0);
4809                tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4810                                            s->mem_index, ot | MO_LE);
4811            } else {
4812                tcg_gen_not_tl(s->T0, s->T0);
4813                if (mod != 3) {
4814                    gen_op_st_v(s, ot, s->T0, s->A0);
4815                } else {
4816                    gen_op_mov_reg_v(s, ot, rm, s->T0);
4817                }
4818            }
4819            break;
4820        case 3: /* neg */
4821            if (s->prefix & PREFIX_LOCK) {
4822                TCGLabel *label1;
4823                TCGv a0, t0, t1, t2;
4824
4825                if (mod == 3) {
4826                    goto illegal_op;
4827                }
4828                a0 = tcg_temp_local_new();
4829                t0 = tcg_temp_local_new();
4830                label1 = gen_new_label();
4831
4832                tcg_gen_mov_tl(a0, s->A0);
4833                tcg_gen_mov_tl(t0, s->T0);
4834
4835                gen_set_label(label1);
4836                t1 = tcg_temp_new();
4837                t2 = tcg_temp_new();
4838                tcg_gen_mov_tl(t2, t0);
4839                tcg_gen_neg_tl(t1, t0);
4840                tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4841                                          s->mem_index, ot | MO_LE);
4842                tcg_temp_free(t1);
4843                tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4844
4845                tcg_temp_free(t2);
4846                tcg_temp_free(a0);
4847                tcg_gen_mov_tl(s->T0, t0);
4848                tcg_temp_free(t0);
4849            } else {
4850                tcg_gen_neg_tl(s->T0, s->T0);
4851                if (mod != 3) {
4852                    gen_op_st_v(s, ot, s->T0, s->A0);
4853                } else {
4854                    gen_op_mov_reg_v(s, ot, rm, s->T0);
4855                }
4856            }
4857            gen_op_update_neg_cc(s);
4858            set_cc_op(s, CC_OP_SUBB + ot);
4859            break;
4860        case 4: /* mul */
4861            switch(ot) {
4862            case MO_8:
4863                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4864                tcg_gen_ext8u_tl(s->T0, s->T0);
4865                tcg_gen_ext8u_tl(s->T1, s->T1);
4866                /* XXX: use 32 bit mul which could be faster */
4867                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4868                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4869                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4870                tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4871                set_cc_op(s, CC_OP_MULB);
4872                break;
4873            case MO_16:
4874                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4875                tcg_gen_ext16u_tl(s->T0, s->T0);
4876                tcg_gen_ext16u_tl(s->T1, s->T1);
4877                /* XXX: use 32 bit mul which could be faster */
4878                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4879                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4880                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4881                tcg_gen_shri_tl(s->T0, s->T0, 16);
4882                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4883                tcg_gen_mov_tl(cpu_cc_src, s->T0);
4884                set_cc_op(s, CC_OP_MULW);
4885                break;
4886            default:
4887            case MO_32:
4888                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4889                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4890                tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4891                                  s->tmp2_i32, s->tmp3_i32);
4892                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4893                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4894                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4895                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4896                set_cc_op(s, CC_OP_MULL);
4897                break;
4898#ifdef TARGET_X86_64
4899            case MO_64:
4900                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4901                                  s->T0, cpu_regs[R_EAX]);
4902                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4903                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4904                set_cc_op(s, CC_OP_MULQ);
4905                break;
4906#endif
4907            }
4908            break;
4909        case 5: /* imul */
4910            switch(ot) {
4911            case MO_8:
4912                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4913                tcg_gen_ext8s_tl(s->T0, s->T0);
4914                tcg_gen_ext8s_tl(s->T1, s->T1);
4915                /* XXX: use 32 bit mul which could be faster */
4916                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4917                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4918                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4919                tcg_gen_ext8s_tl(s->tmp0, s->T0);
4920                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4921                set_cc_op(s, CC_OP_MULB);
4922                break;
4923            case MO_16:
4924                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4925                tcg_gen_ext16s_tl(s->T0, s->T0);
4926                tcg_gen_ext16s_tl(s->T1, s->T1);
4927                /* XXX: use 32 bit mul which could be faster */
4928                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4929                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4930                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4931                tcg_gen_ext16s_tl(s->tmp0, s->T0);
4932                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4933                tcg_gen_shri_tl(s->T0, s->T0, 16);
4934                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4935                set_cc_op(s, CC_OP_MULW);
4936                break;
4937            default:
4938            case MO_32:
4939                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4940                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4941                tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
4942                                  s->tmp2_i32, s->tmp3_i32);
4943                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4944                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4945                tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
4946                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4947                tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
4948                tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
4949                set_cc_op(s, CC_OP_MULL);
4950                break;
4951#ifdef TARGET_X86_64
4952            case MO_64:
4953                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4954                                  s->T0, cpu_regs[R_EAX]);
4955                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4956                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4957                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4958                set_cc_op(s, CC_OP_MULQ);
4959                break;
4960#endif
4961            }
4962            break;
4963        case 6: /* div */
4964            switch(ot) {
4965            case MO_8:
4966                gen_helper_divb_AL(cpu_env, s->T0);
4967                break;
4968            case MO_16:
4969                gen_helper_divw_AX(cpu_env, s->T0);
4970                break;
4971            default:
4972            case MO_32:
4973                gen_helper_divl_EAX(cpu_env, s->T0);
4974                break;
4975#ifdef TARGET_X86_64
4976            case MO_64:
4977                gen_helper_divq_EAX(cpu_env, s->T0);
4978                break;
4979#endif
4980            }
4981            break;
4982        case 7: /* idiv */
4983            switch(ot) {
4984            case MO_8:
4985                gen_helper_idivb_AL(cpu_env, s->T0);
4986                break;
4987            case MO_16:
4988                gen_helper_idivw_AX(cpu_env, s->T0);
4989                break;
4990            default:
4991            case MO_32:
4992                gen_helper_idivl_EAX(cpu_env, s->T0);
4993                break;
4994#ifdef TARGET_X86_64
4995            case MO_64:
4996                gen_helper_idivq_EAX(cpu_env, s->T0);
4997                break;
4998#endif
4999            }
5000            break;
5001        default:
5002            goto unknown_op;
5003        }
5004        break;
5005
5006    case 0xfe: /* GRP4 */
5007    case 0xff: /* GRP5 */
5008        ot = mo_b_d(b, dflag);
5009
5010        modrm = x86_ldub_code(env, s);
5011        mod = (modrm >> 6) & 3;
5012        rm = (modrm & 7) | REX_B(s);
5013        op = (modrm >> 3) & 7;
5014        if (op >= 2 && b == 0xfe) {
5015            goto unknown_op;
5016        }
5017        if (CODE64(s)) {
5018            if (op == 2 || op == 4) {
5019                /* operand size for jumps is 64 bit */
5020                ot = MO_64;
5021            } else if (op == 3 || op == 5) {
5022                ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
5023            } else if (op == 6) {
5024                /* default push size is 64 bit */
5025                ot = mo_pushpop(s, dflag);
5026            }
5027        }
5028        if (mod != 3) {
5029            gen_lea_modrm(env, s, modrm);
5030            if (op >= 2 && op != 3 && op != 5)
5031                gen_op_ld_v(s, ot, s->T0, s->A0);
5032        } else {
5033            gen_op_mov_v_reg(s, ot, s->T0, rm);
5034        }
5035
5036        switch(op) {
5037        case 0: /* inc Ev */
5038            if (mod != 3)
5039                opreg = OR_TMP0;
5040            else
5041                opreg = rm;
5042            gen_inc(s, ot, opreg, 1);
5043            break;
5044        case 1: /* dec Ev */
5045            if (mod != 3)
5046                opreg = OR_TMP0;
5047            else
5048                opreg = rm;
5049            gen_inc(s, ot, opreg, -1);
5050            break;
5051        case 2: /* call Ev */
5052            /* XXX: optimize if memory (no 'and' is necessary) */
5053            if (dflag == MO_16) {
5054                tcg_gen_ext16u_tl(s->T0, s->T0);
5055            }
5056            next_eip = s->pc - s->cs_base;
5057            tcg_gen_movi_tl(s->T1, next_eip);
5058            gen_push_v(s, s->T1);
5059            gen_op_jmp_v(s->T0);
5060            gen_bnd_jmp(s);
5061            gen_jr(s, s->T0);
5062            break;
5063        case 3: /* lcall Ev */
5064            if (mod == 3) {
5065                goto illegal_op;
5066            }
5067            gen_op_ld_v(s, ot, s->T1, s->A0);
5068            gen_add_A0_im(s, 1 << ot);
5069            gen_op_ld_v(s, MO_16, s->T0, s->A0);
5070        do_lcall:
5071            if (s->pe && !s->vm86) {
5072                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5073                gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5074                                           tcg_const_i32(dflag - 1),
5075                                           tcg_const_tl(s->pc - s->cs_base));
5076            } else {
5077                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5078                gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5079                                      tcg_const_i32(dflag - 1),
5080                                      tcg_const_i32(s->pc - s->cs_base));
5081            }
5082            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5083            gen_jr(s, s->tmp4);
5084            break;
5085        case 4: /* jmp Ev */
5086            if (dflag == MO_16) {
5087                tcg_gen_ext16u_tl(s->T0, s->T0);
5088            }
5089            gen_op_jmp_v(s->T0);
5090            gen_bnd_jmp(s);
5091            gen_jr(s, s->T0);
5092            break;
5093        case 5: /* ljmp Ev */
5094            if (mod == 3) {
5095                goto illegal_op;
5096            }
5097            gen_op_ld_v(s, ot, s->T1, s->A0);
5098            gen_add_A0_im(s, 1 << ot);
5099            gen_op_ld_v(s, MO_16, s->T0, s->A0);
5100        do_ljmp:
5101            if (s->pe && !s->vm86) {
5102                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5103                gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5104                                          tcg_const_tl(s->pc - s->cs_base));
5105            } else {
5106                gen_op_movl_seg_T0_vm(s, R_CS);
5107                gen_op_jmp_v(s->T1);
5108            }
5109            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5110            gen_jr(s, s->tmp4);
5111            break;
5112        case 6: /* push Ev */
5113            gen_push_v(s, s->T0);
5114            break;
5115        default:
5116            goto unknown_op;
5117        }
5118        break;
5119
5120    case 0x84: /* test Ev, Gv */
5121    case 0x85:
5122        ot = mo_b_d(b, dflag);
5123
5124        modrm = x86_ldub_code(env, s);
5125        reg = ((modrm >> 3) & 7) | rex_r;
5126
5127        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5128        gen_op_mov_v_reg(s, ot, s->T1, reg);
5129        gen_op_testl_T0_T1_cc(s);
5130        set_cc_op(s, CC_OP_LOGICB + ot);
5131        break;
5132
5133    case 0xa8: /* test eAX, Iv */
5134    case 0xa9:
5135        ot = mo_b_d(b, dflag);
5136        val = insn_get(env, s, ot);
5137
5138        gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5139        tcg_gen_movi_tl(s->T1, val);
5140        gen_op_testl_T0_T1_cc(s);
5141        set_cc_op(s, CC_OP_LOGICB + ot);
5142        break;
5143
5144    case 0x98: /* CWDE/CBW */
5145        switch (dflag) {
5146#ifdef TARGET_X86_64
5147        case MO_64:
5148            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5149            tcg_gen_ext32s_tl(s->T0, s->T0);
5150            gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5151            break;
5152#endif
5153        case MO_32:
5154            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5155            tcg_gen_ext16s_tl(s->T0, s->T0);
5156            gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5157            break;
5158        case MO_16:
5159            gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5160            tcg_gen_ext8s_tl(s->T0, s->T0);
5161            gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5162            break;
5163        default:
5164            tcg_abort();
5165        }
5166        break;
5167    case 0x99: /* CDQ/CWD */
5168        switch (dflag) {
5169#ifdef TARGET_X86_64
5170        case MO_64:
5171            gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5172            tcg_gen_sari_tl(s->T0, s->T0, 63);
5173            gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5174            break;
5175#endif
5176        case MO_32:
5177            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5178            tcg_gen_ext32s_tl(s->T0, s->T0);
5179            tcg_gen_sari_tl(s->T0, s->T0, 31);
5180            gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5181            break;
5182        case MO_16:
5183            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5184            tcg_gen_ext16s_tl(s->T0, s->T0);
5185            tcg_gen_sari_tl(s->T0, s->T0, 15);
5186            gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5187            break;
5188        default:
5189            tcg_abort();
5190        }
5191        break;
5192    case 0x1af: /* imul Gv, Ev */
5193    case 0x69: /* imul Gv, Ev, I */
5194    case 0x6b:
5195        ot = dflag;
5196        modrm = x86_ldub_code(env, s);
5197        reg = ((modrm >> 3) & 7) | rex_r;
5198        if (b == 0x69)
5199            s->rip_offset = insn_const_size(ot);
5200        else if (b == 0x6b)
5201            s->rip_offset = 1;
5202        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5203        if (b == 0x69) {
5204            val = insn_get(env, s, ot);
5205            tcg_gen_movi_tl(s->T1, val);
5206        } else if (b == 0x6b) {
5207            val = (int8_t)insn_get(env, s, MO_8);
5208            tcg_gen_movi_tl(s->T1, val);
5209        } else {
5210            gen_op_mov_v_reg(s, ot, s->T1, reg);
5211        }
5212        switch (ot) {
5213#ifdef TARGET_X86_64
5214        case MO_64:
5215            tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5216            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5217            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5218            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5219            break;
5220#endif
5221        case MO_32:
5222            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5223            tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5224            tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5225                              s->tmp2_i32, s->tmp3_i32);
5226            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5227            tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5228            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5229            tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5230            tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5231            break;
5232        default:
5233            tcg_gen_ext16s_tl(s->T0, s->T0);
5234            tcg_gen_ext16s_tl(s->T1, s->T1);
5235            /* XXX: use 32 bit mul which could be faster */
5236            tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5237            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5238            tcg_gen_ext16s_tl(s->tmp0, s->T0);
5239            tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5240            gen_op_mov_reg_v(s, ot, reg, s->T0);
5241            break;
5242        }
5243        set_cc_op(s, CC_OP_MULB + ot);
5244        break;
5245    case 0x1c0:
5246    case 0x1c1: /* xadd Ev, Gv */
5247        ot = mo_b_d(b, dflag);
5248        modrm = x86_ldub_code(env, s);
5249        reg = ((modrm >> 3) & 7) | rex_r;
5250        mod = (modrm >> 6) & 3;
5251        gen_op_mov_v_reg(s, ot, s->T0, reg);
5252        if (mod == 3) {
5253            rm = (modrm & 7) | REX_B(s);
5254            gen_op_mov_v_reg(s, ot, s->T1, rm);
5255            tcg_gen_add_tl(s->T0, s->T0, s->T1);
5256            gen_op_mov_reg_v(s, ot, reg, s->T1);
5257            gen_op_mov_reg_v(s, ot, rm, s->T0);
5258        } else {
5259            gen_lea_modrm(env, s, modrm);
5260            if (s->prefix & PREFIX_LOCK) {
5261                tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5262                                            s->mem_index, ot | MO_LE);
5263                tcg_gen_add_tl(s->T0, s->T0, s->T1);
5264            } else {
5265                gen_op_ld_v(s, ot, s->T1, s->A0);
5266                tcg_gen_add_tl(s->T0, s->T0, s->T1);
5267                gen_op_st_v(s, ot, s->T0, s->A0);
5268            }
5269            gen_op_mov_reg_v(s, ot, reg, s->T1);
5270        }
5271        gen_op_update2_cc(s);
5272        set_cc_op(s, CC_OP_ADDB + ot);
5273        break;
5274    case 0x1b0:
5275    case 0x1b1: /* cmpxchg Ev, Gv */
5276        {
5277            TCGv oldv, newv, cmpv;
5278
5279            ot = mo_b_d(b, dflag);
5280            modrm = x86_ldub_code(env, s);
5281            reg = ((modrm >> 3) & 7) | rex_r;
5282            mod = (modrm >> 6) & 3;
5283            oldv = tcg_temp_new();
5284            newv = tcg_temp_new();
5285            cmpv = tcg_temp_new();
5286            gen_op_mov_v_reg(s, ot, newv, reg);
5287            tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5288
5289            if (s->prefix & PREFIX_LOCK) {
5290                if (mod == 3) {
5291                    goto illegal_op;
5292                }
5293                gen_lea_modrm(env, s, modrm);
5294                tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5295                                          s->mem_index, ot | MO_LE);
5296                gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5297            } else {
5298                if (mod == 3) {
5299                    rm = (modrm & 7) | REX_B(s);
5300                    gen_op_mov_v_reg(s, ot, oldv, rm);
5301                } else {
5302                    gen_lea_modrm(env, s, modrm);
5303                    gen_op_ld_v(s, ot, oldv, s->A0);
5304                    rm = 0; /* avoid warning */
5305                }
5306                gen_extu(ot, oldv);
5307                gen_extu(ot, cmpv);
5308                /* store value = (old == cmp ? new : old);  */
5309                tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5310                if (mod == 3) {
5311                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5312                    gen_op_mov_reg_v(s, ot, rm, newv);
5313                } else {
5314                    /* Perform an unconditional store cycle like physical cpu;
5315                       must be before changing accumulator to ensure
5316                       idempotency if the store faults and the instruction
5317                       is restarted */
5318                    gen_op_st_v(s, ot, newv, s->A0);
5319                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5320                }
5321            }
5322            tcg_gen_mov_tl(cpu_cc_src, oldv);
5323            tcg_gen_mov_tl(s->cc_srcT, cmpv);
5324            tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5325            set_cc_op(s, CC_OP_SUBB + ot);
5326            tcg_temp_free(oldv);
5327            tcg_temp_free(newv);
5328            tcg_temp_free(cmpv);
5329        }
5330        break;
5331    case 0x1c7: /* cmpxchg8b */
5332        modrm = x86_ldub_code(env, s);
5333        mod = (modrm >> 6) & 3;
5334        switch ((modrm >> 3) & 7) {
5335        case 1: /* CMPXCHG8, CMPXCHG16 */
5336            if (mod == 3) {
5337                goto illegal_op;
5338            }
5339#ifdef TARGET_X86_64
5340            if (dflag == MO_64) {
5341                if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5342                    goto illegal_op;
5343                }
5344                gen_lea_modrm(env, s, modrm);
5345                if ((s->prefix & PREFIX_LOCK) &&
5346                    (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5347                    gen_helper_cmpxchg16b(cpu_env, s->A0);
5348                } else {
5349                    gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5350                }
5351                set_cc_op(s, CC_OP_EFLAGS);
5352                break;
5353            }
5354#endif        
5355            if (!(s->cpuid_features & CPUID_CX8)) {
5356                goto illegal_op;
5357            }
5358            gen_lea_modrm(env, s, modrm);
5359            if ((s->prefix & PREFIX_LOCK) &&
5360                (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5361                gen_helper_cmpxchg8b(cpu_env, s->A0);
5362            } else {
5363                gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5364            }
5365            set_cc_op(s, CC_OP_EFLAGS);
5366            break;
5367
5368        case 7: /* RDSEED */
5369        case 6: /* RDRAND */
5370            if (mod != 3 ||
5371                (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5372                !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5373                goto illegal_op;
5374            }
5375            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5376                gen_io_start();
5377            }
5378            gen_helper_rdrand(s->T0, cpu_env);
5379            rm = (modrm & 7) | REX_B(s);
5380            gen_op_mov_reg_v(s, dflag, rm, s->T0);
5381            set_cc_op(s, CC_OP_EFLAGS);
5382            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5383                gen_jmp(s, s->pc - s->cs_base);
5384            }
5385            break;
5386
5387        default:
5388            goto illegal_op;
5389        }
5390        break;
5391
5392        /**************************/
5393        /* push/pop */
5394    case 0x50 ... 0x57: /* push */
5395        gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5396        gen_push_v(s, s->T0);
5397        break;
5398    case 0x58 ... 0x5f: /* pop */
5399        ot = gen_pop_T0(s);
5400        /* NOTE: order is important for pop %sp */
5401        gen_pop_update(s, ot);
5402        gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5403        break;
5404    case 0x60: /* pusha */
5405        if (CODE64(s))
5406            goto illegal_op;
5407        gen_pusha(s);
5408        break;
5409    case 0x61: /* popa */
5410        if (CODE64(s))
5411            goto illegal_op;
5412        gen_popa(s);
5413        break;
5414    case 0x68: /* push Iv */
5415    case 0x6a:
5416        ot = mo_pushpop(s, dflag);
5417        if (b == 0x68)
5418            val = insn_get(env, s, ot);
5419        else
5420            val = (int8_t)insn_get(env, s, MO_8);
5421        tcg_gen_movi_tl(s->T0, val);
5422        gen_push_v(s, s->T0);
5423        break;
5424    case 0x8f: /* pop Ev */
5425        modrm = x86_ldub_code(env, s);
5426        mod = (modrm >> 6) & 3;
5427        ot = gen_pop_T0(s);
5428        if (mod == 3) {
5429            /* NOTE: order is important for pop %sp */
5430            gen_pop_update(s, ot);
5431            rm = (modrm & 7) | REX_B(s);
5432            gen_op_mov_reg_v(s, ot, rm, s->T0);
5433        } else {
5434            /* NOTE: order is important too for MMU exceptions */
5435            s->popl_esp_hack = 1 << ot;
5436            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5437            s->popl_esp_hack = 0;
5438            gen_pop_update(s, ot);
5439        }
5440        break;
5441    case 0xc8: /* enter */
5442        {
5443            int level;
5444            val = x86_lduw_code(env, s);
5445            level = x86_ldub_code(env, s);
5446            gen_enter(s, val, level);
5447        }
5448        break;
5449    case 0xc9: /* leave */
5450        gen_leave(s);
5451        break;
5452    case 0x06: /* push es */
5453    case 0x0e: /* push cs */
5454    case 0x16: /* push ss */
5455    case 0x1e: /* push ds */
5456        if (CODE64(s))
5457            goto illegal_op;
5458        gen_op_movl_T0_seg(s, b >> 3);
5459        gen_push_v(s, s->T0);
5460        break;
5461    case 0x1a0: /* push fs */
5462    case 0x1a8: /* push gs */
5463        gen_op_movl_T0_seg(s, (b >> 3) & 7);
5464        gen_push_v(s, s->T0);
5465        break;
5466    case 0x07: /* pop es */
5467    case 0x17: /* pop ss */
5468    case 0x1f: /* pop ds */
5469        if (CODE64(s))
5470            goto illegal_op;
5471        reg = b >> 3;
5472        ot = gen_pop_T0(s);
5473        gen_movl_seg_T0(s, reg);
5474        gen_pop_update(s, ot);
5475        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5476        if (s->base.is_jmp) {
5477            gen_jmp_im(s, s->pc - s->cs_base);
5478            if (reg == R_SS) {
5479                s->tf = 0;
5480                gen_eob_inhibit_irq(s, true);
5481            } else {
5482                gen_eob(s);
5483            }
5484        }
5485        break;
5486    case 0x1a1: /* pop fs */
5487    case 0x1a9: /* pop gs */
5488        ot = gen_pop_T0(s);
5489        gen_movl_seg_T0(s, (b >> 3) & 7);
5490        gen_pop_update(s, ot);
5491        if (s->base.is_jmp) {
5492            gen_jmp_im(s, s->pc - s->cs_base);
5493            gen_eob(s);
5494        }
5495        break;
5496
5497        /**************************/
5498        /* mov */
5499    case 0x88:
5500    case 0x89: /* mov Gv, Ev */
5501        ot = mo_b_d(b, dflag);
5502        modrm = x86_ldub_code(env, s);
5503        reg = ((modrm >> 3) & 7) | rex_r;
5504
5505        /* generate a generic store */
5506        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5507        break;
5508    case 0xc6:
5509    case 0xc7: /* mov Ev, Iv */
5510        ot = mo_b_d(b, dflag);
5511        modrm = x86_ldub_code(env, s);
5512        mod = (modrm >> 6) & 3;
5513        if (mod != 3) {
5514            s->rip_offset = insn_const_size(ot);
5515            gen_lea_modrm(env, s, modrm);
5516        }
5517        val = insn_get(env, s, ot);
5518        tcg_gen_movi_tl(s->T0, val);
5519        if (mod != 3) {
5520            gen_op_st_v(s, ot, s->T0, s->A0);
5521        } else {
5522            gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5523        }
5524        break;
5525    case 0x8a:
5526    case 0x8b: /* mov Ev, Gv */
5527        ot = mo_b_d(b, dflag);
5528        modrm = x86_ldub_code(env, s);
5529        reg = ((modrm >> 3) & 7) | rex_r;
5530
5531        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5532        gen_op_mov_reg_v(s, ot, reg, s->T0);
5533        break;
5534    case 0x8e: /* mov seg, Gv */
5535        modrm = x86_ldub_code(env, s);
5536        reg = (modrm >> 3) & 7;
5537        if (reg >= 6 || reg == R_CS)
5538            goto illegal_op;
5539        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5540        gen_movl_seg_T0(s, reg);
5541        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5542        if (s->base.is_jmp) {
5543            gen_jmp_im(s, s->pc - s->cs_base);
5544            if (reg == R_SS) {
5545                s->tf = 0;
5546                gen_eob_inhibit_irq(s, true);
5547            } else {
5548                gen_eob(s);
5549            }
5550        }
5551        break;
5552    case 0x8c: /* mov Gv, seg */
5553        modrm = x86_ldub_code(env, s);
5554        reg = (modrm >> 3) & 7;
5555        mod = (modrm >> 6) & 3;
5556        if (reg >= 6)
5557            goto illegal_op;
5558        gen_op_movl_T0_seg(s, reg);
5559        ot = mod == 3 ? dflag : MO_16;
5560        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5561        break;
5562
5563    case 0x1b6: /* movzbS Gv, Eb */
5564    case 0x1b7: /* movzwS Gv, Eb */
5565    case 0x1be: /* movsbS Gv, Eb */
5566    case 0x1bf: /* movswS Gv, Eb */
5567        {
5568            MemOp d_ot;
5569            MemOp s_ot;
5570
5571            /* d_ot is the size of destination */
5572            d_ot = dflag;
5573            /* ot is the size of source */
5574            ot = (b & 1) + MO_8;
5575            /* s_ot is the sign+size of source */
5576            s_ot = b & 8 ? MO_SIGN | ot : ot;
5577
5578            modrm = x86_ldub_code(env, s);
5579            reg = ((modrm >> 3) & 7) | rex_r;
5580            mod = (modrm >> 6) & 3;
5581            rm = (modrm & 7) | REX_B(s);
5582
5583            if (mod == 3) {
5584                if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5585                    tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5586                } else {
5587                    gen_op_mov_v_reg(s, ot, s->T0, rm);
5588                    switch (s_ot) {
5589                    case MO_UB:
5590                        tcg_gen_ext8u_tl(s->T0, s->T0);
5591                        break;
5592                    case MO_SB:
5593                        tcg_gen_ext8s_tl(s->T0, s->T0);
5594                        break;
5595                    case MO_UW:
5596                        tcg_gen_ext16u_tl(s->T0, s->T0);
5597                        break;
5598                    default:
5599                    case MO_SW:
5600                        tcg_gen_ext16s_tl(s->T0, s->T0);
5601                        break;
5602                    }
5603                }
5604                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5605            } else {
5606                gen_lea_modrm(env, s, modrm);
5607                gen_op_ld_v(s, s_ot, s->T0, s->A0);
5608                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5609            }
5610        }
5611        break;
5612
5613    case 0x8d: /* lea */
5614        modrm = x86_ldub_code(env, s);
5615        mod = (modrm >> 6) & 3;
5616        if (mod == 3)
5617            goto illegal_op;
5618        reg = ((modrm >> 3) & 7) | rex_r;
5619        {
5620            AddressParts a = gen_lea_modrm_0(env, s, modrm);
5621            TCGv ea = gen_lea_modrm_1(s, a);
5622            gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5623            gen_op_mov_reg_v(s, dflag, reg, s->A0);
5624        }
5625        break;
5626
5627    case 0xa0: /* mov EAX, Ov */
5628    case 0xa1:
5629    case 0xa2: /* mov Ov, EAX */
5630    case 0xa3:
5631        {
5632            target_ulong offset_addr;
5633
5634            ot = mo_b_d(b, dflag);
5635            switch (s->aflag) {
5636#ifdef TARGET_X86_64
5637            case MO_64:
5638                offset_addr = x86_ldq_code(env, s);
5639                break;
5640#endif
5641            default:
5642                offset_addr = insn_get(env, s, s->aflag);
5643                break;
5644            }
5645            tcg_gen_movi_tl(s->A0, offset_addr);
5646            gen_add_A0_ds_seg(s);
5647            if ((b & 2) == 0) {
5648                gen_op_ld_v(s, ot, s->T0, s->A0);
5649                gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5650            } else {
5651                gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5652                gen_op_st_v(s, ot, s->T0, s->A0);
5653            }
5654        }
5655        break;
5656    case 0xd7: /* xlat */
5657        tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5658        tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5659        tcg_gen_add_tl(s->A0, s->A0, s->T0);
5660        gen_extu(s->aflag, s->A0);
5661        gen_add_A0_ds_seg(s);
5662        gen_op_ld_v(s, MO_8, s->T0, s->A0);
5663        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5664        break;
5665    case 0xb0 ... 0xb7: /* mov R, Ib */
5666        val = insn_get(env, s, MO_8);
5667        tcg_gen_movi_tl(s->T0, val);
5668        gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5669        break;
5670    case 0xb8 ... 0xbf: /* mov R, Iv */
5671#ifdef TARGET_X86_64
5672        if (dflag == MO_64) {
5673            uint64_t tmp;
5674            /* 64 bit case */
5675            tmp = x86_ldq_code(env, s);
5676            reg = (b & 7) | REX_B(s);
5677            tcg_gen_movi_tl(s->T0, tmp);
5678            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5679        } else
5680#endif
5681        {
5682            ot = dflag;
5683            val = insn_get(env, s, ot);
5684            reg = (b & 7) | REX_B(s);
5685            tcg_gen_movi_tl(s->T0, val);
5686            gen_op_mov_reg_v(s, ot, reg, s->T0);
5687        }
5688        break;
5689
5690    case 0x91 ... 0x97: /* xchg R, EAX */
5691    do_xchg_reg_eax:
5692        ot = dflag;
5693        reg = (b & 7) | REX_B(s);
5694        rm = R_EAX;
5695        goto do_xchg_reg;
5696    case 0x86:
5697    case 0x87: /* xchg Ev, Gv */
5698        ot = mo_b_d(b, dflag);
5699        modrm = x86_ldub_code(env, s);
5700        reg = ((modrm >> 3) & 7) | rex_r;
5701        mod = (modrm >> 6) & 3;
5702        if (mod == 3) {
5703            rm = (modrm & 7) | REX_B(s);
5704        do_xchg_reg:
5705            gen_op_mov_v_reg(s, ot, s->T0, reg);
5706            gen_op_mov_v_reg(s, ot, s->T1, rm);
5707            gen_op_mov_reg_v(s, ot, rm, s->T0);
5708            gen_op_mov_reg_v(s, ot, reg, s->T1);
5709        } else {
5710            gen_lea_modrm(env, s, modrm);
5711            gen_op_mov_v_reg(s, ot, s->T0, reg);
5712            /* for xchg, lock is implicit */
5713            tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5714                                   s->mem_index, ot | MO_LE);
5715            gen_op_mov_reg_v(s, ot, reg, s->T1);
5716        }
5717        break;
5718    case 0xc4: /* les Gv */
5719        /* In CODE64 this is VEX3; see above.  */
5720        op = R_ES;
5721        goto do_lxx;
5722    case 0xc5: /* lds Gv */
5723        /* In CODE64 this is VEX2; see above.  */
5724        op = R_DS;
5725        goto do_lxx;
5726    case 0x1b2: /* lss Gv */
5727        op = R_SS;
5728        goto do_lxx;
5729    case 0x1b4: /* lfs Gv */
5730        op = R_FS;
5731        goto do_lxx;
5732    case 0x1b5: /* lgs Gv */
5733        op = R_GS;
5734    do_lxx:
5735        ot = dflag != MO_16 ? MO_32 : MO_16;
5736        modrm = x86_ldub_code(env, s);
5737        reg = ((modrm >> 3) & 7) | rex_r;
5738        mod = (modrm >> 6) & 3;
5739        if (mod == 3)
5740            goto illegal_op;
5741        gen_lea_modrm(env, s, modrm);
5742        gen_op_ld_v(s, ot, s->T1, s->A0);
5743        gen_add_A0_im(s, 1 << ot);
5744        /* load the segment first to handle exceptions properly */
5745        gen_op_ld_v(s, MO_16, s->T0, s->A0);
5746        gen_movl_seg_T0(s, op);
5747        /* then put the data */
5748        gen_op_mov_reg_v(s, ot, reg, s->T1);
5749        if (s->base.is_jmp) {
5750            gen_jmp_im(s, s->pc - s->cs_base);
5751            gen_eob(s);
5752        }
5753        break;
5754
5755        /************************/
5756        /* shifts */
5757    case 0xc0:
5758    case 0xc1:
5759        /* shift Ev,Ib */
5760        shift = 2;
5761    grp2:
5762        {
5763            ot = mo_b_d(b, dflag);
5764            modrm = x86_ldub_code(env, s);
5765            mod = (modrm >> 6) & 3;
5766            op = (modrm >> 3) & 7;
5767
5768            if (mod != 3) {
5769                if (shift == 2) {
5770                    s->rip_offset = 1;
5771                }
5772                gen_lea_modrm(env, s, modrm);
5773                opreg = OR_TMP0;
5774            } else {
5775                opreg = (modrm & 7) | REX_B(s);
5776            }
5777
5778            /* simpler op */
5779            if (shift == 0) {
5780                gen_shift(s, op, ot, opreg, OR_ECX);
5781            } else {
5782                if (shift == 2) {
5783                    shift = x86_ldub_code(env, s);
5784                }
5785                gen_shifti(s, op, ot, opreg, shift);
5786            }
5787        }
5788        break;
5789    case 0xd0:
5790    case 0xd1:
5791        /* shift Ev,1 */
5792        shift = 1;
5793        goto grp2;
5794    case 0xd2:
5795    case 0xd3:
5796        /* shift Ev,cl */
5797        shift = 0;
5798        goto grp2;
5799
5800    case 0x1a4: /* shld imm */
5801        op = 0;
5802        shift = 1;
5803        goto do_shiftd;
5804    case 0x1a5: /* shld cl */
5805        op = 0;
5806        shift = 0;
5807        goto do_shiftd;
5808    case 0x1ac: /* shrd imm */
5809        op = 1;
5810        shift = 1;
5811        goto do_shiftd;
5812    case 0x1ad: /* shrd cl */
5813        op = 1;
5814        shift = 0;
5815    do_shiftd:
5816        ot = dflag;
5817        modrm = x86_ldub_code(env, s);
5818        mod = (modrm >> 6) & 3;
5819        rm = (modrm & 7) | REX_B(s);
5820        reg = ((modrm >> 3) & 7) | rex_r;
5821        if (mod != 3) {
5822            gen_lea_modrm(env, s, modrm);
5823            opreg = OR_TMP0;
5824        } else {
5825            opreg = rm;
5826        }
5827        gen_op_mov_v_reg(s, ot, s->T1, reg);
5828
5829        if (shift) {
5830            TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5831            gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5832            tcg_temp_free(imm);
5833        } else {
5834            gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5835        }
5836        break;
5837
5838        /************************/
5839        /* floats */
5840    case 0xd8 ... 0xdf:
5841        if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5842            /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5843            /* XXX: what to do if illegal op ? */
5844            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5845            break;
5846        }
5847        modrm = x86_ldub_code(env, s);
5848        mod = (modrm >> 6) & 3;
5849        rm = modrm & 7;
5850        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5851        if (mod != 3) {
5852            /* memory op */
5853            gen_lea_modrm(env, s, modrm);
5854            switch(op) {
5855            case 0x00 ... 0x07: /* fxxxs */
5856            case 0x10 ... 0x17: /* fixxxl */
5857            case 0x20 ... 0x27: /* fxxxl */
5858            case 0x30 ... 0x37: /* fixxx */
5859                {
5860                    int op1;
5861                    op1 = op & 7;
5862
5863                    switch(op >> 4) {
5864                    case 0:
5865                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5866                                            s->mem_index, MO_LEUL);
5867                        gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5868                        break;
5869                    case 1:
5870                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5871                                            s->mem_index, MO_LEUL);
5872                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5873                        break;
5874                    case 2:
5875                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5876                                            s->mem_index, MO_LEQ);
5877                        gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5878                        break;
5879                    case 3:
5880                    default:
5881                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5882                                            s->mem_index, MO_LESW);
5883                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5884                        break;
5885                    }
5886
5887                    gen_helper_fp_arith_ST0_FT0(op1);
5888                    if (op1 == 3) {
5889                        /* fcomp needs pop */
5890                        gen_helper_fpop(cpu_env);
5891                    }
5892                }
5893                break;
5894            case 0x08: /* flds */
5895            case 0x0a: /* fsts */
5896            case 0x0b: /* fstps */
5897            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5898            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5899            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5900                switch(op & 7) {
5901                case 0:
5902                    switch(op >> 4) {
5903                    case 0:
5904                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5905                                            s->mem_index, MO_LEUL);
5906                        gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5907                        break;
5908                    case 1:
5909                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5910                                            s->mem_index, MO_LEUL);
5911                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5912                        break;
5913                    case 2:
5914                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5915                                            s->mem_index, MO_LEQ);
5916                        gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
5917                        break;
5918                    case 3:
5919                    default:
5920                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5921                                            s->mem_index, MO_LESW);
5922                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5923                        break;
5924                    }
5925                    break;
5926                case 1:
5927                    /* XXX: the corresponding CPUID bit must be tested ! */
5928                    switch(op >> 4) {
5929                    case 1:
5930                        gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
5931                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5932                                            s->mem_index, MO_LEUL);
5933                        break;
5934                    case 2:
5935                        gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
5936                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5937                                            s->mem_index, MO_LEQ);
5938                        break;
5939                    case 3:
5940                    default:
5941                        gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
5942                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5943                                            s->mem_index, MO_LEUW);
5944                        break;
5945                    }
5946                    gen_helper_fpop(cpu_env);
5947                    break;
5948                default:
5949                    switch(op >> 4) {
5950                    case 0:
5951                        gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
5952                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5953                                            s->mem_index, MO_LEUL);
5954                        break;
5955                    case 1:
5956                        gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
5957                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5958                                            s->mem_index, MO_LEUL);
5959                        break;
5960                    case 2:
5961                        gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
5962                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5963                                            s->mem_index, MO_LEQ);
5964                        break;
5965                    case 3:
5966                    default:
5967                        gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
5968                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5969                                            s->mem_index, MO_LEUW);
5970                        break;
5971                    }
5972                    if ((op & 7) == 3)
5973                        gen_helper_fpop(cpu_env);
5974                    break;
5975                }
5976                break;
5977            case 0x0c: /* fldenv mem */
5978                gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5979                break;
5980            case 0x0d: /* fldcw mem */
5981                tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5982                                    s->mem_index, MO_LEUW);
5983                gen_helper_fldcw(cpu_env, s->tmp2_i32);
5984                break;
5985            case 0x0e: /* fnstenv mem */
5986                gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5987                break;
5988            case 0x0f: /* fnstcw mem */
5989                gen_helper_fnstcw(s->tmp2_i32, cpu_env);
5990                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5991                                    s->mem_index, MO_LEUW);
5992                break;
5993            case 0x1d: /* fldt mem */
5994                gen_helper_fldt_ST0(cpu_env, s->A0);
5995                break;
5996            case 0x1f: /* fstpt mem */
5997                gen_helper_fstt_ST0(cpu_env, s->A0);
5998                gen_helper_fpop(cpu_env);
5999                break;
6000            case 0x2c: /* frstor mem */
6001                gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6002                break;
6003            case 0x2e: /* fnsave mem */
6004                gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6005                break;
6006            case 0x2f: /* fnstsw mem */
6007                gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6008                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6009                                    s->mem_index, MO_LEUW);
6010                break;
6011            case 0x3c: /* fbld */
6012                gen_helper_fbld_ST0(cpu_env, s->A0);
6013                break;
6014            case 0x3e: /* fbstp */
6015                gen_helper_fbst_ST0(cpu_env, s->A0);
6016                gen_helper_fpop(cpu_env);
6017                break;
6018            case 0x3d: /* fildll */
6019                tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6020                gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6021                break;
6022            case 0x3f: /* fistpll */
6023                gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6024                tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6025                gen_helper_fpop(cpu_env);
6026                break;
6027            default:
6028                goto unknown_op;
6029            }
6030        } else {
6031            /* register float ops */
6032            opreg = rm;
6033
6034            switch(op) {
6035            case 0x08: /* fld sti */
6036                gen_helper_fpush(cpu_env);
6037                gen_helper_fmov_ST0_STN(cpu_env,
6038                                        tcg_const_i32((opreg + 1) & 7));
6039                break;
6040            case 0x09: /* fxchg sti */
6041            case 0x29: /* fxchg4 sti, undocumented op */
6042            case 0x39: /* fxchg7 sti, undocumented op */
6043                gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6044                break;
6045            case 0x0a: /* grp d9/2 */
6046                switch(rm) {
6047                case 0: /* fnop */
6048                    /* check exceptions (FreeBSD FPU probe) */
6049                    gen_helper_fwait(cpu_env);
6050                    break;
6051                default:
6052                    goto unknown_op;
6053                }
6054                break;
6055            case 0x0c: /* grp d9/4 */
6056                switch(rm) {
6057                case 0: /* fchs */
6058                    gen_helper_fchs_ST0(cpu_env);
6059                    break;
6060                case 1: /* fabs */
6061                    gen_helper_fabs_ST0(cpu_env);
6062                    break;
6063                case 4: /* ftst */
6064                    gen_helper_fldz_FT0(cpu_env);
6065                    gen_helper_fcom_ST0_FT0(cpu_env);
6066                    break;
6067                case 5: /* fxam */
6068                    gen_helper_fxam_ST0(cpu_env);
6069                    break;
6070                default:
6071                    goto unknown_op;
6072                }
6073                break;
6074            case 0x0d: /* grp d9/5 */
6075                {
6076                    switch(rm) {
6077                    case 0:
6078                        gen_helper_fpush(cpu_env);
6079                        gen_helper_fld1_ST0(cpu_env);
6080                        break;
6081                    case 1:
6082                        gen_helper_fpush(cpu_env);
6083                        gen_helper_fldl2t_ST0(cpu_env);
6084                        break;
6085                    case 2:
6086                        gen_helper_fpush(cpu_env);
6087                        gen_helper_fldl2e_ST0(cpu_env);
6088                        break;
6089                    case 3:
6090                        gen_helper_fpush(cpu_env);
6091                        gen_helper_fldpi_ST0(cpu_env);
6092                        break;
6093                    case 4:
6094                        gen_helper_fpush(cpu_env);
6095                        gen_helper_fldlg2_ST0(cpu_env);
6096                        break;
6097                    case 5:
6098                        gen_helper_fpush(cpu_env);
6099                        gen_helper_fldln2_ST0(cpu_env);
6100                        break;
6101                    case 6:
6102                        gen_helper_fpush(cpu_env);
6103                        gen_helper_fldz_ST0(cpu_env);
6104                        break;
6105                    default:
6106                        goto unknown_op;
6107                    }
6108                }
6109                break;
6110            case 0x0e: /* grp d9/6 */
6111                switch(rm) {
6112                case 0: /* f2xm1 */
6113                    gen_helper_f2xm1(cpu_env);
6114                    break;
6115                case 1: /* fyl2x */
6116                    gen_helper_fyl2x(cpu_env);
6117                    break;
6118                case 2: /* fptan */
6119                    gen_helper_fptan(cpu_env);
6120                    break;
6121                case 3: /* fpatan */
6122                    gen_helper_fpatan(cpu_env);
6123                    break;
6124                case 4: /* fxtract */
6125                    gen_helper_fxtract(cpu_env);
6126                    break;
6127                case 5: /* fprem1 */
6128                    gen_helper_fprem1(cpu_env);
6129                    break;
6130                case 6: /* fdecstp */
6131                    gen_helper_fdecstp(cpu_env);
6132                    break;
6133                default:
6134                case 7: /* fincstp */
6135                    gen_helper_fincstp(cpu_env);
6136                    break;
6137                }
6138                break;
6139            case 0x0f: /* grp d9/7 */
6140                switch(rm) {
6141                case 0: /* fprem */
6142                    gen_helper_fprem(cpu_env);
6143                    break;
6144                case 1: /* fyl2xp1 */
6145                    gen_helper_fyl2xp1(cpu_env);
6146                    break;
6147                case 2: /* fsqrt */
6148                    gen_helper_fsqrt(cpu_env);
6149                    break;
6150                case 3: /* fsincos */
6151                    gen_helper_fsincos(cpu_env);
6152                    break;
6153                case 5: /* fscale */
6154                    gen_helper_fscale(cpu_env);
6155                    break;
6156                case 4: /* frndint */
6157                    gen_helper_frndint(cpu_env);
6158                    break;
6159                case 6: /* fsin */
6160                    gen_helper_fsin(cpu_env);
6161                    break;
6162                default:
6163                case 7: /* fcos */
6164                    gen_helper_fcos(cpu_env);
6165                    break;
6166                }
6167                break;
6168            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6169            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6170            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6171                {
6172                    int op1;
6173
6174                    op1 = op & 7;
6175                    if (op >= 0x20) {
6176                        gen_helper_fp_arith_STN_ST0(op1, opreg);
6177                        if (op >= 0x30)
6178                            gen_helper_fpop(cpu_env);
6179                    } else {
6180                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6181                        gen_helper_fp_arith_ST0_FT0(op1);
6182                    }
6183                }
6184                break;
6185            case 0x02: /* fcom */
6186            case 0x22: /* fcom2, undocumented op */
6187                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6188                gen_helper_fcom_ST0_FT0(cpu_env);
6189                break;
6190            case 0x03: /* fcomp */
6191            case 0x23: /* fcomp3, undocumented op */
6192            case 0x32: /* fcomp5, undocumented op */
6193                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6194                gen_helper_fcom_ST0_FT0(cpu_env);
6195                gen_helper_fpop(cpu_env);
6196                break;
6197            case 0x15: /* da/5 */
6198                switch(rm) {
6199                case 1: /* fucompp */
6200                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6201                    gen_helper_fucom_ST0_FT0(cpu_env);
6202                    gen_helper_fpop(cpu_env);
6203                    gen_helper_fpop(cpu_env);
6204                    break;
6205                default:
6206                    goto unknown_op;
6207                }
6208                break;
6209            case 0x1c:
6210                switch(rm) {
6211                case 0: /* feni (287 only, just do nop here) */
6212                    break;
6213                case 1: /* fdisi (287 only, just do nop here) */
6214                    break;
6215                case 2: /* fclex */
6216                    gen_helper_fclex(cpu_env);
6217                    break;
6218                case 3: /* fninit */
6219                    gen_helper_fninit(cpu_env);
6220                    break;
6221                case 4: /* fsetpm (287 only, just do nop here) */
6222                    break;
6223                default:
6224                    goto unknown_op;
6225                }
6226                break;
6227            case 0x1d: /* fucomi */
6228                if (!(s->cpuid_features & CPUID_CMOV)) {
6229                    goto illegal_op;
6230                }
6231                gen_update_cc_op(s);
6232                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6233                gen_helper_fucomi_ST0_FT0(cpu_env);
6234                set_cc_op(s, CC_OP_EFLAGS);
6235                break;
6236            case 0x1e: /* fcomi */
6237                if (!(s->cpuid_features & CPUID_CMOV)) {
6238                    goto illegal_op;
6239                }
6240                gen_update_cc_op(s);
6241                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6242                gen_helper_fcomi_ST0_FT0(cpu_env);
6243                set_cc_op(s, CC_OP_EFLAGS);
6244                break;
6245            case 0x28: /* ffree sti */
6246                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6247                break;
6248            case 0x2a: /* fst sti */
6249                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6250                break;
6251            case 0x2b: /* fstp sti */
6252            case 0x0b: /* fstp1 sti, undocumented op */
6253            case 0x3a: /* fstp8 sti, undocumented op */
6254            case 0x3b: /* fstp9 sti, undocumented op */
6255                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6256                gen_helper_fpop(cpu_env);
6257                break;
6258            case 0x2c: /* fucom st(i) */
6259                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6260                gen_helper_fucom_ST0_FT0(cpu_env);
6261                break;
6262            case 0x2d: /* fucomp st(i) */
6263                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6264                gen_helper_fucom_ST0_FT0(cpu_env);
6265                gen_helper_fpop(cpu_env);
6266                break;
6267            case 0x33: /* de/3 */
6268                switch(rm) {
6269                case 1: /* fcompp */
6270                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6271                    gen_helper_fcom_ST0_FT0(cpu_env);
6272                    gen_helper_fpop(cpu_env);
6273                    gen_helper_fpop(cpu_env);
6274                    break;
6275                default:
6276                    goto unknown_op;
6277                }
6278                break;
6279            case 0x38: /* ffreep sti, undocumented op */
6280                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6281                gen_helper_fpop(cpu_env);
6282                break;
6283            case 0x3c: /* df/4 */
6284                switch(rm) {
6285                case 0:
6286                    gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6287                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6288                    gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6289                    break;
6290                default:
6291                    goto unknown_op;
6292                }
6293                break;
6294            case 0x3d: /* fucomip */
6295                if (!(s->cpuid_features & CPUID_CMOV)) {
6296                    goto illegal_op;
6297                }
6298                gen_update_cc_op(s);
6299                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6300                gen_helper_fucomi_ST0_FT0(cpu_env);
6301                gen_helper_fpop(cpu_env);
6302                set_cc_op(s, CC_OP_EFLAGS);
6303                break;
6304            case 0x3e: /* fcomip */
6305                if (!(s->cpuid_features & CPUID_CMOV)) {
6306                    goto illegal_op;
6307                }
6308                gen_update_cc_op(s);
6309                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6310                gen_helper_fcomi_ST0_FT0(cpu_env);
6311                gen_helper_fpop(cpu_env);
6312                set_cc_op(s, CC_OP_EFLAGS);
6313                break;
6314            case 0x10 ... 0x13: /* fcmovxx */
6315            case 0x18 ... 0x1b:
6316                {
6317                    int op1;
6318                    TCGLabel *l1;
6319                    static const uint8_t fcmov_cc[8] = {
6320                        (JCC_B << 1),
6321                        (JCC_Z << 1),
6322                        (JCC_BE << 1),
6323                        (JCC_P << 1),
6324                    };
6325
6326                    if (!(s->cpuid_features & CPUID_CMOV)) {
6327                        goto illegal_op;
6328                    }
6329                    op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6330                    l1 = gen_new_label();
6331                    gen_jcc1_noeob(s, op1, l1);
6332                    gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6333                    gen_set_label(l1);
6334                }
6335                break;
6336            default:
6337                goto unknown_op;
6338            }
6339        }
6340        break;
6341        /************************/
6342        /* string ops */
6343
6344    case 0xa4: /* movsS */
6345    case 0xa5:
6346        ot = mo_b_d(b, dflag);
6347        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6348            gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6349        } else {
6350            gen_movs(s, ot);
6351        }
6352        break;
6353
6354    case 0xaa: /* stosS */
6355    case 0xab:
6356        ot = mo_b_d(b, dflag);
6357        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6358            gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6359        } else {
6360            gen_stos(s, ot);
6361        }
6362        break;
6363    case 0xac: /* lodsS */
6364    case 0xad:
6365        ot = mo_b_d(b, dflag);
6366        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6367            gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6368        } else {
6369            gen_lods(s, ot);
6370        }
6371        break;
6372    case 0xae: /* scasS */
6373    case 0xaf:
6374        ot = mo_b_d(b, dflag);
6375        if (prefixes & PREFIX_REPNZ) {
6376            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6377        } else if (prefixes & PREFIX_REPZ) {
6378            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6379        } else {
6380            gen_scas(s, ot);
6381        }
6382        break;
6383
6384    case 0xa6: /* cmpsS */
6385    case 0xa7:
6386        ot = mo_b_d(b, dflag);
6387        if (prefixes & PREFIX_REPNZ) {
6388            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6389        } else if (prefixes & PREFIX_REPZ) {
6390            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6391        } else {
6392            gen_cmps(s, ot);
6393        }
6394        break;
6395    case 0x6c: /* insS */
6396    case 0x6d:
6397        ot = mo_b_d32(b, dflag);
6398        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6399        gen_check_io(s, ot, pc_start - s->cs_base, 
6400                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6401        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6402            gen_io_start();
6403        }
6404        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6405            gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6406            /* jump generated by gen_repz_ins */
6407        } else {
6408            gen_ins(s, ot);
6409            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6410                gen_jmp(s, s->pc - s->cs_base);
6411            }
6412        }
6413        break;
6414    case 0x6e: /* outsS */
6415    case 0x6f:
6416        ot = mo_b_d32(b, dflag);
6417        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6418        gen_check_io(s, ot, pc_start - s->cs_base,
6419                     svm_is_rep(prefixes) | 4);
6420        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6421            gen_io_start();
6422        }
6423        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6424            gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6425            /* jump generated by gen_repz_outs */
6426        } else {
6427            gen_outs(s, ot);
6428            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6429                gen_jmp(s, s->pc - s->cs_base);
6430            }
6431        }
6432        break;
6433
6434        /************************/
6435        /* port I/O */
6436
6437    case 0xe4:
6438    case 0xe5:
6439        ot = mo_b_d32(b, dflag);
6440        val = x86_ldub_code(env, s);
6441        tcg_gen_movi_tl(s->T0, val);
6442        gen_check_io(s, ot, pc_start - s->cs_base,
6443                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6444        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6445            gen_io_start();
6446        }
6447        tcg_gen_movi_i32(s->tmp2_i32, val);
6448        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6449        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6450        gen_bpt_io(s, s->tmp2_i32, ot);
6451        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6452            gen_jmp(s, s->pc - s->cs_base);
6453        }
6454        break;
6455    case 0xe6:
6456    case 0xe7:
6457        ot = mo_b_d32(b, dflag);
6458        val = x86_ldub_code(env, s);
6459        tcg_gen_movi_tl(s->T0, val);
6460        gen_check_io(s, ot, pc_start - s->cs_base,
6461                     svm_is_rep(prefixes));
6462        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6463
6464        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6465            gen_io_start();
6466        }
6467        tcg_gen_movi_i32(s->tmp2_i32, val);
6468        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6469        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6470        gen_bpt_io(s, s->tmp2_i32, ot);
6471        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6472            gen_jmp(s, s->pc - s->cs_base);
6473        }
6474        break;
6475    case 0xec:
6476    case 0xed:
6477        ot = mo_b_d32(b, dflag);
6478        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6479        gen_check_io(s, ot, pc_start - s->cs_base,
6480                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6481        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6482            gen_io_start();
6483        }
6484        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6485        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6486        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6487        gen_bpt_io(s, s->tmp2_i32, ot);
6488        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6489            gen_jmp(s, s->pc - s->cs_base);
6490        }
6491        break;
6492    case 0xee:
6493    case 0xef:
6494        ot = mo_b_d32(b, dflag);
6495        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6496        gen_check_io(s, ot, pc_start - s->cs_base,
6497                     svm_is_rep(prefixes));
6498        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6499
6500        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6501            gen_io_start();
6502        }
6503        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6504        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6505        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6506        gen_bpt_io(s, s->tmp2_i32, ot);
6507        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6508            gen_jmp(s, s->pc - s->cs_base);
6509        }
6510        break;
6511
6512        /************************/
6513        /* control */
6514    case 0xc2: /* ret im */
6515        val = x86_ldsw_code(env, s);
6516        ot = gen_pop_T0(s);
6517        gen_stack_update(s, val + (1 << ot));
6518        /* Note that gen_pop_T0 uses a zero-extending load.  */
6519        gen_op_jmp_v(s->T0);
6520        gen_bnd_jmp(s);
6521        gen_jr(s, s->T0);
6522        break;
6523    case 0xc3: /* ret */
6524        ot = gen_pop_T0(s);
6525        gen_pop_update(s, ot);
6526        /* Note that gen_pop_T0 uses a zero-extending load.  */
6527        gen_op_jmp_v(s->T0);
6528        gen_bnd_jmp(s);
6529        gen_jr(s, s->T0);
6530        break;
6531    case 0xca: /* lret im */
6532        val = x86_ldsw_code(env, s);
6533    do_lret:
6534        if (s->pe && !s->vm86) {
6535            gen_update_cc_op(s);
6536            gen_jmp_im(s, pc_start - s->cs_base);
6537            gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6538                                      tcg_const_i32(val));
6539        } else {
6540            gen_stack_A0(s);
6541            /* pop offset */
6542            gen_op_ld_v(s, dflag, s->T0, s->A0);
6543            /* NOTE: keeping EIP updated is not a problem in case of
6544               exception */
6545            gen_op_jmp_v(s->T0);
6546            /* pop selector */
6547            gen_add_A0_im(s, 1 << dflag);
6548            gen_op_ld_v(s, dflag, s->T0, s->A0);
6549            gen_op_movl_seg_T0_vm(s, R_CS);
6550            /* add stack offset */
6551            gen_stack_update(s, val + (2 << dflag));
6552        }
6553        gen_eob(s);
6554        break;
6555    case 0xcb: /* lret */
6556        val = 0;
6557        goto do_lret;
6558    case 0xcf: /* iret */
6559        gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6560        if (!s->pe) {
6561            /* real mode */
6562            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6563            set_cc_op(s, CC_OP_EFLAGS);
6564        } else if (s->vm86) {
6565            if (s->iopl != 3) {
6566                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6567            } else {
6568                gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6569                set_cc_op(s, CC_OP_EFLAGS);
6570            }
6571        } else {
6572            gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6573                                      tcg_const_i32(s->pc - s->cs_base));
6574            set_cc_op(s, CC_OP_EFLAGS);
6575        }
6576        gen_eob(s);
6577        break;
6578    case 0xe8: /* call im */
6579        {
6580            if (dflag != MO_16) {
6581                tval = (int32_t)insn_get(env, s, MO_32);
6582            } else {
6583                tval = (int16_t)insn_get(env, s, MO_16);
6584            }
6585            next_eip = s->pc - s->cs_base;
6586            tval += next_eip;
6587            if (dflag == MO_16) {
6588                tval &= 0xffff;
6589            } else if (!CODE64(s)) {
6590                tval &= 0xffffffff;
6591            }
6592            tcg_gen_movi_tl(s->T0, next_eip);
6593            gen_push_v(s, s->T0);
6594            gen_bnd_jmp(s);
6595            gen_jmp(s, tval);
6596        }
6597        break;
6598    case 0x9a: /* lcall im */
6599        {
6600            unsigned int selector, offset;
6601
6602            if (CODE64(s))
6603                goto illegal_op;
6604            ot = dflag;
6605            offset = insn_get(env, s, ot);
6606            selector = insn_get(env, s, MO_16);
6607
6608            tcg_gen_movi_tl(s->T0, selector);
6609            tcg_gen_movi_tl(s->T1, offset);
6610        }
6611        goto do_lcall;
6612    case 0xe9: /* jmp im */
6613        if (dflag != MO_16) {
6614            tval = (int32_t)insn_get(env, s, MO_32);
6615        } else {
6616            tval = (int16_t)insn_get(env, s, MO_16);
6617        }
6618        tval += s->pc - s->cs_base;
6619        if (dflag == MO_16) {
6620            tval &= 0xffff;
6621        } else if (!CODE64(s)) {
6622            tval &= 0xffffffff;
6623        }
6624        gen_bnd_jmp(s);
6625        gen_jmp(s, tval);
6626        break;
6627    case 0xea: /* ljmp im */
6628        {
6629            unsigned int selector, offset;
6630
6631            if (CODE64(s))
6632                goto illegal_op;
6633            ot = dflag;
6634            offset = insn_get(env, s, ot);
6635            selector = insn_get(env, s, MO_16);
6636
6637            tcg_gen_movi_tl(s->T0, selector);
6638            tcg_gen_movi_tl(s->T1, offset);
6639        }
6640        goto do_ljmp;
6641    case 0xeb: /* jmp Jb */
6642        tval = (int8_t)insn_get(env, s, MO_8);
6643        tval += s->pc - s->cs_base;
6644        if (dflag == MO_16) {
6645            tval &= 0xffff;
6646        }
6647        gen_jmp(s, tval);
6648        break;
6649    case 0x70 ... 0x7f: /* jcc Jb */
6650        tval = (int8_t)insn_get(env, s, MO_8);
6651        goto do_jcc;
6652    case 0x180 ... 0x18f: /* jcc Jv */
6653        if (dflag != MO_16) {
6654            tval = (int32_t)insn_get(env, s, MO_32);
6655        } else {
6656            tval = (int16_t)insn_get(env, s, MO_16);
6657        }
6658    do_jcc:
6659        next_eip = s->pc - s->cs_base;
6660        tval += next_eip;
6661        if (dflag == MO_16) {
6662            tval &= 0xffff;
6663        }
6664        gen_bnd_jmp(s);
6665        gen_jcc(s, b, tval, next_eip);
6666        break;
6667
6668    case 0x190 ... 0x19f: /* setcc Gv */
6669        modrm = x86_ldub_code(env, s);
6670        gen_setcc1(s, b, s->T0);
6671        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6672        break;
6673    case 0x140 ... 0x14f: /* cmov Gv, Ev */
6674        if (!(s->cpuid_features & CPUID_CMOV)) {
6675            goto illegal_op;
6676        }
6677        ot = dflag;
6678        modrm = x86_ldub_code(env, s);
6679        reg = ((modrm >> 3) & 7) | rex_r;
6680        gen_cmovcc1(env, s, ot, b, modrm, reg);
6681        break;
6682
6683        /************************/
6684        /* flags */
6685    case 0x9c: /* pushf */
6686        gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6687        if (s->vm86 && s->iopl != 3) {
6688            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6689        } else {
6690            gen_update_cc_op(s);
6691            gen_helper_read_eflags(s->T0, cpu_env);
6692            gen_push_v(s, s->T0);
6693        }
6694        break;
6695    case 0x9d: /* popf */
6696        gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6697        if (s->vm86 && s->iopl != 3) {
6698            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6699        } else {
6700            ot = gen_pop_T0(s);
6701            if (s->cpl == 0) {
6702                if (dflag != MO_16) {
6703                    gen_helper_write_eflags(cpu_env, s->T0,
6704                                            tcg_const_i32((TF_MASK | AC_MASK |
6705                                                           ID_MASK | NT_MASK |
6706                                                           IF_MASK |
6707                                                           IOPL_MASK)));
6708                } else {
6709                    gen_helper_write_eflags(cpu_env, s->T0,
6710                                            tcg_const_i32((TF_MASK | AC_MASK |
6711                                                           ID_MASK | NT_MASK |
6712                                                           IF_MASK | IOPL_MASK)
6713                                                          & 0xffff));
6714                }
6715            } else {
6716                if (s->cpl <= s->iopl) {
6717                    if (dflag != MO_16) {
6718                        gen_helper_write_eflags(cpu_env, s->T0,
6719                                                tcg_const_i32((TF_MASK |
6720                                                               AC_MASK |
6721                                                               ID_MASK |
6722                                                               NT_MASK |
6723                                                               IF_MASK)));
6724                    } else {
6725                        gen_helper_write_eflags(cpu_env, s->T0,
6726                                                tcg_const_i32((TF_MASK |
6727                                                               AC_MASK |
6728                                                               ID_MASK |
6729                                                               NT_MASK |
6730                                                               IF_MASK)
6731                                                              & 0xffff));
6732                    }
6733                } else {
6734                    if (dflag != MO_16) {
6735                        gen_helper_write_eflags(cpu_env, s->T0,
6736                                           tcg_const_i32((TF_MASK | AC_MASK |
6737                                                          ID_MASK | NT_MASK)));
6738                    } else {
6739                        gen_helper_write_eflags(cpu_env, s->T0,
6740                                           tcg_const_i32((TF_MASK | AC_MASK |
6741                                                          ID_MASK | NT_MASK)
6742                                                         & 0xffff));
6743                    }
6744                }
6745            }
6746            gen_pop_update(s, ot);
6747            set_cc_op(s, CC_OP_EFLAGS);
6748            /* abort translation because TF/AC flag may change */
6749            gen_jmp_im(s, s->pc - s->cs_base);
6750            gen_eob(s);
6751        }
6752        break;
6753    case 0x9e: /* sahf */
6754        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6755            goto illegal_op;
6756        gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6757        gen_compute_eflags(s);
6758        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6759        tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6760        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6761        break;
6762    case 0x9f: /* lahf */
6763        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6764            goto illegal_op;
6765        gen_compute_eflags(s);
6766        /* Note: gen_compute_eflags() only gives the condition codes */
6767        tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6768        gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6769        break;
6770    case 0xf5: /* cmc */
6771        gen_compute_eflags(s);
6772        tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6773        break;
6774    case 0xf8: /* clc */
6775        gen_compute_eflags(s);
6776        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6777        break;
6778    case 0xf9: /* stc */
6779        gen_compute_eflags(s);
6780        tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6781        break;
6782    case 0xfc: /* cld */
6783        tcg_gen_movi_i32(s->tmp2_i32, 1);
6784        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6785        break;
6786    case 0xfd: /* std */
6787        tcg_gen_movi_i32(s->tmp2_i32, -1);
6788        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6789        break;
6790
6791        /************************/
6792        /* bit operations */
6793    case 0x1ba: /* bt/bts/btr/btc Gv, im */
6794        ot = dflag;
6795        modrm = x86_ldub_code(env, s);
6796        op = (modrm >> 3) & 7;
6797        mod = (modrm >> 6) & 3;
6798        rm = (modrm & 7) | REX_B(s);
6799        if (mod != 3) {
6800            s->rip_offset = 1;
6801            gen_lea_modrm(env, s, modrm);
6802            if (!(s->prefix & PREFIX_LOCK)) {
6803                gen_op_ld_v(s, ot, s->T0, s->A0);
6804            }
6805        } else {
6806            gen_op_mov_v_reg(s, ot, s->T0, rm);
6807        }
6808        /* load shift */
6809        val = x86_ldub_code(env, s);
6810        tcg_gen_movi_tl(s->T1, val);
6811        if (op < 4)
6812            goto unknown_op;
6813        op -= 4;
6814        goto bt_op;
6815    case 0x1a3: /* bt Gv, Ev */
6816        op = 0;
6817        goto do_btx;
6818    case 0x1ab: /* bts */
6819        op = 1;
6820        goto do_btx;
6821    case 0x1b3: /* btr */
6822        op = 2;
6823        goto do_btx;
6824    case 0x1bb: /* btc */
6825        op = 3;
6826    do_btx:
6827        ot = dflag;
6828        modrm = x86_ldub_code(env, s);
6829        reg = ((modrm >> 3) & 7) | rex_r;
6830        mod = (modrm >> 6) & 3;
6831        rm = (modrm & 7) | REX_B(s);
6832        gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6833        if (mod != 3) {
6834            AddressParts a = gen_lea_modrm_0(env, s, modrm);
6835            /* specific case: we need to add a displacement */
6836            gen_exts(ot, s->T1);
6837            tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6838            tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6839            tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6840            gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6841            if (!(s->prefix & PREFIX_LOCK)) {
6842                gen_op_ld_v(s, ot, s->T0, s->A0);
6843            }
6844        } else {
6845            gen_op_mov_v_reg(s, ot, s->T0, rm);
6846        }
6847    bt_op:
6848        tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6849        tcg_gen_movi_tl(s->tmp0, 1);
6850        tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6851        if (s->prefix & PREFIX_LOCK) {
6852            switch (op) {
6853            case 0: /* bt */
6854                /* Needs no atomic ops; we surpressed the normal
6855                   memory load for LOCK above so do it now.  */
6856                gen_op_ld_v(s, ot, s->T0, s->A0);
6857                break;
6858            case 1: /* bts */
6859                tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6860                                           s->mem_index, ot | MO_LE);
6861                break;
6862            case 2: /* btr */
6863                tcg_gen_not_tl(s->tmp0, s->tmp0);
6864                tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6865                                            s->mem_index, ot | MO_LE);
6866                break;
6867            default:
6868            case 3: /* btc */
6869                tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6870                                            s->mem_index, ot | MO_LE);
6871                break;
6872            }
6873            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6874        } else {
6875            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6876            switch (op) {
6877            case 0: /* bt */
6878                /* Data already loaded; nothing to do.  */
6879                break;
6880            case 1: /* bts */
6881                tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
6882                break;
6883            case 2: /* btr */
6884                tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
6885                break;
6886            default:
6887            case 3: /* btc */
6888                tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
6889                break;
6890            }
6891            if (op != 0) {
6892                if (mod != 3) {
6893                    gen_op_st_v(s, ot, s->T0, s->A0);
6894                } else {
6895                    gen_op_mov_reg_v(s, ot, rm, s->T0);
6896                }
6897            }
6898        }
6899
6900        /* Delay all CC updates until after the store above.  Note that
6901           C is the result of the test, Z is unchanged, and the others
6902           are all undefined.  */
6903        switch (s->cc_op) {
6904        case CC_OP_MULB ... CC_OP_MULQ:
6905        case CC_OP_ADDB ... CC_OP_ADDQ:
6906        case CC_OP_ADCB ... CC_OP_ADCQ:
6907        case CC_OP_SUBB ... CC_OP_SUBQ:
6908        case CC_OP_SBBB ... CC_OP_SBBQ:
6909        case CC_OP_LOGICB ... CC_OP_LOGICQ:
6910        case CC_OP_INCB ... CC_OP_INCQ:
6911        case CC_OP_DECB ... CC_OP_DECQ:
6912        case CC_OP_SHLB ... CC_OP_SHLQ:
6913        case CC_OP_SARB ... CC_OP_SARQ:
6914        case CC_OP_BMILGB ... CC_OP_BMILGQ:
6915            /* Z was going to be computed from the non-zero status of CC_DST.
6916               We can get that same Z value (and the new C value) by leaving
6917               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6918               same width.  */
6919            tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
6920            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6921            break;
6922        default:
6923            /* Otherwise, generate EFLAGS and replace the C bit.  */
6924            gen_compute_eflags(s);
6925            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
6926                               ctz32(CC_C), 1);
6927            break;
6928        }
6929        break;
6930    case 0x1bc: /* bsf / tzcnt */
6931    case 0x1bd: /* bsr / lzcnt */
6932        ot = dflag;
6933        modrm = x86_ldub_code(env, s);
6934        reg = ((modrm >> 3) & 7) | rex_r;
6935        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6936        gen_extu(ot, s->T0);
6937
6938        /* Note that lzcnt and tzcnt are in different extensions.  */
6939        if ((prefixes & PREFIX_REPZ)
6940            && (b & 1
6941                ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6942                : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6943            int size = 8 << ot;
6944            /* For lzcnt/tzcnt, C bit is defined related to the input. */
6945            tcg_gen_mov_tl(cpu_cc_src, s->T0);
6946            if (b & 1) {
6947                /* For lzcnt, reduce the target_ulong result by the
6948                   number of zeros that we expect to find at the top.  */
6949                tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
6950                tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
6951            } else {
6952                /* For tzcnt, a zero input must return the operand size.  */
6953                tcg_gen_ctzi_tl(s->T0, s->T0, size);
6954            }
6955            /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
6956            gen_op_update1_cc(s);
6957            set_cc_op(s, CC_OP_BMILGB + ot);
6958        } else {
6959            /* For bsr/bsf, only the Z bit is defined and it is related
6960               to the input and not the result.  */
6961            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
6962            set_cc_op(s, CC_OP_LOGICB + ot);
6963
6964            /* ??? The manual says that the output is undefined when the
6965               input is zero, but real hardware leaves it unchanged, and
6966               real programs appear to depend on that.  Accomplish this
6967               by passing the output as the value to return upon zero.  */
6968            if (b & 1) {
6969                /* For bsr, return the bit index of the first 1 bit,
6970                   not the count of leading zeros.  */
6971                tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
6972                tcg_gen_clz_tl(s->T0, s->T0, s->T1);
6973                tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
6974            } else {
6975                tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
6976            }
6977        }
6978        gen_op_mov_reg_v(s, ot, reg, s->T0);
6979        break;
6980        /************************/
6981        /* bcd */
6982    case 0x27: /* daa */
6983        if (CODE64(s))
6984            goto illegal_op;
6985        gen_update_cc_op(s);
6986        gen_helper_daa(cpu_env);
6987        set_cc_op(s, CC_OP_EFLAGS);
6988        break;
6989    case 0x2f: /* das */
6990        if (CODE64(s))
6991            goto illegal_op;
6992        gen_update_cc_op(s);
6993        gen_helper_das(cpu_env);
6994        set_cc_op(s, CC_OP_EFLAGS);
6995        break;
6996    case 0x37: /* aaa */
6997        if (CODE64(s))
6998            goto illegal_op;
6999        gen_update_cc_op(s);
7000        gen_helper_aaa(cpu_env);
7001        set_cc_op(s, CC_OP_EFLAGS);
7002        break;
7003    case 0x3f: /* aas */
7004        if (CODE64(s))
7005            goto illegal_op;
7006        gen_update_cc_op(s);
7007        gen_helper_aas(cpu_env);
7008        set_cc_op(s, CC_OP_EFLAGS);
7009        break;
7010    case 0xd4: /* aam */
7011        if (CODE64(s))
7012            goto illegal_op;
7013        val = x86_ldub_code(env, s);
7014        if (val == 0) {
7015            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7016        } else {
7017            gen_helper_aam(cpu_env, tcg_const_i32(val));
7018            set_cc_op(s, CC_OP_LOGICB);
7019        }
7020        break;
7021    case 0xd5: /* aad */
7022        if (CODE64(s))
7023            goto illegal_op;
7024        val = x86_ldub_code(env, s);
7025        gen_helper_aad(cpu_env, tcg_const_i32(val));
7026        set_cc_op(s, CC_OP_LOGICB);
7027        break;
7028        /************************/
7029        /* misc */
7030    case 0x90: /* nop */
7031        /* XXX: correct lock test for all insn */
7032        if (prefixes & PREFIX_LOCK) {
7033            goto illegal_op;
7034        }
7035        /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7036        if (REX_B(s)) {
7037            goto do_xchg_reg_eax;
7038        }
7039        if (prefixes & PREFIX_REPZ) {
7040            gen_update_cc_op(s);
7041            gen_jmp_im(s, pc_start - s->cs_base);
7042            gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7043            s->base.is_jmp = DISAS_NORETURN;
7044        }
7045        break;
7046    case 0x9b: /* fwait */
7047        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7048            (HF_MP_MASK | HF_TS_MASK)) {
7049            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7050        } else {
7051            gen_helper_fwait(cpu_env);
7052        }
7053        break;
7054    case 0xcc: /* int3 */
7055        gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7056        break;
7057    case 0xcd: /* int N */
7058        val = x86_ldub_code(env, s);
7059        if (s->vm86 && s->iopl != 3) {
7060            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7061        } else {
7062            gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7063        }
7064        break;
7065    case 0xce: /* into */
7066        if (CODE64(s))
7067            goto illegal_op;
7068        gen_update_cc_op(s);
7069        gen_jmp_im(s, pc_start - s->cs_base);
7070        gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7071        break;
7072#ifdef WANT_ICEBP
7073    case 0xf1: /* icebp (undocumented, exits to external debugger) */
7074        gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
7075        gen_debug(s, pc_start - s->cs_base);
7076        break;
7077#endif
7078    case 0xfa: /* cli */
7079        if (!s->vm86) {
7080            if (s->cpl <= s->iopl) {
7081                gen_helper_cli(cpu_env);
7082            } else {
7083                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7084            }
7085        } else {
7086            if (s->iopl == 3) {
7087                gen_helper_cli(cpu_env);
7088            } else {
7089                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7090            }
7091        }
7092        break;
7093    case 0xfb: /* sti */
7094        if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
7095            gen_helper_sti(cpu_env);
7096            /* interruptions are enabled only the first insn after sti */
7097            gen_jmp_im(s, s->pc - s->cs_base);
7098            gen_eob_inhibit_irq(s, true);
7099        } else {
7100            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7101        }
7102        break;
7103    case 0x62: /* bound */
7104        if (CODE64(s))
7105            goto illegal_op;
7106        ot = dflag;
7107        modrm = x86_ldub_code(env, s);
7108        reg = (modrm >> 3) & 7;
7109        mod = (modrm >> 6) & 3;
7110        if (mod == 3)
7111            goto illegal_op;
7112        gen_op_mov_v_reg(s, ot, s->T0, reg);
7113        gen_lea_modrm(env, s, modrm);
7114        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7115        if (ot == MO_16) {
7116            gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7117        } else {
7118            gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7119        }
7120        break;
7121    case 0x1c8 ... 0x1cf: /* bswap reg */
7122        reg = (b & 7) | REX_B(s);
7123#ifdef TARGET_X86_64
7124        if (dflag == MO_64) {
7125            gen_op_mov_v_reg(s, MO_64, s->T0, reg);
7126            tcg_gen_bswap64_i64(s->T0, s->T0);
7127            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
7128        } else
7129#endif
7130        {
7131            gen_op_mov_v_reg(s, MO_32, s->T0, reg);
7132            tcg_gen_ext32u_tl(s->T0, s->T0);
7133            tcg_gen_bswap32_tl(s->T0, s->T0);
7134            gen_op_mov_reg_v(s, MO_32, reg, s->T0);
7135        }
7136        break;
7137    case 0xd6: /* salc */
7138        if (CODE64(s))
7139            goto illegal_op;
7140        gen_compute_eflags_c(s, s->T0);
7141        tcg_gen_neg_tl(s->T0, s->T0);
7142        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7143        break;
7144    case 0xe0: /* loopnz */
7145    case 0xe1: /* loopz */
7146    case 0xe2: /* loop */
7147    case 0xe3: /* jecxz */
7148        {
7149            TCGLabel *l1, *l2, *l3;
7150
7151            tval = (int8_t)insn_get(env, s, MO_8);
7152            next_eip = s->pc - s->cs_base;
7153            tval += next_eip;
7154            if (dflag == MO_16) {
7155                tval &= 0xffff;
7156            }
7157
7158            l1 = gen_new_label();
7159            l2 = gen_new_label();
7160            l3 = gen_new_label();
7161            gen_update_cc_op(s);
7162            b &= 3;
7163            switch(b) {
7164            case 0: /* loopnz */
7165            case 1: /* loopz */
7166                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7167                gen_op_jz_ecx(s, s->aflag, l3);
7168                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7169                break;
7170            case 2: /* loop */
7171                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7172                gen_op_jnz_ecx(s, s->aflag, l1);
7173                break;
7174            default:
7175            case 3: /* jcxz */
7176                gen_op_jz_ecx(s, s->aflag, l1);
7177                break;
7178            }
7179
7180            gen_set_label(l3);
7181            gen_jmp_im(s, next_eip);
7182            tcg_gen_br(l2);
7183
7184            gen_set_label(l1);
7185            gen_jmp_im(s, tval);
7186            gen_set_label(l2);
7187            gen_eob(s);
7188        }
7189        break;
7190    case 0x130: /* wrmsr */
7191    case 0x132: /* rdmsr */
7192        if (s->cpl != 0) {
7193            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7194        } else {
7195            gen_update_cc_op(s);
7196            gen_jmp_im(s, pc_start - s->cs_base);
7197            if (b & 2) {
7198                gen_helper_rdmsr(cpu_env);
7199            } else {
7200                gen_helper_wrmsr(cpu_env);
7201            }
7202        }
7203        break;
7204    case 0x131: /* rdtsc */
7205        gen_update_cc_op(s);
7206        gen_jmp_im(s, pc_start - s->cs_base);
7207        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7208            gen_io_start();
7209        }
7210        gen_helper_rdtsc(cpu_env);
7211        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7212            gen_jmp(s, s->pc - s->cs_base);
7213        }
7214        break;
7215    case 0x133: /* rdpmc */
7216        gen_update_cc_op(s);
7217        gen_jmp_im(s, pc_start - s->cs_base);
7218        gen_helper_rdpmc(cpu_env);
7219        break;
7220    case 0x134: /* sysenter */
7221        /* For Intel SYSENTER is valid on 64-bit */
7222        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7223            goto illegal_op;
7224        if (!s->pe) {
7225            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7226        } else {
7227            gen_helper_sysenter(cpu_env);
7228            gen_eob(s);
7229        }
7230        break;
7231    case 0x135: /* sysexit */
7232        /* For Intel SYSEXIT is valid on 64-bit */
7233        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7234            goto illegal_op;
7235        if (!s->pe) {
7236            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7237        } else {
7238            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7239            gen_eob(s);
7240        }
7241        break;
7242#ifdef TARGET_X86_64
7243    case 0x105: /* syscall */
7244        /* XXX: is it usable in real mode ? */
7245        gen_update_cc_op(s);
7246        gen_jmp_im(s, pc_start - s->cs_base);
7247        gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7248        /* TF handling for the syscall insn is different. The TF bit is  checked
7249           after the syscall insn completes. This allows #DB to not be
7250           generated after one has entered CPL0 if TF is set in FMASK.  */
7251        gen_eob_worker(s, false, true);
7252        break;
7253    case 0x107: /* sysret */
7254        if (!s->pe) {
7255            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7256        } else {
7257            gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7258            /* condition codes are modified only in long mode */
7259            if (s->lma) {
7260                set_cc_op(s, CC_OP_EFLAGS);
7261            }
7262            /* TF handling for the sysret insn is different. The TF bit is
7263               checked after the sysret insn completes. This allows #DB to be
7264               generated "as if" the syscall insn in userspace has just
7265               completed.  */
7266            gen_eob_worker(s, false, true);
7267        }
7268        break;
7269#endif
7270    case 0x1a2: /* cpuid */
7271        gen_update_cc_op(s);
7272        gen_jmp_im(s, pc_start - s->cs_base);
7273        gen_helper_cpuid(cpu_env);
7274        break;
7275    case 0xf4: /* hlt */
7276        if (s->cpl != 0) {
7277            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7278        } else {
7279            gen_update_cc_op(s);
7280            gen_jmp_im(s, pc_start - s->cs_base);
7281            gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7282            s->base.is_jmp = DISAS_NORETURN;
7283        }
7284        break;
7285    case 0x100:
7286        modrm = x86_ldub_code(env, s);
7287        mod = (modrm >> 6) & 3;
7288        op = (modrm >> 3) & 7;
7289        switch(op) {
7290        case 0: /* sldt */
7291            if (!s->pe || s->vm86)
7292                goto illegal_op;
7293            gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7294            tcg_gen_ld32u_tl(s->T0, cpu_env,
7295                             offsetof(CPUX86State, ldt.selector));
7296            ot = mod == 3 ? dflag : MO_16;
7297            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7298            break;
7299        case 2: /* lldt */
7300            if (!s->pe || s->vm86)
7301                goto illegal_op;
7302            if (s->cpl != 0) {
7303                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7304            } else {
7305                gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7306                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7307                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7308                gen_helper_lldt(cpu_env, s->tmp2_i32);
7309            }
7310            break;
7311        case 1: /* str */
7312            if (!s->pe || s->vm86)
7313                goto illegal_op;
7314            gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7315            tcg_gen_ld32u_tl(s->T0, cpu_env,
7316                             offsetof(CPUX86State, tr.selector));
7317            ot = mod == 3 ? dflag : MO_16;
7318            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7319            break;
7320        case 3: /* ltr */
7321            if (!s->pe || s->vm86)
7322                goto illegal_op;
7323            if (s->cpl != 0) {
7324                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7325            } else {
7326                gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7327                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7328                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7329                gen_helper_ltr(cpu_env, s->tmp2_i32);
7330            }
7331            break;
7332        case 4: /* verr */
7333        case 5: /* verw */
7334            if (!s->pe || s->vm86)
7335                goto illegal_op;
7336            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7337            gen_update_cc_op(s);
7338            if (op == 4) {
7339                gen_helper_verr(cpu_env, s->T0);
7340            } else {
7341                gen_helper_verw(cpu_env, s->T0);
7342            }
7343            set_cc_op(s, CC_OP_EFLAGS);
7344            break;
7345        default:
7346            goto unknown_op;
7347        }
7348        break;
7349
7350    case 0x101:
7351        modrm = x86_ldub_code(env, s);
7352        switch (modrm) {
7353        CASE_MODRM_MEM_OP(0): /* sgdt */
7354            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7355            gen_lea_modrm(env, s, modrm);
7356            tcg_gen_ld32u_tl(s->T0,
7357                             cpu_env, offsetof(CPUX86State, gdt.limit));
7358            gen_op_st_v(s, MO_16, s->T0, s->A0);
7359            gen_add_A0_im(s, 2);
7360            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7361            if (dflag == MO_16) {
7362                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7363            }
7364            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7365            break;
7366
7367        case 0xc8: /* monitor */
7368            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7369                goto illegal_op;
7370            }
7371            gen_update_cc_op(s);
7372            gen_jmp_im(s, pc_start - s->cs_base);
7373            tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7374            gen_extu(s->aflag, s->A0);
7375            gen_add_A0_ds_seg(s);
7376            gen_helper_monitor(cpu_env, s->A0);
7377            break;
7378
7379        case 0xc9: /* mwait */
7380            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7381                goto illegal_op;
7382            }
7383            gen_update_cc_op(s);
7384            gen_jmp_im(s, pc_start - s->cs_base);
7385            gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7386            gen_eob(s);
7387            break;
7388
7389        case 0xca: /* clac */
7390            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7391                || s->cpl != 0) {
7392                goto illegal_op;
7393            }
7394            gen_helper_clac(cpu_env);
7395            gen_jmp_im(s, s->pc - s->cs_base);
7396            gen_eob(s);
7397            break;
7398
7399        case 0xcb: /* stac */
7400            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7401                || s->cpl != 0) {
7402                goto illegal_op;
7403            }
7404            gen_helper_stac(cpu_env);
7405            gen_jmp_im(s, s->pc - s->cs_base);
7406            gen_eob(s);
7407            break;
7408
7409        CASE_MODRM_MEM_OP(1): /* sidt */
7410            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7411            gen_lea_modrm(env, s, modrm);
7412            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7413            gen_op_st_v(s, MO_16, s->T0, s->A0);
7414            gen_add_A0_im(s, 2);
7415            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7416            if (dflag == MO_16) {
7417                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7418            }
7419            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7420            break;
7421
7422        case 0xd0: /* xgetbv */
7423            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7424                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7425                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7426                goto illegal_op;
7427            }
7428            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7429            gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7430            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7431            break;
7432
7433        case 0xd1: /* xsetbv */
7434            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7435                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7436                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7437                goto illegal_op;
7438            }
7439            if (s->cpl != 0) {
7440                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7441                break;
7442            }
7443            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7444                                  cpu_regs[R_EDX]);
7445            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7446            gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7447            /* End TB because translation flags may change.  */
7448            gen_jmp_im(s, s->pc - s->cs_base);
7449            gen_eob(s);
7450            break;
7451
7452        case 0xd8: /* VMRUN */
7453            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7454                goto illegal_op;
7455            }
7456            if (s->cpl != 0) {
7457                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7458                break;
7459            }
7460            gen_update_cc_op(s);
7461            gen_jmp_im(s, pc_start - s->cs_base);
7462            gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7463                             tcg_const_i32(s->pc - pc_start));
7464            tcg_gen_exit_tb(NULL, 0);
7465            s->base.is_jmp = DISAS_NORETURN;
7466            break;
7467
7468        case 0xd9: /* VMMCALL */
7469            if (!(s->flags & HF_SVME_MASK)) {
7470                goto illegal_op;
7471            }
7472            gen_update_cc_op(s);
7473            gen_jmp_im(s, pc_start - s->cs_base);
7474            gen_helper_vmmcall(cpu_env);
7475            break;
7476
7477        case 0xda: /* VMLOAD */
7478            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7479                goto illegal_op;
7480            }
7481            if (s->cpl != 0) {
7482                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7483                break;
7484            }
7485            gen_update_cc_op(s);
7486            gen_jmp_im(s, pc_start - s->cs_base);
7487            gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7488            break;
7489
7490        case 0xdb: /* VMSAVE */
7491            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7492                goto illegal_op;
7493            }
7494            if (s->cpl != 0) {
7495                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7496                break;
7497            }
7498            gen_update_cc_op(s);
7499            gen_jmp_im(s, pc_start - s->cs_base);
7500            gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7501            break;
7502
7503        case 0xdc: /* STGI */
7504            if ((!(s->flags & HF_SVME_MASK)
7505                   && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7506                || !s->pe) {
7507                goto illegal_op;
7508            }
7509            if (s->cpl != 0) {
7510                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7511                break;
7512            }
7513            gen_update_cc_op(s);
7514            gen_helper_stgi(cpu_env);
7515            gen_jmp_im(s, s->pc - s->cs_base);
7516            gen_eob(s);
7517            break;
7518
7519        case 0xdd: /* CLGI */
7520            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7521                goto illegal_op;
7522            }
7523            if (s->cpl != 0) {
7524                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7525                break;
7526            }
7527            gen_update_cc_op(s);
7528            gen_jmp_im(s, pc_start - s->cs_base);
7529            gen_helper_clgi(cpu_env);
7530            break;
7531
7532        case 0xde: /* SKINIT */
7533            if ((!(s->flags & HF_SVME_MASK)
7534                 && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7535                || !s->pe) {
7536                goto illegal_op;
7537            }
7538            gen_update_cc_op(s);
7539            gen_jmp_im(s, pc_start - s->cs_base);
7540            gen_helper_skinit(cpu_env);
7541            break;
7542
7543        case 0xdf: /* INVLPGA */
7544            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7545                goto illegal_op;
7546            }
7547            if (s->cpl != 0) {
7548                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7549                break;
7550            }
7551            gen_update_cc_op(s);
7552            gen_jmp_im(s, pc_start - s->cs_base);
7553            gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
7554            break;
7555
7556        CASE_MODRM_MEM_OP(2): /* lgdt */
7557            if (s->cpl != 0) {
7558                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7559                break;
7560            }
7561            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
7562            gen_lea_modrm(env, s, modrm);
7563            gen_op_ld_v(s, MO_16, s->T1, s->A0);
7564            gen_add_A0_im(s, 2);
7565            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7566            if (dflag == MO_16) {
7567                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7568            }
7569            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7570            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7571            break;
7572
7573        CASE_MODRM_MEM_OP(3): /* lidt */
7574            if (s->cpl != 0) {
7575                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7576                break;
7577            }
7578            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
7579            gen_lea_modrm(env, s, modrm);
7580            gen_op_ld_v(s, MO_16, s->T1, s->A0);
7581            gen_add_A0_im(s, 2);
7582            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7583            if (dflag == MO_16) {
7584                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7585            }
7586            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7587            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7588            break;
7589
7590        CASE_MODRM_OP(4): /* smsw */
7591            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7592            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7593            /*
7594             * In 32-bit mode, the higher 16 bits of the destination
7595             * register are undefined.  In practice CR0[31:0] is stored
7596             * just like in 64-bit mode.
7597             */
7598            mod = (modrm >> 6) & 3;
7599            ot = (mod != 3 ? MO_16 : s->dflag);
7600            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7601            break;
7602        case 0xee: /* rdpkru */
7603            if (prefixes & PREFIX_LOCK) {
7604                goto illegal_op;
7605            }
7606            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7607            gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7608            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7609            break;
7610        case 0xef: /* wrpkru */
7611            if (prefixes & PREFIX_LOCK) {
7612                goto illegal_op;
7613            }
7614            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7615                                  cpu_regs[R_EDX]);
7616            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7617            gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7618            break;
7619        CASE_MODRM_OP(6): /* lmsw */
7620            if (s->cpl != 0) {
7621                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7622                break;
7623            }
7624            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7625            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7626            gen_helper_lmsw(cpu_env, s->T0);
7627            gen_jmp_im(s, s->pc - s->cs_base);
7628            gen_eob(s);
7629            break;
7630
7631        CASE_MODRM_MEM_OP(7): /* invlpg */
7632            if (s->cpl != 0) {
7633                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7634                break;
7635            }
7636            gen_update_cc_op(s);
7637            gen_jmp_im(s, pc_start - s->cs_base);
7638            gen_lea_modrm(env, s, modrm);
7639            gen_helper_invlpg(cpu_env, s->A0);
7640            gen_jmp_im(s, s->pc - s->cs_base);
7641            gen_eob(s);
7642            break;
7643
7644        case 0xf8: /* swapgs */
7645#ifdef TARGET_X86_64
7646            if (CODE64(s)) {
7647                if (s->cpl != 0) {
7648                    gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7649                } else {
7650                    tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7651                    tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7652                                  offsetof(CPUX86State, kernelgsbase));
7653                    tcg_gen_st_tl(s->T0, cpu_env,
7654                                  offsetof(CPUX86State, kernelgsbase));
7655                }
7656                break;
7657            }
7658#endif
7659            goto illegal_op;
7660
7661        case 0xf9: /* rdtscp */
7662            if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7663                goto illegal_op;
7664            }
7665            gen_update_cc_op(s);
7666            gen_jmp_im(s, pc_start - s->cs_base);
7667            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7668                gen_io_start();
7669            }
7670            gen_helper_rdtscp(cpu_env);
7671            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7672                gen_jmp(s, s->pc - s->cs_base);
7673            }
7674            break;
7675
7676        default:
7677            goto unknown_op;
7678        }
7679        break;
7680
7681    case 0x108: /* invd */
7682    case 0x109: /* wbinvd */
7683        if (s->cpl != 0) {
7684            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7685        } else {
7686            gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7687            /* nothing to do */
7688        }
7689        break;
7690    case 0x63: /* arpl or movslS (x86_64) */
7691#ifdef TARGET_X86_64
7692        if (CODE64(s)) {
7693            int d_ot;
7694            /* d_ot is the size of destination */
7695            d_ot = dflag;
7696
7697            modrm = x86_ldub_code(env, s);
7698            reg = ((modrm >> 3) & 7) | rex_r;
7699            mod = (modrm >> 6) & 3;
7700            rm = (modrm & 7) | REX_B(s);
7701
7702            if (mod == 3) {
7703                gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7704                /* sign extend */
7705                if (d_ot == MO_64) {
7706                    tcg_gen_ext32s_tl(s->T0, s->T0);
7707                }
7708                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7709            } else {
7710                gen_lea_modrm(env, s, modrm);
7711                gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7712                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7713            }
7714        } else
7715#endif
7716        {
7717            TCGLabel *label1;
7718            TCGv t0, t1, t2, a0;
7719
7720            if (!s->pe || s->vm86)
7721                goto illegal_op;
7722            t0 = tcg_temp_local_new();
7723            t1 = tcg_temp_local_new();
7724            t2 = tcg_temp_local_new();
7725            ot = MO_16;
7726            modrm = x86_ldub_code(env, s);
7727            reg = (modrm >> 3) & 7;
7728            mod = (modrm >> 6) & 3;
7729            rm = modrm & 7;
7730            if (mod != 3) {
7731                gen_lea_modrm(env, s, modrm);
7732                gen_op_ld_v(s, ot, t0, s->A0);
7733                a0 = tcg_temp_local_new();
7734                tcg_gen_mov_tl(a0, s->A0);
7735            } else {
7736                gen_op_mov_v_reg(s, ot, t0, rm);
7737                a0 = NULL;
7738            }
7739            gen_op_mov_v_reg(s, ot, t1, reg);
7740            tcg_gen_andi_tl(s->tmp0, t0, 3);
7741            tcg_gen_andi_tl(t1, t1, 3);
7742            tcg_gen_movi_tl(t2, 0);
7743            label1 = gen_new_label();
7744            tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7745            tcg_gen_andi_tl(t0, t0, ~3);
7746            tcg_gen_or_tl(t0, t0, t1);
7747            tcg_gen_movi_tl(t2, CC_Z);
7748            gen_set_label(label1);
7749            if (mod != 3) {
7750                gen_op_st_v(s, ot, t0, a0);
7751                tcg_temp_free(a0);
7752           } else {
7753                gen_op_mov_reg_v(s, ot, rm, t0);
7754            }
7755            gen_compute_eflags(s);
7756            tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7757            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7758            tcg_temp_free(t0);
7759            tcg_temp_free(t1);
7760            tcg_temp_free(t2);
7761        }
7762        break;
7763    case 0x102: /* lar */
7764    case 0x103: /* lsl */
7765        {
7766            TCGLabel *label1;
7767            TCGv t0;
7768            if (!s->pe || s->vm86)
7769                goto illegal_op;
7770            ot = dflag != MO_16 ? MO_32 : MO_16;
7771            modrm = x86_ldub_code(env, s);
7772            reg = ((modrm >> 3) & 7) | rex_r;
7773            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7774            t0 = tcg_temp_local_new();
7775            gen_update_cc_op(s);
7776            if (b == 0x102) {
7777                gen_helper_lar(t0, cpu_env, s->T0);
7778            } else {
7779                gen_helper_lsl(t0, cpu_env, s->T0);
7780            }
7781            tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7782            label1 = gen_new_label();
7783            tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7784            gen_op_mov_reg_v(s, ot, reg, t0);
7785            gen_set_label(label1);
7786            set_cc_op(s, CC_OP_EFLAGS);
7787            tcg_temp_free(t0);
7788        }
7789        break;
7790    case 0x118:
7791        modrm = x86_ldub_code(env, s);
7792        mod = (modrm >> 6) & 3;
7793        op = (modrm >> 3) & 7;
7794        switch(op) {
7795        case 0: /* prefetchnta */
7796        case 1: /* prefetchnt0 */
7797        case 2: /* prefetchnt0 */
7798        case 3: /* prefetchnt0 */
7799            if (mod == 3)
7800                goto illegal_op;
7801            gen_nop_modrm(env, s, modrm);
7802            /* nothing more to do */
7803            break;
7804        default: /* nop (multi byte) */
7805            gen_nop_modrm(env, s, modrm);
7806            break;
7807        }
7808        break;
7809    case 0x11a:
7810        modrm = x86_ldub_code(env, s);
7811        if (s->flags & HF_MPX_EN_MASK) {
7812            mod = (modrm >> 6) & 3;
7813            reg = ((modrm >> 3) & 7) | rex_r;
7814            if (prefixes & PREFIX_REPZ) {
7815                /* bndcl */
7816                if (reg >= 4
7817                    || (prefixes & PREFIX_LOCK)
7818                    || s->aflag == MO_16) {
7819                    goto illegal_op;
7820                }
7821                gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7822            } else if (prefixes & PREFIX_REPNZ) {
7823                /* bndcu */
7824                if (reg >= 4
7825                    || (prefixes & PREFIX_LOCK)
7826                    || s->aflag == MO_16) {
7827                    goto illegal_op;
7828                }
7829                TCGv_i64 notu = tcg_temp_new_i64();
7830                tcg_gen_not_i64(notu, cpu_bndu[reg]);
7831                gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7832                tcg_temp_free_i64(notu);
7833            } else if (prefixes & PREFIX_DATA) {
7834                /* bndmov -- from reg/mem */
7835                if (reg >= 4 || s->aflag == MO_16) {
7836                    goto illegal_op;
7837                }
7838                if (mod == 3) {
7839                    int reg2 = (modrm & 7) | REX_B(s);
7840                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7841                        goto illegal_op;
7842                    }
7843                    if (s->flags & HF_MPX_IU_MASK) {
7844                        tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7845                        tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7846                    }
7847                } else {
7848                    gen_lea_modrm(env, s, modrm);
7849                    if (CODE64(s)) {
7850                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7851                                            s->mem_index, MO_LEQ);
7852                        tcg_gen_addi_tl(s->A0, s->A0, 8);
7853                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7854                                            s->mem_index, MO_LEQ);
7855                    } else {
7856                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7857                                            s->mem_index, MO_LEUL);
7858                        tcg_gen_addi_tl(s->A0, s->A0, 4);
7859                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7860                                            s->mem_index, MO_LEUL);
7861                    }
7862                    /* bnd registers are now in-use */
7863                    gen_set_hflag(s, HF_MPX_IU_MASK);
7864                }
7865            } else if (mod != 3) {
7866                /* bndldx */
7867                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7868                if (reg >= 4
7869                    || (prefixes & PREFIX_LOCK)
7870                    || s->aflag == MO_16
7871                    || a.base < -1) {
7872                    goto illegal_op;
7873                }
7874                if (a.base >= 0) {
7875                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7876                } else {
7877                    tcg_gen_movi_tl(s->A0, 0);
7878                }
7879                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7880                if (a.index >= 0) {
7881                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7882                } else {
7883                    tcg_gen_movi_tl(s->T0, 0);
7884                }
7885                if (CODE64(s)) {
7886                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7887                    tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7888                                   offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7889                } else {
7890                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7891                    tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7892                    tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7893                }
7894                gen_set_hflag(s, HF_MPX_IU_MASK);
7895            }
7896        }
7897        gen_nop_modrm(env, s, modrm);
7898        break;
7899    case 0x11b:
7900        modrm = x86_ldub_code(env, s);
7901        if (s->flags & HF_MPX_EN_MASK) {
7902            mod = (modrm >> 6) & 3;
7903            reg = ((modrm >> 3) & 7) | rex_r;
7904            if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7905                /* bndmk */
7906                if (reg >= 4
7907                    || (prefixes & PREFIX_LOCK)
7908                    || s->aflag == MO_16) {
7909                    goto illegal_op;
7910                }
7911                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7912                if (a.base >= 0) {
7913                    tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7914                    if (!CODE64(s)) {
7915                        tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7916                    }
7917                } else if (a.base == -1) {
7918                    /* no base register has lower bound of 0 */
7919                    tcg_gen_movi_i64(cpu_bndl[reg], 0);
7920                } else {
7921                    /* rip-relative generates #ud */
7922                    goto illegal_op;
7923                }
7924                tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
7925                if (!CODE64(s)) {
7926                    tcg_gen_ext32u_tl(s->A0, s->A0);
7927                }
7928                tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
7929                /* bnd registers are now in-use */
7930                gen_set_hflag(s, HF_MPX_IU_MASK);
7931                break;
7932            } else if (prefixes & PREFIX_REPNZ) {
7933                /* bndcn */
7934                if (reg >= 4
7935                    || (prefixes & PREFIX_LOCK)
7936                    || s->aflag == MO_16) {
7937                    goto illegal_op;
7938                }
7939                gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7940            } else if (prefixes & PREFIX_DATA) {
7941                /* bndmov -- to reg/mem */
7942                if (reg >= 4 || s->aflag == MO_16) {
7943                    goto illegal_op;
7944                }
7945                if (mod == 3) {
7946                    int reg2 = (modrm & 7) | REX_B(s);
7947                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7948                        goto illegal_op;
7949                    }
7950                    if (s->flags & HF_MPX_IU_MASK) {
7951                        tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7952                        tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
7953                    }
7954                } else {
7955                    gen_lea_modrm(env, s, modrm);
7956                    if (CODE64(s)) {
7957                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7958                                            s->mem_index, MO_LEQ);
7959                        tcg_gen_addi_tl(s->A0, s->A0, 8);
7960                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7961                                            s->mem_index, MO_LEQ);
7962                    } else {
7963                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7964                                            s->mem_index, MO_LEUL);
7965                        tcg_gen_addi_tl(s->A0, s->A0, 4);
7966                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7967                                            s->mem_index, MO_LEUL);
7968                    }
7969                }
7970            } else if (mod != 3) {
7971                /* bndstx */
7972                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7973                if (reg >= 4
7974                    || (prefixes & PREFIX_LOCK)
7975                    || s->aflag == MO_16
7976                    || a.base < -1) {
7977                    goto illegal_op;
7978                }
7979                if (a.base >= 0) {
7980                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7981                } else {
7982                    tcg_gen_movi_tl(s->A0, 0);
7983                }
7984                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7985                if (a.index >= 0) {
7986                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7987                } else {
7988                    tcg_gen_movi_tl(s->T0, 0);
7989                }
7990                if (CODE64(s)) {
7991                    gen_helper_bndstx64(cpu_env, s->A0, s->T0,
7992                                        cpu_bndl[reg], cpu_bndu[reg]);
7993                } else {
7994                    gen_helper_bndstx32(cpu_env, s->A0, s->T0,
7995                                        cpu_bndl[reg], cpu_bndu[reg]);
7996                }
7997            }
7998        }
7999        gen_nop_modrm(env, s, modrm);
8000        break;
8001    case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
8002        modrm = x86_ldub_code(env, s);
8003        gen_nop_modrm(env, s, modrm);
8004        break;
8005    case 0x120: /* mov reg, crN */
8006    case 0x122: /* mov crN, reg */
8007        if (s->cpl != 0) {
8008            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8009        } else {
8010            modrm = x86_ldub_code(env, s);
8011            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8012             * AMD documentation (24594.pdf) and testing of
8013             * intel 386 and 486 processors all show that the mod bits
8014             * are assumed to be 1's, regardless of actual values.
8015             */
8016            rm = (modrm & 7) | REX_B(s);
8017            reg = ((modrm >> 3) & 7) | rex_r;
8018            if (CODE64(s))
8019                ot = MO_64;
8020            else
8021                ot = MO_32;
8022            if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
8023                (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8024                reg = 8;
8025            }
8026            switch(reg) {
8027            case 0:
8028            case 2:
8029            case 3:
8030            case 4:
8031            case 8:
8032                gen_update_cc_op(s);
8033                gen_jmp_im(s, pc_start - s->cs_base);
8034                if (b & 2) {
8035                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8036                        gen_io_start();
8037                    }
8038                    gen_op_mov_v_reg(s, ot, s->T0, rm);
8039                    gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
8040                                         s->T0);
8041                    gen_jmp_im(s, s->pc - s->cs_base);
8042                    gen_eob(s);
8043                } else {
8044                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8045                        gen_io_start();
8046                    }
8047                    gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
8048                    gen_op_mov_reg_v(s, ot, rm, s->T0);
8049                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8050                        gen_jmp(s, s->pc - s->cs_base);
8051                    }
8052                }
8053                break;
8054            default:
8055                goto unknown_op;
8056            }
8057        }
8058        break;
8059    case 0x121: /* mov reg, drN */
8060    case 0x123: /* mov drN, reg */
8061        if (s->cpl != 0) {
8062            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8063        } else {
8064            modrm = x86_ldub_code(env, s);
8065            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8066             * AMD documentation (24594.pdf) and testing of
8067             * intel 386 and 486 processors all show that the mod bits
8068             * are assumed to be 1's, regardless of actual values.
8069             */
8070            rm = (modrm & 7) | REX_B(s);
8071            reg = ((modrm >> 3) & 7) | rex_r;
8072            if (CODE64(s))
8073                ot = MO_64;
8074            else
8075                ot = MO_32;
8076            if (reg >= 8) {
8077                goto illegal_op;
8078            }
8079            if (b & 2) {
8080                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
8081                gen_op_mov_v_reg(s, ot, s->T0, rm);
8082                tcg_gen_movi_i32(s->tmp2_i32, reg);
8083                gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8084                gen_jmp_im(s, s->pc - s->cs_base);
8085                gen_eob(s);
8086            } else {
8087                gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
8088                tcg_gen_movi_i32(s->tmp2_i32, reg);
8089                gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8090                gen_op_mov_reg_v(s, ot, rm, s->T0);
8091            }
8092        }
8093        break;
8094    case 0x106: /* clts */
8095        if (s->cpl != 0) {
8096            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8097        } else {
8098            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
8099            gen_helper_clts(cpu_env);
8100            /* abort block because static cpu state changed */
8101            gen_jmp_im(s, s->pc - s->cs_base);
8102            gen_eob(s);
8103        }
8104        break;
8105    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8106    case 0x1c3: /* MOVNTI reg, mem */
8107        if (!(s->cpuid_features & CPUID_SSE2))
8108            goto illegal_op;
8109        ot = mo_64_32(dflag);
8110        modrm = x86_ldub_code(env, s);
8111        mod = (modrm >> 6) & 3;
8112        if (mod == 3)
8113            goto illegal_op;
8114        reg = ((modrm >> 3) & 7) | rex_r;
8115        /* generate a generic store */
8116        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8117        break;
8118    case 0x1ae:
8119        modrm = x86_ldub_code(env, s);
8120        switch (modrm) {
8121        CASE_MODRM_MEM_OP(0): /* fxsave */
8122            if (!(s->cpuid_features & CPUID_FXSR)
8123                || (prefixes & PREFIX_LOCK)) {
8124                goto illegal_op;
8125            }
8126            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8127                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8128                break;
8129            }
8130            gen_lea_modrm(env, s, modrm);
8131            gen_helper_fxsave(cpu_env, s->A0);
8132            break;
8133
8134        CASE_MODRM_MEM_OP(1): /* fxrstor */
8135            if (!(s->cpuid_features & CPUID_FXSR)
8136                || (prefixes & PREFIX_LOCK)) {
8137                goto illegal_op;
8138            }
8139            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8140                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8141                break;
8142            }
8143            gen_lea_modrm(env, s, modrm);
8144            gen_helper_fxrstor(cpu_env, s->A0);
8145            break;
8146
8147        CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8148            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8149                goto illegal_op;
8150            }
8151            if (s->flags & HF_TS_MASK) {
8152                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8153                break;
8154            }
8155            gen_lea_modrm(env, s, modrm);
8156            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8157            gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8158            break;
8159
8160        CASE_MODRM_MEM_OP(3): /* stmxcsr */
8161            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8162                goto illegal_op;
8163            }
8164            if (s->flags & HF_TS_MASK) {
8165                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8166                break;
8167            }
8168            gen_helper_update_mxcsr(cpu_env);
8169            gen_lea_modrm(env, s, modrm);
8170            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8171            gen_op_st_v(s, MO_32, s->T0, s->A0);
8172            break;
8173
8174        CASE_MODRM_MEM_OP(4): /* xsave */
8175            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8176                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8177                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8178                goto illegal_op;
8179            }
8180            gen_lea_modrm(env, s, modrm);
8181            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8182                                  cpu_regs[R_EDX]);
8183            gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8184            break;
8185
8186        CASE_MODRM_MEM_OP(5): /* xrstor */
8187            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8188                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8189                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8190                goto illegal_op;
8191            }
8192            gen_lea_modrm(env, s, modrm);
8193            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8194                                  cpu_regs[R_EDX]);
8195            gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8196            /* XRSTOR is how MPX is enabled, which changes how
8197               we translate.  Thus we need to end the TB.  */
8198            gen_update_cc_op(s);
8199            gen_jmp_im(s, s->pc - s->cs_base);
8200            gen_eob(s);
8201            break;
8202
8203        CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8204            if (prefixes & PREFIX_LOCK) {
8205                goto illegal_op;
8206            }
8207            if (prefixes & PREFIX_DATA) {
8208                /* clwb */
8209                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8210                    goto illegal_op;
8211                }
8212                gen_nop_modrm(env, s, modrm);
8213            } else {
8214                /* xsaveopt */
8215                if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8216                    || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8217                    || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8218                    goto illegal_op;
8219                }
8220                gen_lea_modrm(env, s, modrm);
8221                tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8222                                      cpu_regs[R_EDX]);
8223                gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8224            }
8225            break;
8226
8227        CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8228            if (prefixes & PREFIX_LOCK) {
8229                goto illegal_op;
8230            }
8231            if (prefixes & PREFIX_DATA) {
8232                /* clflushopt */
8233                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8234                    goto illegal_op;
8235                }
8236            } else {
8237                /* clflush */
8238                if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8239                    || !(s->cpuid_features & CPUID_CLFLUSH)) {
8240                    goto illegal_op;
8241                }
8242            }
8243            gen_nop_modrm(env, s, modrm);
8244            break;
8245
8246        case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8247        case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8248        case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8249        case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8250            if (CODE64(s)
8251                && (prefixes & PREFIX_REPZ)
8252                && !(prefixes & PREFIX_LOCK)
8253                && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8254                TCGv base, treg, src, dst;
8255
8256                /* Preserve hflags bits by testing CR4 at runtime.  */
8257                tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8258                gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8259
8260                base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8261                treg = cpu_regs[(modrm & 7) | REX_B(s)];
8262
8263                if (modrm & 0x10) {
8264                    /* wr*base */
8265                    dst = base, src = treg;
8266                } else {
8267                    /* rd*base */
8268                    dst = treg, src = base;
8269                }
8270
8271                if (s->dflag == MO_32) {
8272                    tcg_gen_ext32u_tl(dst, src);
8273                } else {
8274                    tcg_gen_mov_tl(dst, src);
8275                }
8276                break;
8277            }
8278            goto unknown_op;
8279
8280        case 0xf8: /* sfence / pcommit */
8281            if (prefixes & PREFIX_DATA) {
8282                /* pcommit */
8283                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8284                    || (prefixes & PREFIX_LOCK)) {
8285                    goto illegal_op;
8286                }
8287                break;
8288            }
8289            /* fallthru */
8290        case 0xf9 ... 0xff: /* sfence */
8291            if (!(s->cpuid_features & CPUID_SSE)
8292                || (prefixes & PREFIX_LOCK)) {
8293                goto illegal_op;
8294            }
8295            tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8296            break;
8297        case 0xe8 ... 0xef: /* lfence */
8298            if (!(s->cpuid_features & CPUID_SSE)
8299                || (prefixes & PREFIX_LOCK)) {
8300                goto illegal_op;
8301            }
8302            tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8303            break;
8304        case 0xf0 ... 0xf7: /* mfence */
8305            if (!(s->cpuid_features & CPUID_SSE2)
8306                || (prefixes & PREFIX_LOCK)) {
8307                goto illegal_op;
8308            }
8309            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8310            break;
8311
8312        default:
8313            goto unknown_op;
8314        }
8315        break;
8316
8317    case 0x10d: /* 3DNow! prefetch(w) */
8318        modrm = x86_ldub_code(env, s);
8319        mod = (modrm >> 6) & 3;
8320        if (mod == 3)
8321            goto illegal_op;
8322        gen_nop_modrm(env, s, modrm);
8323        break;
8324    case 0x1aa: /* rsm */
8325        gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
8326        if (!(s->flags & HF_SMM_MASK))
8327            goto illegal_op;
8328        gen_update_cc_op(s);
8329        gen_jmp_im(s, s->pc - s->cs_base);
8330        gen_helper_rsm(cpu_env);
8331        gen_eob(s);
8332        break;
8333    case 0x1b8: /* SSE4.2 popcnt */
8334        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8335             PREFIX_REPZ)
8336            goto illegal_op;
8337        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8338            goto illegal_op;
8339
8340        modrm = x86_ldub_code(env, s);
8341        reg = ((modrm >> 3) & 7) | rex_r;
8342
8343        if (s->prefix & PREFIX_DATA) {
8344            ot = MO_16;
8345        } else {
8346            ot = mo_64_32(dflag);
8347        }
8348
8349        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8350        gen_extu(ot, s->T0);
8351        tcg_gen_mov_tl(cpu_cc_src, s->T0);
8352        tcg_gen_ctpop_tl(s->T0, s->T0);
8353        gen_op_mov_reg_v(s, ot, reg, s->T0);
8354
8355        set_cc_op(s, CC_OP_POPCNT);
8356        break;
8357    case 0x10e ... 0x10f:
8358        /* 3DNow! instructions, ignore prefixes */
8359        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8360        /* fall through */
8361    case 0x110 ... 0x117:
8362    case 0x128 ... 0x12f:
8363    case 0x138 ... 0x13a:
8364    case 0x150 ... 0x179:
8365    case 0x17c ... 0x17f:
8366    case 0x1c2:
8367    case 0x1c4 ... 0x1c6:
8368    case 0x1d0 ... 0x1fe:
8369        gen_sse(env, s, b, pc_start, rex_r);
8370        break;
8371    default:
8372        goto unknown_op;
8373    }
8374    return s->pc;
8375 illegal_op:
8376    gen_illegal_opcode(s);
8377    return s->pc;
8378 unknown_op:
8379    gen_unknown_opcode(env, s);
8380    return s->pc;
8381}
8382
8383void tcg_x86_init(void)
8384{
8385    static const char reg_names[CPU_NB_REGS][4] = {
8386#ifdef TARGET_X86_64
8387        [R_EAX] = "rax",
8388        [R_EBX] = "rbx",
8389        [R_ECX] = "rcx",
8390        [R_EDX] = "rdx",
8391        [R_ESI] = "rsi",
8392        [R_EDI] = "rdi",
8393        [R_EBP] = "rbp",
8394        [R_ESP] = "rsp",
8395        [8]  = "r8",
8396        [9]  = "r9",
8397        [10] = "r10",
8398        [11] = "r11",
8399        [12] = "r12",
8400        [13] = "r13",
8401        [14] = "r14",
8402        [15] = "r15",
8403#else
8404        [R_EAX] = "eax",
8405        [R_EBX] = "ebx",
8406        [R_ECX] = "ecx",
8407        [R_EDX] = "edx",
8408        [R_ESI] = "esi",
8409        [R_EDI] = "edi",
8410        [R_EBP] = "ebp",
8411        [R_ESP] = "esp",
8412#endif
8413    };
8414    static const char seg_base_names[6][8] = {
8415        [R_CS] = "cs_base",
8416        [R_DS] = "ds_base",
8417        [R_ES] = "es_base",
8418        [R_FS] = "fs_base",
8419        [R_GS] = "gs_base",
8420        [R_SS] = "ss_base",
8421    };
8422    static const char bnd_regl_names[4][8] = {
8423        "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8424    };
8425    static const char bnd_regu_names[4][8] = {
8426        "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8427    };
8428    int i;
8429
8430    cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8431                                       offsetof(CPUX86State, cc_op), "cc_op");
8432    cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8433                                    "cc_dst");
8434    cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8435                                    "cc_src");
8436    cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8437                                     "cc_src2");
8438
8439    for (i = 0; i < CPU_NB_REGS; ++i) {
8440        cpu_regs[i] = tcg_global_mem_new(cpu_env,
8441                                         offsetof(CPUX86State, regs[i]),
8442                                         reg_names[i]);
8443    }
8444
8445    for (i = 0; i < 6; ++i) {
8446        cpu_seg_base[i]
8447            = tcg_global_mem_new(cpu_env,
8448                                 offsetof(CPUX86State, segs[i].base),
8449                                 seg_base_names[i]);
8450    }
8451
8452    for (i = 0; i < 4; ++i) {
8453        cpu_bndl[i]
8454            = tcg_global_mem_new_i64(cpu_env,
8455                                     offsetof(CPUX86State, bnd_regs[i].lb),
8456                                     bnd_regl_names[i]);
8457        cpu_bndu[i]
8458            = tcg_global_mem_new_i64(cpu_env,
8459                                     offsetof(CPUX86State, bnd_regs[i].ub),
8460                                     bnd_regu_names[i]);
8461    }
8462}
8463
8464static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8465{
8466    DisasContext *dc = container_of(dcbase, DisasContext, base);
8467    CPUX86State *env = cpu->env_ptr;
8468    uint32_t flags = dc->base.tb->flags;
8469    target_ulong cs_base = dc->base.tb->cs_base;
8470
8471    dc->pe = (flags >> HF_PE_SHIFT) & 1;
8472    dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
8473    dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
8474    dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
8475    dc->f_st = 0;
8476    dc->vm86 = (flags >> VM_SHIFT) & 1;
8477    dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
8478    dc->iopl = (flags >> IOPL_SHIFT) & 3;
8479    dc->tf = (flags >> TF_SHIFT) & 1;
8480    dc->cc_op = CC_OP_DYNAMIC;
8481    dc->cc_op_dirty = false;
8482    dc->cs_base = cs_base;
8483    dc->popl_esp_hack = 0;
8484    /* select memory access functions */
8485    dc->mem_index = 0;
8486#ifdef CONFIG_SOFTMMU
8487    dc->mem_index = cpu_mmu_index(env, false);
8488#endif
8489    dc->cpuid_features = env->features[FEAT_1_EDX];
8490    dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8491    dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8492    dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8493    dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8494    dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8495#ifdef TARGET_X86_64
8496    dc->lma = (flags >> HF_LMA_SHIFT) & 1;
8497    dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
8498#endif
8499    dc->flags = flags;
8500    dc->jmp_opt = !(dc->tf || dc->base.singlestep_enabled ||
8501                    (flags & HF_INHIBIT_IRQ_MASK));
8502    /* Do not optimize repz jumps at all in icount mode, because
8503       rep movsS instructions are execured with different paths
8504       in !repz_opt and repz_opt modes. The first one was used
8505       always except single step mode. And this setting
8506       disables jumps optimization and control paths become
8507       equivalent in run and single step modes.
8508       Now there will be no jump optimization for repz in
8509       record/replay modes and there will always be an
8510       additional step for ecx=0 when icount is enabled.
8511     */
8512    dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT);
8513#if 0
8514    /* check addseg logic */
8515    if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
8516        printf("ERROR addseg\n");
8517#endif
8518
8519    dc->T0 = tcg_temp_new();
8520    dc->T1 = tcg_temp_new();
8521    dc->A0 = tcg_temp_new();
8522
8523    dc->tmp0 = tcg_temp_new();
8524    dc->tmp1_i64 = tcg_temp_new_i64();
8525    dc->tmp2_i32 = tcg_temp_new_i32();
8526    dc->tmp3_i32 = tcg_temp_new_i32();
8527    dc->tmp4 = tcg_temp_new();
8528    dc->ptr0 = tcg_temp_new_ptr();
8529    dc->ptr1 = tcg_temp_new_ptr();
8530    dc->cc_srcT = tcg_temp_local_new();
8531}
8532
8533static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8534{
8535}
8536
8537static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8538{
8539    DisasContext *dc = container_of(dcbase, DisasContext, base);
8540
8541    tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8542}
8543
8544static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8545                                     const CPUBreakpoint *bp)
8546{
8547    DisasContext *dc = container_of(dcbase, DisasContext, base);
8548    /* If RF is set, suppress an internally generated breakpoint.  */
8549    int flags = dc->base.tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY;
8550    if (bp->flags & flags) {
8551        gen_debug(dc, dc->base.pc_next - dc->cs_base);
8552        dc->base.is_jmp = DISAS_NORETURN;
8553        /* The address covered by the breakpoint must be included in
8554           [tb->pc, tb->pc + tb->size) in order to for it to be
8555           properly cleared -- thus we increment the PC here so that
8556           the generic logic setting tb->size later does the right thing.  */
8557        dc->base.pc_next += 1;
8558        return true;
8559    } else {
8560        return false;
8561    }
8562}
8563
8564static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8565{
8566    DisasContext *dc = container_of(dcbase, DisasContext, base);
8567    target_ulong pc_next;
8568
8569#ifdef TARGET_VSYSCALL_PAGE
8570    /*
8571     * Detect entry into the vsyscall page and invoke the syscall.
8572     */
8573    if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
8574        gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
8575        return;
8576    }
8577#endif
8578
8579    pc_next = disas_insn(dc, cpu);
8580
8581    if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) {
8582        /* if single step mode, we generate only one instruction and
8583           generate an exception */
8584        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8585           the flag and abort the translation to give the irqs a
8586           chance to happen */
8587        dc->base.is_jmp = DISAS_TOO_MANY;
8588    } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8589               && ((pc_next & TARGET_PAGE_MASK)
8590                   != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8591                       & TARGET_PAGE_MASK)
8592                   || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8593        /* Do not cross the boundary of the pages in icount mode,
8594           it can cause an exception. Do it only when boundary is
8595           crossed by the first instruction in the block.
8596           If current instruction already crossed the bound - it's ok,
8597           because an exception hasn't stopped this code.
8598         */
8599        dc->base.is_jmp = DISAS_TOO_MANY;
8600    } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8601        dc->base.is_jmp = DISAS_TOO_MANY;
8602    }
8603
8604    dc->base.pc_next = pc_next;
8605}
8606
8607static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8608{
8609    DisasContext *dc = container_of(dcbase, DisasContext, base);
8610
8611    if (dc->base.is_jmp == DISAS_TOO_MANY) {
8612        gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8613        gen_eob(dc);
8614    }
8615}
8616
8617static void i386_tr_disas_log(const DisasContextBase *dcbase,
8618                              CPUState *cpu)
8619{
8620    DisasContext *dc = container_of(dcbase, DisasContext, base);
8621
8622    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8623    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8624}
8625
8626static const TranslatorOps i386_tr_ops = {
8627    .init_disas_context = i386_tr_init_disas_context,
8628    .tb_start           = i386_tr_tb_start,
8629    .insn_start         = i386_tr_insn_start,
8630    .breakpoint_check   = i386_tr_breakpoint_check,
8631    .translate_insn     = i386_tr_translate_insn,
8632    .tb_stop            = i386_tr_tb_stop,
8633    .disas_log          = i386_tr_disas_log,
8634};
8635
8636/* generate intermediate code for basic block 'tb'.  */
8637void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8638{
8639    DisasContext dc;
8640
8641    translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8642}
8643
8644void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8645                          target_ulong *data)
8646{
8647    int cc_op = data[1];
8648    env->eip = data[0] - tb->cs_base;
8649    if (cc_op != CC_OP_DYNAMIC) {
8650        env->cc_op = cc_op;
8651    }
8652}
8653