qemu/target/i386/translate.c
<<
>>
Prefs
   1/*
   2 *  i386 translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "qemu/host-utils.h"
  22#include "cpu.h"
  23#include "disas/disas.h"
  24#include "exec/exec-all.h"
  25#include "tcg-op.h"
  26#include "exec/cpu_ldst.h"
  27#include "exec/translator.h"
  28
  29#include "exec/helper-proto.h"
  30#include "exec/helper-gen.h"
  31
  32#include "trace-tcg.h"
  33#include "exec/log.h"
  34
  35#define PREFIX_REPZ   0x01
  36#define PREFIX_REPNZ  0x02
  37#define PREFIX_LOCK   0x04
  38#define PREFIX_DATA   0x08
  39#define PREFIX_ADR    0x10
  40#define PREFIX_VEX    0x20
  41
  42#ifdef TARGET_X86_64
  43#define CODE64(s) ((s)->code64)
  44#define REX_X(s) ((s)->rex_x)
  45#define REX_B(s) ((s)->rex_b)
  46#else
  47#define CODE64(s) 0
  48#define REX_X(s) 0
  49#define REX_B(s) 0
  50#endif
  51
  52#ifdef TARGET_X86_64
  53# define ctztl  ctz64
  54# define clztl  clz64
  55#else
  56# define ctztl  ctz32
  57# define clztl  clz32
  58#endif
  59
  60/* For a switch indexed by MODRM, match all memory operands for a given OP.  */
  61#define CASE_MODRM_MEM_OP(OP) \
  62    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  63    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  64    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
  65
  66#define CASE_MODRM_OP(OP) \
  67    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  68    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  69    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
  70    case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
  71
  72//#define MACRO_TEST   1
  73
  74/* global register indexes */
  75static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
  76static TCGv_i32 cpu_cc_op;
  77static TCGv cpu_regs[CPU_NB_REGS];
  78static TCGv cpu_seg_base[6];
  79static TCGv_i64 cpu_bndl[4];
  80static TCGv_i64 cpu_bndu[4];
  81
  82#include "exec/gen-icount.h"
  83
  84typedef struct DisasContext {
  85    DisasContextBase base;
  86
  87    /* current insn context */
  88    int override; /* -1 if no override */
  89    int prefix;
  90    MemOp aflag;
  91    MemOp dflag;
  92    target_ulong pc_start;
  93    target_ulong pc; /* pc = eip + cs_base */
  94    /* current block context */
  95    target_ulong cs_base; /* base of CS segment */
  96    int pe;     /* protected mode */
  97    int code32; /* 32 bit code segment */
  98#ifdef TARGET_X86_64
  99    int lma;    /* long mode active */
 100    int code64; /* 64 bit code segment */
 101    int rex_x, rex_b;
 102#endif
 103    int vex_l;  /* vex vector length */
 104    int vex_v;  /* vex vvvv register, without 1's complement.  */
 105    int ss32;   /* 32 bit stack segment */
 106    CCOp cc_op;  /* current CC operation */
 107    bool cc_op_dirty;
 108#ifdef TARGET_X86_64
 109    bool x86_64_hregs;
 110#endif
 111    int addseg; /* non zero if either DS/ES/SS have a non zero base */
 112    int f_st;   /* currently unused */
 113    int vm86;   /* vm86 mode */
 114    int cpl;
 115    int iopl;
 116    int tf;     /* TF cpu flag */
 117    int jmp_opt; /* use direct block chaining for direct jumps */
 118    int repz_opt; /* optimize jumps within repz instructions */
 119    int mem_index; /* select memory access functions */
 120    uint64_t flags; /* all execution flags */
 121    int popl_esp_hack; /* for correct popl with esp base handling */
 122    int rip_offset; /* only used in x86_64, but left for simplicity */
 123    int cpuid_features;
 124    int cpuid_ext_features;
 125    int cpuid_ext2_features;
 126    int cpuid_ext3_features;
 127    int cpuid_7_0_ebx_features;
 128    int cpuid_xsave_features;
 129
 130    /* TCG local temps */
 131    TCGv cc_srcT;
 132    TCGv A0;
 133    TCGv T0;
 134    TCGv T1;
 135
 136    /* TCG local register indexes (only used inside old micro ops) */
 137    TCGv tmp0;
 138    TCGv tmp4;
 139    TCGv_ptr ptr0;
 140    TCGv_ptr ptr1;
 141    TCGv_i32 tmp2_i32;
 142    TCGv_i32 tmp3_i32;
 143    TCGv_i64 tmp1_i64;
 144
 145    sigjmp_buf jmpbuf;
 146} DisasContext;
 147
 148static void gen_eob(DisasContext *s);
 149static void gen_jr(DisasContext *s, TCGv dest);
 150static void gen_jmp(DisasContext *s, target_ulong eip);
 151static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 152static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
 153
 154/* i386 arith/logic operations */
 155enum {
 156    OP_ADDL,
 157    OP_ORL,
 158    OP_ADCL,
 159    OP_SBBL,
 160    OP_ANDL,
 161    OP_SUBL,
 162    OP_XORL,
 163    OP_CMPL,
 164};
 165
 166/* i386 shift ops */
 167enum {
 168    OP_ROL,
 169    OP_ROR,
 170    OP_RCL,
 171    OP_RCR,
 172    OP_SHL,
 173    OP_SHR,
 174    OP_SHL1, /* undocumented */
 175    OP_SAR = 7,
 176};
 177
 178enum {
 179    JCC_O,
 180    JCC_B,
 181    JCC_Z,
 182    JCC_BE,
 183    JCC_S,
 184    JCC_P,
 185    JCC_L,
 186    JCC_LE,
 187};
 188
 189enum {
 190    /* I386 int registers */
 191    OR_EAX,   /* MUST be even numbered */
 192    OR_ECX,
 193    OR_EDX,
 194    OR_EBX,
 195    OR_ESP,
 196    OR_EBP,
 197    OR_ESI,
 198    OR_EDI,
 199
 200    OR_TMP0 = 16,    /* temporary operand register */
 201    OR_TMP1,
 202    OR_A0, /* temporary register used when doing address evaluation */
 203};
 204
 205enum {
 206    USES_CC_DST  = 1,
 207    USES_CC_SRC  = 2,
 208    USES_CC_SRC2 = 4,
 209    USES_CC_SRCT = 8,
 210};
 211
 212/* Bit set if the global variable is live after setting CC_OP to X.  */
 213static const uint8_t cc_op_live[CC_OP_NB] = {
 214    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 215    [CC_OP_EFLAGS] = USES_CC_SRC,
 216    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
 217    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
 218    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 219    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
 220    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 221    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
 222    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
 223    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
 224    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
 225    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
 226    [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
 227    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
 228    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
 229    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 230    [CC_OP_CLR] = 0,
 231    [CC_OP_POPCNT] = USES_CC_SRC,
 232};
 233
 234static void set_cc_op(DisasContext *s, CCOp op)
 235{
 236    int dead;
 237
 238    if (s->cc_op == op) {
 239        return;
 240    }
 241
 242    /* Discard CC computation that will no longer be used.  */
 243    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
 244    if (dead & USES_CC_DST) {
 245        tcg_gen_discard_tl(cpu_cc_dst);
 246    }
 247    if (dead & USES_CC_SRC) {
 248        tcg_gen_discard_tl(cpu_cc_src);
 249    }
 250    if (dead & USES_CC_SRC2) {
 251        tcg_gen_discard_tl(cpu_cc_src2);
 252    }
 253    if (dead & USES_CC_SRCT) {
 254        tcg_gen_discard_tl(s->cc_srcT);
 255    }
 256
 257    if (op == CC_OP_DYNAMIC) {
 258        /* The DYNAMIC setting is translator only, and should never be
 259           stored.  Thus we always consider it clean.  */
 260        s->cc_op_dirty = false;
 261    } else {
 262        /* Discard any computed CC_OP value (see shifts).  */
 263        if (s->cc_op == CC_OP_DYNAMIC) {
 264            tcg_gen_discard_i32(cpu_cc_op);
 265        }
 266        s->cc_op_dirty = true;
 267    }
 268    s->cc_op = op;
 269}
 270
 271static void gen_update_cc_op(DisasContext *s)
 272{
 273    if (s->cc_op_dirty) {
 274        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
 275        s->cc_op_dirty = false;
 276    }
 277}
 278
 279#ifdef TARGET_X86_64
 280
 281#define NB_OP_SIZES 4
 282
 283#else /* !TARGET_X86_64 */
 284
 285#define NB_OP_SIZES 3
 286
 287#endif /* !TARGET_X86_64 */
 288
 289#if defined(HOST_WORDS_BIGENDIAN)
 290#define REG_B_OFFSET (sizeof(target_ulong) - 1)
 291#define REG_H_OFFSET (sizeof(target_ulong) - 2)
 292#define REG_W_OFFSET (sizeof(target_ulong) - 2)
 293#define REG_L_OFFSET (sizeof(target_ulong) - 4)
 294#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
 295#else
 296#define REG_B_OFFSET 0
 297#define REG_H_OFFSET 1
 298#define REG_W_OFFSET 0
 299#define REG_L_OFFSET 0
 300#define REG_LH_OFFSET 4
 301#endif
 302
 303/* In instruction encodings for byte register accesses the
 304 * register number usually indicates "low 8 bits of register N";
 305 * however there are some special cases where N 4..7 indicates
 306 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
 307 * true for this special case, false otherwise.
 308 */
 309static inline bool byte_reg_is_xH(DisasContext *s, int reg)
 310{
 311    if (reg < 4) {
 312        return false;
 313    }
 314#ifdef TARGET_X86_64
 315    if (reg >= 8 || s->x86_64_hregs) {
 316        return false;
 317    }
 318#endif
 319    return true;
 320}
 321
 322/* Select the size of a push/pop operation.  */
 323static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
 324{
 325    if (CODE64(s)) {
 326        return ot == MO_16 ? MO_16 : MO_64;
 327    } else {
 328        return ot;
 329    }
 330}
 331
 332/* Select the size of the stack pointer.  */
 333static inline MemOp mo_stacksize(DisasContext *s)
 334{
 335    return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
 336}
 337
 338/* Select only size 64 else 32.  Used for SSE operand sizes.  */
 339static inline MemOp mo_64_32(MemOp ot)
 340{
 341#ifdef TARGET_X86_64
 342    return ot == MO_64 ? MO_64 : MO_32;
 343#else
 344    return MO_32;
 345#endif
 346}
 347
 348/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
 349   byte vs word opcodes.  */
 350static inline MemOp mo_b_d(int b, MemOp ot)
 351{
 352    return b & 1 ? ot : MO_8;
 353}
 354
 355/* Select size 8 if lsb of B is clear, else OT capped at 32.
 356   Used for decoding operand size of port opcodes.  */
 357static inline MemOp mo_b_d32(int b, MemOp ot)
 358{
 359    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 360}
 361
 362static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
 363{
 364    switch(ot) {
 365    case MO_8:
 366        if (!byte_reg_is_xH(s, reg)) {
 367            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
 368        } else {
 369            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
 370        }
 371        break;
 372    case MO_16:
 373        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
 374        break;
 375    case MO_32:
 376        /* For x86_64, this sets the higher half of register to zero.
 377           For i386, this is equivalent to a mov. */
 378        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
 379        break;
 380#ifdef TARGET_X86_64
 381    case MO_64:
 382        tcg_gen_mov_tl(cpu_regs[reg], t0);
 383        break;
 384#endif
 385    default:
 386        tcg_abort();
 387    }
 388}
 389
 390static inline
 391void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
 392{
 393    if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
 394        tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
 395    } else {
 396        tcg_gen_mov_tl(t0, cpu_regs[reg]);
 397    }
 398}
 399
 400static void gen_add_A0_im(DisasContext *s, int val)
 401{
 402    tcg_gen_addi_tl(s->A0, s->A0, val);
 403    if (!CODE64(s)) {
 404        tcg_gen_ext32u_tl(s->A0, s->A0);
 405    }
 406}
 407
 408static inline void gen_op_jmp_v(TCGv dest)
 409{
 410    tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 411}
 412
 413static inline
 414void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
 415{
 416    tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
 417    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 418}
 419
 420static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
 421{
 422    tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
 423    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 424}
 425
 426static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 427{
 428    tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
 429}
 430
 431static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 432{
 433    tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 434}
 435
 436static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 437{
 438    if (d == OR_TMP0) {
 439        gen_op_st_v(s, idx, s->T0, s->A0);
 440    } else {
 441        gen_op_mov_reg_v(s, idx, d, s->T0);
 442    }
 443}
 444
 445static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
 446{
 447    tcg_gen_movi_tl(s->tmp0, pc);
 448    gen_op_jmp_v(s->tmp0);
 449}
 450
 451/* Compute SEG:REG into A0.  SEG is selected from the override segment
 452   (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
 453   indicate no override.  */
 454static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
 455                          int def_seg, int ovr_seg)
 456{
 457    switch (aflag) {
 458#ifdef TARGET_X86_64
 459    case MO_64:
 460        if (ovr_seg < 0) {
 461            tcg_gen_mov_tl(s->A0, a0);
 462            return;
 463        }
 464        break;
 465#endif
 466    case MO_32:
 467        /* 32 bit address */
 468        if (ovr_seg < 0 && s->addseg) {
 469            ovr_seg = def_seg;
 470        }
 471        if (ovr_seg < 0) {
 472            tcg_gen_ext32u_tl(s->A0, a0);
 473            return;
 474        }
 475        break;
 476    case MO_16:
 477        /* 16 bit address */
 478        tcg_gen_ext16u_tl(s->A0, a0);
 479        a0 = s->A0;
 480        if (ovr_seg < 0) {
 481            if (s->addseg) {
 482                ovr_seg = def_seg;
 483            } else {
 484                return;
 485            }
 486        }
 487        break;
 488    default:
 489        tcg_abort();
 490    }
 491
 492    if (ovr_seg >= 0) {
 493        TCGv seg = cpu_seg_base[ovr_seg];
 494
 495        if (aflag == MO_64) {
 496            tcg_gen_add_tl(s->A0, a0, seg);
 497        } else if (CODE64(s)) {
 498            tcg_gen_ext32u_tl(s->A0, a0);
 499            tcg_gen_add_tl(s->A0, s->A0, seg);
 500        } else {
 501            tcg_gen_add_tl(s->A0, a0, seg);
 502            tcg_gen_ext32u_tl(s->A0, s->A0);
 503        }
 504    }
 505}
 506
 507static inline void gen_string_movl_A0_ESI(DisasContext *s)
 508{
 509    gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
 510}
 511
 512static inline void gen_string_movl_A0_EDI(DisasContext *s)
 513{
 514    gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
 515}
 516
 517static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
 518{
 519    tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
 520    tcg_gen_shli_tl(s->T0, s->T0, ot);
 521};
 522
 523static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
 524{
 525    switch (size) {
 526    case MO_8:
 527        if (sign) {
 528            tcg_gen_ext8s_tl(dst, src);
 529        } else {
 530            tcg_gen_ext8u_tl(dst, src);
 531        }
 532        return dst;
 533    case MO_16:
 534        if (sign) {
 535            tcg_gen_ext16s_tl(dst, src);
 536        } else {
 537            tcg_gen_ext16u_tl(dst, src);
 538        }
 539        return dst;
 540#ifdef TARGET_X86_64
 541    case MO_32:
 542        if (sign) {
 543            tcg_gen_ext32s_tl(dst, src);
 544        } else {
 545            tcg_gen_ext32u_tl(dst, src);
 546        }
 547        return dst;
 548#endif
 549    default:
 550        return src;
 551    }
 552}
 553
 554static void gen_extu(MemOp ot, TCGv reg)
 555{
 556    gen_ext_tl(reg, reg, ot, false);
 557}
 558
 559static void gen_exts(MemOp ot, TCGv reg)
 560{
 561    gen_ext_tl(reg, reg, ot, true);
 562}
 563
 564static inline
 565void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
 566{
 567    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
 568    gen_extu(size, s->tmp0);
 569    tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
 570}
 571
 572static inline
 573void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
 574{
 575    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
 576    gen_extu(size, s->tmp0);
 577    tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
 578}
 579
 580static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
 581{
 582    switch (ot) {
 583    case MO_8:
 584        gen_helper_inb(v, cpu_env, n);
 585        break;
 586    case MO_16:
 587        gen_helper_inw(v, cpu_env, n);
 588        break;
 589    case MO_32:
 590        gen_helper_inl(v, cpu_env, n);
 591        break;
 592    default:
 593        tcg_abort();
 594    }
 595}
 596
 597static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
 598{
 599    switch (ot) {
 600    case MO_8:
 601        gen_helper_outb(cpu_env, v, n);
 602        break;
 603    case MO_16:
 604        gen_helper_outw(cpu_env, v, n);
 605        break;
 606    case MO_32:
 607        gen_helper_outl(cpu_env, v, n);
 608        break;
 609    default:
 610        tcg_abort();
 611    }
 612}
 613
 614static void gen_check_io(DisasContext *s, MemOp ot, target_ulong cur_eip,
 615                         uint32_t svm_flags)
 616{
 617    target_ulong next_eip;
 618
 619    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
 620        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 621        switch (ot) {
 622        case MO_8:
 623            gen_helper_check_iob(cpu_env, s->tmp2_i32);
 624            break;
 625        case MO_16:
 626            gen_helper_check_iow(cpu_env, s->tmp2_i32);
 627            break;
 628        case MO_32:
 629            gen_helper_check_iol(cpu_env, s->tmp2_i32);
 630            break;
 631        default:
 632            tcg_abort();
 633        }
 634    }
 635    if(s->flags & HF_GUEST_MASK) {
 636        gen_update_cc_op(s);
 637        gen_jmp_im(s, cur_eip);
 638        svm_flags |= (1 << (4 + ot));
 639        next_eip = s->pc - s->cs_base;
 640        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 641        gen_helper_svm_check_io(cpu_env, s->tmp2_i32,
 642                                tcg_const_i32(svm_flags),
 643                                tcg_const_i32(next_eip - cur_eip));
 644    }
 645}
 646
 647static inline void gen_movs(DisasContext *s, MemOp ot)
 648{
 649    gen_string_movl_A0_ESI(s);
 650    gen_op_ld_v(s, ot, s->T0, s->A0);
 651    gen_string_movl_A0_EDI(s);
 652    gen_op_st_v(s, ot, s->T0, s->A0);
 653    gen_op_movl_T0_Dshift(s, ot);
 654    gen_op_add_reg_T0(s, s->aflag, R_ESI);
 655    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 656}
 657
 658static void gen_op_update1_cc(DisasContext *s)
 659{
 660    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 661}
 662
 663static void gen_op_update2_cc(DisasContext *s)
 664{
 665    tcg_gen_mov_tl(cpu_cc_src, s->T1);
 666    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 667}
 668
 669static void gen_op_update3_cc(DisasContext *s, TCGv reg)
 670{
 671    tcg_gen_mov_tl(cpu_cc_src2, reg);
 672    tcg_gen_mov_tl(cpu_cc_src, s->T1);
 673    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 674}
 675
 676static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
 677{
 678    tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
 679}
 680
 681static void gen_op_update_neg_cc(DisasContext *s)
 682{
 683    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 684    tcg_gen_neg_tl(cpu_cc_src, s->T0);
 685    tcg_gen_movi_tl(s->cc_srcT, 0);
 686}
 687
 688/* compute all eflags to cc_src */
 689static void gen_compute_eflags(DisasContext *s)
 690{
 691    TCGv zero, dst, src1, src2;
 692    int live, dead;
 693
 694    if (s->cc_op == CC_OP_EFLAGS) {
 695        return;
 696    }
 697    if (s->cc_op == CC_OP_CLR) {
 698        tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
 699        set_cc_op(s, CC_OP_EFLAGS);
 700        return;
 701    }
 702
 703    zero = NULL;
 704    dst = cpu_cc_dst;
 705    src1 = cpu_cc_src;
 706    src2 = cpu_cc_src2;
 707
 708    /* Take care to not read values that are not live.  */
 709    live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
 710    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
 711    if (dead) {
 712        zero = tcg_const_tl(0);
 713        if (dead & USES_CC_DST) {
 714            dst = zero;
 715        }
 716        if (dead & USES_CC_SRC) {
 717            src1 = zero;
 718        }
 719        if (dead & USES_CC_SRC2) {
 720            src2 = zero;
 721        }
 722    }
 723
 724    gen_update_cc_op(s);
 725    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
 726    set_cc_op(s, CC_OP_EFLAGS);
 727
 728    if (dead) {
 729        tcg_temp_free(zero);
 730    }
 731}
 732
 733typedef struct CCPrepare {
 734    TCGCond cond;
 735    TCGv reg;
 736    TCGv reg2;
 737    target_ulong imm;
 738    target_ulong mask;
 739    bool use_reg2;
 740    bool no_setcond;
 741} CCPrepare;
 742
 743/* compute eflags.C to reg */
 744static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 745{
 746    TCGv t0, t1;
 747    int size, shift;
 748
 749    switch (s->cc_op) {
 750    case CC_OP_SUBB ... CC_OP_SUBQ:
 751        /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
 752        size = s->cc_op - CC_OP_SUBB;
 753        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 754        /* If no temporary was used, be careful not to alias t1 and t0.  */
 755        t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
 756        tcg_gen_mov_tl(t0, s->cc_srcT);
 757        gen_extu(size, t0);
 758        goto add_sub;
 759
 760    case CC_OP_ADDB ... CC_OP_ADDQ:
 761        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
 762        size = s->cc_op - CC_OP_ADDB;
 763        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 764        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 765    add_sub:
 766        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
 767                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 768
 769    case CC_OP_LOGICB ... CC_OP_LOGICQ:
 770    case CC_OP_CLR:
 771    case CC_OP_POPCNT:
 772        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 773
 774    case CC_OP_INCB ... CC_OP_INCQ:
 775    case CC_OP_DECB ... CC_OP_DECQ:
 776        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 777                             .mask = -1, .no_setcond = true };
 778
 779    case CC_OP_SHLB ... CC_OP_SHLQ:
 780        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
 781        size = s->cc_op - CC_OP_SHLB;
 782        shift = (8 << size) - 1;
 783        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 784                             .mask = (target_ulong)1 << shift };
 785
 786    case CC_OP_MULB ... CC_OP_MULQ:
 787        return (CCPrepare) { .cond = TCG_COND_NE,
 788                             .reg = cpu_cc_src, .mask = -1 };
 789
 790    case CC_OP_BMILGB ... CC_OP_BMILGQ:
 791        size = s->cc_op - CC_OP_BMILGB;
 792        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
 793        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 794
 795    case CC_OP_ADCX:
 796    case CC_OP_ADCOX:
 797        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
 798                             .mask = -1, .no_setcond = true };
 799
 800    case CC_OP_EFLAGS:
 801    case CC_OP_SARB ... CC_OP_SARQ:
 802        /* CC_SRC & 1 */
 803        return (CCPrepare) { .cond = TCG_COND_NE,
 804                             .reg = cpu_cc_src, .mask = CC_C };
 805
 806    default:
 807       /* The need to compute only C from CC_OP_DYNAMIC is important
 808          in efficiently implementing e.g. INC at the start of a TB.  */
 809       gen_update_cc_op(s);
 810       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
 811                               cpu_cc_src2, cpu_cc_op);
 812       return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 813                            .mask = -1, .no_setcond = true };
 814    }
 815}
 816
 817/* compute eflags.P to reg */
 818static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 819{
 820    gen_compute_eflags(s);
 821    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 822                         .mask = CC_P };
 823}
 824
 825/* compute eflags.S to reg */
 826static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 827{
 828    switch (s->cc_op) {
 829    case CC_OP_DYNAMIC:
 830        gen_compute_eflags(s);
 831        /* FALLTHRU */
 832    case CC_OP_EFLAGS:
 833    case CC_OP_ADCX:
 834    case CC_OP_ADOX:
 835    case CC_OP_ADCOX:
 836        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 837                             .mask = CC_S };
 838    case CC_OP_CLR:
 839    case CC_OP_POPCNT:
 840        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 841    default:
 842        {
 843            MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 844            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
 845            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
 846        }
 847    }
 848}
 849
 850/* compute eflags.O to reg */
 851static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 852{
 853    switch (s->cc_op) {
 854    case CC_OP_ADOX:
 855    case CC_OP_ADCOX:
 856        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
 857                             .mask = -1, .no_setcond = true };
 858    case CC_OP_CLR:
 859    case CC_OP_POPCNT:
 860        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 861    default:
 862        gen_compute_eflags(s);
 863        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 864                             .mask = CC_O };
 865    }
 866}
 867
 868/* compute eflags.Z to reg */
 869static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 870{
 871    switch (s->cc_op) {
 872    case CC_OP_DYNAMIC:
 873        gen_compute_eflags(s);
 874        /* FALLTHRU */
 875    case CC_OP_EFLAGS:
 876    case CC_OP_ADCX:
 877    case CC_OP_ADOX:
 878    case CC_OP_ADCOX:
 879        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 880                             .mask = CC_Z };
 881    case CC_OP_CLR:
 882        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
 883    case CC_OP_POPCNT:
 884        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
 885                             .mask = -1 };
 886    default:
 887        {
 888            MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 889            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 890            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 891        }
 892    }
 893}
 894
 895/* perform a conditional store into register 'reg' according to jump opcode
 896   value 'b'. In the fast case, T0 is guaranted not to be used. */
 897static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 898{
 899    int inv, jcc_op, cond;
 900    MemOp size;
 901    CCPrepare cc;
 902    TCGv t0;
 903
 904    inv = b & 1;
 905    jcc_op = (b >> 1) & 7;
 906
 907    switch (s->cc_op) {
 908    case CC_OP_SUBB ... CC_OP_SUBQ:
 909        /* We optimize relational operators for the cmp/jcc case.  */
 910        size = s->cc_op - CC_OP_SUBB;
 911        switch (jcc_op) {
 912        case JCC_BE:
 913            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
 914            gen_extu(size, s->tmp4);
 915            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 916            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
 917                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 918            break;
 919
 920        case JCC_L:
 921            cond = TCG_COND_LT;
 922            goto fast_jcc_l;
 923        case JCC_LE:
 924            cond = TCG_COND_LE;
 925        fast_jcc_l:
 926            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
 927            gen_exts(size, s->tmp4);
 928            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
 929            cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
 930                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 931            break;
 932
 933        default:
 934            goto slow_jcc;
 935        }
 936        break;
 937
 938    default:
 939    slow_jcc:
 940        /* This actually generates good code for JC, JZ and JS.  */
 941        switch (jcc_op) {
 942        case JCC_O:
 943            cc = gen_prepare_eflags_o(s, reg);
 944            break;
 945        case JCC_B:
 946            cc = gen_prepare_eflags_c(s, reg);
 947            break;
 948        case JCC_Z:
 949            cc = gen_prepare_eflags_z(s, reg);
 950            break;
 951        case JCC_BE:
 952            gen_compute_eflags(s);
 953            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 954                               .mask = CC_Z | CC_C };
 955            break;
 956        case JCC_S:
 957            cc = gen_prepare_eflags_s(s, reg);
 958            break;
 959        case JCC_P:
 960            cc = gen_prepare_eflags_p(s, reg);
 961            break;
 962        case JCC_L:
 963            gen_compute_eflags(s);
 964            if (reg == cpu_cc_src) {
 965                reg = s->tmp0;
 966            }
 967            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 968            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 969            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 970                               .mask = CC_S };
 971            break;
 972        default:
 973        case JCC_LE:
 974            gen_compute_eflags(s);
 975            if (reg == cpu_cc_src) {
 976                reg = s->tmp0;
 977            }
 978            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 979            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 980            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 981                               .mask = CC_S | CC_Z };
 982            break;
 983        }
 984        break;
 985    }
 986
 987    if (inv) {
 988        cc.cond = tcg_invert_cond(cc.cond);
 989    }
 990    return cc;
 991}
 992
 993static void gen_setcc1(DisasContext *s, int b, TCGv reg)
 994{
 995    CCPrepare cc = gen_prepare_cc(s, b, reg);
 996
 997    if (cc.no_setcond) {
 998        if (cc.cond == TCG_COND_EQ) {
 999            tcg_gen_xori_tl(reg, cc.reg, 1);
1000        } else {
1001            tcg_gen_mov_tl(reg, cc.reg);
1002        }
1003        return;
1004    }
1005
1006    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1007        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1008        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1009        tcg_gen_andi_tl(reg, reg, 1);
1010        return;
1011    }
1012    if (cc.mask != -1) {
1013        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1014        cc.reg = reg;
1015    }
1016    if (cc.use_reg2) {
1017        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1018    } else {
1019        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1020    }
1021}
1022
1023static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1024{
1025    gen_setcc1(s, JCC_B << 1, reg);
1026}
1027
1028/* generate a conditional jump to label 'l1' according to jump opcode
1029   value 'b'. In the fast case, T0 is guaranted not to be used. */
1030static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1031{
1032    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1033
1034    if (cc.mask != -1) {
1035        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1036        cc.reg = s->T0;
1037    }
1038    if (cc.use_reg2) {
1039        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1040    } else {
1041        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1042    }
1043}
1044
1045/* Generate a conditional jump to label 'l1' according to jump opcode
1046   value 'b'. In the fast case, T0 is guaranted not to be used.
1047   A translation block must end soon.  */
1048static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1049{
1050    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1051
1052    gen_update_cc_op(s);
1053    if (cc.mask != -1) {
1054        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1055        cc.reg = s->T0;
1056    }
1057    set_cc_op(s, CC_OP_DYNAMIC);
1058    if (cc.use_reg2) {
1059        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1060    } else {
1061        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1062    }
1063}
1064
1065/* XXX: does not work with gdbstub "ice" single step - not a
1066   serious problem */
1067static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1068{
1069    TCGLabel *l1 = gen_new_label();
1070    TCGLabel *l2 = gen_new_label();
1071    gen_op_jnz_ecx(s, s->aflag, l1);
1072    gen_set_label(l2);
1073    gen_jmp_tb(s, next_eip, 1);
1074    gen_set_label(l1);
1075    return l2;
1076}
1077
1078static inline void gen_stos(DisasContext *s, MemOp ot)
1079{
1080    gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1081    gen_string_movl_A0_EDI(s);
1082    gen_op_st_v(s, ot, s->T0, s->A0);
1083    gen_op_movl_T0_Dshift(s, ot);
1084    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1085}
1086
1087static inline void gen_lods(DisasContext *s, MemOp ot)
1088{
1089    gen_string_movl_A0_ESI(s);
1090    gen_op_ld_v(s, ot, s->T0, s->A0);
1091    gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1092    gen_op_movl_T0_Dshift(s, ot);
1093    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1094}
1095
1096static inline void gen_scas(DisasContext *s, MemOp ot)
1097{
1098    gen_string_movl_A0_EDI(s);
1099    gen_op_ld_v(s, ot, s->T1, s->A0);
1100    gen_op(s, OP_CMPL, ot, R_EAX);
1101    gen_op_movl_T0_Dshift(s, ot);
1102    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1103}
1104
1105static inline void gen_cmps(DisasContext *s, MemOp ot)
1106{
1107    gen_string_movl_A0_EDI(s);
1108    gen_op_ld_v(s, ot, s->T1, s->A0);
1109    gen_string_movl_A0_ESI(s);
1110    gen_op(s, OP_CMPL, ot, OR_TMP0);
1111    gen_op_movl_T0_Dshift(s, ot);
1112    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1113    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1114}
1115
1116static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1117{
1118    if (s->flags & HF_IOBPT_MASK) {
1119        TCGv_i32 t_size = tcg_const_i32(1 << ot);
1120        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1121
1122        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1123        tcg_temp_free_i32(t_size);
1124        tcg_temp_free(t_next);
1125    }
1126}
1127
1128
1129static inline void gen_ins(DisasContext *s, MemOp ot)
1130{
1131    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1132        gen_io_start();
1133    }
1134    gen_string_movl_A0_EDI(s);
1135    /* Note: we must do this dummy write first to be restartable in
1136       case of page fault. */
1137    tcg_gen_movi_tl(s->T0, 0);
1138    gen_op_st_v(s, ot, s->T0, s->A0);
1139    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1140    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1141    gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1142    gen_op_st_v(s, ot, s->T0, s->A0);
1143    gen_op_movl_T0_Dshift(s, ot);
1144    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1145    gen_bpt_io(s, s->tmp2_i32, ot);
1146    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1147        gen_io_end();
1148    }
1149}
1150
1151static inline void gen_outs(DisasContext *s, MemOp ot)
1152{
1153    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1154        gen_io_start();
1155    }
1156    gen_string_movl_A0_ESI(s);
1157    gen_op_ld_v(s, ot, s->T0, s->A0);
1158
1159    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1160    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1161    tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1162    gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1163    gen_op_movl_T0_Dshift(s, ot);
1164    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1165    gen_bpt_io(s, s->tmp2_i32, ot);
1166    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1167        gen_io_end();
1168    }
1169}
1170
1171/* same method as Valgrind : we generate jumps to current or next
1172   instruction */
1173#define GEN_REPZ(op)                                                          \
1174static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1175                                 target_ulong cur_eip, target_ulong next_eip) \
1176{                                                                             \
1177    TCGLabel *l2;                                                             \
1178    gen_update_cc_op(s);                                                      \
1179    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1180    gen_ ## op(s, ot);                                                        \
1181    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1182    /* a loop would cause two single step exceptions if ECX = 1               \
1183       before rep string_insn */                                              \
1184    if (s->repz_opt)                                                          \
1185        gen_op_jz_ecx(s, s->aflag, l2);                                       \
1186    gen_jmp(s, cur_eip);                                                      \
1187}
1188
1189#define GEN_REPZ2(op)                                                         \
1190static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1191                                   target_ulong cur_eip,                      \
1192                                   target_ulong next_eip,                     \
1193                                   int nz)                                    \
1194{                                                                             \
1195    TCGLabel *l2;                                                             \
1196    gen_update_cc_op(s);                                                      \
1197    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1198    gen_ ## op(s, ot);                                                        \
1199    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1200    gen_update_cc_op(s);                                                      \
1201    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1202    if (s->repz_opt)                                                          \
1203        gen_op_jz_ecx(s, s->aflag, l2);                                       \
1204    gen_jmp(s, cur_eip);                                                      \
1205}
1206
1207GEN_REPZ(movs)
1208GEN_REPZ(stos)
1209GEN_REPZ(lods)
1210GEN_REPZ(ins)
1211GEN_REPZ(outs)
1212GEN_REPZ2(scas)
1213GEN_REPZ2(cmps)
1214
1215static void gen_helper_fp_arith_ST0_FT0(int op)
1216{
1217    switch (op) {
1218    case 0:
1219        gen_helper_fadd_ST0_FT0(cpu_env);
1220        break;
1221    case 1:
1222        gen_helper_fmul_ST0_FT0(cpu_env);
1223        break;
1224    case 2:
1225        gen_helper_fcom_ST0_FT0(cpu_env);
1226        break;
1227    case 3:
1228        gen_helper_fcom_ST0_FT0(cpu_env);
1229        break;
1230    case 4:
1231        gen_helper_fsub_ST0_FT0(cpu_env);
1232        break;
1233    case 5:
1234        gen_helper_fsubr_ST0_FT0(cpu_env);
1235        break;
1236    case 6:
1237        gen_helper_fdiv_ST0_FT0(cpu_env);
1238        break;
1239    case 7:
1240        gen_helper_fdivr_ST0_FT0(cpu_env);
1241        break;
1242    }
1243}
1244
1245/* NOTE the exception in "r" op ordering */
1246static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1247{
1248    TCGv_i32 tmp = tcg_const_i32(opreg);
1249    switch (op) {
1250    case 0:
1251        gen_helper_fadd_STN_ST0(cpu_env, tmp);
1252        break;
1253    case 1:
1254        gen_helper_fmul_STN_ST0(cpu_env, tmp);
1255        break;
1256    case 4:
1257        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1258        break;
1259    case 5:
1260        gen_helper_fsub_STN_ST0(cpu_env, tmp);
1261        break;
1262    case 6:
1263        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1264        break;
1265    case 7:
1266        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1267        break;
1268    }
1269}
1270
1271static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1272{
1273    gen_update_cc_op(s);
1274    gen_jmp_im(s, cur_eip);
1275    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1276    s->base.is_jmp = DISAS_NORETURN;
1277}
1278
1279/* Generate #UD for the current instruction.  The assumption here is that
1280   the instruction is known, but it isn't allowed in the current cpu mode.  */
1281static void gen_illegal_opcode(DisasContext *s)
1282{
1283    gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1284}
1285
1286/* if d == OR_TMP0, it means memory operand (address in A0) */
1287static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1288{
1289    if (d != OR_TMP0) {
1290        if (s1->prefix & PREFIX_LOCK) {
1291            /* Lock prefix when destination is not memory.  */
1292            gen_illegal_opcode(s1);
1293            return;
1294        }
1295        gen_op_mov_v_reg(s1, ot, s1->T0, d);
1296    } else if (!(s1->prefix & PREFIX_LOCK)) {
1297        gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1298    }
1299    switch(op) {
1300    case OP_ADCL:
1301        gen_compute_eflags_c(s1, s1->tmp4);
1302        if (s1->prefix & PREFIX_LOCK) {
1303            tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1304            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1305                                        s1->mem_index, ot | MO_LE);
1306        } else {
1307            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1308            tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1309            gen_op_st_rm_T0_A0(s1, ot, d);
1310        }
1311        gen_op_update3_cc(s1, s1->tmp4);
1312        set_cc_op(s1, CC_OP_ADCB + ot);
1313        break;
1314    case OP_SBBL:
1315        gen_compute_eflags_c(s1, s1->tmp4);
1316        if (s1->prefix & PREFIX_LOCK) {
1317            tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1318            tcg_gen_neg_tl(s1->T0, s1->T0);
1319            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1320                                        s1->mem_index, ot | MO_LE);
1321        } else {
1322            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1323            tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1324            gen_op_st_rm_T0_A0(s1, ot, d);
1325        }
1326        gen_op_update3_cc(s1, s1->tmp4);
1327        set_cc_op(s1, CC_OP_SBBB + ot);
1328        break;
1329    case OP_ADDL:
1330        if (s1->prefix & PREFIX_LOCK) {
1331            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1332                                        s1->mem_index, ot | MO_LE);
1333        } else {
1334            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1335            gen_op_st_rm_T0_A0(s1, ot, d);
1336        }
1337        gen_op_update2_cc(s1);
1338        set_cc_op(s1, CC_OP_ADDB + ot);
1339        break;
1340    case OP_SUBL:
1341        if (s1->prefix & PREFIX_LOCK) {
1342            tcg_gen_neg_tl(s1->T0, s1->T1);
1343            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1344                                        s1->mem_index, ot | MO_LE);
1345            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1346        } else {
1347            tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1348            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1349            gen_op_st_rm_T0_A0(s1, ot, d);
1350        }
1351        gen_op_update2_cc(s1);
1352        set_cc_op(s1, CC_OP_SUBB + ot);
1353        break;
1354    default:
1355    case OP_ANDL:
1356        if (s1->prefix & PREFIX_LOCK) {
1357            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1358                                        s1->mem_index, ot | MO_LE);
1359        } else {
1360            tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1361            gen_op_st_rm_T0_A0(s1, ot, d);
1362        }
1363        gen_op_update1_cc(s1);
1364        set_cc_op(s1, CC_OP_LOGICB + ot);
1365        break;
1366    case OP_ORL:
1367        if (s1->prefix & PREFIX_LOCK) {
1368            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1369                                       s1->mem_index, ot | MO_LE);
1370        } else {
1371            tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1372            gen_op_st_rm_T0_A0(s1, ot, d);
1373        }
1374        gen_op_update1_cc(s1);
1375        set_cc_op(s1, CC_OP_LOGICB + ot);
1376        break;
1377    case OP_XORL:
1378        if (s1->prefix & PREFIX_LOCK) {
1379            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1380                                        s1->mem_index, ot | MO_LE);
1381        } else {
1382            tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1383            gen_op_st_rm_T0_A0(s1, ot, d);
1384        }
1385        gen_op_update1_cc(s1);
1386        set_cc_op(s1, CC_OP_LOGICB + ot);
1387        break;
1388    case OP_CMPL:
1389        tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1390        tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1391        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1392        set_cc_op(s1, CC_OP_SUBB + ot);
1393        break;
1394    }
1395}
1396
1397/* if d == OR_TMP0, it means memory operand (address in A0) */
1398static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1399{
1400    if (s1->prefix & PREFIX_LOCK) {
1401        if (d != OR_TMP0) {
1402            /* Lock prefix when destination is not memory */
1403            gen_illegal_opcode(s1);
1404            return;
1405        }
1406        tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1407        tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1408                                    s1->mem_index, ot | MO_LE);
1409    } else {
1410        if (d != OR_TMP0) {
1411            gen_op_mov_v_reg(s1, ot, s1->T0, d);
1412        } else {
1413            gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1414        }
1415        tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1416        gen_op_st_rm_T0_A0(s1, ot, d);
1417    }
1418
1419    gen_compute_eflags_c(s1, cpu_cc_src);
1420    tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1421    set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1422}
1423
1424static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1425                            TCGv shm1, TCGv count, bool is_right)
1426{
1427    TCGv_i32 z32, s32, oldop;
1428    TCGv z_tl;
1429
1430    /* Store the results into the CC variables.  If we know that the
1431       variable must be dead, store unconditionally.  Otherwise we'll
1432       need to not disrupt the current contents.  */
1433    z_tl = tcg_const_tl(0);
1434    if (cc_op_live[s->cc_op] & USES_CC_DST) {
1435        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1436                           result, cpu_cc_dst);
1437    } else {
1438        tcg_gen_mov_tl(cpu_cc_dst, result);
1439    }
1440    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1441        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1442                           shm1, cpu_cc_src);
1443    } else {
1444        tcg_gen_mov_tl(cpu_cc_src, shm1);
1445    }
1446    tcg_temp_free(z_tl);
1447
1448    /* Get the two potential CC_OP values into temporaries.  */
1449    tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1450    if (s->cc_op == CC_OP_DYNAMIC) {
1451        oldop = cpu_cc_op;
1452    } else {
1453        tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1454        oldop = s->tmp3_i32;
1455    }
1456
1457    /* Conditionally store the CC_OP value.  */
1458    z32 = tcg_const_i32(0);
1459    s32 = tcg_temp_new_i32();
1460    tcg_gen_trunc_tl_i32(s32, count);
1461    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1462    tcg_temp_free_i32(z32);
1463    tcg_temp_free_i32(s32);
1464
1465    /* The CC_OP value is no longer predictable.  */
1466    set_cc_op(s, CC_OP_DYNAMIC);
1467}
1468
1469static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1470                            int is_right, int is_arith)
1471{
1472    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1473
1474    /* load */
1475    if (op1 == OR_TMP0) {
1476        gen_op_ld_v(s, ot, s->T0, s->A0);
1477    } else {
1478        gen_op_mov_v_reg(s, ot, s->T0, op1);
1479    }
1480
1481    tcg_gen_andi_tl(s->T1, s->T1, mask);
1482    tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1483
1484    if (is_right) {
1485        if (is_arith) {
1486            gen_exts(ot, s->T0);
1487            tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1488            tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1489        } else {
1490            gen_extu(ot, s->T0);
1491            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1492            tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1493        }
1494    } else {
1495        tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1496        tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1497    }
1498
1499    /* store */
1500    gen_op_st_rm_T0_A0(s, ot, op1);
1501
1502    gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1503}
1504
1505static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1506                            int is_right, int is_arith)
1507{
1508    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1509
1510    /* load */
1511    if (op1 == OR_TMP0)
1512        gen_op_ld_v(s, ot, s->T0, s->A0);
1513    else
1514        gen_op_mov_v_reg(s, ot, s->T0, op1);
1515
1516    op2 &= mask;
1517    if (op2 != 0) {
1518        if (is_right) {
1519            if (is_arith) {
1520                gen_exts(ot, s->T0);
1521                tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1522                tcg_gen_sari_tl(s->T0, s->T0, op2);
1523            } else {
1524                gen_extu(ot, s->T0);
1525                tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1526                tcg_gen_shri_tl(s->T0, s->T0, op2);
1527            }
1528        } else {
1529            tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1530            tcg_gen_shli_tl(s->T0, s->T0, op2);
1531        }
1532    }
1533
1534    /* store */
1535    gen_op_st_rm_T0_A0(s, ot, op1);
1536
1537    /* update eflags if non zero shift */
1538    if (op2 != 0) {
1539        tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1540        tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1541        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1542    }
1543}
1544
1545static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1546{
1547    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1548    TCGv_i32 t0, t1;
1549
1550    /* load */
1551    if (op1 == OR_TMP0) {
1552        gen_op_ld_v(s, ot, s->T0, s->A0);
1553    } else {
1554        gen_op_mov_v_reg(s, ot, s->T0, op1);
1555    }
1556
1557    tcg_gen_andi_tl(s->T1, s->T1, mask);
1558
1559    switch (ot) {
1560    case MO_8:
1561        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1562        tcg_gen_ext8u_tl(s->T0, s->T0);
1563        tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1564        goto do_long;
1565    case MO_16:
1566        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1567        tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1568        goto do_long;
1569    do_long:
1570#ifdef TARGET_X86_64
1571    case MO_32:
1572        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1573        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1574        if (is_right) {
1575            tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1576        } else {
1577            tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1578        }
1579        tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1580        break;
1581#endif
1582    default:
1583        if (is_right) {
1584            tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1585        } else {
1586            tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1587        }
1588        break;
1589    }
1590
1591    /* store */
1592    gen_op_st_rm_T0_A0(s, ot, op1);
1593
1594    /* We'll need the flags computed into CC_SRC.  */
1595    gen_compute_eflags(s);
1596
1597    /* The value that was "rotated out" is now present at the other end
1598       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1599       since we've computed the flags into CC_SRC, these variables are
1600       currently dead.  */
1601    if (is_right) {
1602        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1603        tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1604        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1605    } else {
1606        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1607        tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1608    }
1609    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1610    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1611
1612    /* Now conditionally store the new CC_OP value.  If the shift count
1613       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1614       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1615       exactly as we computed above.  */
1616    t0 = tcg_const_i32(0);
1617    t1 = tcg_temp_new_i32();
1618    tcg_gen_trunc_tl_i32(t1, s->T1);
1619    tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1620    tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1621    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1622                        s->tmp2_i32, s->tmp3_i32);
1623    tcg_temp_free_i32(t0);
1624    tcg_temp_free_i32(t1);
1625
1626    /* The CC_OP value is no longer predictable.  */ 
1627    set_cc_op(s, CC_OP_DYNAMIC);
1628}
1629
1630static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1631                          int is_right)
1632{
1633    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1634    int shift;
1635
1636    /* load */
1637    if (op1 == OR_TMP0) {
1638        gen_op_ld_v(s, ot, s->T0, s->A0);
1639    } else {
1640        gen_op_mov_v_reg(s, ot, s->T0, op1);
1641    }
1642
1643    op2 &= mask;
1644    if (op2 != 0) {
1645        switch (ot) {
1646#ifdef TARGET_X86_64
1647        case MO_32:
1648            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1649            if (is_right) {
1650                tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1651            } else {
1652                tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1653            }
1654            tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1655            break;
1656#endif
1657        default:
1658            if (is_right) {
1659                tcg_gen_rotri_tl(s->T0, s->T0, op2);
1660            } else {
1661                tcg_gen_rotli_tl(s->T0, s->T0, op2);
1662            }
1663            break;
1664        case MO_8:
1665            mask = 7;
1666            goto do_shifts;
1667        case MO_16:
1668            mask = 15;
1669        do_shifts:
1670            shift = op2 & mask;
1671            if (is_right) {
1672                shift = mask + 1 - shift;
1673            }
1674            gen_extu(ot, s->T0);
1675            tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1676            tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1677            tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1678            break;
1679        }
1680    }
1681
1682    /* store */
1683    gen_op_st_rm_T0_A0(s, ot, op1);
1684
1685    if (op2 != 0) {
1686        /* Compute the flags into CC_SRC.  */
1687        gen_compute_eflags(s);
1688
1689        /* The value that was "rotated out" is now present at the other end
1690           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1691           since we've computed the flags into CC_SRC, these variables are
1692           currently dead.  */
1693        if (is_right) {
1694            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1695            tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1696            tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1697        } else {
1698            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1699            tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1700        }
1701        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1702        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1703        set_cc_op(s, CC_OP_ADCOX);
1704    }
1705}
1706
1707/* XXX: add faster immediate = 1 case */
1708static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1709                           int is_right)
1710{
1711    gen_compute_eflags(s);
1712    assert(s->cc_op == CC_OP_EFLAGS);
1713
1714    /* load */
1715    if (op1 == OR_TMP0)
1716        gen_op_ld_v(s, ot, s->T0, s->A0);
1717    else
1718        gen_op_mov_v_reg(s, ot, s->T0, op1);
1719    
1720    if (is_right) {
1721        switch (ot) {
1722        case MO_8:
1723            gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1724            break;
1725        case MO_16:
1726            gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1727            break;
1728        case MO_32:
1729            gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1730            break;
1731#ifdef TARGET_X86_64
1732        case MO_64:
1733            gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1734            break;
1735#endif
1736        default:
1737            tcg_abort();
1738        }
1739    } else {
1740        switch (ot) {
1741        case MO_8:
1742            gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1743            break;
1744        case MO_16:
1745            gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1746            break;
1747        case MO_32:
1748            gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1749            break;
1750#ifdef TARGET_X86_64
1751        case MO_64:
1752            gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1753            break;
1754#endif
1755        default:
1756            tcg_abort();
1757        }
1758    }
1759    /* store */
1760    gen_op_st_rm_T0_A0(s, ot, op1);
1761}
1762
1763/* XXX: add faster immediate case */
1764static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1765                             bool is_right, TCGv count_in)
1766{
1767    target_ulong mask = (ot == MO_64 ? 63 : 31);
1768    TCGv count;
1769
1770    /* load */
1771    if (op1 == OR_TMP0) {
1772        gen_op_ld_v(s, ot, s->T0, s->A0);
1773    } else {
1774        gen_op_mov_v_reg(s, ot, s->T0, op1);
1775    }
1776
1777    count = tcg_temp_new();
1778    tcg_gen_andi_tl(count, count_in, mask);
1779
1780    switch (ot) {
1781    case MO_16:
1782        /* Note: we implement the Intel behaviour for shift count > 16.
1783           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1784           portion by constructing it as a 32-bit value.  */
1785        if (is_right) {
1786            tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1787            tcg_gen_mov_tl(s->T1, s->T0);
1788            tcg_gen_mov_tl(s->T0, s->tmp0);
1789        } else {
1790            tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1791        }
1792        /* FALLTHRU */
1793#ifdef TARGET_X86_64
1794    case MO_32:
1795        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1796        tcg_gen_subi_tl(s->tmp0, count, 1);
1797        if (is_right) {
1798            tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1799            tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1800            tcg_gen_shr_i64(s->T0, s->T0, count);
1801        } else {
1802            tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1803            tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1804            tcg_gen_shl_i64(s->T0, s->T0, count);
1805            tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1806            tcg_gen_shri_i64(s->T0, s->T0, 32);
1807        }
1808        break;
1809#endif
1810    default:
1811        tcg_gen_subi_tl(s->tmp0, count, 1);
1812        if (is_right) {
1813            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1814
1815            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1816            tcg_gen_shr_tl(s->T0, s->T0, count);
1817            tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1818        } else {
1819            tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1820            if (ot == MO_16) {
1821                /* Only needed if count > 16, for Intel behaviour.  */
1822                tcg_gen_subfi_tl(s->tmp4, 33, count);
1823                tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1824                tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1825            }
1826
1827            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1828            tcg_gen_shl_tl(s->T0, s->T0, count);
1829            tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1830        }
1831        tcg_gen_movi_tl(s->tmp4, 0);
1832        tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1833                           s->tmp4, s->T1);
1834        tcg_gen_or_tl(s->T0, s->T0, s->T1);
1835        break;
1836    }
1837
1838    /* store */
1839    gen_op_st_rm_T0_A0(s, ot, op1);
1840
1841    gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1842    tcg_temp_free(count);
1843}
1844
1845static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1846{
1847    if (s != OR_TMP1)
1848        gen_op_mov_v_reg(s1, ot, s1->T1, s);
1849    switch(op) {
1850    case OP_ROL:
1851        gen_rot_rm_T1(s1, ot, d, 0);
1852        break;
1853    case OP_ROR:
1854        gen_rot_rm_T1(s1, ot, d, 1);
1855        break;
1856    case OP_SHL:
1857    case OP_SHL1:
1858        gen_shift_rm_T1(s1, ot, d, 0, 0);
1859        break;
1860    case OP_SHR:
1861        gen_shift_rm_T1(s1, ot, d, 1, 0);
1862        break;
1863    case OP_SAR:
1864        gen_shift_rm_T1(s1, ot, d, 1, 1);
1865        break;
1866    case OP_RCL:
1867        gen_rotc_rm_T1(s1, ot, d, 0);
1868        break;
1869    case OP_RCR:
1870        gen_rotc_rm_T1(s1, ot, d, 1);
1871        break;
1872    }
1873}
1874
1875static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1876{
1877    switch(op) {
1878    case OP_ROL:
1879        gen_rot_rm_im(s1, ot, d, c, 0);
1880        break;
1881    case OP_ROR:
1882        gen_rot_rm_im(s1, ot, d, c, 1);
1883        break;
1884    case OP_SHL:
1885    case OP_SHL1:
1886        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1887        break;
1888    case OP_SHR:
1889        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1890        break;
1891    case OP_SAR:
1892        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1893        break;
1894    default:
1895        /* currently not optimized */
1896        tcg_gen_movi_tl(s1->T1, c);
1897        gen_shift(s1, op, ot, d, OR_TMP1);
1898        break;
1899    }
1900}
1901
1902#define X86_MAX_INSN_LENGTH 15
1903
1904static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
1905{
1906    uint64_t pc = s->pc;
1907
1908    s->pc += num_bytes;
1909    if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
1910        /* If the instruction's 16th byte is on a different page than the 1st, a
1911         * page fault on the second page wins over the general protection fault
1912         * caused by the instruction being too long.
1913         * This can happen even if the operand is only one byte long!
1914         */
1915        if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
1916            volatile uint8_t unused =
1917                cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
1918            (void) unused;
1919        }
1920        siglongjmp(s->jmpbuf, 1);
1921    }
1922
1923    return pc;
1924}
1925
1926static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
1927{
1928    return translator_ldub(env, advance_pc(env, s, 1));
1929}
1930
1931static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
1932{
1933    return translator_ldsw(env, advance_pc(env, s, 2));
1934}
1935
1936static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
1937{
1938    return translator_lduw(env, advance_pc(env, s, 2));
1939}
1940
1941static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
1942{
1943    return translator_ldl(env, advance_pc(env, s, 4));
1944}
1945
1946#ifdef TARGET_X86_64
1947static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
1948{
1949    return translator_ldq(env, advance_pc(env, s, 8));
1950}
1951#endif
1952
1953/* Decompose an address.  */
1954
1955typedef struct AddressParts {
1956    int def_seg;
1957    int base;
1958    int index;
1959    int scale;
1960    target_long disp;
1961} AddressParts;
1962
1963static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1964                                    int modrm)
1965{
1966    int def_seg, base, index, scale, mod, rm;
1967    target_long disp;
1968    bool havesib;
1969
1970    def_seg = R_DS;
1971    index = -1;
1972    scale = 0;
1973    disp = 0;
1974
1975    mod = (modrm >> 6) & 3;
1976    rm = modrm & 7;
1977    base = rm | REX_B(s);
1978
1979    if (mod == 3) {
1980        /* Normally filtered out earlier, but including this path
1981           simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
1982        goto done;
1983    }
1984
1985    switch (s->aflag) {
1986    case MO_64:
1987    case MO_32:
1988        havesib = 0;
1989        if (rm == 4) {
1990            int code = x86_ldub_code(env, s);
1991            scale = (code >> 6) & 3;
1992            index = ((code >> 3) & 7) | REX_X(s);
1993            if (index == 4) {
1994                index = -1;  /* no index */
1995            }
1996            base = (code & 7) | REX_B(s);
1997            havesib = 1;
1998        }
1999
2000        switch (mod) {
2001        case 0:
2002            if ((base & 7) == 5) {
2003                base = -1;
2004                disp = (int32_t)x86_ldl_code(env, s);
2005                if (CODE64(s) && !havesib) {
2006                    base = -2;
2007                    disp += s->pc + s->rip_offset;
2008                }
2009            }
2010            break;
2011        case 1:
2012            disp = (int8_t)x86_ldub_code(env, s);
2013            break;
2014        default:
2015        case 2:
2016            disp = (int32_t)x86_ldl_code(env, s);
2017            break;
2018        }
2019
2020        /* For correct popl handling with esp.  */
2021        if (base == R_ESP && s->popl_esp_hack) {
2022            disp += s->popl_esp_hack;
2023        }
2024        if (base == R_EBP || base == R_ESP) {
2025            def_seg = R_SS;
2026        }
2027        break;
2028
2029    case MO_16:
2030        if (mod == 0) {
2031            if (rm == 6) {
2032                base = -1;
2033                disp = x86_lduw_code(env, s);
2034                break;
2035            }
2036        } else if (mod == 1) {
2037            disp = (int8_t)x86_ldub_code(env, s);
2038        } else {
2039            disp = (int16_t)x86_lduw_code(env, s);
2040        }
2041
2042        switch (rm) {
2043        case 0:
2044            base = R_EBX;
2045            index = R_ESI;
2046            break;
2047        case 1:
2048            base = R_EBX;
2049            index = R_EDI;
2050            break;
2051        case 2:
2052            base = R_EBP;
2053            index = R_ESI;
2054            def_seg = R_SS;
2055            break;
2056        case 3:
2057            base = R_EBP;
2058            index = R_EDI;
2059            def_seg = R_SS;
2060            break;
2061        case 4:
2062            base = R_ESI;
2063            break;
2064        case 5:
2065            base = R_EDI;
2066            break;
2067        case 6:
2068            base = R_EBP;
2069            def_seg = R_SS;
2070            break;
2071        default:
2072        case 7:
2073            base = R_EBX;
2074            break;
2075        }
2076        break;
2077
2078    default:
2079        tcg_abort();
2080    }
2081
2082 done:
2083    return (AddressParts){ def_seg, base, index, scale, disp };
2084}
2085
2086/* Compute the address, with a minimum number of TCG ops.  */
2087static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2088{
2089    TCGv ea = NULL;
2090
2091    if (a.index >= 0) {
2092        if (a.scale == 0) {
2093            ea = cpu_regs[a.index];
2094        } else {
2095            tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2096            ea = s->A0;
2097        }
2098        if (a.base >= 0) {
2099            tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2100            ea = s->A0;
2101        }
2102    } else if (a.base >= 0) {
2103        ea = cpu_regs[a.base];
2104    }
2105    if (!ea) {
2106        tcg_gen_movi_tl(s->A0, a.disp);
2107        ea = s->A0;
2108    } else if (a.disp != 0) {
2109        tcg_gen_addi_tl(s->A0, ea, a.disp);
2110        ea = s->A0;
2111    }
2112
2113    return ea;
2114}
2115
2116static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2117{
2118    AddressParts a = gen_lea_modrm_0(env, s, modrm);
2119    TCGv ea = gen_lea_modrm_1(s, a);
2120    gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2121}
2122
2123static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2124{
2125    (void)gen_lea_modrm_0(env, s, modrm);
2126}
2127
2128/* Used for BNDCL, BNDCU, BNDCN.  */
2129static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2130                      TCGCond cond, TCGv_i64 bndv)
2131{
2132    TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2133
2134    tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2135    if (!CODE64(s)) {
2136        tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2137    }
2138    tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2139    tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2140    gen_helper_bndck(cpu_env, s->tmp2_i32);
2141}
2142
2143/* used for LEA and MOV AX, mem */
2144static void gen_add_A0_ds_seg(DisasContext *s)
2145{
2146    gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2147}
2148
2149/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2150   OR_TMP0 */
2151static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2152                           MemOp ot, int reg, int is_store)
2153{
2154    int mod, rm;
2155
2156    mod = (modrm >> 6) & 3;
2157    rm = (modrm & 7) | REX_B(s);
2158    if (mod == 3) {
2159        if (is_store) {
2160            if (reg != OR_TMP0)
2161                gen_op_mov_v_reg(s, ot, s->T0, reg);
2162            gen_op_mov_reg_v(s, ot, rm, s->T0);
2163        } else {
2164            gen_op_mov_v_reg(s, ot, s->T0, rm);
2165            if (reg != OR_TMP0)
2166                gen_op_mov_reg_v(s, ot, reg, s->T0);
2167        }
2168    } else {
2169        gen_lea_modrm(env, s, modrm);
2170        if (is_store) {
2171            if (reg != OR_TMP0)
2172                gen_op_mov_v_reg(s, ot, s->T0, reg);
2173            gen_op_st_v(s, ot, s->T0, s->A0);
2174        } else {
2175            gen_op_ld_v(s, ot, s->T0, s->A0);
2176            if (reg != OR_TMP0)
2177                gen_op_mov_reg_v(s, ot, reg, s->T0);
2178        }
2179    }
2180}
2181
2182static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2183{
2184    uint32_t ret;
2185
2186    switch (ot) {
2187    case MO_8:
2188        ret = x86_ldub_code(env, s);
2189        break;
2190    case MO_16:
2191        ret = x86_lduw_code(env, s);
2192        break;
2193    case MO_32:
2194#ifdef TARGET_X86_64
2195    case MO_64:
2196#endif
2197        ret = x86_ldl_code(env, s);
2198        break;
2199    default:
2200        tcg_abort();
2201    }
2202    return ret;
2203}
2204
2205static inline int insn_const_size(MemOp ot)
2206{
2207    if (ot <= MO_32) {
2208        return 1 << ot;
2209    } else {
2210        return 4;
2211    }
2212}
2213
2214static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2215{
2216#ifndef CONFIG_USER_ONLY
2217    return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
2218           (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2219#else
2220    return true;
2221#endif
2222}
2223
2224static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2225{
2226    target_ulong pc = s->cs_base + eip;
2227
2228    if (use_goto_tb(s, pc))  {
2229        /* jump to same page: we can use a direct jump */
2230        tcg_gen_goto_tb(tb_num);
2231        gen_jmp_im(s, eip);
2232        tcg_gen_exit_tb(s->base.tb, tb_num);
2233        s->base.is_jmp = DISAS_NORETURN;
2234    } else {
2235        /* jump to another page */
2236        gen_jmp_im(s, eip);
2237        gen_jr(s, s->tmp0);
2238    }
2239}
2240
2241static inline void gen_jcc(DisasContext *s, int b,
2242                           target_ulong val, target_ulong next_eip)
2243{
2244    TCGLabel *l1, *l2;
2245
2246    if (s->jmp_opt) {
2247        l1 = gen_new_label();
2248        gen_jcc1(s, b, l1);
2249
2250        gen_goto_tb(s, 0, next_eip);
2251
2252        gen_set_label(l1);
2253        gen_goto_tb(s, 1, val);
2254    } else {
2255        l1 = gen_new_label();
2256        l2 = gen_new_label();
2257        gen_jcc1(s, b, l1);
2258
2259        gen_jmp_im(s, next_eip);
2260        tcg_gen_br(l2);
2261
2262        gen_set_label(l1);
2263        gen_jmp_im(s, val);
2264        gen_set_label(l2);
2265        gen_eob(s);
2266    }
2267}
2268
2269static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2270                        int modrm, int reg)
2271{
2272    CCPrepare cc;
2273
2274    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2275
2276    cc = gen_prepare_cc(s, b, s->T1);
2277    if (cc.mask != -1) {
2278        TCGv t0 = tcg_temp_new();
2279        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2280        cc.reg = t0;
2281    }
2282    if (!cc.use_reg2) {
2283        cc.reg2 = tcg_const_tl(cc.imm);
2284    }
2285
2286    tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2287                       s->T0, cpu_regs[reg]);
2288    gen_op_mov_reg_v(s, ot, reg, s->T0);
2289
2290    if (cc.mask != -1) {
2291        tcg_temp_free(cc.reg);
2292    }
2293    if (!cc.use_reg2) {
2294        tcg_temp_free(cc.reg2);
2295    }
2296}
2297
2298static inline void gen_op_movl_T0_seg(DisasContext *s, int seg_reg)
2299{
2300    tcg_gen_ld32u_tl(s->T0, cpu_env,
2301                     offsetof(CPUX86State,segs[seg_reg].selector));
2302}
2303
2304static inline void gen_op_movl_seg_T0_vm(DisasContext *s, int seg_reg)
2305{
2306    tcg_gen_ext16u_tl(s->T0, s->T0);
2307    tcg_gen_st32_tl(s->T0, cpu_env,
2308                    offsetof(CPUX86State,segs[seg_reg].selector));
2309    tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2310}
2311
2312/* move T0 to seg_reg and compute if the CPU state may change. Never
2313   call this function with seg_reg == R_CS */
2314static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2315{
2316    if (s->pe && !s->vm86) {
2317        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2318        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2319        /* abort translation because the addseg value may change or
2320           because ss32 may change. For R_SS, translation must always
2321           stop as a special handling must be done to disable hardware
2322           interrupts for the next instruction */
2323        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS)) {
2324            s->base.is_jmp = DISAS_TOO_MANY;
2325        }
2326    } else {
2327        gen_op_movl_seg_T0_vm(s, seg_reg);
2328        if (seg_reg == R_SS) {
2329            s->base.is_jmp = DISAS_TOO_MANY;
2330        }
2331    }
2332}
2333
2334static inline int svm_is_rep(int prefixes)
2335{
2336    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2337}
2338
2339static inline void
2340gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2341                              uint32_t type, uint64_t param)
2342{
2343    /* no SVM activated; fast case */
2344    if (likely(!(s->flags & HF_GUEST_MASK)))
2345        return;
2346    gen_update_cc_op(s);
2347    gen_jmp_im(s, pc_start - s->cs_base);
2348    gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2349                                         tcg_const_i64(param));
2350}
2351
2352static inline void
2353gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2354{
2355    gen_svm_check_intercept_param(s, pc_start, type, 0);
2356}
2357
2358static inline void gen_stack_update(DisasContext *s, int addend)
2359{
2360    gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2361}
2362
2363/* Generate a push. It depends on ss32, addseg and dflag.  */
2364static void gen_push_v(DisasContext *s, TCGv val)
2365{
2366    MemOp d_ot = mo_pushpop(s, s->dflag);
2367    MemOp a_ot = mo_stacksize(s);
2368    int size = 1 << d_ot;
2369    TCGv new_esp = s->A0;
2370
2371    tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2372
2373    if (!CODE64(s)) {
2374        if (s->addseg) {
2375            new_esp = s->tmp4;
2376            tcg_gen_mov_tl(new_esp, s->A0);
2377        }
2378        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2379    }
2380
2381    gen_op_st_v(s, d_ot, val, s->A0);
2382    gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2383}
2384
2385/* two step pop is necessary for precise exceptions */
2386static MemOp gen_pop_T0(DisasContext *s)
2387{
2388    MemOp d_ot = mo_pushpop(s, s->dflag);
2389
2390    gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2391    gen_op_ld_v(s, d_ot, s->T0, s->A0);
2392
2393    return d_ot;
2394}
2395
2396static inline void gen_pop_update(DisasContext *s, MemOp ot)
2397{
2398    gen_stack_update(s, 1 << ot);
2399}
2400
2401static inline void gen_stack_A0(DisasContext *s)
2402{
2403    gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2404}
2405
2406static void gen_pusha(DisasContext *s)
2407{
2408    MemOp s_ot = s->ss32 ? MO_32 : MO_16;
2409    MemOp d_ot = s->dflag;
2410    int size = 1 << d_ot;
2411    int i;
2412
2413    for (i = 0; i < 8; i++) {
2414        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2415        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2416        gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2417    }
2418
2419    gen_stack_update(s, -8 * size);
2420}
2421
2422static void gen_popa(DisasContext *s)
2423{
2424    MemOp s_ot = s->ss32 ? MO_32 : MO_16;
2425    MemOp d_ot = s->dflag;
2426    int size = 1 << d_ot;
2427    int i;
2428
2429    for (i = 0; i < 8; i++) {
2430        /* ESP is not reloaded */
2431        if (7 - i == R_ESP) {
2432            continue;
2433        }
2434        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2435        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2436        gen_op_ld_v(s, d_ot, s->T0, s->A0);
2437        gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2438    }
2439
2440    gen_stack_update(s, 8 * size);
2441}
2442
2443static void gen_enter(DisasContext *s, int esp_addend, int level)
2444{
2445    MemOp d_ot = mo_pushpop(s, s->dflag);
2446    MemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2447    int size = 1 << d_ot;
2448
2449    /* Push BP; compute FrameTemp into T1.  */
2450    tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2451    gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2452    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2453
2454    level &= 31;
2455    if (level != 0) {
2456        int i;
2457
2458        /* Copy level-1 pointers from the previous frame.  */
2459        for (i = 1; i < level; ++i) {
2460            tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2461            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2462            gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2463
2464            tcg_gen_subi_tl(s->A0, s->T1, size * i);
2465            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2466            gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2467        }
2468
2469        /* Push the current FrameTemp as the last level.  */
2470        tcg_gen_subi_tl(s->A0, s->T1, size * level);
2471        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2472        gen_op_st_v(s, d_ot, s->T1, s->A0);
2473    }
2474
2475    /* Copy the FrameTemp value to EBP.  */
2476    gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2477
2478    /* Compute the final value of ESP.  */
2479    tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2480    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2481}
2482
2483static void gen_leave(DisasContext *s)
2484{
2485    MemOp d_ot = mo_pushpop(s, s->dflag);
2486    MemOp a_ot = mo_stacksize(s);
2487
2488    gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2489    gen_op_ld_v(s, d_ot, s->T0, s->A0);
2490
2491    tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2492
2493    gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2494    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2495}
2496
2497/* Similarly, except that the assumption here is that we don't decode
2498   the instruction at all -- either a missing opcode, an unimplemented
2499   feature, or just a bogus instruction stream.  */
2500static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2501{
2502    gen_illegal_opcode(s);
2503
2504    if (qemu_loglevel_mask(LOG_UNIMP)) {
2505        target_ulong pc = s->pc_start, end = s->pc;
2506        qemu_log_lock();
2507        qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2508        for (; pc < end; ++pc) {
2509            qemu_log(" %02x", cpu_ldub_code(env, pc));
2510        }
2511        qemu_log("\n");
2512        qemu_log_unlock();
2513    }
2514}
2515
2516/* an interrupt is different from an exception because of the
2517   privilege checks */
2518static void gen_interrupt(DisasContext *s, int intno,
2519                          target_ulong cur_eip, target_ulong next_eip)
2520{
2521    gen_update_cc_op(s);
2522    gen_jmp_im(s, cur_eip);
2523    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2524                               tcg_const_i32(next_eip - cur_eip));
2525    s->base.is_jmp = DISAS_NORETURN;
2526}
2527
2528static void gen_debug(DisasContext *s, target_ulong cur_eip)
2529{
2530    gen_update_cc_op(s);
2531    gen_jmp_im(s, cur_eip);
2532    gen_helper_debug(cpu_env);
2533    s->base.is_jmp = DISAS_NORETURN;
2534}
2535
2536static void gen_set_hflag(DisasContext *s, uint32_t mask)
2537{
2538    if ((s->flags & mask) == 0) {
2539        TCGv_i32 t = tcg_temp_new_i32();
2540        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2541        tcg_gen_ori_i32(t, t, mask);
2542        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2543        tcg_temp_free_i32(t);
2544        s->flags |= mask;
2545    }
2546}
2547
2548static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2549{
2550    if (s->flags & mask) {
2551        TCGv_i32 t = tcg_temp_new_i32();
2552        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2553        tcg_gen_andi_i32(t, t, ~mask);
2554        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2555        tcg_temp_free_i32(t);
2556        s->flags &= ~mask;
2557    }
2558}
2559
2560/* Clear BND registers during legacy branches.  */
2561static void gen_bnd_jmp(DisasContext *s)
2562{
2563    /* Clear the registers only if BND prefix is missing, MPX is enabled,
2564       and if the BNDREGs are known to be in use (non-zero) already.
2565       The helper itself will check BNDPRESERVE at runtime.  */
2566    if ((s->prefix & PREFIX_REPNZ) == 0
2567        && (s->flags & HF_MPX_EN_MASK) != 0
2568        && (s->flags & HF_MPX_IU_MASK) != 0) {
2569        gen_helper_bnd_jmp(cpu_env);
2570    }
2571}
2572
2573/* Generate an end of block. Trace exception is also generated if needed.
2574   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2575   If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2576   S->TF.  This is used by the syscall/sysret insns.  */
2577static void
2578do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2579{
2580    gen_update_cc_op(s);
2581
2582    /* If several instructions disable interrupts, only the first does it.  */
2583    if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2584        gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2585    } else {
2586        gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2587    }
2588
2589    if (s->base.tb->flags & HF_RF_MASK) {
2590        gen_helper_reset_rf(cpu_env);
2591    }
2592    if (s->base.singlestep_enabled) {
2593        gen_helper_debug(cpu_env);
2594    } else if (recheck_tf) {
2595        gen_helper_rechecking_single_step(cpu_env);
2596        tcg_gen_exit_tb(NULL, 0);
2597    } else if (s->tf) {
2598        gen_helper_single_step(cpu_env);
2599    } else if (jr) {
2600        tcg_gen_lookup_and_goto_ptr();
2601    } else {
2602        tcg_gen_exit_tb(NULL, 0);
2603    }
2604    s->base.is_jmp = DISAS_NORETURN;
2605}
2606
2607static inline void
2608gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2609{
2610    do_gen_eob_worker(s, inhibit, recheck_tf, false);
2611}
2612
2613/* End of block.
2614   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2615static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2616{
2617    gen_eob_worker(s, inhibit, false);
2618}
2619
2620/* End of block, resetting the inhibit irq flag.  */
2621static void gen_eob(DisasContext *s)
2622{
2623    gen_eob_worker(s, false, false);
2624}
2625
2626/* Jump to register */
2627static void gen_jr(DisasContext *s, TCGv dest)
2628{
2629    do_gen_eob_worker(s, false, false, true);
2630}
2631
2632/* generate a jump to eip. No segment change must happen before as a
2633   direct call to the next block may occur */
2634static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2635{
2636    gen_update_cc_op(s);
2637    set_cc_op(s, CC_OP_DYNAMIC);
2638    if (s->jmp_opt) {
2639        gen_goto_tb(s, tb_num, eip);
2640    } else {
2641        gen_jmp_im(s, eip);
2642        gen_eob(s);
2643    }
2644}
2645
2646static void gen_jmp(DisasContext *s, target_ulong eip)
2647{
2648    gen_jmp_tb(s, eip, 0);
2649}
2650
2651static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2652{
2653    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2654    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2655}
2656
2657static inline void gen_stq_env_A0(DisasContext *s, int offset)
2658{
2659    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2660    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2661}
2662
2663static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2664{
2665    int mem_index = s->mem_index;
2666    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2667    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2668    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2669    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2670    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2671}
2672
2673static inline void gen_sto_env_A0(DisasContext *s, int offset)
2674{
2675    int mem_index = s->mem_index;
2676    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2677    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2678    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2679    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2680    tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2681}
2682
2683static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2684{
2685    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2686    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2687    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2688    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2689}
2690
2691static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2692{
2693    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2694    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2695}
2696
2697static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2698{
2699    tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2700    tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2701}
2702
2703static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2704{
2705    tcg_gen_movi_i64(s->tmp1_i64, 0);
2706    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2707}
2708
2709typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2710typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2711typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2712typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2713typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2714typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2715                               TCGv_i32 val);
2716typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2717typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2718                               TCGv val);
2719
2720#define SSE_SPECIAL ((void *)1)
2721#define SSE_DUMMY ((void *)2)
2722
2723#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2724#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2725                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2726
2727static const SSEFunc_0_epp sse_op_table1[256][4] = {
2728    /* 3DNow! extensions */
2729    [0x0e] = { SSE_DUMMY }, /* femms */
2730    [0x0f] = { SSE_DUMMY }, /* pf... */
2731    /* pure SSE operations */
2732    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2733    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2734    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2735    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2736    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2737    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2738    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2739    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2740
2741    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2742    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2743    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2744    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2745    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2746    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2747    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2748    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2749    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2750    [0x51] = SSE_FOP(sqrt),
2751    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2752    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2753    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2754    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2755    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2756    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2757    [0x58] = SSE_FOP(add),
2758    [0x59] = SSE_FOP(mul),
2759    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2760               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2761    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2762    [0x5c] = SSE_FOP(sub),
2763    [0x5d] = SSE_FOP(min),
2764    [0x5e] = SSE_FOP(div),
2765    [0x5f] = SSE_FOP(max),
2766
2767    [0xc2] = SSE_FOP(cmpeq),
2768    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2769               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2770
2771    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2772    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2773    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2774
2775    /* MMX ops and their SSE extensions */
2776    [0x60] = MMX_OP2(punpcklbw),
2777    [0x61] = MMX_OP2(punpcklwd),
2778    [0x62] = MMX_OP2(punpckldq),
2779    [0x63] = MMX_OP2(packsswb),
2780    [0x64] = MMX_OP2(pcmpgtb),
2781    [0x65] = MMX_OP2(pcmpgtw),
2782    [0x66] = MMX_OP2(pcmpgtl),
2783    [0x67] = MMX_OP2(packuswb),
2784    [0x68] = MMX_OP2(punpckhbw),
2785    [0x69] = MMX_OP2(punpckhwd),
2786    [0x6a] = MMX_OP2(punpckhdq),
2787    [0x6b] = MMX_OP2(packssdw),
2788    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2789    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2790    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2791    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2792    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2793               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2794               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2795               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2796    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2797    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2798    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2799    [0x74] = MMX_OP2(pcmpeqb),
2800    [0x75] = MMX_OP2(pcmpeqw),
2801    [0x76] = MMX_OP2(pcmpeql),
2802    [0x77] = { SSE_DUMMY }, /* emms */
2803    [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2804    [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2805    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2806    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2807    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2808    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2809    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2810    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2811    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2812    [0xd1] = MMX_OP2(psrlw),
2813    [0xd2] = MMX_OP2(psrld),
2814    [0xd3] = MMX_OP2(psrlq),
2815    [0xd4] = MMX_OP2(paddq),
2816    [0xd5] = MMX_OP2(pmullw),
2817    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2818    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2819    [0xd8] = MMX_OP2(psubusb),
2820    [0xd9] = MMX_OP2(psubusw),
2821    [0xda] = MMX_OP2(pminub),
2822    [0xdb] = MMX_OP2(pand),
2823    [0xdc] = MMX_OP2(paddusb),
2824    [0xdd] = MMX_OP2(paddusw),
2825    [0xde] = MMX_OP2(pmaxub),
2826    [0xdf] = MMX_OP2(pandn),
2827    [0xe0] = MMX_OP2(pavgb),
2828    [0xe1] = MMX_OP2(psraw),
2829    [0xe2] = MMX_OP2(psrad),
2830    [0xe3] = MMX_OP2(pavgw),
2831    [0xe4] = MMX_OP2(pmulhuw),
2832    [0xe5] = MMX_OP2(pmulhw),
2833    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2834    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2835    [0xe8] = MMX_OP2(psubsb),
2836    [0xe9] = MMX_OP2(psubsw),
2837    [0xea] = MMX_OP2(pminsw),
2838    [0xeb] = MMX_OP2(por),
2839    [0xec] = MMX_OP2(paddsb),
2840    [0xed] = MMX_OP2(paddsw),
2841    [0xee] = MMX_OP2(pmaxsw),
2842    [0xef] = MMX_OP2(pxor),
2843    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2844    [0xf1] = MMX_OP2(psllw),
2845    [0xf2] = MMX_OP2(pslld),
2846    [0xf3] = MMX_OP2(psllq),
2847    [0xf4] = MMX_OP2(pmuludq),
2848    [0xf5] = MMX_OP2(pmaddwd),
2849    [0xf6] = MMX_OP2(psadbw),
2850    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2851               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2852    [0xf8] = MMX_OP2(psubb),
2853    [0xf9] = MMX_OP2(psubw),
2854    [0xfa] = MMX_OP2(psubl),
2855    [0xfb] = MMX_OP2(psubq),
2856    [0xfc] = MMX_OP2(paddb),
2857    [0xfd] = MMX_OP2(paddw),
2858    [0xfe] = MMX_OP2(paddl),
2859};
2860
2861static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2862    [0 + 2] = MMX_OP2(psrlw),
2863    [0 + 4] = MMX_OP2(psraw),
2864    [0 + 6] = MMX_OP2(psllw),
2865    [8 + 2] = MMX_OP2(psrld),
2866    [8 + 4] = MMX_OP2(psrad),
2867    [8 + 6] = MMX_OP2(pslld),
2868    [16 + 2] = MMX_OP2(psrlq),
2869    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2870    [16 + 6] = MMX_OP2(psllq),
2871    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2872};
2873
2874static const SSEFunc_0_epi sse_op_table3ai[] = {
2875    gen_helper_cvtsi2ss,
2876    gen_helper_cvtsi2sd
2877};
2878
2879#ifdef TARGET_X86_64
2880static const SSEFunc_0_epl sse_op_table3aq[] = {
2881    gen_helper_cvtsq2ss,
2882    gen_helper_cvtsq2sd
2883};
2884#endif
2885
2886static const SSEFunc_i_ep sse_op_table3bi[] = {
2887    gen_helper_cvttss2si,
2888    gen_helper_cvtss2si,
2889    gen_helper_cvttsd2si,
2890    gen_helper_cvtsd2si
2891};
2892
2893#ifdef TARGET_X86_64
2894static const SSEFunc_l_ep sse_op_table3bq[] = {
2895    gen_helper_cvttss2sq,
2896    gen_helper_cvtss2sq,
2897    gen_helper_cvttsd2sq,
2898    gen_helper_cvtsd2sq
2899};
2900#endif
2901
2902static const SSEFunc_0_epp sse_op_table4[8][4] = {
2903    SSE_FOP(cmpeq),
2904    SSE_FOP(cmplt),
2905    SSE_FOP(cmple),
2906    SSE_FOP(cmpunord),
2907    SSE_FOP(cmpneq),
2908    SSE_FOP(cmpnlt),
2909    SSE_FOP(cmpnle),
2910    SSE_FOP(cmpord),
2911};
2912
2913static const SSEFunc_0_epp sse_op_table5[256] = {
2914    [0x0c] = gen_helper_pi2fw,
2915    [0x0d] = gen_helper_pi2fd,
2916    [0x1c] = gen_helper_pf2iw,
2917    [0x1d] = gen_helper_pf2id,
2918    [0x8a] = gen_helper_pfnacc,
2919    [0x8e] = gen_helper_pfpnacc,
2920    [0x90] = gen_helper_pfcmpge,
2921    [0x94] = gen_helper_pfmin,
2922    [0x96] = gen_helper_pfrcp,
2923    [0x97] = gen_helper_pfrsqrt,
2924    [0x9a] = gen_helper_pfsub,
2925    [0x9e] = gen_helper_pfadd,
2926    [0xa0] = gen_helper_pfcmpgt,
2927    [0xa4] = gen_helper_pfmax,
2928    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2929    [0xa7] = gen_helper_movq, /* pfrsqit1 */
2930    [0xaa] = gen_helper_pfsubr,
2931    [0xae] = gen_helper_pfacc,
2932    [0xb0] = gen_helper_pfcmpeq,
2933    [0xb4] = gen_helper_pfmul,
2934    [0xb6] = gen_helper_movq, /* pfrcpit2 */
2935    [0xb7] = gen_helper_pmulhrw_mmx,
2936    [0xbb] = gen_helper_pswapd,
2937    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2938};
2939
2940struct SSEOpHelper_epp {
2941    SSEFunc_0_epp op[2];
2942    uint32_t ext_mask;
2943};
2944
2945struct SSEOpHelper_eppi {
2946    SSEFunc_0_eppi op[2];
2947    uint32_t ext_mask;
2948};
2949
2950#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2951#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2952#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2953#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2954#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2955        CPUID_EXT_PCLMULQDQ }
2956#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2957
2958static const struct SSEOpHelper_epp sse_op_table6[256] = {
2959    [0x00] = SSSE3_OP(pshufb),
2960    [0x01] = SSSE3_OP(phaddw),
2961    [0x02] = SSSE3_OP(phaddd),
2962    [0x03] = SSSE3_OP(phaddsw),
2963    [0x04] = SSSE3_OP(pmaddubsw),
2964    [0x05] = SSSE3_OP(phsubw),
2965    [0x06] = SSSE3_OP(phsubd),
2966    [0x07] = SSSE3_OP(phsubsw),
2967    [0x08] = SSSE3_OP(psignb),
2968    [0x09] = SSSE3_OP(psignw),
2969    [0x0a] = SSSE3_OP(psignd),
2970    [0x0b] = SSSE3_OP(pmulhrsw),
2971    [0x10] = SSE41_OP(pblendvb),
2972    [0x14] = SSE41_OP(blendvps),
2973    [0x15] = SSE41_OP(blendvpd),
2974    [0x17] = SSE41_OP(ptest),
2975    [0x1c] = SSSE3_OP(pabsb),
2976    [0x1d] = SSSE3_OP(pabsw),
2977    [0x1e] = SSSE3_OP(pabsd),
2978    [0x20] = SSE41_OP(pmovsxbw),
2979    [0x21] = SSE41_OP(pmovsxbd),
2980    [0x22] = SSE41_OP(pmovsxbq),
2981    [0x23] = SSE41_OP(pmovsxwd),
2982    [0x24] = SSE41_OP(pmovsxwq),
2983    [0x25] = SSE41_OP(pmovsxdq),
2984    [0x28] = SSE41_OP(pmuldq),
2985    [0x29] = SSE41_OP(pcmpeqq),
2986    [0x2a] = SSE41_SPECIAL, /* movntqda */
2987    [0x2b] = SSE41_OP(packusdw),
2988    [0x30] = SSE41_OP(pmovzxbw),
2989    [0x31] = SSE41_OP(pmovzxbd),
2990    [0x32] = SSE41_OP(pmovzxbq),
2991    [0x33] = SSE41_OP(pmovzxwd),
2992    [0x34] = SSE41_OP(pmovzxwq),
2993    [0x35] = SSE41_OP(pmovzxdq),
2994    [0x37] = SSE42_OP(pcmpgtq),
2995    [0x38] = SSE41_OP(pminsb),
2996    [0x39] = SSE41_OP(pminsd),
2997    [0x3a] = SSE41_OP(pminuw),
2998    [0x3b] = SSE41_OP(pminud),
2999    [0x3c] = SSE41_OP(pmaxsb),
3000    [0x3d] = SSE41_OP(pmaxsd),
3001    [0x3e] = SSE41_OP(pmaxuw),
3002    [0x3f] = SSE41_OP(pmaxud),
3003    [0x40] = SSE41_OP(pmulld),
3004    [0x41] = SSE41_OP(phminposuw),
3005    [0xdb] = AESNI_OP(aesimc),
3006    [0xdc] = AESNI_OP(aesenc),
3007    [0xdd] = AESNI_OP(aesenclast),
3008    [0xde] = AESNI_OP(aesdec),
3009    [0xdf] = AESNI_OP(aesdeclast),
3010};
3011
3012static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3013    [0x08] = SSE41_OP(roundps),
3014    [0x09] = SSE41_OP(roundpd),
3015    [0x0a] = SSE41_OP(roundss),
3016    [0x0b] = SSE41_OP(roundsd),
3017    [0x0c] = SSE41_OP(blendps),
3018    [0x0d] = SSE41_OP(blendpd),
3019    [0x0e] = SSE41_OP(pblendw),
3020    [0x0f] = SSSE3_OP(palignr),
3021    [0x14] = SSE41_SPECIAL, /* pextrb */
3022    [0x15] = SSE41_SPECIAL, /* pextrw */
3023    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3024    [0x17] = SSE41_SPECIAL, /* extractps */
3025    [0x20] = SSE41_SPECIAL, /* pinsrb */
3026    [0x21] = SSE41_SPECIAL, /* insertps */
3027    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3028    [0x40] = SSE41_OP(dpps),
3029    [0x41] = SSE41_OP(dppd),
3030    [0x42] = SSE41_OP(mpsadbw),
3031    [0x44] = PCLMULQDQ_OP(pclmulqdq),
3032    [0x60] = SSE42_OP(pcmpestrm),
3033    [0x61] = SSE42_OP(pcmpestri),
3034    [0x62] = SSE42_OP(pcmpistrm),
3035    [0x63] = SSE42_OP(pcmpistri),
3036    [0xdf] = AESNI_OP(aeskeygenassist),
3037};
3038
3039static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3040                    target_ulong pc_start, int rex_r)
3041{
3042    int b1, op1_offset, op2_offset, is_xmm, val;
3043    int modrm, mod, rm, reg;
3044    SSEFunc_0_epp sse_fn_epp;
3045    SSEFunc_0_eppi sse_fn_eppi;
3046    SSEFunc_0_ppi sse_fn_ppi;
3047    SSEFunc_0_eppt sse_fn_eppt;
3048    MemOp ot;
3049
3050    b &= 0xff;
3051    if (s->prefix & PREFIX_DATA)
3052        b1 = 1;
3053    else if (s->prefix & PREFIX_REPZ)
3054        b1 = 2;
3055    else if (s->prefix & PREFIX_REPNZ)
3056        b1 = 3;
3057    else
3058        b1 = 0;
3059    sse_fn_epp = sse_op_table1[b][b1];
3060    if (!sse_fn_epp) {
3061        goto unknown_op;
3062    }
3063    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3064        is_xmm = 1;
3065    } else {
3066        if (b1 == 0) {
3067            /* MMX case */
3068            is_xmm = 0;
3069        } else {
3070            is_xmm = 1;
3071        }
3072    }
3073    /* simple MMX/SSE operation */
3074    if (s->flags & HF_TS_MASK) {
3075        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3076        return;
3077    }
3078    if (s->flags & HF_EM_MASK) {
3079    illegal_op:
3080        gen_illegal_opcode(s);
3081        return;
3082    }
3083    if (is_xmm
3084        && !(s->flags & HF_OSFXSR_MASK)
3085        && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
3086        goto unknown_op;
3087    }
3088    if (b == 0x0e) {
3089        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3090            /* If we were fully decoding this we might use illegal_op.  */
3091            goto unknown_op;
3092        }
3093        /* femms */
3094        gen_helper_emms(cpu_env);
3095        return;
3096    }
3097    if (b == 0x77) {
3098        /* emms */
3099        gen_helper_emms(cpu_env);
3100        return;
3101    }
3102    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3103       the static cpu state) */
3104    if (!is_xmm) {
3105        gen_helper_enter_mmx(cpu_env);
3106    }
3107
3108    modrm = x86_ldub_code(env, s);
3109    reg = ((modrm >> 3) & 7);
3110    if (is_xmm)
3111        reg |= rex_r;
3112    mod = (modrm >> 6) & 3;
3113    if (sse_fn_epp == SSE_SPECIAL) {
3114        b |= (b1 << 8);
3115        switch(b) {
3116        case 0x0e7: /* movntq */
3117            if (mod == 3) {
3118                goto illegal_op;
3119            }
3120            gen_lea_modrm(env, s, modrm);
3121            gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3122            break;
3123        case 0x1e7: /* movntdq */
3124        case 0x02b: /* movntps */
3125        case 0x12b: /* movntps */
3126            if (mod == 3)
3127                goto illegal_op;
3128            gen_lea_modrm(env, s, modrm);
3129            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3130            break;
3131        case 0x3f0: /* lddqu */
3132            if (mod == 3)
3133                goto illegal_op;
3134            gen_lea_modrm(env, s, modrm);
3135            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3136            break;
3137        case 0x22b: /* movntss */
3138        case 0x32b: /* movntsd */
3139            if (mod == 3)
3140                goto illegal_op;
3141            gen_lea_modrm(env, s, modrm);
3142            if (b1 & 1) {
3143                gen_stq_env_A0(s, offsetof(CPUX86State,
3144                                           xmm_regs[reg].ZMM_Q(0)));
3145            } else {
3146                tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3147                    xmm_regs[reg].ZMM_L(0)));
3148                gen_op_st_v(s, MO_32, s->T0, s->A0);
3149            }
3150            break;
3151        case 0x6e: /* movd mm, ea */
3152#ifdef TARGET_X86_64
3153            if (s->dflag == MO_64) {
3154                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3155                tcg_gen_st_tl(s->T0, cpu_env,
3156                              offsetof(CPUX86State, fpregs[reg].mmx));
3157            } else
3158#endif
3159            {
3160                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3161                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3162                                 offsetof(CPUX86State,fpregs[reg].mmx));
3163                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3164                gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3165            }
3166            break;
3167        case 0x16e: /* movd xmm, ea */
3168#ifdef TARGET_X86_64
3169            if (s->dflag == MO_64) {
3170                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3171                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3172                                 offsetof(CPUX86State,xmm_regs[reg]));
3173                gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3174            } else
3175#endif
3176            {
3177                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3178                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3179                                 offsetof(CPUX86State,xmm_regs[reg]));
3180                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3181                gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3182            }
3183            break;
3184        case 0x6f: /* movq mm, ea */
3185            if (mod != 3) {
3186                gen_lea_modrm(env, s, modrm);
3187                gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3188            } else {
3189                rm = (modrm & 7);
3190                tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3191                               offsetof(CPUX86State,fpregs[rm].mmx));
3192                tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3193                               offsetof(CPUX86State,fpregs[reg].mmx));
3194            }
3195            break;
3196        case 0x010: /* movups */
3197        case 0x110: /* movupd */
3198        case 0x028: /* movaps */
3199        case 0x128: /* movapd */
3200        case 0x16f: /* movdqa xmm, ea */
3201        case 0x26f: /* movdqu xmm, ea */
3202            if (mod != 3) {
3203                gen_lea_modrm(env, s, modrm);
3204                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3205            } else {
3206                rm = (modrm & 7) | REX_B(s);
3207                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3208                            offsetof(CPUX86State,xmm_regs[rm]));
3209            }
3210            break;
3211        case 0x210: /* movss xmm, ea */
3212            if (mod != 3) {
3213                gen_lea_modrm(env, s, modrm);
3214                gen_op_ld_v(s, MO_32, s->T0, s->A0);
3215                tcg_gen_st32_tl(s->T0, cpu_env,
3216                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3217                tcg_gen_movi_tl(s->T0, 0);
3218                tcg_gen_st32_tl(s->T0, cpu_env,
3219                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3220                tcg_gen_st32_tl(s->T0, cpu_env,
3221                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3222                tcg_gen_st32_tl(s->T0, cpu_env,
3223                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3224            } else {
3225                rm = (modrm & 7) | REX_B(s);
3226                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3227                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3228            }
3229            break;
3230        case 0x310: /* movsd xmm, ea */
3231            if (mod != 3) {
3232                gen_lea_modrm(env, s, modrm);
3233                gen_ldq_env_A0(s, offsetof(CPUX86State,
3234                                           xmm_regs[reg].ZMM_Q(0)));
3235                tcg_gen_movi_tl(s->T0, 0);
3236                tcg_gen_st32_tl(s->T0, cpu_env,
3237                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3238                tcg_gen_st32_tl(s->T0, cpu_env,
3239                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3240            } else {
3241                rm = (modrm & 7) | REX_B(s);
3242                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3243                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3244            }
3245            break;
3246        case 0x012: /* movlps */
3247        case 0x112: /* movlpd */
3248            if (mod != 3) {
3249                gen_lea_modrm(env, s, modrm);
3250                gen_ldq_env_A0(s, offsetof(CPUX86State,
3251                                           xmm_regs[reg].ZMM_Q(0)));
3252            } else {
3253                /* movhlps */
3254                rm = (modrm & 7) | REX_B(s);
3255                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3256                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3257            }
3258            break;
3259        case 0x212: /* movsldup */
3260            if (mod != 3) {
3261                gen_lea_modrm(env, s, modrm);
3262                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3263            } else {
3264                rm = (modrm & 7) | REX_B(s);
3265                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3266                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3267                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3268                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3269            }
3270            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3271                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3272            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3273                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3274            break;
3275        case 0x312: /* movddup */
3276            if (mod != 3) {
3277                gen_lea_modrm(env, s, modrm);
3278                gen_ldq_env_A0(s, offsetof(CPUX86State,
3279                                           xmm_regs[reg].ZMM_Q(0)));
3280            } else {
3281                rm = (modrm & 7) | REX_B(s);
3282                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3283                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3284            }
3285            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3286                        offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3287            break;
3288        case 0x016: /* movhps */
3289        case 0x116: /* movhpd */
3290            if (mod != 3) {
3291                gen_lea_modrm(env, s, modrm);
3292                gen_ldq_env_A0(s, offsetof(CPUX86State,
3293                                           xmm_regs[reg].ZMM_Q(1)));
3294            } else {
3295                /* movlhps */
3296                rm = (modrm & 7) | REX_B(s);
3297                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3298                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3299            }
3300            break;
3301        case 0x216: /* movshdup */
3302            if (mod != 3) {
3303                gen_lea_modrm(env, s, modrm);
3304                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3305            } else {
3306                rm = (modrm & 7) | REX_B(s);
3307                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3308                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3309                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3310                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3311            }
3312            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3313                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3314            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3315                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3316            break;
3317        case 0x178:
3318        case 0x378:
3319            {
3320                int bit_index, field_length;
3321
3322                if (b1 == 1 && reg != 0)
3323                    goto illegal_op;
3324                field_length = x86_ldub_code(env, s) & 0x3F;
3325                bit_index = x86_ldub_code(env, s) & 0x3F;
3326                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3327                    offsetof(CPUX86State,xmm_regs[reg]));
3328                if (b1 == 1)
3329                    gen_helper_extrq_i(cpu_env, s->ptr0,
3330                                       tcg_const_i32(bit_index),
3331                                       tcg_const_i32(field_length));
3332                else
3333                    gen_helper_insertq_i(cpu_env, s->ptr0,
3334                                         tcg_const_i32(bit_index),
3335                                         tcg_const_i32(field_length));
3336            }
3337            break;
3338        case 0x7e: /* movd ea, mm */
3339#ifdef TARGET_X86_64
3340            if (s->dflag == MO_64) {
3341                tcg_gen_ld_i64(s->T0, cpu_env,
3342                               offsetof(CPUX86State,fpregs[reg].mmx));
3343                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3344            } else
3345#endif
3346            {
3347                tcg_gen_ld32u_tl(s->T0, cpu_env,
3348                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3349                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3350            }
3351            break;
3352        case 0x17e: /* movd ea, xmm */
3353#ifdef TARGET_X86_64
3354            if (s->dflag == MO_64) {
3355                tcg_gen_ld_i64(s->T0, cpu_env,
3356                               offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3357                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3358            } else
3359#endif
3360            {
3361                tcg_gen_ld32u_tl(s->T0, cpu_env,
3362                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3363                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3364            }
3365            break;
3366        case 0x27e: /* movq xmm, ea */
3367            if (mod != 3) {
3368                gen_lea_modrm(env, s, modrm);
3369                gen_ldq_env_A0(s, offsetof(CPUX86State,
3370                                           xmm_regs[reg].ZMM_Q(0)));
3371            } else {
3372                rm = (modrm & 7) | REX_B(s);
3373                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3374                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3375            }
3376            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3377            break;
3378        case 0x7f: /* movq ea, mm */
3379            if (mod != 3) {
3380                gen_lea_modrm(env, s, modrm);
3381                gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3382            } else {
3383                rm = (modrm & 7);
3384                gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3385                            offsetof(CPUX86State,fpregs[reg].mmx));
3386            }
3387            break;
3388        case 0x011: /* movups */
3389        case 0x111: /* movupd */
3390        case 0x029: /* movaps */
3391        case 0x129: /* movapd */
3392        case 0x17f: /* movdqa ea, xmm */
3393        case 0x27f: /* movdqu ea, xmm */
3394            if (mod != 3) {
3395                gen_lea_modrm(env, s, modrm);
3396                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3397            } else {
3398                rm = (modrm & 7) | REX_B(s);
3399                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3400                            offsetof(CPUX86State,xmm_regs[reg]));
3401            }
3402            break;
3403        case 0x211: /* movss ea, xmm */
3404            if (mod != 3) {
3405                gen_lea_modrm(env, s, modrm);
3406                tcg_gen_ld32u_tl(s->T0, cpu_env,
3407                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3408                gen_op_st_v(s, MO_32, s->T0, s->A0);
3409            } else {
3410                rm = (modrm & 7) | REX_B(s);
3411                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3412                            offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3413            }
3414            break;
3415        case 0x311: /* movsd ea, xmm */
3416            if (mod != 3) {
3417                gen_lea_modrm(env, s, modrm);
3418                gen_stq_env_A0(s, offsetof(CPUX86State,
3419                                           xmm_regs[reg].ZMM_Q(0)));
3420            } else {
3421                rm = (modrm & 7) | REX_B(s);
3422                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3423                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3424            }
3425            break;
3426        case 0x013: /* movlps */
3427        case 0x113: /* movlpd */
3428            if (mod != 3) {
3429                gen_lea_modrm(env, s, modrm);
3430                gen_stq_env_A0(s, offsetof(CPUX86State,
3431                                           xmm_regs[reg].ZMM_Q(0)));
3432            } else {
3433                goto illegal_op;
3434            }
3435            break;
3436        case 0x017: /* movhps */
3437        case 0x117: /* movhpd */
3438            if (mod != 3) {
3439                gen_lea_modrm(env, s, modrm);
3440                gen_stq_env_A0(s, offsetof(CPUX86State,
3441                                           xmm_regs[reg].ZMM_Q(1)));
3442            } else {
3443                goto illegal_op;
3444            }
3445            break;
3446        case 0x71: /* shift mm, im */
3447        case 0x72:
3448        case 0x73:
3449        case 0x171: /* shift xmm, im */
3450        case 0x172:
3451        case 0x173:
3452            if (b1 >= 2) {
3453                goto unknown_op;
3454            }
3455            val = x86_ldub_code(env, s);
3456            if (is_xmm) {
3457                tcg_gen_movi_tl(s->T0, val);
3458                tcg_gen_st32_tl(s->T0, cpu_env,
3459                                offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3460                tcg_gen_movi_tl(s->T0, 0);
3461                tcg_gen_st32_tl(s->T0, cpu_env,
3462                                offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3463                op1_offset = offsetof(CPUX86State,xmm_t0);
3464            } else {
3465                tcg_gen_movi_tl(s->T0, val);
3466                tcg_gen_st32_tl(s->T0, cpu_env,
3467                                offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3468                tcg_gen_movi_tl(s->T0, 0);
3469                tcg_gen_st32_tl(s->T0, cpu_env,
3470                                offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3471                op1_offset = offsetof(CPUX86State,mmx_t0);
3472            }
3473            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3474                                       (((modrm >> 3)) & 7)][b1];
3475            if (!sse_fn_epp) {
3476                goto unknown_op;
3477            }
3478            if (is_xmm) {
3479                rm = (modrm & 7) | REX_B(s);
3480                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3481            } else {
3482                rm = (modrm & 7);
3483                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3484            }
3485            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3486            tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3487            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3488            break;
3489        case 0x050: /* movmskps */
3490            rm = (modrm & 7) | REX_B(s);
3491            tcg_gen_addi_ptr(s->ptr0, cpu_env,
3492                             offsetof(CPUX86State,xmm_regs[rm]));
3493            gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3494            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3495            break;
3496        case 0x150: /* movmskpd */
3497            rm = (modrm & 7) | REX_B(s);
3498            tcg_gen_addi_ptr(s->ptr0, cpu_env,
3499                             offsetof(CPUX86State,xmm_regs[rm]));
3500            gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3501            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3502            break;
3503        case 0x02a: /* cvtpi2ps */
3504        case 0x12a: /* cvtpi2pd */
3505            gen_helper_enter_mmx(cpu_env);
3506            if (mod != 3) {
3507                gen_lea_modrm(env, s, modrm);
3508                op2_offset = offsetof(CPUX86State,mmx_t0);
3509                gen_ldq_env_A0(s, op2_offset);
3510            } else {
3511                rm = (modrm & 7);
3512                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3513            }
3514            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3515            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3516            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3517            switch(b >> 8) {
3518            case 0x0:
3519                gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3520                break;
3521            default:
3522            case 0x1:
3523                gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3524                break;
3525            }
3526            break;
3527        case 0x22a: /* cvtsi2ss */
3528        case 0x32a: /* cvtsi2sd */
3529            ot = mo_64_32(s->dflag);
3530            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3531            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3532            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3533            if (ot == MO_32) {
3534                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3535                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3536                sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3537            } else {
3538#ifdef TARGET_X86_64
3539                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3540                sse_fn_epl(cpu_env, s->ptr0, s->T0);
3541#else
3542                goto illegal_op;
3543#endif
3544            }
3545            break;
3546        case 0x02c: /* cvttps2pi */
3547        case 0x12c: /* cvttpd2pi */
3548        case 0x02d: /* cvtps2pi */
3549        case 0x12d: /* cvtpd2pi */
3550            gen_helper_enter_mmx(cpu_env);
3551            if (mod != 3) {
3552                gen_lea_modrm(env, s, modrm);
3553                op2_offset = offsetof(CPUX86State,xmm_t0);
3554                gen_ldo_env_A0(s, op2_offset);
3555            } else {
3556                rm = (modrm & 7) | REX_B(s);
3557                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3558            }
3559            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3560            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3561            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3562            switch(b) {
3563            case 0x02c:
3564                gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3565                break;
3566            case 0x12c:
3567                gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3568                break;
3569            case 0x02d:
3570                gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3571                break;
3572            case 0x12d:
3573                gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3574                break;
3575            }
3576            break;
3577        case 0x22c: /* cvttss2si */
3578        case 0x32c: /* cvttsd2si */
3579        case 0x22d: /* cvtss2si */
3580        case 0x32d: /* cvtsd2si */
3581            ot = mo_64_32(s->dflag);
3582            if (mod != 3) {
3583                gen_lea_modrm(env, s, modrm);
3584                if ((b >> 8) & 1) {
3585                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3586                } else {
3587                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
3588                    tcg_gen_st32_tl(s->T0, cpu_env,
3589                                    offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3590                }
3591                op2_offset = offsetof(CPUX86State,xmm_t0);
3592            } else {
3593                rm = (modrm & 7) | REX_B(s);
3594                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3595            }
3596            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3597            if (ot == MO_32) {
3598                SSEFunc_i_ep sse_fn_i_ep =
3599                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3600                sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3601                tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3602            } else {
3603#ifdef TARGET_X86_64
3604                SSEFunc_l_ep sse_fn_l_ep =
3605                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3606                sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3607#else
3608                goto illegal_op;
3609#endif
3610            }
3611            gen_op_mov_reg_v(s, ot, reg, s->T0);
3612            break;
3613        case 0xc4: /* pinsrw */
3614        case 0x1c4:
3615            s->rip_offset = 1;
3616            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3617            val = x86_ldub_code(env, s);
3618            if (b1) {
3619                val &= 7;
3620                tcg_gen_st16_tl(s->T0, cpu_env,
3621                                offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3622            } else {
3623                val &= 3;
3624                tcg_gen_st16_tl(s->T0, cpu_env,
3625                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3626            }
3627            break;
3628        case 0xc5: /* pextrw */
3629        case 0x1c5:
3630            if (mod != 3)
3631                goto illegal_op;
3632            ot = mo_64_32(s->dflag);
3633            val = x86_ldub_code(env, s);
3634            if (b1) {
3635                val &= 7;
3636                rm = (modrm & 7) | REX_B(s);
3637                tcg_gen_ld16u_tl(s->T0, cpu_env,
3638                                 offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3639            } else {
3640                val &= 3;
3641                rm = (modrm & 7);
3642                tcg_gen_ld16u_tl(s->T0, cpu_env,
3643                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3644            }
3645            reg = ((modrm >> 3) & 7) | rex_r;
3646            gen_op_mov_reg_v(s, ot, reg, s->T0);
3647            break;
3648        case 0x1d6: /* movq ea, xmm */
3649            if (mod != 3) {
3650                gen_lea_modrm(env, s, modrm);
3651                gen_stq_env_A0(s, offsetof(CPUX86State,
3652                                           xmm_regs[reg].ZMM_Q(0)));
3653            } else {
3654                rm = (modrm & 7) | REX_B(s);
3655                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3656                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3657                gen_op_movq_env_0(s,
3658                                  offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3659            }
3660            break;
3661        case 0x2d6: /* movq2dq */
3662            gen_helper_enter_mmx(cpu_env);
3663            rm = (modrm & 7);
3664            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3665                        offsetof(CPUX86State,fpregs[rm].mmx));
3666            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3667            break;
3668        case 0x3d6: /* movdq2q */
3669            gen_helper_enter_mmx(cpu_env);
3670            rm = (modrm & 7) | REX_B(s);
3671            gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3672                        offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3673            break;
3674        case 0xd7: /* pmovmskb */
3675        case 0x1d7:
3676            if (mod != 3)
3677                goto illegal_op;
3678            if (b1) {
3679                rm = (modrm & 7) | REX_B(s);
3680                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3681                                 offsetof(CPUX86State, xmm_regs[rm]));
3682                gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3683            } else {
3684                rm = (modrm & 7);
3685                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3686                                 offsetof(CPUX86State, fpregs[rm].mmx));
3687                gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3688            }
3689            reg = ((modrm >> 3) & 7) | rex_r;
3690            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3691            break;
3692
3693        case 0x138:
3694        case 0x038:
3695            b = modrm;
3696            if ((b & 0xf0) == 0xf0) {
3697                goto do_0f_38_fx;
3698            }
3699            modrm = x86_ldub_code(env, s);
3700            rm = modrm & 7;
3701            reg = ((modrm >> 3) & 7) | rex_r;
3702            mod = (modrm >> 6) & 3;
3703            if (b1 >= 2) {
3704                goto unknown_op;
3705            }
3706
3707            sse_fn_epp = sse_op_table6[b].op[b1];
3708            if (!sse_fn_epp) {
3709                goto unknown_op;
3710            }
3711            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3712                goto illegal_op;
3713
3714            if (b1) {
3715                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3716                if (mod == 3) {
3717                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3718                } else {
3719                    op2_offset = offsetof(CPUX86State,xmm_t0);
3720                    gen_lea_modrm(env, s, modrm);
3721                    switch (b) {
3722                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3723                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3724                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3725                        gen_ldq_env_A0(s, op2_offset +
3726                                        offsetof(ZMMReg, ZMM_Q(0)));
3727                        break;
3728                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3729                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3730                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3731                                            s->mem_index, MO_LEUL);
3732                        tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3733                                        offsetof(ZMMReg, ZMM_L(0)));
3734                        break;
3735                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3736                        tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3737                                           s->mem_index, MO_LEUW);
3738                        tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3739                                        offsetof(ZMMReg, ZMM_W(0)));
3740                        break;
3741                    case 0x2a:            /* movntqda */
3742                        gen_ldo_env_A0(s, op1_offset);
3743                        return;
3744                    default:
3745                        gen_ldo_env_A0(s, op2_offset);
3746                    }
3747                }
3748            } else {
3749                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3750                if (mod == 3) {
3751                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3752                } else {
3753                    op2_offset = offsetof(CPUX86State,mmx_t0);
3754                    gen_lea_modrm(env, s, modrm);
3755                    gen_ldq_env_A0(s, op2_offset);
3756                }
3757            }
3758            if (sse_fn_epp == SSE_SPECIAL) {
3759                goto unknown_op;
3760            }
3761
3762            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3763            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3764            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3765
3766            if (b == 0x17) {
3767                set_cc_op(s, CC_OP_EFLAGS);
3768            }
3769            break;
3770
3771        case 0x238:
3772        case 0x338:
3773        do_0f_38_fx:
3774            /* Various integer extensions at 0f 38 f[0-f].  */
3775            b = modrm | (b1 << 8);
3776            modrm = x86_ldub_code(env, s);
3777            reg = ((modrm >> 3) & 7) | rex_r;
3778
3779            switch (b) {
3780            case 0x3f0: /* crc32 Gd,Eb */
3781            case 0x3f1: /* crc32 Gd,Ey */
3782            do_crc32:
3783                if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3784                    goto illegal_op;
3785                }
3786                if ((b & 0xff) == 0xf0) {
3787                    ot = MO_8;
3788                } else if (s->dflag != MO_64) {
3789                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3790                } else {
3791                    ot = MO_64;
3792                }
3793
3794                tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3795                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3796                gen_helper_crc32(s->T0, s->tmp2_i32,
3797                                 s->T0, tcg_const_i32(8 << ot));
3798
3799                ot = mo_64_32(s->dflag);
3800                gen_op_mov_reg_v(s, ot, reg, s->T0);
3801                break;
3802
3803            case 0x1f0: /* crc32 or movbe */
3804            case 0x1f1:
3805                /* For these insns, the f3 prefix is supposed to have priority
3806                   over the 66 prefix, but that's not what we implement above
3807                   setting b1.  */
3808                if (s->prefix & PREFIX_REPNZ) {
3809                    goto do_crc32;
3810                }
3811                /* FALLTHRU */
3812            case 0x0f0: /* movbe Gy,My */
3813            case 0x0f1: /* movbe My,Gy */
3814                if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3815                    goto illegal_op;
3816                }
3817                if (s->dflag != MO_64) {
3818                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3819                } else {
3820                    ot = MO_64;
3821                }
3822
3823                gen_lea_modrm(env, s, modrm);
3824                if ((b & 1) == 0) {
3825                    tcg_gen_qemu_ld_tl(s->T0, s->A0,
3826                                       s->mem_index, ot | MO_BE);
3827                    gen_op_mov_reg_v(s, ot, reg, s->T0);
3828                } else {
3829                    tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3830                                       s->mem_index, ot | MO_BE);
3831                }
3832                break;
3833
3834            case 0x0f2: /* andn Gy, By, Ey */
3835                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3836                    || !(s->prefix & PREFIX_VEX)
3837                    || s->vex_l != 0) {
3838                    goto illegal_op;
3839                }
3840                ot = mo_64_32(s->dflag);
3841                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3842                tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3843                gen_op_mov_reg_v(s, ot, reg, s->T0);
3844                gen_op_update1_cc(s);
3845                set_cc_op(s, CC_OP_LOGICB + ot);
3846                break;
3847
3848            case 0x0f7: /* bextr Gy, Ey, By */
3849                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3850                    || !(s->prefix & PREFIX_VEX)
3851                    || s->vex_l != 0) {
3852                    goto illegal_op;
3853                }
3854                ot = mo_64_32(s->dflag);
3855                {
3856                    TCGv bound, zero;
3857
3858                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3859                    /* Extract START, and shift the operand.
3860                       Shifts larger than operand size get zeros.  */
3861                    tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3862                    tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3863
3864                    bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3865                    zero = tcg_const_tl(0);
3866                    tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3867                                       s->T0, zero);
3868                    tcg_temp_free(zero);
3869
3870                    /* Extract the LEN into a mask.  Lengths larger than
3871                       operand size get all ones.  */
3872                    tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3873                    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3874                                       s->A0, bound);
3875                    tcg_temp_free(bound);
3876                    tcg_gen_movi_tl(s->T1, 1);
3877                    tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3878                    tcg_gen_subi_tl(s->T1, s->T1, 1);
3879                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
3880
3881                    gen_op_mov_reg_v(s, ot, reg, s->T0);
3882                    gen_op_update1_cc(s);
3883                    set_cc_op(s, CC_OP_LOGICB + ot);
3884                }
3885                break;
3886
3887            case 0x0f5: /* bzhi Gy, Ey, By */
3888                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3889                    || !(s->prefix & PREFIX_VEX)
3890                    || s->vex_l != 0) {
3891                    goto illegal_op;
3892                }
3893                ot = mo_64_32(s->dflag);
3894                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3895                tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3896                {
3897                    TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3898                    /* Note that since we're using BMILG (in order to get O
3899                       cleared) we need to store the inverse into C.  */
3900                    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3901                                       s->T1, bound);
3902                    tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3903                                       bound, bound, s->T1);
3904                    tcg_temp_free(bound);
3905                }
3906                tcg_gen_movi_tl(s->A0, -1);
3907                tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3908                tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3909                gen_op_mov_reg_v(s, ot, reg, s->T0);
3910                gen_op_update1_cc(s);
3911                set_cc_op(s, CC_OP_BMILGB + ot);
3912                break;
3913
3914            case 0x3f6: /* mulx By, Gy, rdx, Ey */
3915                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3916                    || !(s->prefix & PREFIX_VEX)
3917                    || s->vex_l != 0) {
3918                    goto illegal_op;
3919                }
3920                ot = mo_64_32(s->dflag);
3921                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3922                switch (ot) {
3923                default:
3924                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3925                    tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
3926                    tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
3927                                      s->tmp2_i32, s->tmp3_i32);
3928                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
3929                    tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
3930                    break;
3931#ifdef TARGET_X86_64
3932                case MO_64:
3933                    tcg_gen_mulu2_i64(s->T0, s->T1,
3934                                      s->T0, cpu_regs[R_EDX]);
3935                    tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
3936                    tcg_gen_mov_i64(cpu_regs[reg], s->T1);
3937                    break;
3938#endif
3939                }
3940                break;
3941
3942            case 0x3f5: /* pdep Gy, By, Ey */
3943                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3944                    || !(s->prefix & PREFIX_VEX)
3945                    || s->vex_l != 0) {
3946                    goto illegal_op;
3947                }
3948                ot = mo_64_32(s->dflag);
3949                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3950                /* Note that by zero-extending the mask operand, we
3951                   automatically handle zero-extending the result.  */
3952                if (ot == MO_64) {
3953                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3954                } else {
3955                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3956                }
3957                gen_helper_pdep(cpu_regs[reg], s->T0, s->T1);
3958                break;
3959
3960            case 0x2f5: /* pext Gy, By, Ey */
3961                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3962                    || !(s->prefix & PREFIX_VEX)
3963                    || s->vex_l != 0) {
3964                    goto illegal_op;
3965                }
3966                ot = mo_64_32(s->dflag);
3967                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3968                /* Note that by zero-extending the mask operand, we
3969                   automatically handle zero-extending the result.  */
3970                if (ot == MO_64) {
3971                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3972                } else {
3973                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3974                }
3975                gen_helper_pext(cpu_regs[reg], s->T0, s->T1);
3976                break;
3977
3978            case 0x1f6: /* adcx Gy, Ey */
3979            case 0x2f6: /* adox Gy, Ey */
3980                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3981                    goto illegal_op;
3982                } else {
3983                    TCGv carry_in, carry_out, zero;
3984                    int end_op;
3985
3986                    ot = mo_64_32(s->dflag);
3987                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3988
3989                    /* Re-use the carry-out from a previous round.  */
3990                    carry_in = NULL;
3991                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3992                    switch (s->cc_op) {
3993                    case CC_OP_ADCX:
3994                        if (b == 0x1f6) {
3995                            carry_in = cpu_cc_dst;
3996                            end_op = CC_OP_ADCX;
3997                        } else {
3998                            end_op = CC_OP_ADCOX;
3999                        }
4000                        break;
4001                    case CC_OP_ADOX:
4002                        if (b == 0x1f6) {
4003                            end_op = CC_OP_ADCOX;
4004                        } else {
4005                            carry_in = cpu_cc_src2;
4006                            end_op = CC_OP_ADOX;
4007                        }
4008                        break;
4009                    case CC_OP_ADCOX:
4010                        end_op = CC_OP_ADCOX;
4011                        carry_in = carry_out;
4012                        break;
4013                    default:
4014                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4015                        break;
4016                    }
4017                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
4018                    if (!carry_in) {
4019                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4020                            gen_compute_eflags(s);
4021                        }
4022                        carry_in = s->tmp0;
4023                        tcg_gen_extract_tl(carry_in, cpu_cc_src,
4024                                           ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4025                    }
4026
4027                    switch (ot) {
4028#ifdef TARGET_X86_64
4029                    case MO_32:
4030                        /* If we know TL is 64-bit, and we want a 32-bit
4031                           result, just do everything in 64-bit arithmetic.  */
4032                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4033                        tcg_gen_ext32u_i64(s->T0, s->T0);
4034                        tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4035                        tcg_gen_add_i64(s->T0, s->T0, carry_in);
4036                        tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4037                        tcg_gen_shri_i64(carry_out, s->T0, 32);
4038                        break;
4039#endif
4040                    default:
4041                        /* Otherwise compute the carry-out in two steps.  */
4042                        zero = tcg_const_tl(0);
4043                        tcg_gen_add2_tl(s->T0, carry_out,
4044                                        s->T0, zero,
4045                                        carry_in, zero);
4046                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4047                                        cpu_regs[reg], carry_out,
4048                                        s->T0, zero);
4049                        tcg_temp_free(zero);
4050                        break;
4051                    }
4052                    set_cc_op(s, end_op);
4053                }
4054                break;
4055
4056            case 0x1f7: /* shlx Gy, Ey, By */
4057            case 0x2f7: /* sarx Gy, Ey, By */
4058            case 0x3f7: /* shrx Gy, Ey, By */
4059                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4060                    || !(s->prefix & PREFIX_VEX)
4061                    || s->vex_l != 0) {
4062                    goto illegal_op;
4063                }
4064                ot = mo_64_32(s->dflag);
4065                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4066                if (ot == MO_64) {
4067                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4068                } else {
4069                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4070                }
4071                if (b == 0x1f7) {
4072                    tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4073                } else if (b == 0x2f7) {
4074                    if (ot != MO_64) {
4075                        tcg_gen_ext32s_tl(s->T0, s->T0);
4076                    }
4077                    tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4078                } else {
4079                    if (ot != MO_64) {
4080                        tcg_gen_ext32u_tl(s->T0, s->T0);
4081                    }
4082                    tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4083                }
4084                gen_op_mov_reg_v(s, ot, reg, s->T0);
4085                break;
4086
4087            case 0x0f3:
4088            case 0x1f3:
4089            case 0x2f3:
4090            case 0x3f3: /* Group 17 */
4091                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4092                    || !(s->prefix & PREFIX_VEX)
4093                    || s->vex_l != 0) {
4094                    goto illegal_op;
4095                }
4096                ot = mo_64_32(s->dflag);
4097                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4098
4099                tcg_gen_mov_tl(cpu_cc_src, s->T0);
4100                switch (reg & 7) {
4101                case 1: /* blsr By,Ey */
4102                    tcg_gen_subi_tl(s->T1, s->T0, 1);
4103                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
4104                    break;
4105                case 2: /* blsmsk By,Ey */
4106                    tcg_gen_subi_tl(s->T1, s->T0, 1);
4107                    tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4108                    break;
4109                case 3: /* blsi By, Ey */
4110                    tcg_gen_neg_tl(s->T1, s->T0);
4111                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
4112                    break;
4113                default:
4114                    goto unknown_op;
4115                }
4116                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4117                gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4118                set_cc_op(s, CC_OP_BMILGB + ot);
4119                break;
4120
4121            default:
4122                goto unknown_op;
4123            }
4124            break;
4125
4126        case 0x03a:
4127        case 0x13a:
4128            b = modrm;
4129            modrm = x86_ldub_code(env, s);
4130            rm = modrm & 7;
4131            reg = ((modrm >> 3) & 7) | rex_r;
4132            mod = (modrm >> 6) & 3;
4133            if (b1 >= 2) {
4134                goto unknown_op;
4135            }
4136
4137            sse_fn_eppi = sse_op_table7[b].op[b1];
4138            if (!sse_fn_eppi) {
4139                goto unknown_op;
4140            }
4141            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4142                goto illegal_op;
4143
4144            s->rip_offset = 1;
4145
4146            if (sse_fn_eppi == SSE_SPECIAL) {
4147                ot = mo_64_32(s->dflag);
4148                rm = (modrm & 7) | REX_B(s);
4149                if (mod != 3)
4150                    gen_lea_modrm(env, s, modrm);
4151                reg = ((modrm >> 3) & 7) | rex_r;
4152                val = x86_ldub_code(env, s);
4153                switch (b) {
4154                case 0x14: /* pextrb */
4155                    tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4156                                            xmm_regs[reg].ZMM_B(val & 15)));
4157                    if (mod == 3) {
4158                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4159                    } else {
4160                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4161                                           s->mem_index, MO_UB);
4162                    }
4163                    break;
4164                case 0x15: /* pextrw */
4165                    tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4166                                            xmm_regs[reg].ZMM_W(val & 7)));
4167                    if (mod == 3) {
4168                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4169                    } else {
4170                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4171                                           s->mem_index, MO_LEUW);
4172                    }
4173                    break;
4174                case 0x16:
4175                    if (ot == MO_32) { /* pextrd */
4176                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4177                                        offsetof(CPUX86State,
4178                                                xmm_regs[reg].ZMM_L(val & 3)));
4179                        if (mod == 3) {
4180                            tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4181                        } else {
4182                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4183                                                s->mem_index, MO_LEUL);
4184                        }
4185                    } else { /* pextrq */
4186#ifdef TARGET_X86_64
4187                        tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4188                                        offsetof(CPUX86State,
4189                                                xmm_regs[reg].ZMM_Q(val & 1)));
4190                        if (mod == 3) {
4191                            tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4192                        } else {
4193                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4194                                                s->mem_index, MO_LEQ);
4195                        }
4196#else
4197                        goto illegal_op;
4198#endif
4199                    }
4200                    break;
4201                case 0x17: /* extractps */
4202                    tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4203                                            xmm_regs[reg].ZMM_L(val & 3)));
4204                    if (mod == 3) {
4205                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4206                    } else {
4207                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4208                                           s->mem_index, MO_LEUL);
4209                    }
4210                    break;
4211                case 0x20: /* pinsrb */
4212                    if (mod == 3) {
4213                        gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4214                    } else {
4215                        tcg_gen_qemu_ld_tl(s->T0, s->A0,
4216                                           s->mem_index, MO_UB);
4217                    }
4218                    tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4219                                            xmm_regs[reg].ZMM_B(val & 15)));
4220                    break;
4221                case 0x21: /* insertps */
4222                    if (mod == 3) {
4223                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4224                                        offsetof(CPUX86State,xmm_regs[rm]
4225                                                .ZMM_L((val >> 6) & 3)));
4226                    } else {
4227                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4228                                            s->mem_index, MO_LEUL);
4229                    }
4230                    tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4231                                    offsetof(CPUX86State,xmm_regs[reg]
4232                                            .ZMM_L((val >> 4) & 3)));
4233                    if ((val >> 0) & 1)
4234                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4235                                        cpu_env, offsetof(CPUX86State,
4236                                                xmm_regs[reg].ZMM_L(0)));
4237                    if ((val >> 1) & 1)
4238                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4239                                        cpu_env, offsetof(CPUX86State,
4240                                                xmm_regs[reg].ZMM_L(1)));
4241                    if ((val >> 2) & 1)
4242                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4243                                        cpu_env, offsetof(CPUX86State,
4244                                                xmm_regs[reg].ZMM_L(2)));
4245                    if ((val >> 3) & 1)
4246                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4247                                        cpu_env, offsetof(CPUX86State,
4248                                                xmm_regs[reg].ZMM_L(3)));
4249                    break;
4250                case 0x22:
4251                    if (ot == MO_32) { /* pinsrd */
4252                        if (mod == 3) {
4253                            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4254                        } else {
4255                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4256                                                s->mem_index, MO_LEUL);
4257                        }
4258                        tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4259                                        offsetof(CPUX86State,
4260                                                xmm_regs[reg].ZMM_L(val & 3)));
4261                    } else { /* pinsrq */
4262#ifdef TARGET_X86_64
4263                        if (mod == 3) {
4264                            gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4265                        } else {
4266                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4267                                                s->mem_index, MO_LEQ);
4268                        }
4269                        tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4270                                        offsetof(CPUX86State,
4271                                                xmm_regs[reg].ZMM_Q(val & 1)));
4272#else
4273                        goto illegal_op;
4274#endif
4275                    }
4276                    break;
4277                }
4278                return;
4279            }
4280
4281            if (b1) {
4282                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4283                if (mod == 3) {
4284                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4285                } else {
4286                    op2_offset = offsetof(CPUX86State,xmm_t0);
4287                    gen_lea_modrm(env, s, modrm);
4288                    gen_ldo_env_A0(s, op2_offset);
4289                }
4290            } else {
4291                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4292                if (mod == 3) {
4293                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4294                } else {
4295                    op2_offset = offsetof(CPUX86State,mmx_t0);
4296                    gen_lea_modrm(env, s, modrm);
4297                    gen_ldq_env_A0(s, op2_offset);
4298                }
4299            }
4300            val = x86_ldub_code(env, s);
4301
4302            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4303                set_cc_op(s, CC_OP_EFLAGS);
4304
4305                if (s->dflag == MO_64) {
4306                    /* The helper must use entire 64-bit gp registers */
4307                    val |= 1 << 8;
4308                }
4309            }
4310
4311            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4312            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4313            sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4314            break;
4315
4316        case 0x33a:
4317            /* Various integer extensions at 0f 3a f[0-f].  */
4318            b = modrm | (b1 << 8);
4319            modrm = x86_ldub_code(env, s);
4320            reg = ((modrm >> 3) & 7) | rex_r;
4321
4322            switch (b) {
4323            case 0x3f0: /* rorx Gy,Ey, Ib */
4324                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4325                    || !(s->prefix & PREFIX_VEX)
4326                    || s->vex_l != 0) {
4327                    goto illegal_op;
4328                }
4329                ot = mo_64_32(s->dflag);
4330                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4331                b = x86_ldub_code(env, s);
4332                if (ot == MO_64) {
4333                    tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4334                } else {
4335                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4336                    tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4337                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4338                }
4339                gen_op_mov_reg_v(s, ot, reg, s->T0);
4340                break;
4341
4342            default:
4343                goto unknown_op;
4344            }
4345            break;
4346
4347        default:
4348        unknown_op:
4349            gen_unknown_opcode(env, s);
4350            return;
4351        }
4352    } else {
4353        /* generic MMX or SSE operation */
4354        switch(b) {
4355        case 0x70: /* pshufx insn */
4356        case 0xc6: /* pshufx insn */
4357        case 0xc2: /* compare insns */
4358            s->rip_offset = 1;
4359            break;
4360        default:
4361            break;
4362        }
4363        if (is_xmm) {
4364            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4365            if (mod != 3) {
4366                int sz = 4;
4367
4368                gen_lea_modrm(env, s, modrm);
4369                op2_offset = offsetof(CPUX86State,xmm_t0);
4370
4371                switch (b) {
4372                case 0x50 ... 0x5a:
4373                case 0x5c ... 0x5f:
4374                case 0xc2:
4375                    /* Most sse scalar operations.  */
4376                    if (b1 == 2) {
4377                        sz = 2;
4378                    } else if (b1 == 3) {
4379                        sz = 3;
4380                    }
4381                    break;
4382
4383                case 0x2e:  /* ucomis[sd] */
4384                case 0x2f:  /* comis[sd] */
4385                    if (b1 == 0) {
4386                        sz = 2;
4387                    } else {
4388                        sz = 3;
4389                    }
4390                    break;
4391                }
4392
4393                switch (sz) {
4394                case 2:
4395                    /* 32 bit access */
4396                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
4397                    tcg_gen_st32_tl(s->T0, cpu_env,
4398                                    offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4399                    break;
4400                case 3:
4401                    /* 64 bit access */
4402                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4403                    break;
4404                default:
4405                    /* 128 bit access */
4406                    gen_ldo_env_A0(s, op2_offset);
4407                    break;
4408                }
4409            } else {
4410                rm = (modrm & 7) | REX_B(s);
4411                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4412            }
4413        } else {
4414            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4415            if (mod != 3) {
4416                gen_lea_modrm(env, s, modrm);
4417                op2_offset = offsetof(CPUX86State,mmx_t0);
4418                gen_ldq_env_A0(s, op2_offset);
4419            } else {
4420                rm = (modrm & 7);
4421                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4422            }
4423        }
4424        switch(b) {
4425        case 0x0f: /* 3DNow! data insns */
4426            val = x86_ldub_code(env, s);
4427            sse_fn_epp = sse_op_table5[val];
4428            if (!sse_fn_epp) {
4429                goto unknown_op;
4430            }
4431            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4432                goto illegal_op;
4433            }
4434            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4435            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4436            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4437            break;
4438        case 0x70: /* pshufx insn */
4439        case 0xc6: /* pshufx insn */
4440            val = x86_ldub_code(env, s);
4441            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4442            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4443            /* XXX: introduce a new table? */
4444            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4445            sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4446            break;
4447        case 0xc2:
4448            /* compare insns */
4449            val = x86_ldub_code(env, s);
4450            if (val >= 8)
4451                goto unknown_op;
4452            sse_fn_epp = sse_op_table4[val][b1];
4453
4454            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4455            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4456            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4457            break;
4458        case 0xf7:
4459            /* maskmov : we must prepare A0 */
4460            if (mod != 3)
4461                goto illegal_op;
4462            tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4463            gen_extu(s->aflag, s->A0);
4464            gen_add_A0_ds_seg(s);
4465
4466            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4467            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4468            /* XXX: introduce a new table? */
4469            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4470            sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4471            break;
4472        default:
4473            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4474            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4475            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4476            break;
4477        }
4478        if (b == 0x2e || b == 0x2f) {
4479            set_cc_op(s, CC_OP_EFLAGS);
4480        }
4481    }
4482}
4483
4484/* convert one instruction. s->base.is_jmp is set if the translation must
4485   be stopped. Return the next pc value */
4486static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4487{
4488    CPUX86State *env = cpu->env_ptr;
4489    int b, prefixes;
4490    int shift;
4491    MemOp ot, aflag, dflag;
4492    int modrm, reg, rm, mod, op, opreg, val;
4493    target_ulong next_eip, tval;
4494    int rex_w, rex_r;
4495    target_ulong pc_start = s->base.pc_next;
4496
4497    s->pc_start = s->pc = pc_start;
4498    s->override = -1;
4499#ifdef TARGET_X86_64
4500    s->rex_x = 0;
4501    s->rex_b = 0;
4502    s->x86_64_hregs = false;
4503#endif
4504    s->rip_offset = 0; /* for relative ip address */
4505    s->vex_l = 0;
4506    s->vex_v = 0;
4507    if (sigsetjmp(s->jmpbuf, 0) != 0) {
4508        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
4509        return s->pc;
4510    }
4511
4512    prefixes = 0;
4513    rex_w = -1;
4514    rex_r = 0;
4515
4516 next_byte:
4517    b = x86_ldub_code(env, s);
4518    /* Collect prefixes.  */
4519    switch (b) {
4520    case 0xf3:
4521        prefixes |= PREFIX_REPZ;
4522        goto next_byte;
4523    case 0xf2:
4524        prefixes |= PREFIX_REPNZ;
4525        goto next_byte;
4526    case 0xf0:
4527        prefixes |= PREFIX_LOCK;
4528        goto next_byte;
4529    case 0x2e:
4530        s->override = R_CS;
4531        goto next_byte;
4532    case 0x36:
4533        s->override = R_SS;
4534        goto next_byte;
4535    case 0x3e:
4536        s->override = R_DS;
4537        goto next_byte;
4538    case 0x26:
4539        s->override = R_ES;
4540        goto next_byte;
4541    case 0x64:
4542        s->override = R_FS;
4543        goto next_byte;
4544    case 0x65:
4545        s->override = R_GS;
4546        goto next_byte;
4547    case 0x66:
4548        prefixes |= PREFIX_DATA;
4549        goto next_byte;
4550    case 0x67:
4551        prefixes |= PREFIX_ADR;
4552        goto next_byte;
4553#ifdef TARGET_X86_64
4554    case 0x40 ... 0x4f:
4555        if (CODE64(s)) {
4556            /* REX prefix */
4557            rex_w = (b >> 3) & 1;
4558            rex_r = (b & 0x4) << 1;
4559            s->rex_x = (b & 0x2) << 2;
4560            REX_B(s) = (b & 0x1) << 3;
4561            /* select uniform byte register addressing */
4562            s->x86_64_hregs = true;
4563            goto next_byte;
4564        }
4565        break;
4566#endif
4567    case 0xc5: /* 2-byte VEX */
4568    case 0xc4: /* 3-byte VEX */
4569        /* VEX prefixes cannot be used except in 32-bit mode.
4570           Otherwise the instruction is LES or LDS.  */
4571        if (s->code32 && !s->vm86) {
4572            static const int pp_prefix[4] = {
4573                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4574            };
4575            int vex3, vex2 = x86_ldub_code(env, s);
4576
4577            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4578                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4579                   otherwise the instruction is LES or LDS.  */
4580                s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4581                break;
4582            }
4583
4584            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4585            if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4586                            | PREFIX_LOCK | PREFIX_DATA)) {
4587                goto illegal_op;
4588            }
4589#ifdef TARGET_X86_64
4590            if (s->x86_64_hregs) {
4591                goto illegal_op;
4592            }
4593#endif
4594            rex_r = (~vex2 >> 4) & 8;
4595            if (b == 0xc5) {
4596                /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4597                vex3 = vex2;
4598                b = x86_ldub_code(env, s) | 0x100;
4599            } else {
4600                /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4601#ifdef TARGET_X86_64
4602                s->rex_x = (~vex2 >> 3) & 8;
4603                s->rex_b = (~vex2 >> 2) & 8;
4604#endif
4605                vex3 = x86_ldub_code(env, s);
4606                rex_w = (vex3 >> 7) & 1;
4607                switch (vex2 & 0x1f) {
4608                case 0x01: /* Implied 0f leading opcode bytes.  */
4609                    b = x86_ldub_code(env, s) | 0x100;
4610                    break;
4611                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4612                    b = 0x138;
4613                    break;
4614                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4615                    b = 0x13a;
4616                    break;
4617                default:   /* Reserved for future use.  */
4618                    goto unknown_op;
4619                }
4620            }
4621            s->vex_v = (~vex3 >> 3) & 0xf;
4622            s->vex_l = (vex3 >> 2) & 1;
4623            prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4624        }
4625        break;
4626    }
4627
4628    /* Post-process prefixes.  */
4629    if (CODE64(s)) {
4630        /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4631           data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4632           over 0x66 if both are present.  */
4633        dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4634        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4635        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4636    } else {
4637        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4638        if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4639            dflag = MO_32;
4640        } else {
4641            dflag = MO_16;
4642        }
4643        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4644        if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4645            aflag = MO_32;
4646        }  else {
4647            aflag = MO_16;
4648        }
4649    }
4650
4651    s->prefix = prefixes;
4652    s->aflag = aflag;
4653    s->dflag = dflag;
4654
4655    /* now check op code */
4656 reswitch:
4657    switch(b) {
4658    case 0x0f:
4659        /**************************/
4660        /* extended op code */
4661        b = x86_ldub_code(env, s) | 0x100;
4662        goto reswitch;
4663
4664        /**************************/
4665        /* arith & logic */
4666    case 0x00 ... 0x05:
4667    case 0x08 ... 0x0d:
4668    case 0x10 ... 0x15:
4669    case 0x18 ... 0x1d:
4670    case 0x20 ... 0x25:
4671    case 0x28 ... 0x2d:
4672    case 0x30 ... 0x35:
4673    case 0x38 ... 0x3d:
4674        {
4675            int op, f, val;
4676            op = (b >> 3) & 7;
4677            f = (b >> 1) & 3;
4678
4679            ot = mo_b_d(b, dflag);
4680
4681            switch(f) {
4682            case 0: /* OP Ev, Gv */
4683                modrm = x86_ldub_code(env, s);
4684                reg = ((modrm >> 3) & 7) | rex_r;
4685                mod = (modrm >> 6) & 3;
4686                rm = (modrm & 7) | REX_B(s);
4687                if (mod != 3) {
4688                    gen_lea_modrm(env, s, modrm);
4689                    opreg = OR_TMP0;
4690                } else if (op == OP_XORL && rm == reg) {
4691                xor_zero:
4692                    /* xor reg, reg optimisation */
4693                    set_cc_op(s, CC_OP_CLR);
4694                    tcg_gen_movi_tl(s->T0, 0);
4695                    gen_op_mov_reg_v(s, ot, reg, s->T0);
4696                    break;
4697                } else {
4698                    opreg = rm;
4699                }
4700                gen_op_mov_v_reg(s, ot, s->T1, reg);
4701                gen_op(s, op, ot, opreg);
4702                break;
4703            case 1: /* OP Gv, Ev */
4704                modrm = x86_ldub_code(env, s);
4705                mod = (modrm >> 6) & 3;
4706                reg = ((modrm >> 3) & 7) | rex_r;
4707                rm = (modrm & 7) | REX_B(s);
4708                if (mod != 3) {
4709                    gen_lea_modrm(env, s, modrm);
4710                    gen_op_ld_v(s, ot, s->T1, s->A0);
4711                } else if (op == OP_XORL && rm == reg) {
4712                    goto xor_zero;
4713                } else {
4714                    gen_op_mov_v_reg(s, ot, s->T1, rm);
4715                }
4716                gen_op(s, op, ot, reg);
4717                break;
4718            case 2: /* OP A, Iv */
4719                val = insn_get(env, s, ot);
4720                tcg_gen_movi_tl(s->T1, val);
4721                gen_op(s, op, ot, OR_EAX);
4722                break;
4723            }
4724        }
4725        break;
4726
4727    case 0x82:
4728        if (CODE64(s))
4729            goto illegal_op;
4730        /* fall through */
4731    case 0x80: /* GRP1 */
4732    case 0x81:
4733    case 0x83:
4734        {
4735            int val;
4736
4737            ot = mo_b_d(b, dflag);
4738
4739            modrm = x86_ldub_code(env, s);
4740            mod = (modrm >> 6) & 3;
4741            rm = (modrm & 7) | REX_B(s);
4742            op = (modrm >> 3) & 7;
4743
4744            if (mod != 3) {
4745                if (b == 0x83)
4746                    s->rip_offset = 1;
4747                else
4748                    s->rip_offset = insn_const_size(ot);
4749                gen_lea_modrm(env, s, modrm);
4750                opreg = OR_TMP0;
4751            } else {
4752                opreg = rm;
4753            }
4754
4755            switch(b) {
4756            default:
4757            case 0x80:
4758            case 0x81:
4759            case 0x82:
4760                val = insn_get(env, s, ot);
4761                break;
4762            case 0x83:
4763                val = (int8_t)insn_get(env, s, MO_8);
4764                break;
4765            }
4766            tcg_gen_movi_tl(s->T1, val);
4767            gen_op(s, op, ot, opreg);
4768        }
4769        break;
4770
4771        /**************************/
4772        /* inc, dec, and other misc arith */
4773    case 0x40 ... 0x47: /* inc Gv */
4774        ot = dflag;
4775        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4776        break;
4777    case 0x48 ... 0x4f: /* dec Gv */
4778        ot = dflag;
4779        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4780        break;
4781    case 0xf6: /* GRP3 */
4782    case 0xf7:
4783        ot = mo_b_d(b, dflag);
4784
4785        modrm = x86_ldub_code(env, s);
4786        mod = (modrm >> 6) & 3;
4787        rm = (modrm & 7) | REX_B(s);
4788        op = (modrm >> 3) & 7;
4789        if (mod != 3) {
4790            if (op == 0) {
4791                s->rip_offset = insn_const_size(ot);
4792            }
4793            gen_lea_modrm(env, s, modrm);
4794            /* For those below that handle locked memory, don't load here.  */
4795            if (!(s->prefix & PREFIX_LOCK)
4796                || op != 2) {
4797                gen_op_ld_v(s, ot, s->T0, s->A0);
4798            }
4799        } else {
4800            gen_op_mov_v_reg(s, ot, s->T0, rm);
4801        }
4802
4803        switch(op) {
4804        case 0: /* test */
4805            val = insn_get(env, s, ot);
4806            tcg_gen_movi_tl(s->T1, val);
4807            gen_op_testl_T0_T1_cc(s);
4808            set_cc_op(s, CC_OP_LOGICB + ot);
4809            break;
4810        case 2: /* not */
4811            if (s->prefix & PREFIX_LOCK) {
4812                if (mod == 3) {
4813                    goto illegal_op;
4814                }
4815                tcg_gen_movi_tl(s->T0, ~0);
4816                tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4817                                            s->mem_index, ot | MO_LE);
4818            } else {
4819                tcg_gen_not_tl(s->T0, s->T0);
4820                if (mod != 3) {
4821                    gen_op_st_v(s, ot, s->T0, s->A0);
4822                } else {
4823                    gen_op_mov_reg_v(s, ot, rm, s->T0);
4824                }
4825            }
4826            break;
4827        case 3: /* neg */
4828            if (s->prefix & PREFIX_LOCK) {
4829                TCGLabel *label1;
4830                TCGv a0, t0, t1, t2;
4831
4832                if (mod == 3) {
4833                    goto illegal_op;
4834                }
4835                a0 = tcg_temp_local_new();
4836                t0 = tcg_temp_local_new();
4837                label1 = gen_new_label();
4838
4839                tcg_gen_mov_tl(a0, s->A0);
4840                tcg_gen_mov_tl(t0, s->T0);
4841
4842                gen_set_label(label1);
4843                t1 = tcg_temp_new();
4844                t2 = tcg_temp_new();
4845                tcg_gen_mov_tl(t2, t0);
4846                tcg_gen_neg_tl(t1, t0);
4847                tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4848                                          s->mem_index, ot | MO_LE);
4849                tcg_temp_free(t1);
4850                tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4851
4852                tcg_temp_free(t2);
4853                tcg_temp_free(a0);
4854                tcg_gen_mov_tl(s->T0, t0);
4855                tcg_temp_free(t0);
4856            } else {
4857                tcg_gen_neg_tl(s->T0, s->T0);
4858                if (mod != 3) {
4859                    gen_op_st_v(s, ot, s->T0, s->A0);
4860                } else {
4861                    gen_op_mov_reg_v(s, ot, rm, s->T0);
4862                }
4863            }
4864            gen_op_update_neg_cc(s);
4865            set_cc_op(s, CC_OP_SUBB + ot);
4866            break;
4867        case 4: /* mul */
4868            switch(ot) {
4869            case MO_8:
4870                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4871                tcg_gen_ext8u_tl(s->T0, s->T0);
4872                tcg_gen_ext8u_tl(s->T1, s->T1);
4873                /* XXX: use 32 bit mul which could be faster */
4874                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4875                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4876                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4877                tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4878                set_cc_op(s, CC_OP_MULB);
4879                break;
4880            case MO_16:
4881                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4882                tcg_gen_ext16u_tl(s->T0, s->T0);
4883                tcg_gen_ext16u_tl(s->T1, s->T1);
4884                /* XXX: use 32 bit mul which could be faster */
4885                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4886                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4887                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4888                tcg_gen_shri_tl(s->T0, s->T0, 16);
4889                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4890                tcg_gen_mov_tl(cpu_cc_src, s->T0);
4891                set_cc_op(s, CC_OP_MULW);
4892                break;
4893            default:
4894            case MO_32:
4895                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4896                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4897                tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4898                                  s->tmp2_i32, s->tmp3_i32);
4899                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4900                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4901                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4902                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4903                set_cc_op(s, CC_OP_MULL);
4904                break;
4905#ifdef TARGET_X86_64
4906            case MO_64:
4907                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4908                                  s->T0, cpu_regs[R_EAX]);
4909                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4910                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4911                set_cc_op(s, CC_OP_MULQ);
4912                break;
4913#endif
4914            }
4915            break;
4916        case 5: /* imul */
4917            switch(ot) {
4918            case MO_8:
4919                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4920                tcg_gen_ext8s_tl(s->T0, s->T0);
4921                tcg_gen_ext8s_tl(s->T1, s->T1);
4922                /* XXX: use 32 bit mul which could be faster */
4923                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4924                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4925                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4926                tcg_gen_ext8s_tl(s->tmp0, s->T0);
4927                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4928                set_cc_op(s, CC_OP_MULB);
4929                break;
4930            case MO_16:
4931                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4932                tcg_gen_ext16s_tl(s->T0, s->T0);
4933                tcg_gen_ext16s_tl(s->T1, s->T1);
4934                /* XXX: use 32 bit mul which could be faster */
4935                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4936                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4937                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4938                tcg_gen_ext16s_tl(s->tmp0, s->T0);
4939                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4940                tcg_gen_shri_tl(s->T0, s->T0, 16);
4941                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4942                set_cc_op(s, CC_OP_MULW);
4943                break;
4944            default:
4945            case MO_32:
4946                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4947                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4948                tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
4949                                  s->tmp2_i32, s->tmp3_i32);
4950                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4951                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4952                tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
4953                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4954                tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
4955                tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
4956                set_cc_op(s, CC_OP_MULL);
4957                break;
4958#ifdef TARGET_X86_64
4959            case MO_64:
4960                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4961                                  s->T0, cpu_regs[R_EAX]);
4962                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4963                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4964                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4965                set_cc_op(s, CC_OP_MULQ);
4966                break;
4967#endif
4968            }
4969            break;
4970        case 6: /* div */
4971            switch(ot) {
4972            case MO_8:
4973                gen_helper_divb_AL(cpu_env, s->T0);
4974                break;
4975            case MO_16:
4976                gen_helper_divw_AX(cpu_env, s->T0);
4977                break;
4978            default:
4979            case MO_32:
4980                gen_helper_divl_EAX(cpu_env, s->T0);
4981                break;
4982#ifdef TARGET_X86_64
4983            case MO_64:
4984                gen_helper_divq_EAX(cpu_env, s->T0);
4985                break;
4986#endif
4987            }
4988            break;
4989        case 7: /* idiv */
4990            switch(ot) {
4991            case MO_8:
4992                gen_helper_idivb_AL(cpu_env, s->T0);
4993                break;
4994            case MO_16:
4995                gen_helper_idivw_AX(cpu_env, s->T0);
4996                break;
4997            default:
4998            case MO_32:
4999                gen_helper_idivl_EAX(cpu_env, s->T0);
5000                break;
5001#ifdef TARGET_X86_64
5002            case MO_64:
5003                gen_helper_idivq_EAX(cpu_env, s->T0);
5004                break;
5005#endif
5006            }
5007            break;
5008        default:
5009            goto unknown_op;
5010        }
5011        break;
5012
5013    case 0xfe: /* GRP4 */
5014    case 0xff: /* GRP5 */
5015        ot = mo_b_d(b, dflag);
5016
5017        modrm = x86_ldub_code(env, s);
5018        mod = (modrm >> 6) & 3;
5019        rm = (modrm & 7) | REX_B(s);
5020        op = (modrm >> 3) & 7;
5021        if (op >= 2 && b == 0xfe) {
5022            goto unknown_op;
5023        }
5024        if (CODE64(s)) {
5025            if (op == 2 || op == 4) {
5026                /* operand size for jumps is 64 bit */
5027                ot = MO_64;
5028            } else if (op == 3 || op == 5) {
5029                ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
5030            } else if (op == 6) {
5031                /* default push size is 64 bit */
5032                ot = mo_pushpop(s, dflag);
5033            }
5034        }
5035        if (mod != 3) {
5036            gen_lea_modrm(env, s, modrm);
5037            if (op >= 2 && op != 3 && op != 5)
5038                gen_op_ld_v(s, ot, s->T0, s->A0);
5039        } else {
5040            gen_op_mov_v_reg(s, ot, s->T0, rm);
5041        }
5042
5043        switch(op) {
5044        case 0: /* inc Ev */
5045            if (mod != 3)
5046                opreg = OR_TMP0;
5047            else
5048                opreg = rm;
5049            gen_inc(s, ot, opreg, 1);
5050            break;
5051        case 1: /* dec Ev */
5052            if (mod != 3)
5053                opreg = OR_TMP0;
5054            else
5055                opreg = rm;
5056            gen_inc(s, ot, opreg, -1);
5057            break;
5058        case 2: /* call Ev */
5059            /* XXX: optimize if memory (no 'and' is necessary) */
5060            if (dflag == MO_16) {
5061                tcg_gen_ext16u_tl(s->T0, s->T0);
5062            }
5063            next_eip = s->pc - s->cs_base;
5064            tcg_gen_movi_tl(s->T1, next_eip);
5065            gen_push_v(s, s->T1);
5066            gen_op_jmp_v(s->T0);
5067            gen_bnd_jmp(s);
5068            gen_jr(s, s->T0);
5069            break;
5070        case 3: /* lcall Ev */
5071            gen_op_ld_v(s, ot, s->T1, s->A0);
5072            gen_add_A0_im(s, 1 << ot);
5073            gen_op_ld_v(s, MO_16, s->T0, s->A0);
5074        do_lcall:
5075            if (s->pe && !s->vm86) {
5076                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5077                gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5078                                           tcg_const_i32(dflag - 1),
5079                                           tcg_const_tl(s->pc - s->cs_base));
5080            } else {
5081                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5082                gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5083                                      tcg_const_i32(dflag - 1),
5084                                      tcg_const_i32(s->pc - s->cs_base));
5085            }
5086            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5087            gen_jr(s, s->tmp4);
5088            break;
5089        case 4: /* jmp Ev */
5090            if (dflag == MO_16) {
5091                tcg_gen_ext16u_tl(s->T0, s->T0);
5092            }
5093            gen_op_jmp_v(s->T0);
5094            gen_bnd_jmp(s);
5095            gen_jr(s, s->T0);
5096            break;
5097        case 5: /* ljmp Ev */
5098            gen_op_ld_v(s, ot, s->T1, s->A0);
5099            gen_add_A0_im(s, 1 << ot);
5100            gen_op_ld_v(s, MO_16, s->T0, s->A0);
5101        do_ljmp:
5102            if (s->pe && !s->vm86) {
5103                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5104                gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5105                                          tcg_const_tl(s->pc - s->cs_base));
5106            } else {
5107                gen_op_movl_seg_T0_vm(s, R_CS);
5108                gen_op_jmp_v(s->T1);
5109            }
5110            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5111            gen_jr(s, s->tmp4);
5112            break;
5113        case 6: /* push Ev */
5114            gen_push_v(s, s->T0);
5115            break;
5116        default:
5117            goto unknown_op;
5118        }
5119        break;
5120
5121    case 0x84: /* test Ev, Gv */
5122    case 0x85:
5123        ot = mo_b_d(b, dflag);
5124
5125        modrm = x86_ldub_code(env, s);
5126        reg = ((modrm >> 3) & 7) | rex_r;
5127
5128        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5129        gen_op_mov_v_reg(s, ot, s->T1, reg);
5130        gen_op_testl_T0_T1_cc(s);
5131        set_cc_op(s, CC_OP_LOGICB + ot);
5132        break;
5133
5134    case 0xa8: /* test eAX, Iv */
5135    case 0xa9:
5136        ot = mo_b_d(b, dflag);
5137        val = insn_get(env, s, ot);
5138
5139        gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5140        tcg_gen_movi_tl(s->T1, val);
5141        gen_op_testl_T0_T1_cc(s);
5142        set_cc_op(s, CC_OP_LOGICB + ot);
5143        break;
5144
5145    case 0x98: /* CWDE/CBW */
5146        switch (dflag) {
5147#ifdef TARGET_X86_64
5148        case MO_64:
5149            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5150            tcg_gen_ext32s_tl(s->T0, s->T0);
5151            gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5152            break;
5153#endif
5154        case MO_32:
5155            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5156            tcg_gen_ext16s_tl(s->T0, s->T0);
5157            gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5158            break;
5159        case MO_16:
5160            gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5161            tcg_gen_ext8s_tl(s->T0, s->T0);
5162            gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5163            break;
5164        default:
5165            tcg_abort();
5166        }
5167        break;
5168    case 0x99: /* CDQ/CWD */
5169        switch (dflag) {
5170#ifdef TARGET_X86_64
5171        case MO_64:
5172            gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5173            tcg_gen_sari_tl(s->T0, s->T0, 63);
5174            gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5175            break;
5176#endif
5177        case MO_32:
5178            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5179            tcg_gen_ext32s_tl(s->T0, s->T0);
5180            tcg_gen_sari_tl(s->T0, s->T0, 31);
5181            gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5182            break;
5183        case MO_16:
5184            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5185            tcg_gen_ext16s_tl(s->T0, s->T0);
5186            tcg_gen_sari_tl(s->T0, s->T0, 15);
5187            gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5188            break;
5189        default:
5190            tcg_abort();
5191        }
5192        break;
5193    case 0x1af: /* imul Gv, Ev */
5194    case 0x69: /* imul Gv, Ev, I */
5195    case 0x6b:
5196        ot = dflag;
5197        modrm = x86_ldub_code(env, s);
5198        reg = ((modrm >> 3) & 7) | rex_r;
5199        if (b == 0x69)
5200            s->rip_offset = insn_const_size(ot);
5201        else if (b == 0x6b)
5202            s->rip_offset = 1;
5203        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5204        if (b == 0x69) {
5205            val = insn_get(env, s, ot);
5206            tcg_gen_movi_tl(s->T1, val);
5207        } else if (b == 0x6b) {
5208            val = (int8_t)insn_get(env, s, MO_8);
5209            tcg_gen_movi_tl(s->T1, val);
5210        } else {
5211            gen_op_mov_v_reg(s, ot, s->T1, reg);
5212        }
5213        switch (ot) {
5214#ifdef TARGET_X86_64
5215        case MO_64:
5216            tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5217            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5218            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5219            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5220            break;
5221#endif
5222        case MO_32:
5223            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5224            tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5225            tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5226                              s->tmp2_i32, s->tmp3_i32);
5227            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5228            tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5229            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5230            tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5231            tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5232            break;
5233        default:
5234            tcg_gen_ext16s_tl(s->T0, s->T0);
5235            tcg_gen_ext16s_tl(s->T1, s->T1);
5236            /* XXX: use 32 bit mul which could be faster */
5237            tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5238            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5239            tcg_gen_ext16s_tl(s->tmp0, s->T0);
5240            tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5241            gen_op_mov_reg_v(s, ot, reg, s->T0);
5242            break;
5243        }
5244        set_cc_op(s, CC_OP_MULB + ot);
5245        break;
5246    case 0x1c0:
5247    case 0x1c1: /* xadd Ev, Gv */
5248        ot = mo_b_d(b, dflag);
5249        modrm = x86_ldub_code(env, s);
5250        reg = ((modrm >> 3) & 7) | rex_r;
5251        mod = (modrm >> 6) & 3;
5252        gen_op_mov_v_reg(s, ot, s->T0, reg);
5253        if (mod == 3) {
5254            rm = (modrm & 7) | REX_B(s);
5255            gen_op_mov_v_reg(s, ot, s->T1, rm);
5256            tcg_gen_add_tl(s->T0, s->T0, s->T1);
5257            gen_op_mov_reg_v(s, ot, reg, s->T1);
5258            gen_op_mov_reg_v(s, ot, rm, s->T0);
5259        } else {
5260            gen_lea_modrm(env, s, modrm);
5261            if (s->prefix & PREFIX_LOCK) {
5262                tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5263                                            s->mem_index, ot | MO_LE);
5264                tcg_gen_add_tl(s->T0, s->T0, s->T1);
5265            } else {
5266                gen_op_ld_v(s, ot, s->T1, s->A0);
5267                tcg_gen_add_tl(s->T0, s->T0, s->T1);
5268                gen_op_st_v(s, ot, s->T0, s->A0);
5269            }
5270            gen_op_mov_reg_v(s, ot, reg, s->T1);
5271        }
5272        gen_op_update2_cc(s);
5273        set_cc_op(s, CC_OP_ADDB + ot);
5274        break;
5275    case 0x1b0:
5276    case 0x1b1: /* cmpxchg Ev, Gv */
5277        {
5278            TCGv oldv, newv, cmpv;
5279
5280            ot = mo_b_d(b, dflag);
5281            modrm = x86_ldub_code(env, s);
5282            reg = ((modrm >> 3) & 7) | rex_r;
5283            mod = (modrm >> 6) & 3;
5284            oldv = tcg_temp_new();
5285            newv = tcg_temp_new();
5286            cmpv = tcg_temp_new();
5287            gen_op_mov_v_reg(s, ot, newv, reg);
5288            tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5289
5290            if (s->prefix & PREFIX_LOCK) {
5291                if (mod == 3) {
5292                    goto illegal_op;
5293                }
5294                gen_lea_modrm(env, s, modrm);
5295                tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5296                                          s->mem_index, ot | MO_LE);
5297                gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5298            } else {
5299                if (mod == 3) {
5300                    rm = (modrm & 7) | REX_B(s);
5301                    gen_op_mov_v_reg(s, ot, oldv, rm);
5302                } else {
5303                    gen_lea_modrm(env, s, modrm);
5304                    gen_op_ld_v(s, ot, oldv, s->A0);
5305                    rm = 0; /* avoid warning */
5306                }
5307                gen_extu(ot, oldv);
5308                gen_extu(ot, cmpv);
5309                /* store value = (old == cmp ? new : old);  */
5310                tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5311                if (mod == 3) {
5312                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5313                    gen_op_mov_reg_v(s, ot, rm, newv);
5314                } else {
5315                    /* Perform an unconditional store cycle like physical cpu;
5316                       must be before changing accumulator to ensure
5317                       idempotency if the store faults and the instruction
5318                       is restarted */
5319                    gen_op_st_v(s, ot, newv, s->A0);
5320                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5321                }
5322            }
5323            tcg_gen_mov_tl(cpu_cc_src, oldv);
5324            tcg_gen_mov_tl(s->cc_srcT, cmpv);
5325            tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5326            set_cc_op(s, CC_OP_SUBB + ot);
5327            tcg_temp_free(oldv);
5328            tcg_temp_free(newv);
5329            tcg_temp_free(cmpv);
5330        }
5331        break;
5332    case 0x1c7: /* cmpxchg8b */
5333        modrm = x86_ldub_code(env, s);
5334        mod = (modrm >> 6) & 3;
5335        switch ((modrm >> 3) & 7) {
5336        case 1: /* CMPXCHG8, CMPXCHG16 */
5337            if (mod == 3) {
5338                goto illegal_op;
5339            }
5340#ifdef TARGET_X86_64
5341            if (dflag == MO_64) {
5342                if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5343                    goto illegal_op;
5344                }
5345                gen_lea_modrm(env, s, modrm);
5346                if ((s->prefix & PREFIX_LOCK) &&
5347                    (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5348                    gen_helper_cmpxchg16b(cpu_env, s->A0);
5349                } else {
5350                    gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5351                }
5352                set_cc_op(s, CC_OP_EFLAGS);
5353                break;
5354            }
5355#endif        
5356            if (!(s->cpuid_features & CPUID_CX8)) {
5357                goto illegal_op;
5358            }
5359            gen_lea_modrm(env, s, modrm);
5360            if ((s->prefix & PREFIX_LOCK) &&
5361                (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5362                gen_helper_cmpxchg8b(cpu_env, s->A0);
5363            } else {
5364                gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5365            }
5366            set_cc_op(s, CC_OP_EFLAGS);
5367            break;
5368
5369        case 7: /* RDSEED */
5370        case 6: /* RDRAND */
5371            if (mod != 3 ||
5372                (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5373                !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5374                goto illegal_op;
5375            }
5376            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5377                gen_io_start();
5378            }
5379            gen_helper_rdrand(s->T0, cpu_env);
5380            rm = (modrm & 7) | REX_B(s);
5381            gen_op_mov_reg_v(s, dflag, rm, s->T0);
5382            set_cc_op(s, CC_OP_EFLAGS);
5383            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5384                gen_jmp(s, s->pc - s->cs_base);
5385            }
5386            break;
5387
5388        default:
5389            goto illegal_op;
5390        }
5391        break;
5392
5393        /**************************/
5394        /* push/pop */
5395    case 0x50 ... 0x57: /* push */
5396        gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5397        gen_push_v(s, s->T0);
5398        break;
5399    case 0x58 ... 0x5f: /* pop */
5400        ot = gen_pop_T0(s);
5401        /* NOTE: order is important for pop %sp */
5402        gen_pop_update(s, ot);
5403        gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5404        break;
5405    case 0x60: /* pusha */
5406        if (CODE64(s))
5407            goto illegal_op;
5408        gen_pusha(s);
5409        break;
5410    case 0x61: /* popa */
5411        if (CODE64(s))
5412            goto illegal_op;
5413        gen_popa(s);
5414        break;
5415    case 0x68: /* push Iv */
5416    case 0x6a:
5417        ot = mo_pushpop(s, dflag);
5418        if (b == 0x68)
5419            val = insn_get(env, s, ot);
5420        else
5421            val = (int8_t)insn_get(env, s, MO_8);
5422        tcg_gen_movi_tl(s->T0, val);
5423        gen_push_v(s, s->T0);
5424        break;
5425    case 0x8f: /* pop Ev */
5426        modrm = x86_ldub_code(env, s);
5427        mod = (modrm >> 6) & 3;
5428        ot = gen_pop_T0(s);
5429        if (mod == 3) {
5430            /* NOTE: order is important for pop %sp */
5431            gen_pop_update(s, ot);
5432            rm = (modrm & 7) | REX_B(s);
5433            gen_op_mov_reg_v(s, ot, rm, s->T0);
5434        } else {
5435            /* NOTE: order is important too for MMU exceptions */
5436            s->popl_esp_hack = 1 << ot;
5437            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5438            s->popl_esp_hack = 0;
5439            gen_pop_update(s, ot);
5440        }
5441        break;
5442    case 0xc8: /* enter */
5443        {
5444            int level;
5445            val = x86_lduw_code(env, s);
5446            level = x86_ldub_code(env, s);
5447            gen_enter(s, val, level);
5448        }
5449        break;
5450    case 0xc9: /* leave */
5451        gen_leave(s);
5452        break;
5453    case 0x06: /* push es */
5454    case 0x0e: /* push cs */
5455    case 0x16: /* push ss */
5456    case 0x1e: /* push ds */
5457        if (CODE64(s))
5458            goto illegal_op;
5459        gen_op_movl_T0_seg(s, b >> 3);
5460        gen_push_v(s, s->T0);
5461        break;
5462    case 0x1a0: /* push fs */
5463    case 0x1a8: /* push gs */
5464        gen_op_movl_T0_seg(s, (b >> 3) & 7);
5465        gen_push_v(s, s->T0);
5466        break;
5467    case 0x07: /* pop es */
5468    case 0x17: /* pop ss */
5469    case 0x1f: /* pop ds */
5470        if (CODE64(s))
5471            goto illegal_op;
5472        reg = b >> 3;
5473        ot = gen_pop_T0(s);
5474        gen_movl_seg_T0(s, reg);
5475        gen_pop_update(s, ot);
5476        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5477        if (s->base.is_jmp) {
5478            gen_jmp_im(s, s->pc - s->cs_base);
5479            if (reg == R_SS) {
5480                s->tf = 0;
5481                gen_eob_inhibit_irq(s, true);
5482            } else {
5483                gen_eob(s);
5484            }
5485        }
5486        break;
5487    case 0x1a1: /* pop fs */
5488    case 0x1a9: /* pop gs */
5489        ot = gen_pop_T0(s);
5490        gen_movl_seg_T0(s, (b >> 3) & 7);
5491        gen_pop_update(s, ot);
5492        if (s->base.is_jmp) {
5493            gen_jmp_im(s, s->pc - s->cs_base);
5494            gen_eob(s);
5495        }
5496        break;
5497
5498        /**************************/
5499        /* mov */
5500    case 0x88:
5501    case 0x89: /* mov Gv, Ev */
5502        ot = mo_b_d(b, dflag);
5503        modrm = x86_ldub_code(env, s);
5504        reg = ((modrm >> 3) & 7) | rex_r;
5505
5506        /* generate a generic store */
5507        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5508        break;
5509    case 0xc6:
5510    case 0xc7: /* mov Ev, Iv */
5511        ot = mo_b_d(b, dflag);
5512        modrm = x86_ldub_code(env, s);
5513        mod = (modrm >> 6) & 3;
5514        if (mod != 3) {
5515            s->rip_offset = insn_const_size(ot);
5516            gen_lea_modrm(env, s, modrm);
5517        }
5518        val = insn_get(env, s, ot);
5519        tcg_gen_movi_tl(s->T0, val);
5520        if (mod != 3) {
5521            gen_op_st_v(s, ot, s->T0, s->A0);
5522        } else {
5523            gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5524        }
5525        break;
5526    case 0x8a:
5527    case 0x8b: /* mov Ev, Gv */
5528        ot = mo_b_d(b, dflag);
5529        modrm = x86_ldub_code(env, s);
5530        reg = ((modrm >> 3) & 7) | rex_r;
5531
5532        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5533        gen_op_mov_reg_v(s, ot, reg, s->T0);
5534        break;
5535    case 0x8e: /* mov seg, Gv */
5536        modrm = x86_ldub_code(env, s);
5537        reg = (modrm >> 3) & 7;
5538        if (reg >= 6 || reg == R_CS)
5539            goto illegal_op;
5540        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5541        gen_movl_seg_T0(s, reg);
5542        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5543        if (s->base.is_jmp) {
5544            gen_jmp_im(s, s->pc - s->cs_base);
5545            if (reg == R_SS) {
5546                s->tf = 0;
5547                gen_eob_inhibit_irq(s, true);
5548            } else {
5549                gen_eob(s);
5550            }
5551        }
5552        break;
5553    case 0x8c: /* mov Gv, seg */
5554        modrm = x86_ldub_code(env, s);
5555        reg = (modrm >> 3) & 7;
5556        mod = (modrm >> 6) & 3;
5557        if (reg >= 6)
5558            goto illegal_op;
5559        gen_op_movl_T0_seg(s, reg);
5560        ot = mod == 3 ? dflag : MO_16;
5561        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5562        break;
5563
5564    case 0x1b6: /* movzbS Gv, Eb */
5565    case 0x1b7: /* movzwS Gv, Eb */
5566    case 0x1be: /* movsbS Gv, Eb */
5567    case 0x1bf: /* movswS Gv, Eb */
5568        {
5569            MemOp d_ot;
5570            MemOp s_ot;
5571
5572            /* d_ot is the size of destination */
5573            d_ot = dflag;
5574            /* ot is the size of source */
5575            ot = (b & 1) + MO_8;
5576            /* s_ot is the sign+size of source */
5577            s_ot = b & 8 ? MO_SIGN | ot : ot;
5578
5579            modrm = x86_ldub_code(env, s);
5580            reg = ((modrm >> 3) & 7) | rex_r;
5581            mod = (modrm >> 6) & 3;
5582            rm = (modrm & 7) | REX_B(s);
5583
5584            if (mod == 3) {
5585                if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5586                    tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5587                } else {
5588                    gen_op_mov_v_reg(s, ot, s->T0, rm);
5589                    switch (s_ot) {
5590                    case MO_UB:
5591                        tcg_gen_ext8u_tl(s->T0, s->T0);
5592                        break;
5593                    case MO_SB:
5594                        tcg_gen_ext8s_tl(s->T0, s->T0);
5595                        break;
5596                    case MO_UW:
5597                        tcg_gen_ext16u_tl(s->T0, s->T0);
5598                        break;
5599                    default:
5600                    case MO_SW:
5601                        tcg_gen_ext16s_tl(s->T0, s->T0);
5602                        break;
5603                    }
5604                }
5605                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5606            } else {
5607                gen_lea_modrm(env, s, modrm);
5608                gen_op_ld_v(s, s_ot, s->T0, s->A0);
5609                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5610            }
5611        }
5612        break;
5613
5614    case 0x8d: /* lea */
5615        modrm = x86_ldub_code(env, s);
5616        mod = (modrm >> 6) & 3;
5617        if (mod == 3)
5618            goto illegal_op;
5619        reg = ((modrm >> 3) & 7) | rex_r;
5620        {
5621            AddressParts a = gen_lea_modrm_0(env, s, modrm);
5622            TCGv ea = gen_lea_modrm_1(s, a);
5623            gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5624            gen_op_mov_reg_v(s, dflag, reg, s->A0);
5625        }
5626        break;
5627
5628    case 0xa0: /* mov EAX, Ov */
5629    case 0xa1:
5630    case 0xa2: /* mov Ov, EAX */
5631    case 0xa3:
5632        {
5633            target_ulong offset_addr;
5634
5635            ot = mo_b_d(b, dflag);
5636            switch (s->aflag) {
5637#ifdef TARGET_X86_64
5638            case MO_64:
5639                offset_addr = x86_ldq_code(env, s);
5640                break;
5641#endif
5642            default:
5643                offset_addr = insn_get(env, s, s->aflag);
5644                break;
5645            }
5646            tcg_gen_movi_tl(s->A0, offset_addr);
5647            gen_add_A0_ds_seg(s);
5648            if ((b & 2) == 0) {
5649                gen_op_ld_v(s, ot, s->T0, s->A0);
5650                gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5651            } else {
5652                gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5653                gen_op_st_v(s, ot, s->T0, s->A0);
5654            }
5655        }
5656        break;
5657    case 0xd7: /* xlat */
5658        tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5659        tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5660        tcg_gen_add_tl(s->A0, s->A0, s->T0);
5661        gen_extu(s->aflag, s->A0);
5662        gen_add_A0_ds_seg(s);
5663        gen_op_ld_v(s, MO_8, s->T0, s->A0);
5664        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5665        break;
5666    case 0xb0 ... 0xb7: /* mov R, Ib */
5667        val = insn_get(env, s, MO_8);
5668        tcg_gen_movi_tl(s->T0, val);
5669        gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5670        break;
5671    case 0xb8 ... 0xbf: /* mov R, Iv */
5672#ifdef TARGET_X86_64
5673        if (dflag == MO_64) {
5674            uint64_t tmp;
5675            /* 64 bit case */
5676            tmp = x86_ldq_code(env, s);
5677            reg = (b & 7) | REX_B(s);
5678            tcg_gen_movi_tl(s->T0, tmp);
5679            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5680        } else
5681#endif
5682        {
5683            ot = dflag;
5684            val = insn_get(env, s, ot);
5685            reg = (b & 7) | REX_B(s);
5686            tcg_gen_movi_tl(s->T0, val);
5687            gen_op_mov_reg_v(s, ot, reg, s->T0);
5688        }
5689        break;
5690
5691    case 0x91 ... 0x97: /* xchg R, EAX */
5692    do_xchg_reg_eax:
5693        ot = dflag;
5694        reg = (b & 7) | REX_B(s);
5695        rm = R_EAX;
5696        goto do_xchg_reg;
5697    case 0x86:
5698    case 0x87: /* xchg Ev, Gv */
5699        ot = mo_b_d(b, dflag);
5700        modrm = x86_ldub_code(env, s);
5701        reg = ((modrm >> 3) & 7) | rex_r;
5702        mod = (modrm >> 6) & 3;
5703        if (mod == 3) {
5704            rm = (modrm & 7) | REX_B(s);
5705        do_xchg_reg:
5706            gen_op_mov_v_reg(s, ot, s->T0, reg);
5707            gen_op_mov_v_reg(s, ot, s->T1, rm);
5708            gen_op_mov_reg_v(s, ot, rm, s->T0);
5709            gen_op_mov_reg_v(s, ot, reg, s->T1);
5710        } else {
5711            gen_lea_modrm(env, s, modrm);
5712            gen_op_mov_v_reg(s, ot, s->T0, reg);
5713            /* for xchg, lock is implicit */
5714            tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5715                                   s->mem_index, ot | MO_LE);
5716            gen_op_mov_reg_v(s, ot, reg, s->T1);
5717        }
5718        break;
5719    case 0xc4: /* les Gv */
5720        /* In CODE64 this is VEX3; see above.  */
5721        op = R_ES;
5722        goto do_lxx;
5723    case 0xc5: /* lds Gv */
5724        /* In CODE64 this is VEX2; see above.  */
5725        op = R_DS;
5726        goto do_lxx;
5727    case 0x1b2: /* lss Gv */
5728        op = R_SS;
5729        goto do_lxx;
5730    case 0x1b4: /* lfs Gv */
5731        op = R_FS;
5732        goto do_lxx;
5733    case 0x1b5: /* lgs Gv */
5734        op = R_GS;
5735    do_lxx:
5736        ot = dflag != MO_16 ? MO_32 : MO_16;
5737        modrm = x86_ldub_code(env, s);
5738        reg = ((modrm >> 3) & 7) | rex_r;
5739        mod = (modrm >> 6) & 3;
5740        if (mod == 3)
5741            goto illegal_op;
5742        gen_lea_modrm(env, s, modrm);
5743        gen_op_ld_v(s, ot, s->T1, s->A0);
5744        gen_add_A0_im(s, 1 << ot);
5745        /* load the segment first to handle exceptions properly */
5746        gen_op_ld_v(s, MO_16, s->T0, s->A0);
5747        gen_movl_seg_T0(s, op);
5748        /* then put the data */
5749        gen_op_mov_reg_v(s, ot, reg, s->T1);
5750        if (s->base.is_jmp) {
5751            gen_jmp_im(s, s->pc - s->cs_base);
5752            gen_eob(s);
5753        }
5754        break;
5755
5756        /************************/
5757        /* shifts */
5758    case 0xc0:
5759    case 0xc1:
5760        /* shift Ev,Ib */
5761        shift = 2;
5762    grp2:
5763        {
5764            ot = mo_b_d(b, dflag);
5765            modrm = x86_ldub_code(env, s);
5766            mod = (modrm >> 6) & 3;
5767            op = (modrm >> 3) & 7;
5768
5769            if (mod != 3) {
5770                if (shift == 2) {
5771                    s->rip_offset = 1;
5772                }
5773                gen_lea_modrm(env, s, modrm);
5774                opreg = OR_TMP0;
5775            } else {
5776                opreg = (modrm & 7) | REX_B(s);
5777            }
5778
5779            /* simpler op */
5780            if (shift == 0) {
5781                gen_shift(s, op, ot, opreg, OR_ECX);
5782            } else {
5783                if (shift == 2) {
5784                    shift = x86_ldub_code(env, s);
5785                }
5786                gen_shifti(s, op, ot, opreg, shift);
5787            }
5788        }
5789        break;
5790    case 0xd0:
5791    case 0xd1:
5792        /* shift Ev,1 */
5793        shift = 1;
5794        goto grp2;
5795    case 0xd2:
5796    case 0xd3:
5797        /* shift Ev,cl */
5798        shift = 0;
5799        goto grp2;
5800
5801    case 0x1a4: /* shld imm */
5802        op = 0;
5803        shift = 1;
5804        goto do_shiftd;
5805    case 0x1a5: /* shld cl */
5806        op = 0;
5807        shift = 0;
5808        goto do_shiftd;
5809    case 0x1ac: /* shrd imm */
5810        op = 1;
5811        shift = 1;
5812        goto do_shiftd;
5813    case 0x1ad: /* shrd cl */
5814        op = 1;
5815        shift = 0;
5816    do_shiftd:
5817        ot = dflag;
5818        modrm = x86_ldub_code(env, s);
5819        mod = (modrm >> 6) & 3;
5820        rm = (modrm & 7) | REX_B(s);
5821        reg = ((modrm >> 3) & 7) | rex_r;
5822        if (mod != 3) {
5823            gen_lea_modrm(env, s, modrm);
5824            opreg = OR_TMP0;
5825        } else {
5826            opreg = rm;
5827        }
5828        gen_op_mov_v_reg(s, ot, s->T1, reg);
5829
5830        if (shift) {
5831            TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5832            gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5833            tcg_temp_free(imm);
5834        } else {
5835            gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5836        }
5837        break;
5838
5839        /************************/
5840        /* floats */
5841    case 0xd8 ... 0xdf:
5842        if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5843            /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5844            /* XXX: what to do if illegal op ? */
5845            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5846            break;
5847        }
5848        modrm = x86_ldub_code(env, s);
5849        mod = (modrm >> 6) & 3;
5850        rm = modrm & 7;
5851        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5852        if (mod != 3) {
5853            /* memory op */
5854            gen_lea_modrm(env, s, modrm);
5855            switch(op) {
5856            case 0x00 ... 0x07: /* fxxxs */
5857            case 0x10 ... 0x17: /* fixxxl */
5858            case 0x20 ... 0x27: /* fxxxl */
5859            case 0x30 ... 0x37: /* fixxx */
5860                {
5861                    int op1;
5862                    op1 = op & 7;
5863
5864                    switch(op >> 4) {
5865                    case 0:
5866                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5867                                            s->mem_index, MO_LEUL);
5868                        gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5869                        break;
5870                    case 1:
5871                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5872                                            s->mem_index, MO_LEUL);
5873                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5874                        break;
5875                    case 2:
5876                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5877                                            s->mem_index, MO_LEQ);
5878                        gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5879                        break;
5880                    case 3:
5881                    default:
5882                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5883                                            s->mem_index, MO_LESW);
5884                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5885                        break;
5886                    }
5887
5888                    gen_helper_fp_arith_ST0_FT0(op1);
5889                    if (op1 == 3) {
5890                        /* fcomp needs pop */
5891                        gen_helper_fpop(cpu_env);
5892                    }
5893                }
5894                break;
5895            case 0x08: /* flds */
5896            case 0x0a: /* fsts */
5897            case 0x0b: /* fstps */
5898            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5899            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5900            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5901                switch(op & 7) {
5902                case 0:
5903                    switch(op >> 4) {
5904                    case 0:
5905                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5906                                            s->mem_index, MO_LEUL);
5907                        gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5908                        break;
5909                    case 1:
5910                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5911                                            s->mem_index, MO_LEUL);
5912                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5913                        break;
5914                    case 2:
5915                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5916                                            s->mem_index, MO_LEQ);
5917                        gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
5918                        break;
5919                    case 3:
5920                    default:
5921                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5922                                            s->mem_index, MO_LESW);
5923                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5924                        break;
5925                    }
5926                    break;
5927                case 1:
5928                    /* XXX: the corresponding CPUID bit must be tested ! */
5929                    switch(op >> 4) {
5930                    case 1:
5931                        gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
5932                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5933                                            s->mem_index, MO_LEUL);
5934                        break;
5935                    case 2:
5936                        gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
5937                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5938                                            s->mem_index, MO_LEQ);
5939                        break;
5940                    case 3:
5941                    default:
5942                        gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
5943                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5944                                            s->mem_index, MO_LEUW);
5945                        break;
5946                    }
5947                    gen_helper_fpop(cpu_env);
5948                    break;
5949                default:
5950                    switch(op >> 4) {
5951                    case 0:
5952                        gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
5953                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5954                                            s->mem_index, MO_LEUL);
5955                        break;
5956                    case 1:
5957                        gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
5958                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5959                                            s->mem_index, MO_LEUL);
5960                        break;
5961                    case 2:
5962                        gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
5963                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5964                                            s->mem_index, MO_LEQ);
5965                        break;
5966                    case 3:
5967                    default:
5968                        gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
5969                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5970                                            s->mem_index, MO_LEUW);
5971                        break;
5972                    }
5973                    if ((op & 7) == 3)
5974                        gen_helper_fpop(cpu_env);
5975                    break;
5976                }
5977                break;
5978            case 0x0c: /* fldenv mem */
5979                gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5980                break;
5981            case 0x0d: /* fldcw mem */
5982                tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5983                                    s->mem_index, MO_LEUW);
5984                gen_helper_fldcw(cpu_env, s->tmp2_i32);
5985                break;
5986            case 0x0e: /* fnstenv mem */
5987                gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5988                break;
5989            case 0x0f: /* fnstcw mem */
5990                gen_helper_fnstcw(s->tmp2_i32, cpu_env);
5991                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5992                                    s->mem_index, MO_LEUW);
5993                break;
5994            case 0x1d: /* fldt mem */
5995                gen_helper_fldt_ST0(cpu_env, s->A0);
5996                break;
5997            case 0x1f: /* fstpt mem */
5998                gen_helper_fstt_ST0(cpu_env, s->A0);
5999                gen_helper_fpop(cpu_env);
6000                break;
6001            case 0x2c: /* frstor mem */
6002                gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6003                break;
6004            case 0x2e: /* fnsave mem */
6005                gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6006                break;
6007            case 0x2f: /* fnstsw mem */
6008                gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6009                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6010                                    s->mem_index, MO_LEUW);
6011                break;
6012            case 0x3c: /* fbld */
6013                gen_helper_fbld_ST0(cpu_env, s->A0);
6014                break;
6015            case 0x3e: /* fbstp */
6016                gen_helper_fbst_ST0(cpu_env, s->A0);
6017                gen_helper_fpop(cpu_env);
6018                break;
6019            case 0x3d: /* fildll */
6020                tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6021                gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6022                break;
6023            case 0x3f: /* fistpll */
6024                gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6025                tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6026                gen_helper_fpop(cpu_env);
6027                break;
6028            default:
6029                goto unknown_op;
6030            }
6031        } else {
6032            /* register float ops */
6033            opreg = rm;
6034
6035            switch(op) {
6036            case 0x08: /* fld sti */
6037                gen_helper_fpush(cpu_env);
6038                gen_helper_fmov_ST0_STN(cpu_env,
6039                                        tcg_const_i32((opreg + 1) & 7));
6040                break;
6041            case 0x09: /* fxchg sti */
6042            case 0x29: /* fxchg4 sti, undocumented op */
6043            case 0x39: /* fxchg7 sti, undocumented op */
6044                gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6045                break;
6046            case 0x0a: /* grp d9/2 */
6047                switch(rm) {
6048                case 0: /* fnop */
6049                    /* check exceptions (FreeBSD FPU probe) */
6050                    gen_helper_fwait(cpu_env);
6051                    break;
6052                default:
6053                    goto unknown_op;
6054                }
6055                break;
6056            case 0x0c: /* grp d9/4 */
6057                switch(rm) {
6058                case 0: /* fchs */
6059                    gen_helper_fchs_ST0(cpu_env);
6060                    break;
6061                case 1: /* fabs */
6062                    gen_helper_fabs_ST0(cpu_env);
6063                    break;
6064                case 4: /* ftst */
6065                    gen_helper_fldz_FT0(cpu_env);
6066                    gen_helper_fcom_ST0_FT0(cpu_env);
6067                    break;
6068                case 5: /* fxam */
6069                    gen_helper_fxam_ST0(cpu_env);
6070                    break;
6071                default:
6072                    goto unknown_op;
6073                }
6074                break;
6075            case 0x0d: /* grp d9/5 */
6076                {
6077                    switch(rm) {
6078                    case 0:
6079                        gen_helper_fpush(cpu_env);
6080                        gen_helper_fld1_ST0(cpu_env);
6081                        break;
6082                    case 1:
6083                        gen_helper_fpush(cpu_env);
6084                        gen_helper_fldl2t_ST0(cpu_env);
6085                        break;
6086                    case 2:
6087                        gen_helper_fpush(cpu_env);
6088                        gen_helper_fldl2e_ST0(cpu_env);
6089                        break;
6090                    case 3:
6091                        gen_helper_fpush(cpu_env);
6092                        gen_helper_fldpi_ST0(cpu_env);
6093                        break;
6094                    case 4:
6095                        gen_helper_fpush(cpu_env);
6096                        gen_helper_fldlg2_ST0(cpu_env);
6097                        break;
6098                    case 5:
6099                        gen_helper_fpush(cpu_env);
6100                        gen_helper_fldln2_ST0(cpu_env);
6101                        break;
6102                    case 6:
6103                        gen_helper_fpush(cpu_env);
6104                        gen_helper_fldz_ST0(cpu_env);
6105                        break;
6106                    default:
6107                        goto unknown_op;
6108                    }
6109                }
6110                break;
6111            case 0x0e: /* grp d9/6 */
6112                switch(rm) {
6113                case 0: /* f2xm1 */
6114                    gen_helper_f2xm1(cpu_env);
6115                    break;
6116                case 1: /* fyl2x */
6117                    gen_helper_fyl2x(cpu_env);
6118                    break;
6119                case 2: /* fptan */
6120                    gen_helper_fptan(cpu_env);
6121                    break;
6122                case 3: /* fpatan */
6123                    gen_helper_fpatan(cpu_env);
6124                    break;
6125                case 4: /* fxtract */
6126                    gen_helper_fxtract(cpu_env);
6127                    break;
6128                case 5: /* fprem1 */
6129                    gen_helper_fprem1(cpu_env);
6130                    break;
6131                case 6: /* fdecstp */
6132                    gen_helper_fdecstp(cpu_env);
6133                    break;
6134                default:
6135                case 7: /* fincstp */
6136                    gen_helper_fincstp(cpu_env);
6137                    break;
6138                }
6139                break;
6140            case 0x0f: /* grp d9/7 */
6141                switch(rm) {
6142                case 0: /* fprem */
6143                    gen_helper_fprem(cpu_env);
6144                    break;
6145                case 1: /* fyl2xp1 */
6146                    gen_helper_fyl2xp1(cpu_env);
6147                    break;
6148                case 2: /* fsqrt */
6149                    gen_helper_fsqrt(cpu_env);
6150                    break;
6151                case 3: /* fsincos */
6152                    gen_helper_fsincos(cpu_env);
6153                    break;
6154                case 5: /* fscale */
6155                    gen_helper_fscale(cpu_env);
6156                    break;
6157                case 4: /* frndint */
6158                    gen_helper_frndint(cpu_env);
6159                    break;
6160                case 6: /* fsin */
6161                    gen_helper_fsin(cpu_env);
6162                    break;
6163                default:
6164                case 7: /* fcos */
6165                    gen_helper_fcos(cpu_env);
6166                    break;
6167                }
6168                break;
6169            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6170            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6171            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6172                {
6173                    int op1;
6174
6175                    op1 = op & 7;
6176                    if (op >= 0x20) {
6177                        gen_helper_fp_arith_STN_ST0(op1, opreg);
6178                        if (op >= 0x30)
6179                            gen_helper_fpop(cpu_env);
6180                    } else {
6181                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6182                        gen_helper_fp_arith_ST0_FT0(op1);
6183                    }
6184                }
6185                break;
6186            case 0x02: /* fcom */
6187            case 0x22: /* fcom2, undocumented op */
6188                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6189                gen_helper_fcom_ST0_FT0(cpu_env);
6190                break;
6191            case 0x03: /* fcomp */
6192            case 0x23: /* fcomp3, undocumented op */
6193            case 0x32: /* fcomp5, undocumented op */
6194                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6195                gen_helper_fcom_ST0_FT0(cpu_env);
6196                gen_helper_fpop(cpu_env);
6197                break;
6198            case 0x15: /* da/5 */
6199                switch(rm) {
6200                case 1: /* fucompp */
6201                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6202                    gen_helper_fucom_ST0_FT0(cpu_env);
6203                    gen_helper_fpop(cpu_env);
6204                    gen_helper_fpop(cpu_env);
6205                    break;
6206                default:
6207                    goto unknown_op;
6208                }
6209                break;
6210            case 0x1c:
6211                switch(rm) {
6212                case 0: /* feni (287 only, just do nop here) */
6213                    break;
6214                case 1: /* fdisi (287 only, just do nop here) */
6215                    break;
6216                case 2: /* fclex */
6217                    gen_helper_fclex(cpu_env);
6218                    break;
6219                case 3: /* fninit */
6220                    gen_helper_fninit(cpu_env);
6221                    break;
6222                case 4: /* fsetpm (287 only, just do nop here) */
6223                    break;
6224                default:
6225                    goto unknown_op;
6226                }
6227                break;
6228            case 0x1d: /* fucomi */
6229                if (!(s->cpuid_features & CPUID_CMOV)) {
6230                    goto illegal_op;
6231                }
6232                gen_update_cc_op(s);
6233                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6234                gen_helper_fucomi_ST0_FT0(cpu_env);
6235                set_cc_op(s, CC_OP_EFLAGS);
6236                break;
6237            case 0x1e: /* fcomi */
6238                if (!(s->cpuid_features & CPUID_CMOV)) {
6239                    goto illegal_op;
6240                }
6241                gen_update_cc_op(s);
6242                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6243                gen_helper_fcomi_ST0_FT0(cpu_env);
6244                set_cc_op(s, CC_OP_EFLAGS);
6245                break;
6246            case 0x28: /* ffree sti */
6247                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6248                break;
6249            case 0x2a: /* fst sti */
6250                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6251                break;
6252            case 0x2b: /* fstp sti */
6253            case 0x0b: /* fstp1 sti, undocumented op */
6254            case 0x3a: /* fstp8 sti, undocumented op */
6255            case 0x3b: /* fstp9 sti, undocumented op */
6256                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6257                gen_helper_fpop(cpu_env);
6258                break;
6259            case 0x2c: /* fucom st(i) */
6260                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6261                gen_helper_fucom_ST0_FT0(cpu_env);
6262                break;
6263            case 0x2d: /* fucomp st(i) */
6264                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6265                gen_helper_fucom_ST0_FT0(cpu_env);
6266                gen_helper_fpop(cpu_env);
6267                break;
6268            case 0x33: /* de/3 */
6269                switch(rm) {
6270                case 1: /* fcompp */
6271                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6272                    gen_helper_fcom_ST0_FT0(cpu_env);
6273                    gen_helper_fpop(cpu_env);
6274                    gen_helper_fpop(cpu_env);
6275                    break;
6276                default:
6277                    goto unknown_op;
6278                }
6279                break;
6280            case 0x38: /* ffreep sti, undocumented op */
6281                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6282                gen_helper_fpop(cpu_env);
6283                break;
6284            case 0x3c: /* df/4 */
6285                switch(rm) {
6286                case 0:
6287                    gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6288                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6289                    gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6290                    break;
6291                default:
6292                    goto unknown_op;
6293                }
6294                break;
6295            case 0x3d: /* fucomip */
6296                if (!(s->cpuid_features & CPUID_CMOV)) {
6297                    goto illegal_op;
6298                }
6299                gen_update_cc_op(s);
6300                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6301                gen_helper_fucomi_ST0_FT0(cpu_env);
6302                gen_helper_fpop(cpu_env);
6303                set_cc_op(s, CC_OP_EFLAGS);
6304                break;
6305            case 0x3e: /* fcomip */
6306                if (!(s->cpuid_features & CPUID_CMOV)) {
6307                    goto illegal_op;
6308                }
6309                gen_update_cc_op(s);
6310                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6311                gen_helper_fcomi_ST0_FT0(cpu_env);
6312                gen_helper_fpop(cpu_env);
6313                set_cc_op(s, CC_OP_EFLAGS);
6314                break;
6315            case 0x10 ... 0x13: /* fcmovxx */
6316            case 0x18 ... 0x1b:
6317                {
6318                    int op1;
6319                    TCGLabel *l1;
6320                    static const uint8_t fcmov_cc[8] = {
6321                        (JCC_B << 1),
6322                        (JCC_Z << 1),
6323                        (JCC_BE << 1),
6324                        (JCC_P << 1),
6325                    };
6326
6327                    if (!(s->cpuid_features & CPUID_CMOV)) {
6328                        goto illegal_op;
6329                    }
6330                    op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6331                    l1 = gen_new_label();
6332                    gen_jcc1_noeob(s, op1, l1);
6333                    gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6334                    gen_set_label(l1);
6335                }
6336                break;
6337            default:
6338                goto unknown_op;
6339            }
6340        }
6341        break;
6342        /************************/
6343        /* string ops */
6344
6345    case 0xa4: /* movsS */
6346    case 0xa5:
6347        ot = mo_b_d(b, dflag);
6348        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6349            gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6350        } else {
6351            gen_movs(s, ot);
6352        }
6353        break;
6354
6355    case 0xaa: /* stosS */
6356    case 0xab:
6357        ot = mo_b_d(b, dflag);
6358        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6359            gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6360        } else {
6361            gen_stos(s, ot);
6362        }
6363        break;
6364    case 0xac: /* lodsS */
6365    case 0xad:
6366        ot = mo_b_d(b, dflag);
6367        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6368            gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6369        } else {
6370            gen_lods(s, ot);
6371        }
6372        break;
6373    case 0xae: /* scasS */
6374    case 0xaf:
6375        ot = mo_b_d(b, dflag);
6376        if (prefixes & PREFIX_REPNZ) {
6377            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6378        } else if (prefixes & PREFIX_REPZ) {
6379            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6380        } else {
6381            gen_scas(s, ot);
6382        }
6383        break;
6384
6385    case 0xa6: /* cmpsS */
6386    case 0xa7:
6387        ot = mo_b_d(b, dflag);
6388        if (prefixes & PREFIX_REPNZ) {
6389            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6390        } else if (prefixes & PREFIX_REPZ) {
6391            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6392        } else {
6393            gen_cmps(s, ot);
6394        }
6395        break;
6396    case 0x6c: /* insS */
6397    case 0x6d:
6398        ot = mo_b_d32(b, dflag);
6399        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6400        gen_check_io(s, ot, pc_start - s->cs_base, 
6401                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6402        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6403            gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6404        } else {
6405            gen_ins(s, ot);
6406            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6407                gen_jmp(s, s->pc - s->cs_base);
6408            }
6409        }
6410        break;
6411    case 0x6e: /* outsS */
6412    case 0x6f:
6413        ot = mo_b_d32(b, dflag);
6414        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6415        gen_check_io(s, ot, pc_start - s->cs_base,
6416                     svm_is_rep(prefixes) | 4);
6417        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6418            gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6419        } else {
6420            gen_outs(s, ot);
6421            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6422                gen_jmp(s, s->pc - s->cs_base);
6423            }
6424        }
6425        break;
6426
6427        /************************/
6428        /* port I/O */
6429
6430    case 0xe4:
6431    case 0xe5:
6432        ot = mo_b_d32(b, dflag);
6433        val = x86_ldub_code(env, s);
6434        tcg_gen_movi_tl(s->T0, val);
6435        gen_check_io(s, ot, pc_start - s->cs_base,
6436                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6437        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6438            gen_io_start();
6439        }
6440        tcg_gen_movi_i32(s->tmp2_i32, val);
6441        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6442        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6443        gen_bpt_io(s, s->tmp2_i32, ot);
6444        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6445            gen_jmp(s, s->pc - s->cs_base);
6446        }
6447        break;
6448    case 0xe6:
6449    case 0xe7:
6450        ot = mo_b_d32(b, dflag);
6451        val = x86_ldub_code(env, s);
6452        tcg_gen_movi_tl(s->T0, val);
6453        gen_check_io(s, ot, pc_start - s->cs_base,
6454                     svm_is_rep(prefixes));
6455        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6456
6457        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6458            gen_io_start();
6459        }
6460        tcg_gen_movi_i32(s->tmp2_i32, val);
6461        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6462        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6463        gen_bpt_io(s, s->tmp2_i32, ot);
6464        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6465            gen_jmp(s, s->pc - s->cs_base);
6466        }
6467        break;
6468    case 0xec:
6469    case 0xed:
6470        ot = mo_b_d32(b, dflag);
6471        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6472        gen_check_io(s, ot, pc_start - s->cs_base,
6473                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6474        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6475            gen_io_start();
6476        }
6477        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6478        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6479        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6480        gen_bpt_io(s, s->tmp2_i32, ot);
6481        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6482            gen_jmp(s, s->pc - s->cs_base);
6483        }
6484        break;
6485    case 0xee:
6486    case 0xef:
6487        ot = mo_b_d32(b, dflag);
6488        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6489        gen_check_io(s, ot, pc_start - s->cs_base,
6490                     svm_is_rep(prefixes));
6491        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6492
6493        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6494            gen_io_start();
6495        }
6496        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6497        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6498        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6499        gen_bpt_io(s, s->tmp2_i32, ot);
6500        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6501            gen_jmp(s, s->pc - s->cs_base);
6502        }
6503        break;
6504
6505        /************************/
6506        /* control */
6507    case 0xc2: /* ret im */
6508        val = x86_ldsw_code(env, s);
6509        ot = gen_pop_T0(s);
6510        gen_stack_update(s, val + (1 << ot));
6511        /* Note that gen_pop_T0 uses a zero-extending load.  */
6512        gen_op_jmp_v(s->T0);
6513        gen_bnd_jmp(s);
6514        gen_jr(s, s->T0);
6515        break;
6516    case 0xc3: /* ret */
6517        ot = gen_pop_T0(s);
6518        gen_pop_update(s, ot);
6519        /* Note that gen_pop_T0 uses a zero-extending load.  */
6520        gen_op_jmp_v(s->T0);
6521        gen_bnd_jmp(s);
6522        gen_jr(s, s->T0);
6523        break;
6524    case 0xca: /* lret im */
6525        val = x86_ldsw_code(env, s);
6526    do_lret:
6527        if (s->pe && !s->vm86) {
6528            gen_update_cc_op(s);
6529            gen_jmp_im(s, pc_start - s->cs_base);
6530            gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6531                                      tcg_const_i32(val));
6532        } else {
6533            gen_stack_A0(s);
6534            /* pop offset */
6535            gen_op_ld_v(s, dflag, s->T0, s->A0);
6536            /* NOTE: keeping EIP updated is not a problem in case of
6537               exception */
6538            gen_op_jmp_v(s->T0);
6539            /* pop selector */
6540            gen_add_A0_im(s, 1 << dflag);
6541            gen_op_ld_v(s, dflag, s->T0, s->A0);
6542            gen_op_movl_seg_T0_vm(s, R_CS);
6543            /* add stack offset */
6544            gen_stack_update(s, val + (2 << dflag));
6545        }
6546        gen_eob(s);
6547        break;
6548    case 0xcb: /* lret */
6549        val = 0;
6550        goto do_lret;
6551    case 0xcf: /* iret */
6552        gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6553        if (!s->pe) {
6554            /* real mode */
6555            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6556            set_cc_op(s, CC_OP_EFLAGS);
6557        } else if (s->vm86) {
6558            if (s->iopl != 3) {
6559                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6560            } else {
6561                gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6562                set_cc_op(s, CC_OP_EFLAGS);
6563            }
6564        } else {
6565            gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6566                                      tcg_const_i32(s->pc - s->cs_base));
6567            set_cc_op(s, CC_OP_EFLAGS);
6568        }
6569        gen_eob(s);
6570        break;
6571    case 0xe8: /* call im */
6572        {
6573            if (dflag != MO_16) {
6574                tval = (int32_t)insn_get(env, s, MO_32);
6575            } else {
6576                tval = (int16_t)insn_get(env, s, MO_16);
6577            }
6578            next_eip = s->pc - s->cs_base;
6579            tval += next_eip;
6580            if (dflag == MO_16) {
6581                tval &= 0xffff;
6582            } else if (!CODE64(s)) {
6583                tval &= 0xffffffff;
6584            }
6585            tcg_gen_movi_tl(s->T0, next_eip);
6586            gen_push_v(s, s->T0);
6587            gen_bnd_jmp(s);
6588            gen_jmp(s, tval);
6589        }
6590        break;
6591    case 0x9a: /* lcall im */
6592        {
6593            unsigned int selector, offset;
6594
6595            if (CODE64(s))
6596                goto illegal_op;
6597            ot = dflag;
6598            offset = insn_get(env, s, ot);
6599            selector = insn_get(env, s, MO_16);
6600
6601            tcg_gen_movi_tl(s->T0, selector);
6602            tcg_gen_movi_tl(s->T1, offset);
6603        }
6604        goto do_lcall;
6605    case 0xe9: /* jmp im */
6606        if (dflag != MO_16) {
6607            tval = (int32_t)insn_get(env, s, MO_32);
6608        } else {
6609            tval = (int16_t)insn_get(env, s, MO_16);
6610        }
6611        tval += s->pc - s->cs_base;
6612        if (dflag == MO_16) {
6613            tval &= 0xffff;
6614        } else if (!CODE64(s)) {
6615            tval &= 0xffffffff;
6616        }
6617        gen_bnd_jmp(s);
6618        gen_jmp(s, tval);
6619        break;
6620    case 0xea: /* ljmp im */
6621        {
6622            unsigned int selector, offset;
6623
6624            if (CODE64(s))
6625                goto illegal_op;
6626            ot = dflag;
6627            offset = insn_get(env, s, ot);
6628            selector = insn_get(env, s, MO_16);
6629
6630            tcg_gen_movi_tl(s->T0, selector);
6631            tcg_gen_movi_tl(s->T1, offset);
6632        }
6633        goto do_ljmp;
6634    case 0xeb: /* jmp Jb */
6635        tval = (int8_t)insn_get(env, s, MO_8);
6636        tval += s->pc - s->cs_base;
6637        if (dflag == MO_16) {
6638            tval &= 0xffff;
6639        }
6640        gen_jmp(s, tval);
6641        break;
6642    case 0x70 ... 0x7f: /* jcc Jb */
6643        tval = (int8_t)insn_get(env, s, MO_8);
6644        goto do_jcc;
6645    case 0x180 ... 0x18f: /* jcc Jv */
6646        if (dflag != MO_16) {
6647            tval = (int32_t)insn_get(env, s, MO_32);
6648        } else {
6649            tval = (int16_t)insn_get(env, s, MO_16);
6650        }
6651    do_jcc:
6652        next_eip = s->pc - s->cs_base;
6653        tval += next_eip;
6654        if (dflag == MO_16) {
6655            tval &= 0xffff;
6656        }
6657        gen_bnd_jmp(s);
6658        gen_jcc(s, b, tval, next_eip);
6659        break;
6660
6661    case 0x190 ... 0x19f: /* setcc Gv */
6662        modrm = x86_ldub_code(env, s);
6663        gen_setcc1(s, b, s->T0);
6664        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6665        break;
6666    case 0x140 ... 0x14f: /* cmov Gv, Ev */
6667        if (!(s->cpuid_features & CPUID_CMOV)) {
6668            goto illegal_op;
6669        }
6670        ot = dflag;
6671        modrm = x86_ldub_code(env, s);
6672        reg = ((modrm >> 3) & 7) | rex_r;
6673        gen_cmovcc1(env, s, ot, b, modrm, reg);
6674        break;
6675
6676        /************************/
6677        /* flags */
6678    case 0x9c: /* pushf */
6679        gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6680        if (s->vm86 && s->iopl != 3) {
6681            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6682        } else {
6683            gen_update_cc_op(s);
6684            gen_helper_read_eflags(s->T0, cpu_env);
6685            gen_push_v(s, s->T0);
6686        }
6687        break;
6688    case 0x9d: /* popf */
6689        gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6690        if (s->vm86 && s->iopl != 3) {
6691            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6692        } else {
6693            ot = gen_pop_T0(s);
6694            if (s->cpl == 0) {
6695                if (dflag != MO_16) {
6696                    gen_helper_write_eflags(cpu_env, s->T0,
6697                                            tcg_const_i32((TF_MASK | AC_MASK |
6698                                                           ID_MASK | NT_MASK |
6699                                                           IF_MASK |
6700                                                           IOPL_MASK)));
6701                } else {
6702                    gen_helper_write_eflags(cpu_env, s->T0,
6703                                            tcg_const_i32((TF_MASK | AC_MASK |
6704                                                           ID_MASK | NT_MASK |
6705                                                           IF_MASK | IOPL_MASK)
6706                                                          & 0xffff));
6707                }
6708            } else {
6709                if (s->cpl <= s->iopl) {
6710                    if (dflag != MO_16) {
6711                        gen_helper_write_eflags(cpu_env, s->T0,
6712                                                tcg_const_i32((TF_MASK |
6713                                                               AC_MASK |
6714                                                               ID_MASK |
6715                                                               NT_MASK |
6716                                                               IF_MASK)));
6717                    } else {
6718                        gen_helper_write_eflags(cpu_env, s->T0,
6719                                                tcg_const_i32((TF_MASK |
6720                                                               AC_MASK |
6721                                                               ID_MASK |
6722                                                               NT_MASK |
6723                                                               IF_MASK)
6724                                                              & 0xffff));
6725                    }
6726                } else {
6727                    if (dflag != MO_16) {
6728                        gen_helper_write_eflags(cpu_env, s->T0,
6729                                           tcg_const_i32((TF_MASK | AC_MASK |
6730                                                          ID_MASK | NT_MASK)));
6731                    } else {
6732                        gen_helper_write_eflags(cpu_env, s->T0,
6733                                           tcg_const_i32((TF_MASK | AC_MASK |
6734                                                          ID_MASK | NT_MASK)
6735                                                         & 0xffff));
6736                    }
6737                }
6738            }
6739            gen_pop_update(s, ot);
6740            set_cc_op(s, CC_OP_EFLAGS);
6741            /* abort translation because TF/AC flag may change */
6742            gen_jmp_im(s, s->pc - s->cs_base);
6743            gen_eob(s);
6744        }
6745        break;
6746    case 0x9e: /* sahf */
6747        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6748            goto illegal_op;
6749        gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6750        gen_compute_eflags(s);
6751        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6752        tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6753        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6754        break;
6755    case 0x9f: /* lahf */
6756        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6757            goto illegal_op;
6758        gen_compute_eflags(s);
6759        /* Note: gen_compute_eflags() only gives the condition codes */
6760        tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6761        gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6762        break;
6763    case 0xf5: /* cmc */
6764        gen_compute_eflags(s);
6765        tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6766        break;
6767    case 0xf8: /* clc */
6768        gen_compute_eflags(s);
6769        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6770        break;
6771    case 0xf9: /* stc */
6772        gen_compute_eflags(s);
6773        tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6774        break;
6775    case 0xfc: /* cld */
6776        tcg_gen_movi_i32(s->tmp2_i32, 1);
6777        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6778        break;
6779    case 0xfd: /* std */
6780        tcg_gen_movi_i32(s->tmp2_i32, -1);
6781        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6782        break;
6783
6784        /************************/
6785        /* bit operations */
6786    case 0x1ba: /* bt/bts/btr/btc Gv, im */
6787        ot = dflag;
6788        modrm = x86_ldub_code(env, s);
6789        op = (modrm >> 3) & 7;
6790        mod = (modrm >> 6) & 3;
6791        rm = (modrm & 7) | REX_B(s);
6792        if (mod != 3) {
6793            s->rip_offset = 1;
6794            gen_lea_modrm(env, s, modrm);
6795            if (!(s->prefix & PREFIX_LOCK)) {
6796                gen_op_ld_v(s, ot, s->T0, s->A0);
6797            }
6798        } else {
6799            gen_op_mov_v_reg(s, ot, s->T0, rm);
6800        }
6801        /* load shift */
6802        val = x86_ldub_code(env, s);
6803        tcg_gen_movi_tl(s->T1, val);
6804        if (op < 4)
6805            goto unknown_op;
6806        op -= 4;
6807        goto bt_op;
6808    case 0x1a3: /* bt Gv, Ev */
6809        op = 0;
6810        goto do_btx;
6811    case 0x1ab: /* bts */
6812        op = 1;
6813        goto do_btx;
6814    case 0x1b3: /* btr */
6815        op = 2;
6816        goto do_btx;
6817    case 0x1bb: /* btc */
6818        op = 3;
6819    do_btx:
6820        ot = dflag;
6821        modrm = x86_ldub_code(env, s);
6822        reg = ((modrm >> 3) & 7) | rex_r;
6823        mod = (modrm >> 6) & 3;
6824        rm = (modrm & 7) | REX_B(s);
6825        gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6826        if (mod != 3) {
6827            AddressParts a = gen_lea_modrm_0(env, s, modrm);
6828            /* specific case: we need to add a displacement */
6829            gen_exts(ot, s->T1);
6830            tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6831            tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6832            tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6833            gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6834            if (!(s->prefix & PREFIX_LOCK)) {
6835                gen_op_ld_v(s, ot, s->T0, s->A0);
6836            }
6837        } else {
6838            gen_op_mov_v_reg(s, ot, s->T0, rm);
6839        }
6840    bt_op:
6841        tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6842        tcg_gen_movi_tl(s->tmp0, 1);
6843        tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6844        if (s->prefix & PREFIX_LOCK) {
6845            switch (op) {
6846            case 0: /* bt */
6847                /* Needs no atomic ops; we surpressed the normal
6848                   memory load for LOCK above so do it now.  */
6849                gen_op_ld_v(s, ot, s->T0, s->A0);
6850                break;
6851            case 1: /* bts */
6852                tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6853                                           s->mem_index, ot | MO_LE);
6854                break;
6855            case 2: /* btr */
6856                tcg_gen_not_tl(s->tmp0, s->tmp0);
6857                tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6858                                            s->mem_index, ot | MO_LE);
6859                break;
6860            default:
6861            case 3: /* btc */
6862                tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6863                                            s->mem_index, ot | MO_LE);
6864                break;
6865            }
6866            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6867        } else {
6868            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6869            switch (op) {
6870            case 0: /* bt */
6871                /* Data already loaded; nothing to do.  */
6872                break;
6873            case 1: /* bts */
6874                tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
6875                break;
6876            case 2: /* btr */
6877                tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
6878                break;
6879            default:
6880            case 3: /* btc */
6881                tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
6882                break;
6883            }
6884            if (op != 0) {
6885                if (mod != 3) {
6886                    gen_op_st_v(s, ot, s->T0, s->A0);
6887                } else {
6888                    gen_op_mov_reg_v(s, ot, rm, s->T0);
6889                }
6890            }
6891        }
6892
6893        /* Delay all CC updates until after the store above.  Note that
6894           C is the result of the test, Z is unchanged, and the others
6895           are all undefined.  */
6896        switch (s->cc_op) {
6897        case CC_OP_MULB ... CC_OP_MULQ:
6898        case CC_OP_ADDB ... CC_OP_ADDQ:
6899        case CC_OP_ADCB ... CC_OP_ADCQ:
6900        case CC_OP_SUBB ... CC_OP_SUBQ:
6901        case CC_OP_SBBB ... CC_OP_SBBQ:
6902        case CC_OP_LOGICB ... CC_OP_LOGICQ:
6903        case CC_OP_INCB ... CC_OP_INCQ:
6904        case CC_OP_DECB ... CC_OP_DECQ:
6905        case CC_OP_SHLB ... CC_OP_SHLQ:
6906        case CC_OP_SARB ... CC_OP_SARQ:
6907        case CC_OP_BMILGB ... CC_OP_BMILGQ:
6908            /* Z was going to be computed from the non-zero status of CC_DST.
6909               We can get that same Z value (and the new C value) by leaving
6910               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6911               same width.  */
6912            tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
6913            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6914            break;
6915        default:
6916            /* Otherwise, generate EFLAGS and replace the C bit.  */
6917            gen_compute_eflags(s);
6918            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
6919                               ctz32(CC_C), 1);
6920            break;
6921        }
6922        break;
6923    case 0x1bc: /* bsf / tzcnt */
6924    case 0x1bd: /* bsr / lzcnt */
6925        ot = dflag;
6926        modrm = x86_ldub_code(env, s);
6927        reg = ((modrm >> 3) & 7) | rex_r;
6928        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6929        gen_extu(ot, s->T0);
6930
6931        /* Note that lzcnt and tzcnt are in different extensions.  */
6932        if ((prefixes & PREFIX_REPZ)
6933            && (b & 1
6934                ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6935                : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6936            int size = 8 << ot;
6937            /* For lzcnt/tzcnt, C bit is defined related to the input. */
6938            tcg_gen_mov_tl(cpu_cc_src, s->T0);
6939            if (b & 1) {
6940                /* For lzcnt, reduce the target_ulong result by the
6941                   number of zeros that we expect to find at the top.  */
6942                tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
6943                tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
6944            } else {
6945                /* For tzcnt, a zero input must return the operand size.  */
6946                tcg_gen_ctzi_tl(s->T0, s->T0, size);
6947            }
6948            /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
6949            gen_op_update1_cc(s);
6950            set_cc_op(s, CC_OP_BMILGB + ot);
6951        } else {
6952            /* For bsr/bsf, only the Z bit is defined and it is related
6953               to the input and not the result.  */
6954            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
6955            set_cc_op(s, CC_OP_LOGICB + ot);
6956
6957            /* ??? The manual says that the output is undefined when the
6958               input is zero, but real hardware leaves it unchanged, and
6959               real programs appear to depend on that.  Accomplish this
6960               by passing the output as the value to return upon zero.  */
6961            if (b & 1) {
6962                /* For bsr, return the bit index of the first 1 bit,
6963                   not the count of leading zeros.  */
6964                tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
6965                tcg_gen_clz_tl(s->T0, s->T0, s->T1);
6966                tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
6967            } else {
6968                tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
6969            }
6970        }
6971        gen_op_mov_reg_v(s, ot, reg, s->T0);
6972        break;
6973        /************************/
6974        /* bcd */
6975    case 0x27: /* daa */
6976        if (CODE64(s))
6977            goto illegal_op;
6978        gen_update_cc_op(s);
6979        gen_helper_daa(cpu_env);
6980        set_cc_op(s, CC_OP_EFLAGS);
6981        break;
6982    case 0x2f: /* das */
6983        if (CODE64(s))
6984            goto illegal_op;
6985        gen_update_cc_op(s);
6986        gen_helper_das(cpu_env);
6987        set_cc_op(s, CC_OP_EFLAGS);
6988        break;
6989    case 0x37: /* aaa */
6990        if (CODE64(s))
6991            goto illegal_op;
6992        gen_update_cc_op(s);
6993        gen_helper_aaa(cpu_env);
6994        set_cc_op(s, CC_OP_EFLAGS);
6995        break;
6996    case 0x3f: /* aas */
6997        if (CODE64(s))
6998            goto illegal_op;
6999        gen_update_cc_op(s);
7000        gen_helper_aas(cpu_env);
7001        set_cc_op(s, CC_OP_EFLAGS);
7002        break;
7003    case 0xd4: /* aam */
7004        if (CODE64(s))
7005            goto illegal_op;
7006        val = x86_ldub_code(env, s);
7007        if (val == 0) {
7008            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7009        } else {
7010            gen_helper_aam(cpu_env, tcg_const_i32(val));
7011            set_cc_op(s, CC_OP_LOGICB);
7012        }
7013        break;
7014    case 0xd5: /* aad */
7015        if (CODE64(s))
7016            goto illegal_op;
7017        val = x86_ldub_code(env, s);
7018        gen_helper_aad(cpu_env, tcg_const_i32(val));
7019        set_cc_op(s, CC_OP_LOGICB);
7020        break;
7021        /************************/
7022        /* misc */
7023    case 0x90: /* nop */
7024        /* XXX: correct lock test for all insn */
7025        if (prefixes & PREFIX_LOCK) {
7026            goto illegal_op;
7027        }
7028        /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7029        if (REX_B(s)) {
7030            goto do_xchg_reg_eax;
7031        }
7032        if (prefixes & PREFIX_REPZ) {
7033            gen_update_cc_op(s);
7034            gen_jmp_im(s, pc_start - s->cs_base);
7035            gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7036            s->base.is_jmp = DISAS_NORETURN;
7037        }
7038        break;
7039    case 0x9b: /* fwait */
7040        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7041            (HF_MP_MASK | HF_TS_MASK)) {
7042            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7043        } else {
7044            gen_helper_fwait(cpu_env);
7045        }
7046        break;
7047    case 0xcc: /* int3 */
7048        gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7049        break;
7050    case 0xcd: /* int N */
7051        val = x86_ldub_code(env, s);
7052        if (s->vm86 && s->iopl != 3) {
7053            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7054        } else {
7055            gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7056        }
7057        break;
7058    case 0xce: /* into */
7059        if (CODE64(s))
7060            goto illegal_op;
7061        gen_update_cc_op(s);
7062        gen_jmp_im(s, pc_start - s->cs_base);
7063        gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7064        break;
7065#ifdef WANT_ICEBP
7066    case 0xf1: /* icebp (undocumented, exits to external debugger) */
7067        gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
7068        gen_debug(s, pc_start - s->cs_base);
7069        break;
7070#endif
7071    case 0xfa: /* cli */
7072        if (!s->vm86) {
7073            if (s->cpl <= s->iopl) {
7074                gen_helper_cli(cpu_env);
7075            } else {
7076                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7077            }
7078        } else {
7079            if (s->iopl == 3) {
7080                gen_helper_cli(cpu_env);
7081            } else {
7082                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7083            }
7084        }
7085        break;
7086    case 0xfb: /* sti */
7087        if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
7088            gen_helper_sti(cpu_env);
7089            /* interruptions are enabled only the first insn after sti */
7090            gen_jmp_im(s, s->pc - s->cs_base);
7091            gen_eob_inhibit_irq(s, true);
7092        } else {
7093            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7094        }
7095        break;
7096    case 0x62: /* bound */
7097        if (CODE64(s))
7098            goto illegal_op;
7099        ot = dflag;
7100        modrm = x86_ldub_code(env, s);
7101        reg = (modrm >> 3) & 7;
7102        mod = (modrm >> 6) & 3;
7103        if (mod == 3)
7104            goto illegal_op;
7105        gen_op_mov_v_reg(s, ot, s->T0, reg);
7106        gen_lea_modrm(env, s, modrm);
7107        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7108        if (ot == MO_16) {
7109            gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7110        } else {
7111            gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7112        }
7113        break;
7114    case 0x1c8 ... 0x1cf: /* bswap reg */
7115        reg = (b & 7) | REX_B(s);
7116#ifdef TARGET_X86_64
7117        if (dflag == MO_64) {
7118            gen_op_mov_v_reg(s, MO_64, s->T0, reg);
7119            tcg_gen_bswap64_i64(s->T0, s->T0);
7120            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
7121        } else
7122#endif
7123        {
7124            gen_op_mov_v_reg(s, MO_32, s->T0, reg);
7125            tcg_gen_ext32u_tl(s->T0, s->T0);
7126            tcg_gen_bswap32_tl(s->T0, s->T0);
7127            gen_op_mov_reg_v(s, MO_32, reg, s->T0);
7128        }
7129        break;
7130    case 0xd6: /* salc */
7131        if (CODE64(s))
7132            goto illegal_op;
7133        gen_compute_eflags_c(s, s->T0);
7134        tcg_gen_neg_tl(s->T0, s->T0);
7135        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7136        break;
7137    case 0xe0: /* loopnz */
7138    case 0xe1: /* loopz */
7139    case 0xe2: /* loop */
7140    case 0xe3: /* jecxz */
7141        {
7142            TCGLabel *l1, *l2, *l3;
7143
7144            tval = (int8_t)insn_get(env, s, MO_8);
7145            next_eip = s->pc - s->cs_base;
7146            tval += next_eip;
7147            if (dflag == MO_16) {
7148                tval &= 0xffff;
7149            }
7150
7151            l1 = gen_new_label();
7152            l2 = gen_new_label();
7153            l3 = gen_new_label();
7154            b &= 3;
7155            switch(b) {
7156            case 0: /* loopnz */
7157            case 1: /* loopz */
7158                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7159                gen_op_jz_ecx(s, s->aflag, l3);
7160                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7161                break;
7162            case 2: /* loop */
7163                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7164                gen_op_jnz_ecx(s, s->aflag, l1);
7165                break;
7166            default:
7167            case 3: /* jcxz */
7168                gen_op_jz_ecx(s, s->aflag, l1);
7169                break;
7170            }
7171
7172            gen_set_label(l3);
7173            gen_jmp_im(s, next_eip);
7174            tcg_gen_br(l2);
7175
7176            gen_set_label(l1);
7177            gen_jmp_im(s, tval);
7178            gen_set_label(l2);
7179            gen_eob(s);
7180        }
7181        break;
7182    case 0x130: /* wrmsr */
7183    case 0x132: /* rdmsr */
7184        if (s->cpl != 0) {
7185            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7186        } else {
7187            gen_update_cc_op(s);
7188            gen_jmp_im(s, pc_start - s->cs_base);
7189            if (b & 2) {
7190                gen_helper_rdmsr(cpu_env);
7191            } else {
7192                gen_helper_wrmsr(cpu_env);
7193            }
7194        }
7195        break;
7196    case 0x131: /* rdtsc */
7197        gen_update_cc_op(s);
7198        gen_jmp_im(s, pc_start - s->cs_base);
7199        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7200            gen_io_start();
7201        }
7202        gen_helper_rdtsc(cpu_env);
7203        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7204            gen_jmp(s, s->pc - s->cs_base);
7205        }
7206        break;
7207    case 0x133: /* rdpmc */
7208        gen_update_cc_op(s);
7209        gen_jmp_im(s, pc_start - s->cs_base);
7210        gen_helper_rdpmc(cpu_env);
7211        break;
7212    case 0x134: /* sysenter */
7213        /* For Intel SYSENTER is valid on 64-bit */
7214        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7215            goto illegal_op;
7216        if (!s->pe) {
7217            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7218        } else {
7219            gen_helper_sysenter(cpu_env);
7220            gen_eob(s);
7221        }
7222        break;
7223    case 0x135: /* sysexit */
7224        /* For Intel SYSEXIT is valid on 64-bit */
7225        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7226            goto illegal_op;
7227        if (!s->pe) {
7228            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7229        } else {
7230            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7231            gen_eob(s);
7232        }
7233        break;
7234#ifdef TARGET_X86_64
7235    case 0x105: /* syscall */
7236        /* XXX: is it usable in real mode ? */
7237        gen_update_cc_op(s);
7238        gen_jmp_im(s, pc_start - s->cs_base);
7239        gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7240        /* TF handling for the syscall insn is different. The TF bit is  checked
7241           after the syscall insn completes. This allows #DB to not be
7242           generated after one has entered CPL0 if TF is set in FMASK.  */
7243        gen_eob_worker(s, false, true);
7244        break;
7245    case 0x107: /* sysret */
7246        if (!s->pe) {
7247            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7248        } else {
7249            gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7250            /* condition codes are modified only in long mode */
7251            if (s->lma) {
7252                set_cc_op(s, CC_OP_EFLAGS);
7253            }
7254            /* TF handling for the sysret insn is different. The TF bit is
7255               checked after the sysret insn completes. This allows #DB to be
7256               generated "as if" the syscall insn in userspace has just
7257               completed.  */
7258            gen_eob_worker(s, false, true);
7259        }
7260        break;
7261#endif
7262    case 0x1a2: /* cpuid */
7263        gen_update_cc_op(s);
7264        gen_jmp_im(s, pc_start - s->cs_base);
7265        gen_helper_cpuid(cpu_env);
7266        break;
7267    case 0xf4: /* hlt */
7268        if (s->cpl != 0) {
7269            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7270        } else {
7271            gen_update_cc_op(s);
7272            gen_jmp_im(s, pc_start - s->cs_base);
7273            gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7274            s->base.is_jmp = DISAS_NORETURN;
7275        }
7276        break;
7277    case 0x100:
7278        modrm = x86_ldub_code(env, s);
7279        mod = (modrm >> 6) & 3;
7280        op = (modrm >> 3) & 7;
7281        switch(op) {
7282        case 0: /* sldt */
7283            if (!s->pe || s->vm86)
7284                goto illegal_op;
7285            gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7286            tcg_gen_ld32u_tl(s->T0, cpu_env,
7287                             offsetof(CPUX86State, ldt.selector));
7288            ot = mod == 3 ? dflag : MO_16;
7289            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7290            break;
7291        case 2: /* lldt */
7292            if (!s->pe || s->vm86)
7293                goto illegal_op;
7294            if (s->cpl != 0) {
7295                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7296            } else {
7297                gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7298                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7299                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7300                gen_helper_lldt(cpu_env, s->tmp2_i32);
7301            }
7302            break;
7303        case 1: /* str */
7304            if (!s->pe || s->vm86)
7305                goto illegal_op;
7306            gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7307            tcg_gen_ld32u_tl(s->T0, cpu_env,
7308                             offsetof(CPUX86State, tr.selector));
7309            ot = mod == 3 ? dflag : MO_16;
7310            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7311            break;
7312        case 3: /* ltr */
7313            if (!s->pe || s->vm86)
7314                goto illegal_op;
7315            if (s->cpl != 0) {
7316                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7317            } else {
7318                gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7319                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7320                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7321                gen_helper_ltr(cpu_env, s->tmp2_i32);
7322            }
7323            break;
7324        case 4: /* verr */
7325        case 5: /* verw */
7326            if (!s->pe || s->vm86)
7327                goto illegal_op;
7328            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7329            gen_update_cc_op(s);
7330            if (op == 4) {
7331                gen_helper_verr(cpu_env, s->T0);
7332            } else {
7333                gen_helper_verw(cpu_env, s->T0);
7334            }
7335            set_cc_op(s, CC_OP_EFLAGS);
7336            break;
7337        default:
7338            goto unknown_op;
7339        }
7340        break;
7341
7342    case 0x101:
7343        modrm = x86_ldub_code(env, s);
7344        switch (modrm) {
7345        CASE_MODRM_MEM_OP(0): /* sgdt */
7346            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7347            gen_lea_modrm(env, s, modrm);
7348            tcg_gen_ld32u_tl(s->T0,
7349                             cpu_env, offsetof(CPUX86State, gdt.limit));
7350            gen_op_st_v(s, MO_16, s->T0, s->A0);
7351            gen_add_A0_im(s, 2);
7352            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7353            if (dflag == MO_16) {
7354                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7355            }
7356            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7357            break;
7358
7359        case 0xc8: /* monitor */
7360            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7361                goto illegal_op;
7362            }
7363            gen_update_cc_op(s);
7364            gen_jmp_im(s, pc_start - s->cs_base);
7365            tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7366            gen_extu(s->aflag, s->A0);
7367            gen_add_A0_ds_seg(s);
7368            gen_helper_monitor(cpu_env, s->A0);
7369            break;
7370
7371        case 0xc9: /* mwait */
7372            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7373                goto illegal_op;
7374            }
7375            gen_update_cc_op(s);
7376            gen_jmp_im(s, pc_start - s->cs_base);
7377            gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7378            gen_eob(s);
7379            break;
7380
7381        case 0xca: /* clac */
7382            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7383                || s->cpl != 0) {
7384                goto illegal_op;
7385            }
7386            gen_helper_clac(cpu_env);
7387            gen_jmp_im(s, s->pc - s->cs_base);
7388            gen_eob(s);
7389            break;
7390
7391        case 0xcb: /* stac */
7392            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7393                || s->cpl != 0) {
7394                goto illegal_op;
7395            }
7396            gen_helper_stac(cpu_env);
7397            gen_jmp_im(s, s->pc - s->cs_base);
7398            gen_eob(s);
7399            break;
7400
7401        CASE_MODRM_MEM_OP(1): /* sidt */
7402            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7403            gen_lea_modrm(env, s, modrm);
7404            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7405            gen_op_st_v(s, MO_16, s->T0, s->A0);
7406            gen_add_A0_im(s, 2);
7407            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7408            if (dflag == MO_16) {
7409                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7410            }
7411            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7412            break;
7413
7414        case 0xd0: /* xgetbv */
7415            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7416                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7417                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7418                goto illegal_op;
7419            }
7420            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7421            gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7422            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7423            break;
7424
7425        case 0xd1: /* xsetbv */
7426            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7427                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7428                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7429                goto illegal_op;
7430            }
7431            if (s->cpl != 0) {
7432                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7433                break;
7434            }
7435            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7436                                  cpu_regs[R_EDX]);
7437            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7438            gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7439            /* End TB because translation flags may change.  */
7440            gen_jmp_im(s, s->pc - s->cs_base);
7441            gen_eob(s);
7442            break;
7443
7444        case 0xd8: /* VMRUN */
7445            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7446                goto illegal_op;
7447            }
7448            if (s->cpl != 0) {
7449                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7450                break;
7451            }
7452            gen_update_cc_op(s);
7453            gen_jmp_im(s, pc_start - s->cs_base);
7454            gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7455                             tcg_const_i32(s->pc - pc_start));
7456            tcg_gen_exit_tb(NULL, 0);
7457            s->base.is_jmp = DISAS_NORETURN;
7458            break;
7459
7460        case 0xd9: /* VMMCALL */
7461            if (!(s->flags & HF_SVME_MASK)) {
7462                goto illegal_op;
7463            }
7464            gen_update_cc_op(s);
7465            gen_jmp_im(s, pc_start - s->cs_base);
7466            gen_helper_vmmcall(cpu_env);
7467            break;
7468
7469        case 0xda: /* VMLOAD */
7470            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7471                goto illegal_op;
7472            }
7473            if (s->cpl != 0) {
7474                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7475                break;
7476            }
7477            gen_update_cc_op(s);
7478            gen_jmp_im(s, pc_start - s->cs_base);
7479            gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7480            break;
7481
7482        case 0xdb: /* VMSAVE */
7483            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7484                goto illegal_op;
7485            }
7486            if (s->cpl != 0) {
7487                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7488                break;
7489            }
7490            gen_update_cc_op(s);
7491            gen_jmp_im(s, pc_start - s->cs_base);
7492            gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7493            break;
7494
7495        case 0xdc: /* STGI */
7496            if ((!(s->flags & HF_SVME_MASK)
7497                   && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7498                || !s->pe) {
7499                goto illegal_op;
7500            }
7501            if (s->cpl != 0) {
7502                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7503                break;
7504            }
7505            gen_update_cc_op(s);
7506            gen_helper_stgi(cpu_env);
7507            gen_jmp_im(s, s->pc - s->cs_base);
7508            gen_eob(s);
7509            break;
7510
7511        case 0xdd: /* CLGI */
7512            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7513                goto illegal_op;
7514            }
7515            if (s->cpl != 0) {
7516                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7517                break;
7518            }
7519            gen_update_cc_op(s);
7520            gen_jmp_im(s, pc_start - s->cs_base);
7521            gen_helper_clgi(cpu_env);
7522            break;
7523
7524        case 0xde: /* SKINIT */
7525            if ((!(s->flags & HF_SVME_MASK)
7526                 && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7527                || !s->pe) {
7528                goto illegal_op;
7529            }
7530            gen_update_cc_op(s);
7531            gen_jmp_im(s, pc_start - s->cs_base);
7532            gen_helper_skinit(cpu_env);
7533            break;
7534
7535        case 0xdf: /* INVLPGA */
7536            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7537                goto illegal_op;
7538            }
7539            if (s->cpl != 0) {
7540                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7541                break;
7542            }
7543            gen_update_cc_op(s);
7544            gen_jmp_im(s, pc_start - s->cs_base);
7545            gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
7546            break;
7547
7548        CASE_MODRM_MEM_OP(2): /* lgdt */
7549            if (s->cpl != 0) {
7550                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7551                break;
7552            }
7553            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
7554            gen_lea_modrm(env, s, modrm);
7555            gen_op_ld_v(s, MO_16, s->T1, s->A0);
7556            gen_add_A0_im(s, 2);
7557            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7558            if (dflag == MO_16) {
7559                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7560            }
7561            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7562            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7563            break;
7564
7565        CASE_MODRM_MEM_OP(3): /* lidt */
7566            if (s->cpl != 0) {
7567                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7568                break;
7569            }
7570            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
7571            gen_lea_modrm(env, s, modrm);
7572            gen_op_ld_v(s, MO_16, s->T1, s->A0);
7573            gen_add_A0_im(s, 2);
7574            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7575            if (dflag == MO_16) {
7576                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7577            }
7578            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7579            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7580            break;
7581
7582        CASE_MODRM_OP(4): /* smsw */
7583            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7584            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7585            if (CODE64(s)) {
7586                mod = (modrm >> 6) & 3;
7587                ot = (mod != 3 ? MO_16 : s->dflag);
7588            } else {
7589                ot = MO_16;
7590            }
7591            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7592            break;
7593        case 0xee: /* rdpkru */
7594            if (prefixes & PREFIX_LOCK) {
7595                goto illegal_op;
7596            }
7597            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7598            gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7599            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7600            break;
7601        case 0xef: /* wrpkru */
7602            if (prefixes & PREFIX_LOCK) {
7603                goto illegal_op;
7604            }
7605            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7606                                  cpu_regs[R_EDX]);
7607            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7608            gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7609            break;
7610        CASE_MODRM_OP(6): /* lmsw */
7611            if (s->cpl != 0) {
7612                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7613                break;
7614            }
7615            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7616            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7617            gen_helper_lmsw(cpu_env, s->T0);
7618            gen_jmp_im(s, s->pc - s->cs_base);
7619            gen_eob(s);
7620            break;
7621
7622        CASE_MODRM_MEM_OP(7): /* invlpg */
7623            if (s->cpl != 0) {
7624                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7625                break;
7626            }
7627            gen_update_cc_op(s);
7628            gen_jmp_im(s, pc_start - s->cs_base);
7629            gen_lea_modrm(env, s, modrm);
7630            gen_helper_invlpg(cpu_env, s->A0);
7631            gen_jmp_im(s, s->pc - s->cs_base);
7632            gen_eob(s);
7633            break;
7634
7635        case 0xf8: /* swapgs */
7636#ifdef TARGET_X86_64
7637            if (CODE64(s)) {
7638                if (s->cpl != 0) {
7639                    gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7640                } else {
7641                    tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7642                    tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7643                                  offsetof(CPUX86State, kernelgsbase));
7644                    tcg_gen_st_tl(s->T0, cpu_env,
7645                                  offsetof(CPUX86State, kernelgsbase));
7646                }
7647                break;
7648            }
7649#endif
7650            goto illegal_op;
7651
7652        case 0xf9: /* rdtscp */
7653            if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7654                goto illegal_op;
7655            }
7656            gen_update_cc_op(s);
7657            gen_jmp_im(s, pc_start - s->cs_base);
7658            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7659                gen_io_start();
7660            }
7661            gen_helper_rdtscp(cpu_env);
7662            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7663                gen_jmp(s, s->pc - s->cs_base);
7664            }
7665            break;
7666
7667        default:
7668            goto unknown_op;
7669        }
7670        break;
7671
7672    case 0x108: /* invd */
7673    case 0x109: /* wbinvd */
7674        if (s->cpl != 0) {
7675            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7676        } else {
7677            gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7678            /* nothing to do */
7679        }
7680        break;
7681    case 0x63: /* arpl or movslS (x86_64) */
7682#ifdef TARGET_X86_64
7683        if (CODE64(s)) {
7684            int d_ot;
7685            /* d_ot is the size of destination */
7686            d_ot = dflag;
7687
7688            modrm = x86_ldub_code(env, s);
7689            reg = ((modrm >> 3) & 7) | rex_r;
7690            mod = (modrm >> 6) & 3;
7691            rm = (modrm & 7) | REX_B(s);
7692
7693            if (mod == 3) {
7694                gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7695                /* sign extend */
7696                if (d_ot == MO_64) {
7697                    tcg_gen_ext32s_tl(s->T0, s->T0);
7698                }
7699                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7700            } else {
7701                gen_lea_modrm(env, s, modrm);
7702                gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7703                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7704            }
7705        } else
7706#endif
7707        {
7708            TCGLabel *label1;
7709            TCGv t0, t1, t2, a0;
7710
7711            if (!s->pe || s->vm86)
7712                goto illegal_op;
7713            t0 = tcg_temp_local_new();
7714            t1 = tcg_temp_local_new();
7715            t2 = tcg_temp_local_new();
7716            ot = MO_16;
7717            modrm = x86_ldub_code(env, s);
7718            reg = (modrm >> 3) & 7;
7719            mod = (modrm >> 6) & 3;
7720            rm = modrm & 7;
7721            if (mod != 3) {
7722                gen_lea_modrm(env, s, modrm);
7723                gen_op_ld_v(s, ot, t0, s->A0);
7724                a0 = tcg_temp_local_new();
7725                tcg_gen_mov_tl(a0, s->A0);
7726            } else {
7727                gen_op_mov_v_reg(s, ot, t0, rm);
7728                a0 = NULL;
7729            }
7730            gen_op_mov_v_reg(s, ot, t1, reg);
7731            tcg_gen_andi_tl(s->tmp0, t0, 3);
7732            tcg_gen_andi_tl(t1, t1, 3);
7733            tcg_gen_movi_tl(t2, 0);
7734            label1 = gen_new_label();
7735            tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7736            tcg_gen_andi_tl(t0, t0, ~3);
7737            tcg_gen_or_tl(t0, t0, t1);
7738            tcg_gen_movi_tl(t2, CC_Z);
7739            gen_set_label(label1);
7740            if (mod != 3) {
7741                gen_op_st_v(s, ot, t0, a0);
7742                tcg_temp_free(a0);
7743           } else {
7744                gen_op_mov_reg_v(s, ot, rm, t0);
7745            }
7746            gen_compute_eflags(s);
7747            tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7748            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7749            tcg_temp_free(t0);
7750            tcg_temp_free(t1);
7751            tcg_temp_free(t2);
7752        }
7753        break;
7754    case 0x102: /* lar */
7755    case 0x103: /* lsl */
7756        {
7757            TCGLabel *label1;
7758            TCGv t0;
7759            if (!s->pe || s->vm86)
7760                goto illegal_op;
7761            ot = dflag != MO_16 ? MO_32 : MO_16;
7762            modrm = x86_ldub_code(env, s);
7763            reg = ((modrm >> 3) & 7) | rex_r;
7764            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7765            t0 = tcg_temp_local_new();
7766            gen_update_cc_op(s);
7767            if (b == 0x102) {
7768                gen_helper_lar(t0, cpu_env, s->T0);
7769            } else {
7770                gen_helper_lsl(t0, cpu_env, s->T0);
7771            }
7772            tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7773            label1 = gen_new_label();
7774            tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7775            gen_op_mov_reg_v(s, ot, reg, t0);
7776            gen_set_label(label1);
7777            set_cc_op(s, CC_OP_EFLAGS);
7778            tcg_temp_free(t0);
7779        }
7780        break;
7781    case 0x118:
7782        modrm = x86_ldub_code(env, s);
7783        mod = (modrm >> 6) & 3;
7784        op = (modrm >> 3) & 7;
7785        switch(op) {
7786        case 0: /* prefetchnta */
7787        case 1: /* prefetchnt0 */
7788        case 2: /* prefetchnt0 */
7789        case 3: /* prefetchnt0 */
7790            if (mod == 3)
7791                goto illegal_op;
7792            gen_nop_modrm(env, s, modrm);
7793            /* nothing more to do */
7794            break;
7795        default: /* nop (multi byte) */
7796            gen_nop_modrm(env, s, modrm);
7797            break;
7798        }
7799        break;
7800    case 0x11a:
7801        modrm = x86_ldub_code(env, s);
7802        if (s->flags & HF_MPX_EN_MASK) {
7803            mod = (modrm >> 6) & 3;
7804            reg = ((modrm >> 3) & 7) | rex_r;
7805            if (prefixes & PREFIX_REPZ) {
7806                /* bndcl */
7807                if (reg >= 4
7808                    || (prefixes & PREFIX_LOCK)
7809                    || s->aflag == MO_16) {
7810                    goto illegal_op;
7811                }
7812                gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7813            } else if (prefixes & PREFIX_REPNZ) {
7814                /* bndcu */
7815                if (reg >= 4
7816                    || (prefixes & PREFIX_LOCK)
7817                    || s->aflag == MO_16) {
7818                    goto illegal_op;
7819                }
7820                TCGv_i64 notu = tcg_temp_new_i64();
7821                tcg_gen_not_i64(notu, cpu_bndu[reg]);
7822                gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7823                tcg_temp_free_i64(notu);
7824            } else if (prefixes & PREFIX_DATA) {
7825                /* bndmov -- from reg/mem */
7826                if (reg >= 4 || s->aflag == MO_16) {
7827                    goto illegal_op;
7828                }
7829                if (mod == 3) {
7830                    int reg2 = (modrm & 7) | REX_B(s);
7831                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7832                        goto illegal_op;
7833                    }
7834                    if (s->flags & HF_MPX_IU_MASK) {
7835                        tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7836                        tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7837                    }
7838                } else {
7839                    gen_lea_modrm(env, s, modrm);
7840                    if (CODE64(s)) {
7841                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7842                                            s->mem_index, MO_LEQ);
7843                        tcg_gen_addi_tl(s->A0, s->A0, 8);
7844                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7845                                            s->mem_index, MO_LEQ);
7846                    } else {
7847                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7848                                            s->mem_index, MO_LEUL);
7849                        tcg_gen_addi_tl(s->A0, s->A0, 4);
7850                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7851                                            s->mem_index, MO_LEUL);
7852                    }
7853                    /* bnd registers are now in-use */
7854                    gen_set_hflag(s, HF_MPX_IU_MASK);
7855                }
7856            } else if (mod != 3) {
7857                /* bndldx */
7858                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7859                if (reg >= 4
7860                    || (prefixes & PREFIX_LOCK)
7861                    || s->aflag == MO_16
7862                    || a.base < -1) {
7863                    goto illegal_op;
7864                }
7865                if (a.base >= 0) {
7866                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7867                } else {
7868                    tcg_gen_movi_tl(s->A0, 0);
7869                }
7870                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7871                if (a.index >= 0) {
7872                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7873                } else {
7874                    tcg_gen_movi_tl(s->T0, 0);
7875                }
7876                if (CODE64(s)) {
7877                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7878                    tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7879                                   offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7880                } else {
7881                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7882                    tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7883                    tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7884                }
7885                gen_set_hflag(s, HF_MPX_IU_MASK);
7886            }
7887        }
7888        gen_nop_modrm(env, s, modrm);
7889        break;
7890    case 0x11b:
7891        modrm = x86_ldub_code(env, s);
7892        if (s->flags & HF_MPX_EN_MASK) {
7893            mod = (modrm >> 6) & 3;
7894            reg = ((modrm >> 3) & 7) | rex_r;
7895            if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7896                /* bndmk */
7897                if (reg >= 4
7898                    || (prefixes & PREFIX_LOCK)
7899                    || s->aflag == MO_16) {
7900                    goto illegal_op;
7901                }
7902                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7903                if (a.base >= 0) {
7904                    tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7905                    if (!CODE64(s)) {
7906                        tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7907                    }
7908                } else if (a.base == -1) {
7909                    /* no base register has lower bound of 0 */
7910                    tcg_gen_movi_i64(cpu_bndl[reg], 0);
7911                } else {
7912                    /* rip-relative generates #ud */
7913                    goto illegal_op;
7914                }
7915                tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
7916                if (!CODE64(s)) {
7917                    tcg_gen_ext32u_tl(s->A0, s->A0);
7918                }
7919                tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
7920                /* bnd registers are now in-use */
7921                gen_set_hflag(s, HF_MPX_IU_MASK);
7922                break;
7923            } else if (prefixes & PREFIX_REPNZ) {
7924                /* bndcn */
7925                if (reg >= 4
7926                    || (prefixes & PREFIX_LOCK)
7927                    || s->aflag == MO_16) {
7928                    goto illegal_op;
7929                }
7930                gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7931            } else if (prefixes & PREFIX_DATA) {
7932                /* bndmov -- to reg/mem */
7933                if (reg >= 4 || s->aflag == MO_16) {
7934                    goto illegal_op;
7935                }
7936                if (mod == 3) {
7937                    int reg2 = (modrm & 7) | REX_B(s);
7938                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7939                        goto illegal_op;
7940                    }
7941                    if (s->flags & HF_MPX_IU_MASK) {
7942                        tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7943                        tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
7944                    }
7945                } else {
7946                    gen_lea_modrm(env, s, modrm);
7947                    if (CODE64(s)) {
7948                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7949                                            s->mem_index, MO_LEQ);
7950                        tcg_gen_addi_tl(s->A0, s->A0, 8);
7951                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7952                                            s->mem_index, MO_LEQ);
7953                    } else {
7954                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7955                                            s->mem_index, MO_LEUL);
7956                        tcg_gen_addi_tl(s->A0, s->A0, 4);
7957                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7958                                            s->mem_index, MO_LEUL);
7959                    }
7960                }
7961            } else if (mod != 3) {
7962                /* bndstx */
7963                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7964                if (reg >= 4
7965                    || (prefixes & PREFIX_LOCK)
7966                    || s->aflag == MO_16
7967                    || a.base < -1) {
7968                    goto illegal_op;
7969                }
7970                if (a.base >= 0) {
7971                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7972                } else {
7973                    tcg_gen_movi_tl(s->A0, 0);
7974                }
7975                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7976                if (a.index >= 0) {
7977                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7978                } else {
7979                    tcg_gen_movi_tl(s->T0, 0);
7980                }
7981                if (CODE64(s)) {
7982                    gen_helper_bndstx64(cpu_env, s->A0, s->T0,
7983                                        cpu_bndl[reg], cpu_bndu[reg]);
7984                } else {
7985                    gen_helper_bndstx32(cpu_env, s->A0, s->T0,
7986                                        cpu_bndl[reg], cpu_bndu[reg]);
7987                }
7988            }
7989        }
7990        gen_nop_modrm(env, s, modrm);
7991        break;
7992    case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
7993        modrm = x86_ldub_code(env, s);
7994        gen_nop_modrm(env, s, modrm);
7995        break;
7996    case 0x120: /* mov reg, crN */
7997    case 0x122: /* mov crN, reg */
7998        if (s->cpl != 0) {
7999            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8000        } else {
8001            modrm = x86_ldub_code(env, s);
8002            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8003             * AMD documentation (24594.pdf) and testing of
8004             * intel 386 and 486 processors all show that the mod bits
8005             * are assumed to be 1's, regardless of actual values.
8006             */
8007            rm = (modrm & 7) | REX_B(s);
8008            reg = ((modrm >> 3) & 7) | rex_r;
8009            if (CODE64(s))
8010                ot = MO_64;
8011            else
8012                ot = MO_32;
8013            if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
8014                (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8015                reg = 8;
8016            }
8017            switch(reg) {
8018            case 0:
8019            case 2:
8020            case 3:
8021            case 4:
8022            case 8:
8023                gen_update_cc_op(s);
8024                gen_jmp_im(s, pc_start - s->cs_base);
8025                if (b & 2) {
8026                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8027                        gen_io_start();
8028                    }
8029                    gen_op_mov_v_reg(s, ot, s->T0, rm);
8030                    gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
8031                                         s->T0);
8032                    gen_jmp_im(s, s->pc - s->cs_base);
8033                    gen_eob(s);
8034                } else {
8035                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8036                        gen_io_start();
8037                    }
8038                    gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
8039                    gen_op_mov_reg_v(s, ot, rm, s->T0);
8040                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8041                        gen_io_end();
8042                    }
8043                }
8044                break;
8045            default:
8046                goto unknown_op;
8047            }
8048        }
8049        break;
8050    case 0x121: /* mov reg, drN */
8051    case 0x123: /* mov drN, reg */
8052        if (s->cpl != 0) {
8053            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8054        } else {
8055            modrm = x86_ldub_code(env, s);
8056            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8057             * AMD documentation (24594.pdf) and testing of
8058             * intel 386 and 486 processors all show that the mod bits
8059             * are assumed to be 1's, regardless of actual values.
8060             */
8061            rm = (modrm & 7) | REX_B(s);
8062            reg = ((modrm >> 3) & 7) | rex_r;
8063            if (CODE64(s))
8064                ot = MO_64;
8065            else
8066                ot = MO_32;
8067            if (reg >= 8) {
8068                goto illegal_op;
8069            }
8070            if (b & 2) {
8071                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
8072                gen_op_mov_v_reg(s, ot, s->T0, rm);
8073                tcg_gen_movi_i32(s->tmp2_i32, reg);
8074                gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8075                gen_jmp_im(s, s->pc - s->cs_base);
8076                gen_eob(s);
8077            } else {
8078                gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
8079                tcg_gen_movi_i32(s->tmp2_i32, reg);
8080                gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8081                gen_op_mov_reg_v(s, ot, rm, s->T0);
8082            }
8083        }
8084        break;
8085    case 0x106: /* clts */
8086        if (s->cpl != 0) {
8087            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8088        } else {
8089            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
8090            gen_helper_clts(cpu_env);
8091            /* abort block because static cpu state changed */
8092            gen_jmp_im(s, s->pc - s->cs_base);
8093            gen_eob(s);
8094        }
8095        break;
8096    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8097    case 0x1c3: /* MOVNTI reg, mem */
8098        if (!(s->cpuid_features & CPUID_SSE2))
8099            goto illegal_op;
8100        ot = mo_64_32(dflag);
8101        modrm = x86_ldub_code(env, s);
8102        mod = (modrm >> 6) & 3;
8103        if (mod == 3)
8104            goto illegal_op;
8105        reg = ((modrm >> 3) & 7) | rex_r;
8106        /* generate a generic store */
8107        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8108        break;
8109    case 0x1ae:
8110        modrm = x86_ldub_code(env, s);
8111        switch (modrm) {
8112        CASE_MODRM_MEM_OP(0): /* fxsave */
8113            if (!(s->cpuid_features & CPUID_FXSR)
8114                || (prefixes & PREFIX_LOCK)) {
8115                goto illegal_op;
8116            }
8117            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8118                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8119                break;
8120            }
8121            gen_lea_modrm(env, s, modrm);
8122            gen_helper_fxsave(cpu_env, s->A0);
8123            break;
8124
8125        CASE_MODRM_MEM_OP(1): /* fxrstor */
8126            if (!(s->cpuid_features & CPUID_FXSR)
8127                || (prefixes & PREFIX_LOCK)) {
8128                goto illegal_op;
8129            }
8130            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8131                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8132                break;
8133            }
8134            gen_lea_modrm(env, s, modrm);
8135            gen_helper_fxrstor(cpu_env, s->A0);
8136            break;
8137
8138        CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8139            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8140                goto illegal_op;
8141            }
8142            if (s->flags & HF_TS_MASK) {
8143                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8144                break;
8145            }
8146            gen_lea_modrm(env, s, modrm);
8147            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8148            gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8149            break;
8150
8151        CASE_MODRM_MEM_OP(3): /* stmxcsr */
8152            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8153                goto illegal_op;
8154            }
8155            if (s->flags & HF_TS_MASK) {
8156                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8157                break;
8158            }
8159            gen_lea_modrm(env, s, modrm);
8160            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8161            gen_op_st_v(s, MO_32, s->T0, s->A0);
8162            break;
8163
8164        CASE_MODRM_MEM_OP(4): /* xsave */
8165            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8166                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8167                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8168                goto illegal_op;
8169            }
8170            gen_lea_modrm(env, s, modrm);
8171            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8172                                  cpu_regs[R_EDX]);
8173            gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8174            break;
8175
8176        CASE_MODRM_MEM_OP(5): /* xrstor */
8177            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8178                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8179                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8180                goto illegal_op;
8181            }
8182            gen_lea_modrm(env, s, modrm);
8183            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8184                                  cpu_regs[R_EDX]);
8185            gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8186            /* XRSTOR is how MPX is enabled, which changes how
8187               we translate.  Thus we need to end the TB.  */
8188            gen_update_cc_op(s);
8189            gen_jmp_im(s, s->pc - s->cs_base);
8190            gen_eob(s);
8191            break;
8192
8193        CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8194            if (prefixes & PREFIX_LOCK) {
8195                goto illegal_op;
8196            }
8197            if (prefixes & PREFIX_DATA) {
8198                /* clwb */
8199                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8200                    goto illegal_op;
8201                }
8202                gen_nop_modrm(env, s, modrm);
8203            } else {
8204                /* xsaveopt */
8205                if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8206                    || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8207                    || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8208                    goto illegal_op;
8209                }
8210                gen_lea_modrm(env, s, modrm);
8211                tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8212                                      cpu_regs[R_EDX]);
8213                gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8214            }
8215            break;
8216
8217        CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8218            if (prefixes & PREFIX_LOCK) {
8219                goto illegal_op;
8220            }
8221            if (prefixes & PREFIX_DATA) {
8222                /* clflushopt */
8223                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8224                    goto illegal_op;
8225                }
8226            } else {
8227                /* clflush */
8228                if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8229                    || !(s->cpuid_features & CPUID_CLFLUSH)) {
8230                    goto illegal_op;
8231                }
8232            }
8233            gen_nop_modrm(env, s, modrm);
8234            break;
8235
8236        case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8237        case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8238        case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8239        case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8240            if (CODE64(s)
8241                && (prefixes & PREFIX_REPZ)
8242                && !(prefixes & PREFIX_LOCK)
8243                && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8244                TCGv base, treg, src, dst;
8245
8246                /* Preserve hflags bits by testing CR4 at runtime.  */
8247                tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8248                gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8249
8250                base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8251                treg = cpu_regs[(modrm & 7) | REX_B(s)];
8252
8253                if (modrm & 0x10) {
8254                    /* wr*base */
8255                    dst = base, src = treg;
8256                } else {
8257                    /* rd*base */
8258                    dst = treg, src = base;
8259                }
8260
8261                if (s->dflag == MO_32) {
8262                    tcg_gen_ext32u_tl(dst, src);
8263                } else {
8264                    tcg_gen_mov_tl(dst, src);
8265                }
8266                break;
8267            }
8268            goto unknown_op;
8269
8270        case 0xf8: /* sfence / pcommit */
8271            if (prefixes & PREFIX_DATA) {
8272                /* pcommit */
8273                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8274                    || (prefixes & PREFIX_LOCK)) {
8275                    goto illegal_op;
8276                }
8277                break;
8278            }
8279            /* fallthru */
8280        case 0xf9 ... 0xff: /* sfence */
8281            if (!(s->cpuid_features & CPUID_SSE)
8282                || (prefixes & PREFIX_LOCK)) {
8283                goto illegal_op;
8284            }
8285            tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8286            break;
8287        case 0xe8 ... 0xef: /* lfence */
8288            if (!(s->cpuid_features & CPUID_SSE)
8289                || (prefixes & PREFIX_LOCK)) {
8290                goto illegal_op;
8291            }
8292            tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8293            break;
8294        case 0xf0 ... 0xf7: /* mfence */
8295            if (!(s->cpuid_features & CPUID_SSE2)
8296                || (prefixes & PREFIX_LOCK)) {
8297                goto illegal_op;
8298            }
8299            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8300            break;
8301
8302        default:
8303            goto unknown_op;
8304        }
8305        break;
8306
8307    case 0x10d: /* 3DNow! prefetch(w) */
8308        modrm = x86_ldub_code(env, s);
8309        mod = (modrm >> 6) & 3;
8310        if (mod == 3)
8311            goto illegal_op;
8312        gen_nop_modrm(env, s, modrm);
8313        break;
8314    case 0x1aa: /* rsm */
8315        gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
8316        if (!(s->flags & HF_SMM_MASK))
8317            goto illegal_op;
8318        gen_update_cc_op(s);
8319        gen_jmp_im(s, s->pc - s->cs_base);
8320        gen_helper_rsm(cpu_env);
8321        gen_eob(s);
8322        break;
8323    case 0x1b8: /* SSE4.2 popcnt */
8324        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8325             PREFIX_REPZ)
8326            goto illegal_op;
8327        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8328            goto illegal_op;
8329
8330        modrm = x86_ldub_code(env, s);
8331        reg = ((modrm >> 3) & 7) | rex_r;
8332
8333        if (s->prefix & PREFIX_DATA) {
8334            ot = MO_16;
8335        } else {
8336            ot = mo_64_32(dflag);
8337        }
8338
8339        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8340        gen_extu(ot, s->T0);
8341        tcg_gen_mov_tl(cpu_cc_src, s->T0);
8342        tcg_gen_ctpop_tl(s->T0, s->T0);
8343        gen_op_mov_reg_v(s, ot, reg, s->T0);
8344
8345        set_cc_op(s, CC_OP_POPCNT);
8346        break;
8347    case 0x10e ... 0x10f:
8348        /* 3DNow! instructions, ignore prefixes */
8349        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8350        /* fall through */
8351    case 0x110 ... 0x117:
8352    case 0x128 ... 0x12f:
8353    case 0x138 ... 0x13a:
8354    case 0x150 ... 0x179:
8355    case 0x17c ... 0x17f:
8356    case 0x1c2:
8357    case 0x1c4 ... 0x1c6:
8358    case 0x1d0 ... 0x1fe:
8359        gen_sse(env, s, b, pc_start, rex_r);
8360        break;
8361    default:
8362        goto unknown_op;
8363    }
8364    return s->pc;
8365 illegal_op:
8366    gen_illegal_opcode(s);
8367    return s->pc;
8368 unknown_op:
8369    gen_unknown_opcode(env, s);
8370    return s->pc;
8371}
8372
8373void tcg_x86_init(void)
8374{
8375    static const char reg_names[CPU_NB_REGS][4] = {
8376#ifdef TARGET_X86_64
8377        [R_EAX] = "rax",
8378        [R_EBX] = "rbx",
8379        [R_ECX] = "rcx",
8380        [R_EDX] = "rdx",
8381        [R_ESI] = "rsi",
8382        [R_EDI] = "rdi",
8383        [R_EBP] = "rbp",
8384        [R_ESP] = "rsp",
8385        [8]  = "r8",
8386        [9]  = "r9",
8387        [10] = "r10",
8388        [11] = "r11",
8389        [12] = "r12",
8390        [13] = "r13",
8391        [14] = "r14",
8392        [15] = "r15",
8393#else
8394        [R_EAX] = "eax",
8395        [R_EBX] = "ebx",
8396        [R_ECX] = "ecx",
8397        [R_EDX] = "edx",
8398        [R_ESI] = "esi",
8399        [R_EDI] = "edi",
8400        [R_EBP] = "ebp",
8401        [R_ESP] = "esp",
8402#endif
8403    };
8404    static const char seg_base_names[6][8] = {
8405        [R_CS] = "cs_base",
8406        [R_DS] = "ds_base",
8407        [R_ES] = "es_base",
8408        [R_FS] = "fs_base",
8409        [R_GS] = "gs_base",
8410        [R_SS] = "ss_base",
8411    };
8412    static const char bnd_regl_names[4][8] = {
8413        "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8414    };
8415    static const char bnd_regu_names[4][8] = {
8416        "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8417    };
8418    int i;
8419
8420    cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8421                                       offsetof(CPUX86State, cc_op), "cc_op");
8422    cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8423                                    "cc_dst");
8424    cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8425                                    "cc_src");
8426    cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8427                                     "cc_src2");
8428
8429    for (i = 0; i < CPU_NB_REGS; ++i) {
8430        cpu_regs[i] = tcg_global_mem_new(cpu_env,
8431                                         offsetof(CPUX86State, regs[i]),
8432                                         reg_names[i]);
8433    }
8434
8435    for (i = 0; i < 6; ++i) {
8436        cpu_seg_base[i]
8437            = tcg_global_mem_new(cpu_env,
8438                                 offsetof(CPUX86State, segs[i].base),
8439                                 seg_base_names[i]);
8440    }
8441
8442    for (i = 0; i < 4; ++i) {
8443        cpu_bndl[i]
8444            = tcg_global_mem_new_i64(cpu_env,
8445                                     offsetof(CPUX86State, bnd_regs[i].lb),
8446                                     bnd_regl_names[i]);
8447        cpu_bndu[i]
8448            = tcg_global_mem_new_i64(cpu_env,
8449                                     offsetof(CPUX86State, bnd_regs[i].ub),
8450                                     bnd_regu_names[i]);
8451    }
8452}
8453
8454static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8455{
8456    DisasContext *dc = container_of(dcbase, DisasContext, base);
8457    CPUX86State *env = cpu->env_ptr;
8458    uint32_t flags = dc->base.tb->flags;
8459    target_ulong cs_base = dc->base.tb->cs_base;
8460
8461    dc->pe = (flags >> HF_PE_SHIFT) & 1;
8462    dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
8463    dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
8464    dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
8465    dc->f_st = 0;
8466    dc->vm86 = (flags >> VM_SHIFT) & 1;
8467    dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
8468    dc->iopl = (flags >> IOPL_SHIFT) & 3;
8469    dc->tf = (flags >> TF_SHIFT) & 1;
8470    dc->cc_op = CC_OP_DYNAMIC;
8471    dc->cc_op_dirty = false;
8472    dc->cs_base = cs_base;
8473    dc->popl_esp_hack = 0;
8474    /* select memory access functions */
8475    dc->mem_index = 0;
8476#ifdef CONFIG_SOFTMMU
8477    dc->mem_index = cpu_mmu_index(env, false);
8478#endif
8479    dc->cpuid_features = env->features[FEAT_1_EDX];
8480    dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8481    dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8482    dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8483    dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8484    dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8485#ifdef TARGET_X86_64
8486    dc->lma = (flags >> HF_LMA_SHIFT) & 1;
8487    dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
8488#endif
8489    dc->flags = flags;
8490    dc->jmp_opt = !(dc->tf || dc->base.singlestep_enabled ||
8491                    (flags & HF_INHIBIT_IRQ_MASK));
8492    /* Do not optimize repz jumps at all in icount mode, because
8493       rep movsS instructions are execured with different paths
8494       in !repz_opt and repz_opt modes. The first one was used
8495       always except single step mode. And this setting
8496       disables jumps optimization and control paths become
8497       equivalent in run and single step modes.
8498       Now there will be no jump optimization for repz in
8499       record/replay modes and there will always be an
8500       additional step for ecx=0 when icount is enabled.
8501     */
8502    dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT);
8503#if 0
8504    /* check addseg logic */
8505    if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
8506        printf("ERROR addseg\n");
8507#endif
8508
8509    dc->T0 = tcg_temp_new();
8510    dc->T1 = tcg_temp_new();
8511    dc->A0 = tcg_temp_new();
8512
8513    dc->tmp0 = tcg_temp_new();
8514    dc->tmp1_i64 = tcg_temp_new_i64();
8515    dc->tmp2_i32 = tcg_temp_new_i32();
8516    dc->tmp3_i32 = tcg_temp_new_i32();
8517    dc->tmp4 = tcg_temp_new();
8518    dc->ptr0 = tcg_temp_new_ptr();
8519    dc->ptr1 = tcg_temp_new_ptr();
8520    dc->cc_srcT = tcg_temp_local_new();
8521}
8522
8523static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8524{
8525}
8526
8527static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8528{
8529    DisasContext *dc = container_of(dcbase, DisasContext, base);
8530
8531    tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8532}
8533
8534static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8535                                     const CPUBreakpoint *bp)
8536{
8537    DisasContext *dc = container_of(dcbase, DisasContext, base);
8538    /* If RF is set, suppress an internally generated breakpoint.  */
8539    int flags = dc->base.tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY;
8540    if (bp->flags & flags) {
8541        gen_debug(dc, dc->base.pc_next - dc->cs_base);
8542        dc->base.is_jmp = DISAS_NORETURN;
8543        /* The address covered by the breakpoint must be included in
8544           [tb->pc, tb->pc + tb->size) in order to for it to be
8545           properly cleared -- thus we increment the PC here so that
8546           the generic logic setting tb->size later does the right thing.  */
8547        dc->base.pc_next += 1;
8548        return true;
8549    } else {
8550        return false;
8551    }
8552}
8553
8554static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8555{
8556    DisasContext *dc = container_of(dcbase, DisasContext, base);
8557    target_ulong pc_next = disas_insn(dc, cpu);
8558
8559    if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) {
8560        /* if single step mode, we generate only one instruction and
8561           generate an exception */
8562        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8563           the flag and abort the translation to give the irqs a
8564           chance to happen */
8565        dc->base.is_jmp = DISAS_TOO_MANY;
8566    } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8567               && ((pc_next & TARGET_PAGE_MASK)
8568                   != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8569                       & TARGET_PAGE_MASK)
8570                   || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8571        /* Do not cross the boundary of the pages in icount mode,
8572           it can cause an exception. Do it only when boundary is
8573           crossed by the first instruction in the block.
8574           If current instruction already crossed the bound - it's ok,
8575           because an exception hasn't stopped this code.
8576         */
8577        dc->base.is_jmp = DISAS_TOO_MANY;
8578    } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8579        dc->base.is_jmp = DISAS_TOO_MANY;
8580    }
8581
8582    dc->base.pc_next = pc_next;
8583}
8584
8585static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8586{
8587    DisasContext *dc = container_of(dcbase, DisasContext, base);
8588
8589    if (dc->base.is_jmp == DISAS_TOO_MANY) {
8590        gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8591        gen_eob(dc);
8592    }
8593}
8594
8595static void i386_tr_disas_log(const DisasContextBase *dcbase,
8596                              CPUState *cpu)
8597{
8598    DisasContext *dc = container_of(dcbase, DisasContext, base);
8599
8600    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8601    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8602}
8603
8604static const TranslatorOps i386_tr_ops = {
8605    .init_disas_context = i386_tr_init_disas_context,
8606    .tb_start           = i386_tr_tb_start,
8607    .insn_start         = i386_tr_insn_start,
8608    .breakpoint_check   = i386_tr_breakpoint_check,
8609    .translate_insn     = i386_tr_translate_insn,
8610    .tb_stop            = i386_tr_tb_stop,
8611    .disas_log          = i386_tr_disas_log,
8612};
8613
8614/* generate intermediate code for basic block 'tb'.  */
8615void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8616{
8617    DisasContext dc;
8618
8619    translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8620}
8621
8622void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8623                          target_ulong *data)
8624{
8625    int cc_op = data[1];
8626    env->eip = data[0] - tb->cs_base;
8627    if (cc_op != CC_OP_DYNAMIC) {
8628        env->cc_op = cc_op;
8629    }
8630}
8631