qemu/target/i386/translate.c
<<
>>
Prefs
   1/*
   2 *  i386 translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "qemu/host-utils.h"
  22#include "cpu.h"
  23#include "disas/disas.h"
  24#include "exec/exec-all.h"
  25#include "tcg-op.h"
  26#include "exec/cpu_ldst.h"
  27#include "exec/translator.h"
  28
  29#include "exec/helper-proto.h"
  30#include "exec/helper-gen.h"
  31
  32#include "trace-tcg.h"
  33#include "exec/log.h"
  34
  35#define PREFIX_REPZ   0x01
  36#define PREFIX_REPNZ  0x02
  37#define PREFIX_LOCK   0x04
  38#define PREFIX_DATA   0x08
  39#define PREFIX_ADR    0x10
  40#define PREFIX_VEX    0x20
  41
  42#ifdef TARGET_X86_64
  43#define CODE64(s) ((s)->code64)
  44#define REX_X(s) ((s)->rex_x)
  45#define REX_B(s) ((s)->rex_b)
  46#else
  47#define CODE64(s) 0
  48#define REX_X(s) 0
  49#define REX_B(s) 0
  50#endif
  51
  52#ifdef TARGET_X86_64
  53# define ctztl  ctz64
  54# define clztl  clz64
  55#else
  56# define ctztl  ctz32
  57# define clztl  clz32
  58#endif
  59
  60/* For a switch indexed by MODRM, match all memory operands for a given OP.  */
  61#define CASE_MODRM_MEM_OP(OP) \
  62    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  63    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  64    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
  65
  66#define CASE_MODRM_OP(OP) \
  67    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  68    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  69    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
  70    case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
  71
  72//#define MACRO_TEST   1
  73
  74/* global register indexes */
  75static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
  76static TCGv_i32 cpu_cc_op;
  77static TCGv cpu_regs[CPU_NB_REGS];
  78static TCGv cpu_seg_base[6];
  79static TCGv_i64 cpu_bndl[4];
  80static TCGv_i64 cpu_bndu[4];
  81
  82#include "exec/gen-icount.h"
  83
  84typedef struct DisasContext {
  85    DisasContextBase base;
  86
  87    /* current insn context */
  88    int override; /* -1 if no override */
  89    int prefix;
  90    TCGMemOp aflag;
  91    TCGMemOp dflag;
  92    target_ulong pc_start;
  93    target_ulong pc; /* pc = eip + cs_base */
  94    /* current block context */
  95    target_ulong cs_base; /* base of CS segment */
  96    int pe;     /* protected mode */
  97    int code32; /* 32 bit code segment */
  98#ifdef TARGET_X86_64
  99    int lma;    /* long mode active */
 100    int code64; /* 64 bit code segment */
 101    int rex_x, rex_b;
 102#endif
 103    int vex_l;  /* vex vector length */
 104    int vex_v;  /* vex vvvv register, without 1's complement.  */
 105    int ss32;   /* 32 bit stack segment */
 106    CCOp cc_op;  /* current CC operation */
 107    bool cc_op_dirty;
 108#ifdef TARGET_X86_64
 109    bool x86_64_hregs;
 110#endif
 111    int addseg; /* non zero if either DS/ES/SS have a non zero base */
 112    int f_st;   /* currently unused */
 113    int vm86;   /* vm86 mode */
 114    int cpl;
 115    int iopl;
 116    int tf;     /* TF cpu flag */
 117    int jmp_opt; /* use direct block chaining for direct jumps */
 118    int repz_opt; /* optimize jumps within repz instructions */
 119    int mem_index; /* select memory access functions */
 120    uint64_t flags; /* all execution flags */
 121    int popl_esp_hack; /* for correct popl with esp base handling */
 122    int rip_offset; /* only used in x86_64, but left for simplicity */
 123    int cpuid_features;
 124    int cpuid_ext_features;
 125    int cpuid_ext2_features;
 126    int cpuid_ext3_features;
 127    int cpuid_7_0_ebx_features;
 128    int cpuid_xsave_features;
 129
 130    /* TCG local temps */
 131    TCGv cc_srcT;
 132    TCGv A0;
 133    TCGv T0;
 134    TCGv T1;
 135
 136    /* TCG local register indexes (only used inside old micro ops) */
 137    TCGv tmp0;
 138    TCGv tmp4;
 139    TCGv_ptr ptr0;
 140    TCGv_ptr ptr1;
 141    TCGv_i32 tmp2_i32;
 142    TCGv_i32 tmp3_i32;
 143    TCGv_i64 tmp1_i64;
 144
 145    sigjmp_buf jmpbuf;
 146} DisasContext;
 147
 148static void gen_eob(DisasContext *s);
 149static void gen_jr(DisasContext *s, TCGv dest);
 150static void gen_jmp(DisasContext *s, target_ulong eip);
 151static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 152static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
 153
 154/* i386 arith/logic operations */
 155enum {
 156    OP_ADDL,
 157    OP_ORL,
 158    OP_ADCL,
 159    OP_SBBL,
 160    OP_ANDL,
 161    OP_SUBL,
 162    OP_XORL,
 163    OP_CMPL,
 164};
 165
 166/* i386 shift ops */
 167enum {
 168    OP_ROL,
 169    OP_ROR,
 170    OP_RCL,
 171    OP_RCR,
 172    OP_SHL,
 173    OP_SHR,
 174    OP_SHL1, /* undocumented */
 175    OP_SAR = 7,
 176};
 177
 178enum {
 179    JCC_O,
 180    JCC_B,
 181    JCC_Z,
 182    JCC_BE,
 183    JCC_S,
 184    JCC_P,
 185    JCC_L,
 186    JCC_LE,
 187};
 188
 189enum {
 190    /* I386 int registers */
 191    OR_EAX,   /* MUST be even numbered */
 192    OR_ECX,
 193    OR_EDX,
 194    OR_EBX,
 195    OR_ESP,
 196    OR_EBP,
 197    OR_ESI,
 198    OR_EDI,
 199
 200    OR_TMP0 = 16,    /* temporary operand register */
 201    OR_TMP1,
 202    OR_A0, /* temporary register used when doing address evaluation */
 203};
 204
 205enum {
 206    USES_CC_DST  = 1,
 207    USES_CC_SRC  = 2,
 208    USES_CC_SRC2 = 4,
 209    USES_CC_SRCT = 8,
 210};
 211
 212/* Bit set if the global variable is live after setting CC_OP to X.  */
 213static const uint8_t cc_op_live[CC_OP_NB] = {
 214    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 215    [CC_OP_EFLAGS] = USES_CC_SRC,
 216    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
 217    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
 218    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 219    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
 220    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 221    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
 222    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
 223    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
 224    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
 225    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
 226    [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
 227    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
 228    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
 229    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 230    [CC_OP_CLR] = 0,
 231    [CC_OP_POPCNT] = USES_CC_SRC,
 232};
 233
 234static void set_cc_op(DisasContext *s, CCOp op)
 235{
 236    int dead;
 237
 238    if (s->cc_op == op) {
 239        return;
 240    }
 241
 242    /* Discard CC computation that will no longer be used.  */
 243    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
 244    if (dead & USES_CC_DST) {
 245        tcg_gen_discard_tl(cpu_cc_dst);
 246    }
 247    if (dead & USES_CC_SRC) {
 248        tcg_gen_discard_tl(cpu_cc_src);
 249    }
 250    if (dead & USES_CC_SRC2) {
 251        tcg_gen_discard_tl(cpu_cc_src2);
 252    }
 253    if (dead & USES_CC_SRCT) {
 254        tcg_gen_discard_tl(s->cc_srcT);
 255    }
 256
 257    if (op == CC_OP_DYNAMIC) {
 258        /* The DYNAMIC setting is translator only, and should never be
 259           stored.  Thus we always consider it clean.  */
 260        s->cc_op_dirty = false;
 261    } else {
 262        /* Discard any computed CC_OP value (see shifts).  */
 263        if (s->cc_op == CC_OP_DYNAMIC) {
 264            tcg_gen_discard_i32(cpu_cc_op);
 265        }
 266        s->cc_op_dirty = true;
 267    }
 268    s->cc_op = op;
 269}
 270
 271static void gen_update_cc_op(DisasContext *s)
 272{
 273    if (s->cc_op_dirty) {
 274        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
 275        s->cc_op_dirty = false;
 276    }
 277}
 278
 279#ifdef TARGET_X86_64
 280
 281#define NB_OP_SIZES 4
 282
 283#else /* !TARGET_X86_64 */
 284
 285#define NB_OP_SIZES 3
 286
 287#endif /* !TARGET_X86_64 */
 288
 289#if defined(HOST_WORDS_BIGENDIAN)
 290#define REG_B_OFFSET (sizeof(target_ulong) - 1)
 291#define REG_H_OFFSET (sizeof(target_ulong) - 2)
 292#define REG_W_OFFSET (sizeof(target_ulong) - 2)
 293#define REG_L_OFFSET (sizeof(target_ulong) - 4)
 294#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
 295#else
 296#define REG_B_OFFSET 0
 297#define REG_H_OFFSET 1
 298#define REG_W_OFFSET 0
 299#define REG_L_OFFSET 0
 300#define REG_LH_OFFSET 4
 301#endif
 302
 303/* In instruction encodings for byte register accesses the
 304 * register number usually indicates "low 8 bits of register N";
 305 * however there are some special cases where N 4..7 indicates
 306 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
 307 * true for this special case, false otherwise.
 308 */
 309static inline bool byte_reg_is_xH(DisasContext *s, int reg)
 310{
 311    if (reg < 4) {
 312        return false;
 313    }
 314#ifdef TARGET_X86_64
 315    if (reg >= 8 || s->x86_64_hregs) {
 316        return false;
 317    }
 318#endif
 319    return true;
 320}
 321
 322/* Select the size of a push/pop operation.  */
 323static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
 324{
 325    if (CODE64(s)) {
 326        return ot == MO_16 ? MO_16 : MO_64;
 327    } else {
 328        return ot;
 329    }
 330}
 331
 332/* Select the size of the stack pointer.  */
 333static inline TCGMemOp mo_stacksize(DisasContext *s)
 334{
 335    return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
 336}
 337
 338/* Select only size 64 else 32.  Used for SSE operand sizes.  */
 339static inline TCGMemOp mo_64_32(TCGMemOp ot)
 340{
 341#ifdef TARGET_X86_64
 342    return ot == MO_64 ? MO_64 : MO_32;
 343#else
 344    return MO_32;
 345#endif
 346}
 347
 348/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
 349   byte vs word opcodes.  */
 350static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
 351{
 352    return b & 1 ? ot : MO_8;
 353}
 354
 355/* Select size 8 if lsb of B is clear, else OT capped at 32.
 356   Used for decoding operand size of port opcodes.  */
 357static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
 358{
 359    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 360}
 361
 362static void gen_op_mov_reg_v(DisasContext *s, TCGMemOp ot, int reg, TCGv t0)
 363{
 364    switch(ot) {
 365    case MO_8:
 366        if (!byte_reg_is_xH(s, reg)) {
 367            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
 368        } else {
 369            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
 370        }
 371        break;
 372    case MO_16:
 373        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
 374        break;
 375    case MO_32:
 376        /* For x86_64, this sets the higher half of register to zero.
 377           For i386, this is equivalent to a mov. */
 378        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
 379        break;
 380#ifdef TARGET_X86_64
 381    case MO_64:
 382        tcg_gen_mov_tl(cpu_regs[reg], t0);
 383        break;
 384#endif
 385    default:
 386        tcg_abort();
 387    }
 388}
 389
 390static inline
 391void gen_op_mov_v_reg(DisasContext *s, TCGMemOp ot, TCGv t0, int reg)
 392{
 393    if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
 394        tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
 395    } else {
 396        tcg_gen_mov_tl(t0, cpu_regs[reg]);
 397    }
 398}
 399
 400static void gen_add_A0_im(DisasContext *s, int val)
 401{
 402    tcg_gen_addi_tl(s->A0, s->A0, val);
 403    if (!CODE64(s)) {
 404        tcg_gen_ext32u_tl(s->A0, s->A0);
 405    }
 406}
 407
 408static inline void gen_op_jmp_v(TCGv dest)
 409{
 410    tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 411}
 412
 413static inline
 414void gen_op_add_reg_im(DisasContext *s, TCGMemOp size, int reg, int32_t val)
 415{
 416    tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
 417    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 418}
 419
 420static inline void gen_op_add_reg_T0(DisasContext *s, TCGMemOp size, int reg)
 421{
 422    tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
 423    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 424}
 425
 426static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 427{
 428    tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
 429}
 430
 431static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 432{
 433    tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 434}
 435
 436static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 437{
 438    if (d == OR_TMP0) {
 439        gen_op_st_v(s, idx, s->T0, s->A0);
 440    } else {
 441        gen_op_mov_reg_v(s, idx, d, s->T0);
 442    }
 443}
 444
 445static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
 446{
 447    tcg_gen_movi_tl(s->tmp0, pc);
 448    gen_op_jmp_v(s->tmp0);
 449}
 450
 451/* Compute SEG:REG into A0.  SEG is selected from the override segment
 452   (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
 453   indicate no override.  */
 454static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
 455                          int def_seg, int ovr_seg)
 456{
 457    switch (aflag) {
 458#ifdef TARGET_X86_64
 459    case MO_64:
 460        if (ovr_seg < 0) {
 461            tcg_gen_mov_tl(s->A0, a0);
 462            return;
 463        }
 464        break;
 465#endif
 466    case MO_32:
 467        /* 32 bit address */
 468        if (ovr_seg < 0 && s->addseg) {
 469            ovr_seg = def_seg;
 470        }
 471        if (ovr_seg < 0) {
 472            tcg_gen_ext32u_tl(s->A0, a0);
 473            return;
 474        }
 475        break;
 476    case MO_16:
 477        /* 16 bit address */
 478        tcg_gen_ext16u_tl(s->A0, a0);
 479        a0 = s->A0;
 480        if (ovr_seg < 0) {
 481            if (s->addseg) {
 482                ovr_seg = def_seg;
 483            } else {
 484                return;
 485            }
 486        }
 487        break;
 488    default:
 489        tcg_abort();
 490    }
 491
 492    if (ovr_seg >= 0) {
 493        TCGv seg = cpu_seg_base[ovr_seg];
 494
 495        if (aflag == MO_64) {
 496            tcg_gen_add_tl(s->A0, a0, seg);
 497        } else if (CODE64(s)) {
 498            tcg_gen_ext32u_tl(s->A0, a0);
 499            tcg_gen_add_tl(s->A0, s->A0, seg);
 500        } else {
 501            tcg_gen_add_tl(s->A0, a0, seg);
 502            tcg_gen_ext32u_tl(s->A0, s->A0);
 503        }
 504    }
 505}
 506
 507static inline void gen_string_movl_A0_ESI(DisasContext *s)
 508{
 509    gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
 510}
 511
 512static inline void gen_string_movl_A0_EDI(DisasContext *s)
 513{
 514    gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
 515}
 516
 517static inline void gen_op_movl_T0_Dshift(DisasContext *s, TCGMemOp ot)
 518{
 519    tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
 520    tcg_gen_shli_tl(s->T0, s->T0, ot);
 521};
 522
 523static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
 524{
 525    switch (size) {
 526    case MO_8:
 527        if (sign) {
 528            tcg_gen_ext8s_tl(dst, src);
 529        } else {
 530            tcg_gen_ext8u_tl(dst, src);
 531        }
 532        return dst;
 533    case MO_16:
 534        if (sign) {
 535            tcg_gen_ext16s_tl(dst, src);
 536        } else {
 537            tcg_gen_ext16u_tl(dst, src);
 538        }
 539        return dst;
 540#ifdef TARGET_X86_64
 541    case MO_32:
 542        if (sign) {
 543            tcg_gen_ext32s_tl(dst, src);
 544        } else {
 545            tcg_gen_ext32u_tl(dst, src);
 546        }
 547        return dst;
 548#endif
 549    default:
 550        return src;
 551    }
 552}
 553
 554static void gen_extu(TCGMemOp ot, TCGv reg)
 555{
 556    gen_ext_tl(reg, reg, ot, false);
 557}
 558
 559static void gen_exts(TCGMemOp ot, TCGv reg)
 560{
 561    gen_ext_tl(reg, reg, ot, true);
 562}
 563
 564static inline
 565void gen_op_jnz_ecx(DisasContext *s, TCGMemOp size, TCGLabel *label1)
 566{
 567    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
 568    gen_extu(size, s->tmp0);
 569    tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
 570}
 571
 572static inline
 573void gen_op_jz_ecx(DisasContext *s, TCGMemOp size, TCGLabel *label1)
 574{
 575    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
 576    gen_extu(size, s->tmp0);
 577    tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
 578}
 579
 580static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
 581{
 582    switch (ot) {
 583    case MO_8:
 584        gen_helper_inb(v, cpu_env, n);
 585        break;
 586    case MO_16:
 587        gen_helper_inw(v, cpu_env, n);
 588        break;
 589    case MO_32:
 590        gen_helper_inl(v, cpu_env, n);
 591        break;
 592    default:
 593        tcg_abort();
 594    }
 595}
 596
 597static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
 598{
 599    switch (ot) {
 600    case MO_8:
 601        gen_helper_outb(cpu_env, v, n);
 602        break;
 603    case MO_16:
 604        gen_helper_outw(cpu_env, v, n);
 605        break;
 606    case MO_32:
 607        gen_helper_outl(cpu_env, v, n);
 608        break;
 609    default:
 610        tcg_abort();
 611    }
 612}
 613
 614static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
 615                         uint32_t svm_flags)
 616{
 617    target_ulong next_eip;
 618
 619    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
 620        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 621        switch (ot) {
 622        case MO_8:
 623            gen_helper_check_iob(cpu_env, s->tmp2_i32);
 624            break;
 625        case MO_16:
 626            gen_helper_check_iow(cpu_env, s->tmp2_i32);
 627            break;
 628        case MO_32:
 629            gen_helper_check_iol(cpu_env, s->tmp2_i32);
 630            break;
 631        default:
 632            tcg_abort();
 633        }
 634    }
 635    if(s->flags & HF_GUEST_MASK) {
 636        gen_update_cc_op(s);
 637        gen_jmp_im(s, cur_eip);
 638        svm_flags |= (1 << (4 + ot));
 639        next_eip = s->pc - s->cs_base;
 640        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 641        gen_helper_svm_check_io(cpu_env, s->tmp2_i32,
 642                                tcg_const_i32(svm_flags),
 643                                tcg_const_i32(next_eip - cur_eip));
 644    }
 645}
 646
 647static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 648{
 649    gen_string_movl_A0_ESI(s);
 650    gen_op_ld_v(s, ot, s->T0, s->A0);
 651    gen_string_movl_A0_EDI(s);
 652    gen_op_st_v(s, ot, s->T0, s->A0);
 653    gen_op_movl_T0_Dshift(s, ot);
 654    gen_op_add_reg_T0(s, s->aflag, R_ESI);
 655    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 656}
 657
 658static void gen_op_update1_cc(DisasContext *s)
 659{
 660    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 661}
 662
 663static void gen_op_update2_cc(DisasContext *s)
 664{
 665    tcg_gen_mov_tl(cpu_cc_src, s->T1);
 666    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 667}
 668
 669static void gen_op_update3_cc(DisasContext *s, TCGv reg)
 670{
 671    tcg_gen_mov_tl(cpu_cc_src2, reg);
 672    tcg_gen_mov_tl(cpu_cc_src, s->T1);
 673    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 674}
 675
 676static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
 677{
 678    tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
 679}
 680
 681static void gen_op_update_neg_cc(DisasContext *s)
 682{
 683    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 684    tcg_gen_neg_tl(cpu_cc_src, s->T0);
 685    tcg_gen_movi_tl(s->cc_srcT, 0);
 686}
 687
 688/* compute all eflags to cc_src */
 689static void gen_compute_eflags(DisasContext *s)
 690{
 691    TCGv zero, dst, src1, src2;
 692    int live, dead;
 693
 694    if (s->cc_op == CC_OP_EFLAGS) {
 695        return;
 696    }
 697    if (s->cc_op == CC_OP_CLR) {
 698        tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
 699        set_cc_op(s, CC_OP_EFLAGS);
 700        return;
 701    }
 702
 703    zero = NULL;
 704    dst = cpu_cc_dst;
 705    src1 = cpu_cc_src;
 706    src2 = cpu_cc_src2;
 707
 708    /* Take care to not read values that are not live.  */
 709    live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
 710    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
 711    if (dead) {
 712        zero = tcg_const_tl(0);
 713        if (dead & USES_CC_DST) {
 714            dst = zero;
 715        }
 716        if (dead & USES_CC_SRC) {
 717            src1 = zero;
 718        }
 719        if (dead & USES_CC_SRC2) {
 720            src2 = zero;
 721        }
 722    }
 723
 724    gen_update_cc_op(s);
 725    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
 726    set_cc_op(s, CC_OP_EFLAGS);
 727
 728    if (dead) {
 729        tcg_temp_free(zero);
 730    }
 731}
 732
 733typedef struct CCPrepare {
 734    TCGCond cond;
 735    TCGv reg;
 736    TCGv reg2;
 737    target_ulong imm;
 738    target_ulong mask;
 739    bool use_reg2;
 740    bool no_setcond;
 741} CCPrepare;
 742
 743/* compute eflags.C to reg */
 744static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 745{
 746    TCGv t0, t1;
 747    int size, shift;
 748
 749    switch (s->cc_op) {
 750    case CC_OP_SUBB ... CC_OP_SUBQ:
 751        /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
 752        size = s->cc_op - CC_OP_SUBB;
 753        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 754        /* If no temporary was used, be careful not to alias t1 and t0.  */
 755        t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
 756        tcg_gen_mov_tl(t0, s->cc_srcT);
 757        gen_extu(size, t0);
 758        goto add_sub;
 759
 760    case CC_OP_ADDB ... CC_OP_ADDQ:
 761        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
 762        size = s->cc_op - CC_OP_ADDB;
 763        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 764        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 765    add_sub:
 766        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
 767                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 768
 769    case CC_OP_LOGICB ... CC_OP_LOGICQ:
 770    case CC_OP_CLR:
 771    case CC_OP_POPCNT:
 772        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 773
 774    case CC_OP_INCB ... CC_OP_INCQ:
 775    case CC_OP_DECB ... CC_OP_DECQ:
 776        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 777                             .mask = -1, .no_setcond = true };
 778
 779    case CC_OP_SHLB ... CC_OP_SHLQ:
 780        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
 781        size = s->cc_op - CC_OP_SHLB;
 782        shift = (8 << size) - 1;
 783        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 784                             .mask = (target_ulong)1 << shift };
 785
 786    case CC_OP_MULB ... CC_OP_MULQ:
 787        return (CCPrepare) { .cond = TCG_COND_NE,
 788                             .reg = cpu_cc_src, .mask = -1 };
 789
 790    case CC_OP_BMILGB ... CC_OP_BMILGQ:
 791        size = s->cc_op - CC_OP_BMILGB;
 792        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
 793        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 794
 795    case CC_OP_ADCX:
 796    case CC_OP_ADCOX:
 797        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
 798                             .mask = -1, .no_setcond = true };
 799
 800    case CC_OP_EFLAGS:
 801    case CC_OP_SARB ... CC_OP_SARQ:
 802        /* CC_SRC & 1 */
 803        return (CCPrepare) { .cond = TCG_COND_NE,
 804                             .reg = cpu_cc_src, .mask = CC_C };
 805
 806    default:
 807       /* The need to compute only C from CC_OP_DYNAMIC is important
 808          in efficiently implementing e.g. INC at the start of a TB.  */
 809       gen_update_cc_op(s);
 810       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
 811                               cpu_cc_src2, cpu_cc_op);
 812       return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 813                            .mask = -1, .no_setcond = true };
 814    }
 815}
 816
 817/* compute eflags.P to reg */
 818static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 819{
 820    gen_compute_eflags(s);
 821    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 822                         .mask = CC_P };
 823}
 824
 825/* compute eflags.S to reg */
 826static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 827{
 828    switch (s->cc_op) {
 829    case CC_OP_DYNAMIC:
 830        gen_compute_eflags(s);
 831        /* FALLTHRU */
 832    case CC_OP_EFLAGS:
 833    case CC_OP_ADCX:
 834    case CC_OP_ADOX:
 835    case CC_OP_ADCOX:
 836        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 837                             .mask = CC_S };
 838    case CC_OP_CLR:
 839    case CC_OP_POPCNT:
 840        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 841    default:
 842        {
 843            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 844            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
 845            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
 846        }
 847    }
 848}
 849
 850/* compute eflags.O to reg */
 851static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 852{
 853    switch (s->cc_op) {
 854    case CC_OP_ADOX:
 855    case CC_OP_ADCOX:
 856        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
 857                             .mask = -1, .no_setcond = true };
 858    case CC_OP_CLR:
 859    case CC_OP_POPCNT:
 860        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 861    default:
 862        gen_compute_eflags(s);
 863        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 864                             .mask = CC_O };
 865    }
 866}
 867
 868/* compute eflags.Z to reg */
 869static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 870{
 871    switch (s->cc_op) {
 872    case CC_OP_DYNAMIC:
 873        gen_compute_eflags(s);
 874        /* FALLTHRU */
 875    case CC_OP_EFLAGS:
 876    case CC_OP_ADCX:
 877    case CC_OP_ADOX:
 878    case CC_OP_ADCOX:
 879        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 880                             .mask = CC_Z };
 881    case CC_OP_CLR:
 882        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
 883    case CC_OP_POPCNT:
 884        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
 885                             .mask = -1 };
 886    default:
 887        {
 888            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 889            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 890            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 891        }
 892    }
 893}
 894
 895/* perform a conditional store into register 'reg' according to jump opcode
 896   value 'b'. In the fast case, T0 is guaranted not to be used. */
 897static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 898{
 899    int inv, jcc_op, cond;
 900    TCGMemOp size;
 901    CCPrepare cc;
 902    TCGv t0;
 903
 904    inv = b & 1;
 905    jcc_op = (b >> 1) & 7;
 906
 907    switch (s->cc_op) {
 908    case CC_OP_SUBB ... CC_OP_SUBQ:
 909        /* We optimize relational operators for the cmp/jcc case.  */
 910        size = s->cc_op - CC_OP_SUBB;
 911        switch (jcc_op) {
 912        case JCC_BE:
 913            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
 914            gen_extu(size, s->tmp4);
 915            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
 916            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
 917                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 918            break;
 919
 920        case JCC_L:
 921            cond = TCG_COND_LT;
 922            goto fast_jcc_l;
 923        case JCC_LE:
 924            cond = TCG_COND_LE;
 925        fast_jcc_l:
 926            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
 927            gen_exts(size, s->tmp4);
 928            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
 929            cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
 930                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 931            break;
 932
 933        default:
 934            goto slow_jcc;
 935        }
 936        break;
 937
 938    default:
 939    slow_jcc:
 940        /* This actually generates good code for JC, JZ and JS.  */
 941        switch (jcc_op) {
 942        case JCC_O:
 943            cc = gen_prepare_eflags_o(s, reg);
 944            break;
 945        case JCC_B:
 946            cc = gen_prepare_eflags_c(s, reg);
 947            break;
 948        case JCC_Z:
 949            cc = gen_prepare_eflags_z(s, reg);
 950            break;
 951        case JCC_BE:
 952            gen_compute_eflags(s);
 953            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 954                               .mask = CC_Z | CC_C };
 955            break;
 956        case JCC_S:
 957            cc = gen_prepare_eflags_s(s, reg);
 958            break;
 959        case JCC_P:
 960            cc = gen_prepare_eflags_p(s, reg);
 961            break;
 962        case JCC_L:
 963            gen_compute_eflags(s);
 964            if (reg == cpu_cc_src) {
 965                reg = s->tmp0;
 966            }
 967            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 968            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 969            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 970                               .mask = CC_S };
 971            break;
 972        default:
 973        case JCC_LE:
 974            gen_compute_eflags(s);
 975            if (reg == cpu_cc_src) {
 976                reg = s->tmp0;
 977            }
 978            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 979            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 980            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 981                               .mask = CC_S | CC_Z };
 982            break;
 983        }
 984        break;
 985    }
 986
 987    if (inv) {
 988        cc.cond = tcg_invert_cond(cc.cond);
 989    }
 990    return cc;
 991}
 992
 993static void gen_setcc1(DisasContext *s, int b, TCGv reg)
 994{
 995    CCPrepare cc = gen_prepare_cc(s, b, reg);
 996
 997    if (cc.no_setcond) {
 998        if (cc.cond == TCG_COND_EQ) {
 999            tcg_gen_xori_tl(reg, cc.reg, 1);
1000        } else {
1001            tcg_gen_mov_tl(reg, cc.reg);
1002        }
1003        return;
1004    }
1005
1006    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1007        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1008        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1009        tcg_gen_andi_tl(reg, reg, 1);
1010        return;
1011    }
1012    if (cc.mask != -1) {
1013        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1014        cc.reg = reg;
1015    }
1016    if (cc.use_reg2) {
1017        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1018    } else {
1019        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1020    }
1021}
1022
1023static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1024{
1025    gen_setcc1(s, JCC_B << 1, reg);
1026}
1027
1028/* generate a conditional jump to label 'l1' according to jump opcode
1029   value 'b'. In the fast case, T0 is guaranted not to be used. */
1030static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1031{
1032    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1033
1034    if (cc.mask != -1) {
1035        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1036        cc.reg = s->T0;
1037    }
1038    if (cc.use_reg2) {
1039        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1040    } else {
1041        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1042    }
1043}
1044
1045/* Generate a conditional jump to label 'l1' according to jump opcode
1046   value 'b'. In the fast case, T0 is guaranted not to be used.
1047   A translation block must end soon.  */
1048static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1049{
1050    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1051
1052    gen_update_cc_op(s);
1053    if (cc.mask != -1) {
1054        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1055        cc.reg = s->T0;
1056    }
1057    set_cc_op(s, CC_OP_DYNAMIC);
1058    if (cc.use_reg2) {
1059        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1060    } else {
1061        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1062    }
1063}
1064
1065/* XXX: does not work with gdbstub "ice" single step - not a
1066   serious problem */
1067static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1068{
1069    TCGLabel *l1 = gen_new_label();
1070    TCGLabel *l2 = gen_new_label();
1071    gen_op_jnz_ecx(s, s->aflag, l1);
1072    gen_set_label(l2);
1073    gen_jmp_tb(s, next_eip, 1);
1074    gen_set_label(l1);
1075    return l2;
1076}
1077
1078static inline void gen_stos(DisasContext *s, TCGMemOp ot)
1079{
1080    gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1081    gen_string_movl_A0_EDI(s);
1082    gen_op_st_v(s, ot, s->T0, s->A0);
1083    gen_op_movl_T0_Dshift(s, ot);
1084    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1085}
1086
1087static inline void gen_lods(DisasContext *s, TCGMemOp ot)
1088{
1089    gen_string_movl_A0_ESI(s);
1090    gen_op_ld_v(s, ot, s->T0, s->A0);
1091    gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1092    gen_op_movl_T0_Dshift(s, ot);
1093    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1094}
1095
1096static inline void gen_scas(DisasContext *s, TCGMemOp ot)
1097{
1098    gen_string_movl_A0_EDI(s);
1099    gen_op_ld_v(s, ot, s->T1, s->A0);
1100    gen_op(s, OP_CMPL, ot, R_EAX);
1101    gen_op_movl_T0_Dshift(s, ot);
1102    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1103}
1104
1105static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
1106{
1107    gen_string_movl_A0_EDI(s);
1108    gen_op_ld_v(s, ot, s->T1, s->A0);
1109    gen_string_movl_A0_ESI(s);
1110    gen_op(s, OP_CMPL, ot, OR_TMP0);
1111    gen_op_movl_T0_Dshift(s, ot);
1112    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1113    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1114}
1115
1116static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1117{
1118    if (s->flags & HF_IOBPT_MASK) {
1119        TCGv_i32 t_size = tcg_const_i32(1 << ot);
1120        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1121
1122        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1123        tcg_temp_free_i32(t_size);
1124        tcg_temp_free(t_next);
1125    }
1126}
1127
1128
1129static inline void gen_ins(DisasContext *s, TCGMemOp ot)
1130{
1131    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1132        gen_io_start();
1133    }
1134    gen_string_movl_A0_EDI(s);
1135    /* Note: we must do this dummy write first to be restartable in
1136       case of page fault. */
1137    tcg_gen_movi_tl(s->T0, 0);
1138    gen_op_st_v(s, ot, s->T0, s->A0);
1139    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1140    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1141    gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1142    gen_op_st_v(s, ot, s->T0, s->A0);
1143    gen_op_movl_T0_Dshift(s, ot);
1144    gen_op_add_reg_T0(s, s->aflag, R_EDI);
1145    gen_bpt_io(s, s->tmp2_i32, ot);
1146    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1147        gen_io_end();
1148    }
1149}
1150
1151static inline void gen_outs(DisasContext *s, TCGMemOp ot)
1152{
1153    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1154        gen_io_start();
1155    }
1156    gen_string_movl_A0_ESI(s);
1157    gen_op_ld_v(s, ot, s->T0, s->A0);
1158
1159    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1160    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1161    tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1162    gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1163    gen_op_movl_T0_Dshift(s, ot);
1164    gen_op_add_reg_T0(s, s->aflag, R_ESI);
1165    gen_bpt_io(s, s->tmp2_i32, ot);
1166    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1167        gen_io_end();
1168    }
1169}
1170
1171/* same method as Valgrind : we generate jumps to current or next
1172   instruction */
1173#define GEN_REPZ(op)                                                          \
1174static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1175                                 target_ulong cur_eip, target_ulong next_eip) \
1176{                                                                             \
1177    TCGLabel *l2;                                                             \
1178    gen_update_cc_op(s);                                                      \
1179    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1180    gen_ ## op(s, ot);                                                        \
1181    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1182    /* a loop would cause two single step exceptions if ECX = 1               \
1183       before rep string_insn */                                              \
1184    if (s->repz_opt)                                                          \
1185        gen_op_jz_ecx(s, s->aflag, l2);                                       \
1186    gen_jmp(s, cur_eip);                                                      \
1187}
1188
1189#define GEN_REPZ2(op)                                                         \
1190static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1191                                   target_ulong cur_eip,                      \
1192                                   target_ulong next_eip,                     \
1193                                   int nz)                                    \
1194{                                                                             \
1195    TCGLabel *l2;                                                             \
1196    gen_update_cc_op(s);                                                      \
1197    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1198    gen_ ## op(s, ot);                                                        \
1199    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1200    gen_update_cc_op(s);                                                      \
1201    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1202    if (s->repz_opt)                                                          \
1203        gen_op_jz_ecx(s, s->aflag, l2);                                       \
1204    gen_jmp(s, cur_eip);                                                      \
1205}
1206
1207GEN_REPZ(movs)
1208GEN_REPZ(stos)
1209GEN_REPZ(lods)
1210GEN_REPZ(ins)
1211GEN_REPZ(outs)
1212GEN_REPZ2(scas)
1213GEN_REPZ2(cmps)
1214
1215static void gen_helper_fp_arith_ST0_FT0(int op)
1216{
1217    switch (op) {
1218    case 0:
1219        gen_helper_fadd_ST0_FT0(cpu_env);
1220        break;
1221    case 1:
1222        gen_helper_fmul_ST0_FT0(cpu_env);
1223        break;
1224    case 2:
1225        gen_helper_fcom_ST0_FT0(cpu_env);
1226        break;
1227    case 3:
1228        gen_helper_fcom_ST0_FT0(cpu_env);
1229        break;
1230    case 4:
1231        gen_helper_fsub_ST0_FT0(cpu_env);
1232        break;
1233    case 5:
1234        gen_helper_fsubr_ST0_FT0(cpu_env);
1235        break;
1236    case 6:
1237        gen_helper_fdiv_ST0_FT0(cpu_env);
1238        break;
1239    case 7:
1240        gen_helper_fdivr_ST0_FT0(cpu_env);
1241        break;
1242    }
1243}
1244
1245/* NOTE the exception in "r" op ordering */
1246static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1247{
1248    TCGv_i32 tmp = tcg_const_i32(opreg);
1249    switch (op) {
1250    case 0:
1251        gen_helper_fadd_STN_ST0(cpu_env, tmp);
1252        break;
1253    case 1:
1254        gen_helper_fmul_STN_ST0(cpu_env, tmp);
1255        break;
1256    case 4:
1257        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1258        break;
1259    case 5:
1260        gen_helper_fsub_STN_ST0(cpu_env, tmp);
1261        break;
1262    case 6:
1263        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1264        break;
1265    case 7:
1266        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1267        break;
1268    }
1269}
1270
1271static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1272{
1273    gen_update_cc_op(s);
1274    gen_jmp_im(s, cur_eip);
1275    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1276    s->base.is_jmp = DISAS_NORETURN;
1277}
1278
1279/* Generate #UD for the current instruction.  The assumption here is that
1280   the instruction is known, but it isn't allowed in the current cpu mode.  */
1281static void gen_illegal_opcode(DisasContext *s)
1282{
1283    gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1284}
1285
1286/* if d == OR_TMP0, it means memory operand (address in A0) */
1287static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
1288{
1289    if (d != OR_TMP0) {
1290        if (s1->prefix & PREFIX_LOCK) {
1291            /* Lock prefix when destination is not memory.  */
1292            gen_illegal_opcode(s1);
1293            return;
1294        }
1295        gen_op_mov_v_reg(s1, ot, s1->T0, d);
1296    } else if (!(s1->prefix & PREFIX_LOCK)) {
1297        gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1298    }
1299    switch(op) {
1300    case OP_ADCL:
1301        gen_compute_eflags_c(s1, s1->tmp4);
1302        if (s1->prefix & PREFIX_LOCK) {
1303            tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1304            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1305                                        s1->mem_index, ot | MO_LE);
1306        } else {
1307            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1308            tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1309            gen_op_st_rm_T0_A0(s1, ot, d);
1310        }
1311        gen_op_update3_cc(s1, s1->tmp4);
1312        set_cc_op(s1, CC_OP_ADCB + ot);
1313        break;
1314    case OP_SBBL:
1315        gen_compute_eflags_c(s1, s1->tmp4);
1316        if (s1->prefix & PREFIX_LOCK) {
1317            tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1318            tcg_gen_neg_tl(s1->T0, s1->T0);
1319            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1320                                        s1->mem_index, ot | MO_LE);
1321        } else {
1322            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1323            tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1324            gen_op_st_rm_T0_A0(s1, ot, d);
1325        }
1326        gen_op_update3_cc(s1, s1->tmp4);
1327        set_cc_op(s1, CC_OP_SBBB + ot);
1328        break;
1329    case OP_ADDL:
1330        if (s1->prefix & PREFIX_LOCK) {
1331            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1332                                        s1->mem_index, ot | MO_LE);
1333        } else {
1334            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1335            gen_op_st_rm_T0_A0(s1, ot, d);
1336        }
1337        gen_op_update2_cc(s1);
1338        set_cc_op(s1, CC_OP_ADDB + ot);
1339        break;
1340    case OP_SUBL:
1341        if (s1->prefix & PREFIX_LOCK) {
1342            tcg_gen_neg_tl(s1->T0, s1->T1);
1343            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1344                                        s1->mem_index, ot | MO_LE);
1345            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1346        } else {
1347            tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1348            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1349            gen_op_st_rm_T0_A0(s1, ot, d);
1350        }
1351        gen_op_update2_cc(s1);
1352        set_cc_op(s1, CC_OP_SUBB + ot);
1353        break;
1354    default:
1355    case OP_ANDL:
1356        if (s1->prefix & PREFIX_LOCK) {
1357            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1358                                        s1->mem_index, ot | MO_LE);
1359        } else {
1360            tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1361            gen_op_st_rm_T0_A0(s1, ot, d);
1362        }
1363        gen_op_update1_cc(s1);
1364        set_cc_op(s1, CC_OP_LOGICB + ot);
1365        break;
1366    case OP_ORL:
1367        if (s1->prefix & PREFIX_LOCK) {
1368            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1369                                       s1->mem_index, ot | MO_LE);
1370        } else {
1371            tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1372            gen_op_st_rm_T0_A0(s1, ot, d);
1373        }
1374        gen_op_update1_cc(s1);
1375        set_cc_op(s1, CC_OP_LOGICB + ot);
1376        break;
1377    case OP_XORL:
1378        if (s1->prefix & PREFIX_LOCK) {
1379            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1380                                        s1->mem_index, ot | MO_LE);
1381        } else {
1382            tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1383            gen_op_st_rm_T0_A0(s1, ot, d);
1384        }
1385        gen_op_update1_cc(s1);
1386        set_cc_op(s1, CC_OP_LOGICB + ot);
1387        break;
1388    case OP_CMPL:
1389        tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1390        tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1391        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1392        set_cc_op(s1, CC_OP_SUBB + ot);
1393        break;
1394    }
1395}
1396
1397/* if d == OR_TMP0, it means memory operand (address in A0) */
1398static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
1399{
1400    if (s1->prefix & PREFIX_LOCK) {
1401        if (d != OR_TMP0) {
1402            /* Lock prefix when destination is not memory */
1403            gen_illegal_opcode(s1);
1404            return;
1405        }
1406        tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1407        tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1408                                    s1->mem_index, ot | MO_LE);
1409    } else {
1410        if (d != OR_TMP0) {
1411            gen_op_mov_v_reg(s1, ot, s1->T0, d);
1412        } else {
1413            gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1414        }
1415        tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1416        gen_op_st_rm_T0_A0(s1, ot, d);
1417    }
1418
1419    gen_compute_eflags_c(s1, cpu_cc_src);
1420    tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1421    set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1422}
1423
1424static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
1425                            TCGv shm1, TCGv count, bool is_right)
1426{
1427    TCGv_i32 z32, s32, oldop;
1428    TCGv z_tl;
1429
1430    /* Store the results into the CC variables.  If we know that the
1431       variable must be dead, store unconditionally.  Otherwise we'll
1432       need to not disrupt the current contents.  */
1433    z_tl = tcg_const_tl(0);
1434    if (cc_op_live[s->cc_op] & USES_CC_DST) {
1435        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1436                           result, cpu_cc_dst);
1437    } else {
1438        tcg_gen_mov_tl(cpu_cc_dst, result);
1439    }
1440    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1441        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1442                           shm1, cpu_cc_src);
1443    } else {
1444        tcg_gen_mov_tl(cpu_cc_src, shm1);
1445    }
1446    tcg_temp_free(z_tl);
1447
1448    /* Get the two potential CC_OP values into temporaries.  */
1449    tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1450    if (s->cc_op == CC_OP_DYNAMIC) {
1451        oldop = cpu_cc_op;
1452    } else {
1453        tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1454        oldop = s->tmp3_i32;
1455    }
1456
1457    /* Conditionally store the CC_OP value.  */
1458    z32 = tcg_const_i32(0);
1459    s32 = tcg_temp_new_i32();
1460    tcg_gen_trunc_tl_i32(s32, count);
1461    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1462    tcg_temp_free_i32(z32);
1463    tcg_temp_free_i32(s32);
1464
1465    /* The CC_OP value is no longer predictable.  */
1466    set_cc_op(s, CC_OP_DYNAMIC);
1467}
1468
1469static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1470                            int is_right, int is_arith)
1471{
1472    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1473
1474    /* load */
1475    if (op1 == OR_TMP0) {
1476        gen_op_ld_v(s, ot, s->T0, s->A0);
1477    } else {
1478        gen_op_mov_v_reg(s, ot, s->T0, op1);
1479    }
1480
1481    tcg_gen_andi_tl(s->T1, s->T1, mask);
1482    tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1483
1484    if (is_right) {
1485        if (is_arith) {
1486            gen_exts(ot, s->T0);
1487            tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1488            tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1489        } else {
1490            gen_extu(ot, s->T0);
1491            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1492            tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1493        }
1494    } else {
1495        tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1496        tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1497    }
1498
1499    /* store */
1500    gen_op_st_rm_T0_A0(s, ot, op1);
1501
1502    gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1503}
1504
1505static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1506                            int is_right, int is_arith)
1507{
1508    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1509
1510    /* load */
1511    if (op1 == OR_TMP0)
1512        gen_op_ld_v(s, ot, s->T0, s->A0);
1513    else
1514        gen_op_mov_v_reg(s, ot, s->T0, op1);
1515
1516    op2 &= mask;
1517    if (op2 != 0) {
1518        if (is_right) {
1519            if (is_arith) {
1520                gen_exts(ot, s->T0);
1521                tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1522                tcg_gen_sari_tl(s->T0, s->T0, op2);
1523            } else {
1524                gen_extu(ot, s->T0);
1525                tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1526                tcg_gen_shri_tl(s->T0, s->T0, op2);
1527            }
1528        } else {
1529            tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1530            tcg_gen_shli_tl(s->T0, s->T0, op2);
1531        }
1532    }
1533
1534    /* store */
1535    gen_op_st_rm_T0_A0(s, ot, op1);
1536
1537    /* update eflags if non zero shift */
1538    if (op2 != 0) {
1539        tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1540        tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1541        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1542    }
1543}
1544
1545static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
1546{
1547    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1548    TCGv_i32 t0, t1;
1549
1550    /* load */
1551    if (op1 == OR_TMP0) {
1552        gen_op_ld_v(s, ot, s->T0, s->A0);
1553    } else {
1554        gen_op_mov_v_reg(s, ot, s->T0, op1);
1555    }
1556
1557    tcg_gen_andi_tl(s->T1, s->T1, mask);
1558
1559    switch (ot) {
1560    case MO_8:
1561        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1562        tcg_gen_ext8u_tl(s->T0, s->T0);
1563        tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1564        goto do_long;
1565    case MO_16:
1566        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1567        tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1568        goto do_long;
1569    do_long:
1570#ifdef TARGET_X86_64
1571    case MO_32:
1572        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1573        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1574        if (is_right) {
1575            tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1576        } else {
1577            tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1578        }
1579        tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1580        break;
1581#endif
1582    default:
1583        if (is_right) {
1584            tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1585        } else {
1586            tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1587        }
1588        break;
1589    }
1590
1591    /* store */
1592    gen_op_st_rm_T0_A0(s, ot, op1);
1593
1594    /* We'll need the flags computed into CC_SRC.  */
1595    gen_compute_eflags(s);
1596
1597    /* The value that was "rotated out" is now present at the other end
1598       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1599       since we've computed the flags into CC_SRC, these variables are
1600       currently dead.  */
1601    if (is_right) {
1602        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1603        tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1604        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1605    } else {
1606        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1607        tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1608    }
1609    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1610    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1611
1612    /* Now conditionally store the new CC_OP value.  If the shift count
1613       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1614       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1615       exactly as we computed above.  */
1616    t0 = tcg_const_i32(0);
1617    t1 = tcg_temp_new_i32();
1618    tcg_gen_trunc_tl_i32(t1, s->T1);
1619    tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1620    tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1621    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1622                        s->tmp2_i32, s->tmp3_i32);
1623    tcg_temp_free_i32(t0);
1624    tcg_temp_free_i32(t1);
1625
1626    /* The CC_OP value is no longer predictable.  */ 
1627    set_cc_op(s, CC_OP_DYNAMIC);
1628}
1629
1630static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1631                          int is_right)
1632{
1633    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1634    int shift;
1635
1636    /* load */
1637    if (op1 == OR_TMP0) {
1638        gen_op_ld_v(s, ot, s->T0, s->A0);
1639    } else {
1640        gen_op_mov_v_reg(s, ot, s->T0, op1);
1641    }
1642
1643    op2 &= mask;
1644    if (op2 != 0) {
1645        switch (ot) {
1646#ifdef TARGET_X86_64
1647        case MO_32:
1648            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1649            if (is_right) {
1650                tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1651            } else {
1652                tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1653            }
1654            tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1655            break;
1656#endif
1657        default:
1658            if (is_right) {
1659                tcg_gen_rotri_tl(s->T0, s->T0, op2);
1660            } else {
1661                tcg_gen_rotli_tl(s->T0, s->T0, op2);
1662            }
1663            break;
1664        case MO_8:
1665            mask = 7;
1666            goto do_shifts;
1667        case MO_16:
1668            mask = 15;
1669        do_shifts:
1670            shift = op2 & mask;
1671            if (is_right) {
1672                shift = mask + 1 - shift;
1673            }
1674            gen_extu(ot, s->T0);
1675            tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1676            tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1677            tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1678            break;
1679        }
1680    }
1681
1682    /* store */
1683    gen_op_st_rm_T0_A0(s, ot, op1);
1684
1685    if (op2 != 0) {
1686        /* Compute the flags into CC_SRC.  */
1687        gen_compute_eflags(s);
1688
1689        /* The value that was "rotated out" is now present at the other end
1690           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1691           since we've computed the flags into CC_SRC, these variables are
1692           currently dead.  */
1693        if (is_right) {
1694            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1695            tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1696            tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1697        } else {
1698            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1699            tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1700        }
1701        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1702        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1703        set_cc_op(s, CC_OP_ADCOX);
1704    }
1705}
1706
1707/* XXX: add faster immediate = 1 case */
1708static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1709                           int is_right)
1710{
1711    gen_compute_eflags(s);
1712    assert(s->cc_op == CC_OP_EFLAGS);
1713
1714    /* load */
1715    if (op1 == OR_TMP0)
1716        gen_op_ld_v(s, ot, s->T0, s->A0);
1717    else
1718        gen_op_mov_v_reg(s, ot, s->T0, op1);
1719    
1720    if (is_right) {
1721        switch (ot) {
1722        case MO_8:
1723            gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1724            break;
1725        case MO_16:
1726            gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1727            break;
1728        case MO_32:
1729            gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1730            break;
1731#ifdef TARGET_X86_64
1732        case MO_64:
1733            gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1734            break;
1735#endif
1736        default:
1737            tcg_abort();
1738        }
1739    } else {
1740        switch (ot) {
1741        case MO_8:
1742            gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1743            break;
1744        case MO_16:
1745            gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1746            break;
1747        case MO_32:
1748            gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1749            break;
1750#ifdef TARGET_X86_64
1751        case MO_64:
1752            gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1753            break;
1754#endif
1755        default:
1756            tcg_abort();
1757        }
1758    }
1759    /* store */
1760    gen_op_st_rm_T0_A0(s, ot, op1);
1761}
1762
1763/* XXX: add faster immediate case */
1764static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1765                             bool is_right, TCGv count_in)
1766{
1767    target_ulong mask = (ot == MO_64 ? 63 : 31);
1768    TCGv count;
1769
1770    /* load */
1771    if (op1 == OR_TMP0) {
1772        gen_op_ld_v(s, ot, s->T0, s->A0);
1773    } else {
1774        gen_op_mov_v_reg(s, ot, s->T0, op1);
1775    }
1776
1777    count = tcg_temp_new();
1778    tcg_gen_andi_tl(count, count_in, mask);
1779
1780    switch (ot) {
1781    case MO_16:
1782        /* Note: we implement the Intel behaviour for shift count > 16.
1783           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1784           portion by constructing it as a 32-bit value.  */
1785        if (is_right) {
1786            tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1787            tcg_gen_mov_tl(s->T1, s->T0);
1788            tcg_gen_mov_tl(s->T0, s->tmp0);
1789        } else {
1790            tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1791        }
1792        /* FALLTHRU */
1793#ifdef TARGET_X86_64
1794    case MO_32:
1795        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1796        tcg_gen_subi_tl(s->tmp0, count, 1);
1797        if (is_right) {
1798            tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1799            tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1800            tcg_gen_shr_i64(s->T0, s->T0, count);
1801        } else {
1802            tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1803            tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1804            tcg_gen_shl_i64(s->T0, s->T0, count);
1805            tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1806            tcg_gen_shri_i64(s->T0, s->T0, 32);
1807        }
1808        break;
1809#endif
1810    default:
1811        tcg_gen_subi_tl(s->tmp0, count, 1);
1812        if (is_right) {
1813            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1814
1815            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1816            tcg_gen_shr_tl(s->T0, s->T0, count);
1817            tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1818        } else {
1819            tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1820            if (ot == MO_16) {
1821                /* Only needed if count > 16, for Intel behaviour.  */
1822                tcg_gen_subfi_tl(s->tmp4, 33, count);
1823                tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1824                tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1825            }
1826
1827            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1828            tcg_gen_shl_tl(s->T0, s->T0, count);
1829            tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1830        }
1831        tcg_gen_movi_tl(s->tmp4, 0);
1832        tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1833                           s->tmp4, s->T1);
1834        tcg_gen_or_tl(s->T0, s->T0, s->T1);
1835        break;
1836    }
1837
1838    /* store */
1839    gen_op_st_rm_T0_A0(s, ot, op1);
1840
1841    gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1842    tcg_temp_free(count);
1843}
1844
1845static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
1846{
1847    if (s != OR_TMP1)
1848        gen_op_mov_v_reg(s1, ot, s1->T1, s);
1849    switch(op) {
1850    case OP_ROL:
1851        gen_rot_rm_T1(s1, ot, d, 0);
1852        break;
1853    case OP_ROR:
1854        gen_rot_rm_T1(s1, ot, d, 1);
1855        break;
1856    case OP_SHL:
1857    case OP_SHL1:
1858        gen_shift_rm_T1(s1, ot, d, 0, 0);
1859        break;
1860    case OP_SHR:
1861        gen_shift_rm_T1(s1, ot, d, 1, 0);
1862        break;
1863    case OP_SAR:
1864        gen_shift_rm_T1(s1, ot, d, 1, 1);
1865        break;
1866    case OP_RCL:
1867        gen_rotc_rm_T1(s1, ot, d, 0);
1868        break;
1869    case OP_RCR:
1870        gen_rotc_rm_T1(s1, ot, d, 1);
1871        break;
1872    }
1873}
1874
1875static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
1876{
1877    switch(op) {
1878    case OP_ROL:
1879        gen_rot_rm_im(s1, ot, d, c, 0);
1880        break;
1881    case OP_ROR:
1882        gen_rot_rm_im(s1, ot, d, c, 1);
1883        break;
1884    case OP_SHL:
1885    case OP_SHL1:
1886        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1887        break;
1888    case OP_SHR:
1889        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1890        break;
1891    case OP_SAR:
1892        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1893        break;
1894    default:
1895        /* currently not optimized */
1896        tcg_gen_movi_tl(s1->T1, c);
1897        gen_shift(s1, op, ot, d, OR_TMP1);
1898        break;
1899    }
1900}
1901
1902#define X86_MAX_INSN_LENGTH 15
1903
1904static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
1905{
1906    uint64_t pc = s->pc;
1907
1908    s->pc += num_bytes;
1909    if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
1910        /* If the instruction's 16th byte is on a different page than the 1st, a
1911         * page fault on the second page wins over the general protection fault
1912         * caused by the instruction being too long.
1913         * This can happen even if the operand is only one byte long!
1914         */
1915        if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
1916            volatile uint8_t unused =
1917                cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
1918            (void) unused;
1919        }
1920        siglongjmp(s->jmpbuf, 1);
1921    }
1922
1923    return pc;
1924}
1925
1926static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
1927{
1928    return cpu_ldub_code(env, advance_pc(env, s, 1));
1929}
1930
1931static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
1932{
1933    return cpu_ldsw_code(env, advance_pc(env, s, 2));
1934}
1935
1936static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
1937{
1938    return cpu_lduw_code(env, advance_pc(env, s, 2));
1939}
1940
1941static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
1942{
1943    return cpu_ldl_code(env, advance_pc(env, s, 4));
1944}
1945
1946#ifdef TARGET_X86_64
1947static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
1948{
1949    return cpu_ldq_code(env, advance_pc(env, s, 8));
1950}
1951#endif
1952
1953/* Decompose an address.  */
1954
1955typedef struct AddressParts {
1956    int def_seg;
1957    int base;
1958    int index;
1959    int scale;
1960    target_long disp;
1961} AddressParts;
1962
1963static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1964                                    int modrm)
1965{
1966    int def_seg, base, index, scale, mod, rm;
1967    target_long disp;
1968    bool havesib;
1969
1970    def_seg = R_DS;
1971    index = -1;
1972    scale = 0;
1973    disp = 0;
1974
1975    mod = (modrm >> 6) & 3;
1976    rm = modrm & 7;
1977    base = rm | REX_B(s);
1978
1979    if (mod == 3) {
1980        /* Normally filtered out earlier, but including this path
1981           simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
1982        goto done;
1983    }
1984
1985    switch (s->aflag) {
1986    case MO_64:
1987    case MO_32:
1988        havesib = 0;
1989        if (rm == 4) {
1990            int code = x86_ldub_code(env, s);
1991            scale = (code >> 6) & 3;
1992            index = ((code >> 3) & 7) | REX_X(s);
1993            if (index == 4) {
1994                index = -1;  /* no index */
1995            }
1996            base = (code & 7) | REX_B(s);
1997            havesib = 1;
1998        }
1999
2000        switch (mod) {
2001        case 0:
2002            if ((base & 7) == 5) {
2003                base = -1;
2004                disp = (int32_t)x86_ldl_code(env, s);
2005                if (CODE64(s) && !havesib) {
2006                    base = -2;
2007                    disp += s->pc + s->rip_offset;
2008                }
2009            }
2010            break;
2011        case 1:
2012            disp = (int8_t)x86_ldub_code(env, s);
2013            break;
2014        default:
2015        case 2:
2016            disp = (int32_t)x86_ldl_code(env, s);
2017            break;
2018        }
2019
2020        /* For correct popl handling with esp.  */
2021        if (base == R_ESP && s->popl_esp_hack) {
2022            disp += s->popl_esp_hack;
2023        }
2024        if (base == R_EBP || base == R_ESP) {
2025            def_seg = R_SS;
2026        }
2027        break;
2028
2029    case MO_16:
2030        if (mod == 0) {
2031            if (rm == 6) {
2032                base = -1;
2033                disp = x86_lduw_code(env, s);
2034                break;
2035            }
2036        } else if (mod == 1) {
2037            disp = (int8_t)x86_ldub_code(env, s);
2038        } else {
2039            disp = (int16_t)x86_lduw_code(env, s);
2040        }
2041
2042        switch (rm) {
2043        case 0:
2044            base = R_EBX;
2045            index = R_ESI;
2046            break;
2047        case 1:
2048            base = R_EBX;
2049            index = R_EDI;
2050            break;
2051        case 2:
2052            base = R_EBP;
2053            index = R_ESI;
2054            def_seg = R_SS;
2055            break;
2056        case 3:
2057            base = R_EBP;
2058            index = R_EDI;
2059            def_seg = R_SS;
2060            break;
2061        case 4:
2062            base = R_ESI;
2063            break;
2064        case 5:
2065            base = R_EDI;
2066            break;
2067        case 6:
2068            base = R_EBP;
2069            def_seg = R_SS;
2070            break;
2071        default:
2072        case 7:
2073            base = R_EBX;
2074            break;
2075        }
2076        break;
2077
2078    default:
2079        tcg_abort();
2080    }
2081
2082 done:
2083    return (AddressParts){ def_seg, base, index, scale, disp };
2084}
2085
2086/* Compute the address, with a minimum number of TCG ops.  */
2087static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2088{
2089    TCGv ea = NULL;
2090
2091    if (a.index >= 0) {
2092        if (a.scale == 0) {
2093            ea = cpu_regs[a.index];
2094        } else {
2095            tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2096            ea = s->A0;
2097        }
2098        if (a.base >= 0) {
2099            tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2100            ea = s->A0;
2101        }
2102    } else if (a.base >= 0) {
2103        ea = cpu_regs[a.base];
2104    }
2105    if (!ea) {
2106        tcg_gen_movi_tl(s->A0, a.disp);
2107        ea = s->A0;
2108    } else if (a.disp != 0) {
2109        tcg_gen_addi_tl(s->A0, ea, a.disp);
2110        ea = s->A0;
2111    }
2112
2113    return ea;
2114}
2115
2116static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2117{
2118    AddressParts a = gen_lea_modrm_0(env, s, modrm);
2119    TCGv ea = gen_lea_modrm_1(s, a);
2120    gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2121}
2122
2123static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2124{
2125    (void)gen_lea_modrm_0(env, s, modrm);
2126}
2127
2128/* Used for BNDCL, BNDCU, BNDCN.  */
2129static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2130                      TCGCond cond, TCGv_i64 bndv)
2131{
2132    TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2133
2134    tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2135    if (!CODE64(s)) {
2136        tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2137    }
2138    tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2139    tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2140    gen_helper_bndck(cpu_env, s->tmp2_i32);
2141}
2142
2143/* used for LEA and MOV AX, mem */
2144static void gen_add_A0_ds_seg(DisasContext *s)
2145{
2146    gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2147}
2148
2149/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2150   OR_TMP0 */
2151static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2152                           TCGMemOp ot, int reg, int is_store)
2153{
2154    int mod, rm;
2155
2156    mod = (modrm >> 6) & 3;
2157    rm = (modrm & 7) | REX_B(s);
2158    if (mod == 3) {
2159        if (is_store) {
2160            if (reg != OR_TMP0)
2161                gen_op_mov_v_reg(s, ot, s->T0, reg);
2162            gen_op_mov_reg_v(s, ot, rm, s->T0);
2163        } else {
2164            gen_op_mov_v_reg(s, ot, s->T0, rm);
2165            if (reg != OR_TMP0)
2166                gen_op_mov_reg_v(s, ot, reg, s->T0);
2167        }
2168    } else {
2169        gen_lea_modrm(env, s, modrm);
2170        if (is_store) {
2171            if (reg != OR_TMP0)
2172                gen_op_mov_v_reg(s, ot, s->T0, reg);
2173            gen_op_st_v(s, ot, s->T0, s->A0);
2174        } else {
2175            gen_op_ld_v(s, ot, s->T0, s->A0);
2176            if (reg != OR_TMP0)
2177                gen_op_mov_reg_v(s, ot, reg, s->T0);
2178        }
2179    }
2180}
2181
2182static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
2183{
2184    uint32_t ret;
2185
2186    switch (ot) {
2187    case MO_8:
2188        ret = x86_ldub_code(env, s);
2189        break;
2190    case MO_16:
2191        ret = x86_lduw_code(env, s);
2192        break;
2193    case MO_32:
2194#ifdef TARGET_X86_64
2195    case MO_64:
2196#endif
2197        ret = x86_ldl_code(env, s);
2198        break;
2199    default:
2200        tcg_abort();
2201    }
2202    return ret;
2203}
2204
2205static inline int insn_const_size(TCGMemOp ot)
2206{
2207    if (ot <= MO_32) {
2208        return 1 << ot;
2209    } else {
2210        return 4;
2211    }
2212}
2213
2214static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2215{
2216#ifndef CONFIG_USER_ONLY
2217    return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
2218           (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2219#else
2220    return true;
2221#endif
2222}
2223
2224static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2225{
2226    target_ulong pc = s->cs_base + eip;
2227
2228    if (use_goto_tb(s, pc))  {
2229        /* jump to same page: we can use a direct jump */
2230        tcg_gen_goto_tb(tb_num);
2231        gen_jmp_im(s, eip);
2232        tcg_gen_exit_tb(s->base.tb, tb_num);
2233        s->base.is_jmp = DISAS_NORETURN;
2234    } else {
2235        /* jump to another page */
2236        gen_jmp_im(s, eip);
2237        gen_jr(s, s->tmp0);
2238    }
2239}
2240
2241static inline void gen_jcc(DisasContext *s, int b,
2242                           target_ulong val, target_ulong next_eip)
2243{
2244    TCGLabel *l1, *l2;
2245
2246    if (s->jmp_opt) {
2247        l1 = gen_new_label();
2248        gen_jcc1(s, b, l1);
2249
2250        gen_goto_tb(s, 0, next_eip);
2251
2252        gen_set_label(l1);
2253        gen_goto_tb(s, 1, val);
2254    } else {
2255        l1 = gen_new_label();
2256        l2 = gen_new_label();
2257        gen_jcc1(s, b, l1);
2258
2259        gen_jmp_im(s, next_eip);
2260        tcg_gen_br(l2);
2261
2262        gen_set_label(l1);
2263        gen_jmp_im(s, val);
2264        gen_set_label(l2);
2265        gen_eob(s);
2266    }
2267}
2268
2269static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
2270                        int modrm, int reg)
2271{
2272    CCPrepare cc;
2273
2274    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2275
2276    cc = gen_prepare_cc(s, b, s->T1);
2277    if (cc.mask != -1) {
2278        TCGv t0 = tcg_temp_new();
2279        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2280        cc.reg = t0;
2281    }
2282    if (!cc.use_reg2) {
2283        cc.reg2 = tcg_const_tl(cc.imm);
2284    }
2285
2286    tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2287                       s->T0, cpu_regs[reg]);
2288    gen_op_mov_reg_v(s, ot, reg, s->T0);
2289
2290    if (cc.mask != -1) {
2291        tcg_temp_free(cc.reg);
2292    }
2293    if (!cc.use_reg2) {
2294        tcg_temp_free(cc.reg2);
2295    }
2296}
2297
2298static inline void gen_op_movl_T0_seg(DisasContext *s, int seg_reg)
2299{
2300    tcg_gen_ld32u_tl(s->T0, cpu_env,
2301                     offsetof(CPUX86State,segs[seg_reg].selector));
2302}
2303
2304static inline void gen_op_movl_seg_T0_vm(DisasContext *s, int seg_reg)
2305{
2306    tcg_gen_ext16u_tl(s->T0, s->T0);
2307    tcg_gen_st32_tl(s->T0, cpu_env,
2308                    offsetof(CPUX86State,segs[seg_reg].selector));
2309    tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2310}
2311
2312/* move T0 to seg_reg and compute if the CPU state may change. Never
2313   call this function with seg_reg == R_CS */
2314static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2315{
2316    if (s->pe && !s->vm86) {
2317        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2318        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2319        /* abort translation because the addseg value may change or
2320           because ss32 may change. For R_SS, translation must always
2321           stop as a special handling must be done to disable hardware
2322           interrupts for the next instruction */
2323        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS)) {
2324            s->base.is_jmp = DISAS_TOO_MANY;
2325        }
2326    } else {
2327        gen_op_movl_seg_T0_vm(s, seg_reg);
2328        if (seg_reg == R_SS) {
2329            s->base.is_jmp = DISAS_TOO_MANY;
2330        }
2331    }
2332}
2333
2334static inline int svm_is_rep(int prefixes)
2335{
2336    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2337}
2338
2339static inline void
2340gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2341                              uint32_t type, uint64_t param)
2342{
2343    /* no SVM activated; fast case */
2344    if (likely(!(s->flags & HF_GUEST_MASK)))
2345        return;
2346    gen_update_cc_op(s);
2347    gen_jmp_im(s, pc_start - s->cs_base);
2348    gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2349                                         tcg_const_i64(param));
2350}
2351
2352static inline void
2353gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2354{
2355    gen_svm_check_intercept_param(s, pc_start, type, 0);
2356}
2357
2358static inline void gen_stack_update(DisasContext *s, int addend)
2359{
2360    gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2361}
2362
2363/* Generate a push. It depends on ss32, addseg and dflag.  */
2364static void gen_push_v(DisasContext *s, TCGv val)
2365{
2366    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2367    TCGMemOp a_ot = mo_stacksize(s);
2368    int size = 1 << d_ot;
2369    TCGv new_esp = s->A0;
2370
2371    tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2372
2373    if (!CODE64(s)) {
2374        if (s->addseg) {
2375            new_esp = s->tmp4;
2376            tcg_gen_mov_tl(new_esp, s->A0);
2377        }
2378        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2379    }
2380
2381    gen_op_st_v(s, d_ot, val, s->A0);
2382    gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2383}
2384
2385/* two step pop is necessary for precise exceptions */
2386static TCGMemOp gen_pop_T0(DisasContext *s)
2387{
2388    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2389
2390    gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2391    gen_op_ld_v(s, d_ot, s->T0, s->A0);
2392
2393    return d_ot;
2394}
2395
2396static inline void gen_pop_update(DisasContext *s, TCGMemOp ot)
2397{
2398    gen_stack_update(s, 1 << ot);
2399}
2400
2401static inline void gen_stack_A0(DisasContext *s)
2402{
2403    gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2404}
2405
2406static void gen_pusha(DisasContext *s)
2407{
2408    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2409    TCGMemOp d_ot = s->dflag;
2410    int size = 1 << d_ot;
2411    int i;
2412
2413    for (i = 0; i < 8; i++) {
2414        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2415        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2416        gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2417    }
2418
2419    gen_stack_update(s, -8 * size);
2420}
2421
2422static void gen_popa(DisasContext *s)
2423{
2424    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2425    TCGMemOp d_ot = s->dflag;
2426    int size = 1 << d_ot;
2427    int i;
2428
2429    for (i = 0; i < 8; i++) {
2430        /* ESP is not reloaded */
2431        if (7 - i == R_ESP) {
2432            continue;
2433        }
2434        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2435        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2436        gen_op_ld_v(s, d_ot, s->T0, s->A0);
2437        gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2438    }
2439
2440    gen_stack_update(s, 8 * size);
2441}
2442
2443static void gen_enter(DisasContext *s, int esp_addend, int level)
2444{
2445    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2446    TCGMemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2447    int size = 1 << d_ot;
2448
2449    /* Push BP; compute FrameTemp into T1.  */
2450    tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2451    gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2452    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2453
2454    level &= 31;
2455    if (level != 0) {
2456        int i;
2457
2458        /* Copy level-1 pointers from the previous frame.  */
2459        for (i = 1; i < level; ++i) {
2460            tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2461            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2462            gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2463
2464            tcg_gen_subi_tl(s->A0, s->T1, size * i);
2465            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2466            gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2467        }
2468
2469        /* Push the current FrameTemp as the last level.  */
2470        tcg_gen_subi_tl(s->A0, s->T1, size * level);
2471        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2472        gen_op_st_v(s, d_ot, s->T1, s->A0);
2473    }
2474
2475    /* Copy the FrameTemp value to EBP.  */
2476    gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2477
2478    /* Compute the final value of ESP.  */
2479    tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2480    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2481}
2482
2483static void gen_leave(DisasContext *s)
2484{
2485    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2486    TCGMemOp a_ot = mo_stacksize(s);
2487
2488    gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2489    gen_op_ld_v(s, d_ot, s->T0, s->A0);
2490
2491    tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2492
2493    gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2494    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2495}
2496
2497/* Similarly, except that the assumption here is that we don't decode
2498   the instruction at all -- either a missing opcode, an unimplemented
2499   feature, or just a bogus instruction stream.  */
2500static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2501{
2502    gen_illegal_opcode(s);
2503
2504    if (qemu_loglevel_mask(LOG_UNIMP)) {
2505        target_ulong pc = s->pc_start, end = s->pc;
2506        qemu_log_lock();
2507        qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2508        for (; pc < end; ++pc) {
2509            qemu_log(" %02x", cpu_ldub_code(env, pc));
2510        }
2511        qemu_log("\n");
2512        qemu_log_unlock();
2513    }
2514}
2515
2516/* an interrupt is different from an exception because of the
2517   privilege checks */
2518static void gen_interrupt(DisasContext *s, int intno,
2519                          target_ulong cur_eip, target_ulong next_eip)
2520{
2521    gen_update_cc_op(s);
2522    gen_jmp_im(s, cur_eip);
2523    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2524                               tcg_const_i32(next_eip - cur_eip));
2525    s->base.is_jmp = DISAS_NORETURN;
2526}
2527
2528static void gen_debug(DisasContext *s, target_ulong cur_eip)
2529{
2530    gen_update_cc_op(s);
2531    gen_jmp_im(s, cur_eip);
2532    gen_helper_debug(cpu_env);
2533    s->base.is_jmp = DISAS_NORETURN;
2534}
2535
2536static void gen_set_hflag(DisasContext *s, uint32_t mask)
2537{
2538    if ((s->flags & mask) == 0) {
2539        TCGv_i32 t = tcg_temp_new_i32();
2540        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2541        tcg_gen_ori_i32(t, t, mask);
2542        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2543        tcg_temp_free_i32(t);
2544        s->flags |= mask;
2545    }
2546}
2547
2548static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2549{
2550    if (s->flags & mask) {
2551        TCGv_i32 t = tcg_temp_new_i32();
2552        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2553        tcg_gen_andi_i32(t, t, ~mask);
2554        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2555        tcg_temp_free_i32(t);
2556        s->flags &= ~mask;
2557    }
2558}
2559
2560/* Clear BND registers during legacy branches.  */
2561static void gen_bnd_jmp(DisasContext *s)
2562{
2563    /* Clear the registers only if BND prefix is missing, MPX is enabled,
2564       and if the BNDREGs are known to be in use (non-zero) already.
2565       The helper itself will check BNDPRESERVE at runtime.  */
2566    if ((s->prefix & PREFIX_REPNZ) == 0
2567        && (s->flags & HF_MPX_EN_MASK) != 0
2568        && (s->flags & HF_MPX_IU_MASK) != 0) {
2569        gen_helper_bnd_jmp(cpu_env);
2570    }
2571}
2572
2573/* Generate an end of block. Trace exception is also generated if needed.
2574   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2575   If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2576   S->TF.  This is used by the syscall/sysret insns.  */
2577static void
2578do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2579{
2580    gen_update_cc_op(s);
2581
2582    /* If several instructions disable interrupts, only the first does it.  */
2583    if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2584        gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2585    } else {
2586        gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2587    }
2588
2589    if (s->base.tb->flags & HF_RF_MASK) {
2590        gen_helper_reset_rf(cpu_env);
2591    }
2592    if (s->base.singlestep_enabled) {
2593        gen_helper_debug(cpu_env);
2594    } else if (recheck_tf) {
2595        gen_helper_rechecking_single_step(cpu_env);
2596        tcg_gen_exit_tb(NULL, 0);
2597    } else if (s->tf) {
2598        gen_helper_single_step(cpu_env);
2599    } else if (jr) {
2600        tcg_gen_lookup_and_goto_ptr();
2601    } else {
2602        tcg_gen_exit_tb(NULL, 0);
2603    }
2604    s->base.is_jmp = DISAS_NORETURN;
2605}
2606
2607static inline void
2608gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2609{
2610    do_gen_eob_worker(s, inhibit, recheck_tf, false);
2611}
2612
2613/* End of block.
2614   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2615static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2616{
2617    gen_eob_worker(s, inhibit, false);
2618}
2619
2620/* End of block, resetting the inhibit irq flag.  */
2621static void gen_eob(DisasContext *s)
2622{
2623    gen_eob_worker(s, false, false);
2624}
2625
2626/* Jump to register */
2627static void gen_jr(DisasContext *s, TCGv dest)
2628{
2629    do_gen_eob_worker(s, false, false, true);
2630}
2631
2632/* generate a jump to eip. No segment change must happen before as a
2633   direct call to the next block may occur */
2634static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2635{
2636    gen_update_cc_op(s);
2637    set_cc_op(s, CC_OP_DYNAMIC);
2638    if (s->jmp_opt) {
2639        gen_goto_tb(s, tb_num, eip);
2640    } else {
2641        gen_jmp_im(s, eip);
2642        gen_eob(s);
2643    }
2644}
2645
2646static void gen_jmp(DisasContext *s, target_ulong eip)
2647{
2648    gen_jmp_tb(s, eip, 0);
2649}
2650
2651static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2652{
2653    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2654    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2655}
2656
2657static inline void gen_stq_env_A0(DisasContext *s, int offset)
2658{
2659    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2660    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2661}
2662
2663static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2664{
2665    int mem_index = s->mem_index;
2666    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2667    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2668    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2669    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2670    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2671}
2672
2673static inline void gen_sto_env_A0(DisasContext *s, int offset)
2674{
2675    int mem_index = s->mem_index;
2676    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2677    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2678    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2679    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2680    tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2681}
2682
2683static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2684{
2685    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2686    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2687    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2688    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2689}
2690
2691static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2692{
2693    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2694    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2695}
2696
2697static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2698{
2699    tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2700    tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2701}
2702
2703static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2704{
2705    tcg_gen_movi_i64(s->tmp1_i64, 0);
2706    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2707}
2708
2709typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2710typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2711typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2712typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2713typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2714typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2715                               TCGv_i32 val);
2716typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2717typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2718                               TCGv val);
2719
2720#define SSE_SPECIAL ((void *)1)
2721#define SSE_DUMMY ((void *)2)
2722
2723#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2724#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2725                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2726
2727static const SSEFunc_0_epp sse_op_table1[256][4] = {
2728    /* 3DNow! extensions */
2729    [0x0e] = { SSE_DUMMY }, /* femms */
2730    [0x0f] = { SSE_DUMMY }, /* pf... */
2731    /* pure SSE operations */
2732    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2733    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2734    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2735    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2736    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2737    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2738    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2739    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2740
2741    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2742    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2743    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2744    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2745    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2746    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2747    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2748    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2749    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2750    [0x51] = SSE_FOP(sqrt),
2751    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2752    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2753    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2754    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2755    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2756    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2757    [0x58] = SSE_FOP(add),
2758    [0x59] = SSE_FOP(mul),
2759    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2760               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2761    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2762    [0x5c] = SSE_FOP(sub),
2763    [0x5d] = SSE_FOP(min),
2764    [0x5e] = SSE_FOP(div),
2765    [0x5f] = SSE_FOP(max),
2766
2767    [0xc2] = SSE_FOP(cmpeq),
2768    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2769               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2770
2771    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2772    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2773    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2774
2775    /* MMX ops and their SSE extensions */
2776    [0x60] = MMX_OP2(punpcklbw),
2777    [0x61] = MMX_OP2(punpcklwd),
2778    [0x62] = MMX_OP2(punpckldq),
2779    [0x63] = MMX_OP2(packsswb),
2780    [0x64] = MMX_OP2(pcmpgtb),
2781    [0x65] = MMX_OP2(pcmpgtw),
2782    [0x66] = MMX_OP2(pcmpgtl),
2783    [0x67] = MMX_OP2(packuswb),
2784    [0x68] = MMX_OP2(punpckhbw),
2785    [0x69] = MMX_OP2(punpckhwd),
2786    [0x6a] = MMX_OP2(punpckhdq),
2787    [0x6b] = MMX_OP2(packssdw),
2788    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2789    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2790    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2791    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2792    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2793               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2794               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2795               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2796    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2797    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2798    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2799    [0x74] = MMX_OP2(pcmpeqb),
2800    [0x75] = MMX_OP2(pcmpeqw),
2801    [0x76] = MMX_OP2(pcmpeql),
2802    [0x77] = { SSE_DUMMY }, /* emms */
2803    [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2804    [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2805    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2806    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2807    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2808    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2809    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2810    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2811    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2812    [0xd1] = MMX_OP2(psrlw),
2813    [0xd2] = MMX_OP2(psrld),
2814    [0xd3] = MMX_OP2(psrlq),
2815    [0xd4] = MMX_OP2(paddq),
2816    [0xd5] = MMX_OP2(pmullw),
2817    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2818    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2819    [0xd8] = MMX_OP2(psubusb),
2820    [0xd9] = MMX_OP2(psubusw),
2821    [0xda] = MMX_OP2(pminub),
2822    [0xdb] = MMX_OP2(pand),
2823    [0xdc] = MMX_OP2(paddusb),
2824    [0xdd] = MMX_OP2(paddusw),
2825    [0xde] = MMX_OP2(pmaxub),
2826    [0xdf] = MMX_OP2(pandn),
2827    [0xe0] = MMX_OP2(pavgb),
2828    [0xe1] = MMX_OP2(psraw),
2829    [0xe2] = MMX_OP2(psrad),
2830    [0xe3] = MMX_OP2(pavgw),
2831    [0xe4] = MMX_OP2(pmulhuw),
2832    [0xe5] = MMX_OP2(pmulhw),
2833    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2834    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2835    [0xe8] = MMX_OP2(psubsb),
2836    [0xe9] = MMX_OP2(psubsw),
2837    [0xea] = MMX_OP2(pminsw),
2838    [0xeb] = MMX_OP2(por),
2839    [0xec] = MMX_OP2(paddsb),
2840    [0xed] = MMX_OP2(paddsw),
2841    [0xee] = MMX_OP2(pmaxsw),
2842    [0xef] = MMX_OP2(pxor),
2843    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2844    [0xf1] = MMX_OP2(psllw),
2845    [0xf2] = MMX_OP2(pslld),
2846    [0xf3] = MMX_OP2(psllq),
2847    [0xf4] = MMX_OP2(pmuludq),
2848    [0xf5] = MMX_OP2(pmaddwd),
2849    [0xf6] = MMX_OP2(psadbw),
2850    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2851               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2852    [0xf8] = MMX_OP2(psubb),
2853    [0xf9] = MMX_OP2(psubw),
2854    [0xfa] = MMX_OP2(psubl),
2855    [0xfb] = MMX_OP2(psubq),
2856    [0xfc] = MMX_OP2(paddb),
2857    [0xfd] = MMX_OP2(paddw),
2858    [0xfe] = MMX_OP2(paddl),
2859};
2860
2861static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2862    [0 + 2] = MMX_OP2(psrlw),
2863    [0 + 4] = MMX_OP2(psraw),
2864    [0 + 6] = MMX_OP2(psllw),
2865    [8 + 2] = MMX_OP2(psrld),
2866    [8 + 4] = MMX_OP2(psrad),
2867    [8 + 6] = MMX_OP2(pslld),
2868    [16 + 2] = MMX_OP2(psrlq),
2869    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2870    [16 + 6] = MMX_OP2(psllq),
2871    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2872};
2873
2874static const SSEFunc_0_epi sse_op_table3ai[] = {
2875    gen_helper_cvtsi2ss,
2876    gen_helper_cvtsi2sd
2877};
2878
2879#ifdef TARGET_X86_64
2880static const SSEFunc_0_epl sse_op_table3aq[] = {
2881    gen_helper_cvtsq2ss,
2882    gen_helper_cvtsq2sd
2883};
2884#endif
2885
2886static const SSEFunc_i_ep sse_op_table3bi[] = {
2887    gen_helper_cvttss2si,
2888    gen_helper_cvtss2si,
2889    gen_helper_cvttsd2si,
2890    gen_helper_cvtsd2si
2891};
2892
2893#ifdef TARGET_X86_64
2894static const SSEFunc_l_ep sse_op_table3bq[] = {
2895    gen_helper_cvttss2sq,
2896    gen_helper_cvtss2sq,
2897    gen_helper_cvttsd2sq,
2898    gen_helper_cvtsd2sq
2899};
2900#endif
2901
2902static const SSEFunc_0_epp sse_op_table4[8][4] = {
2903    SSE_FOP(cmpeq),
2904    SSE_FOP(cmplt),
2905    SSE_FOP(cmple),
2906    SSE_FOP(cmpunord),
2907    SSE_FOP(cmpneq),
2908    SSE_FOP(cmpnlt),
2909    SSE_FOP(cmpnle),
2910    SSE_FOP(cmpord),
2911};
2912
2913static const SSEFunc_0_epp sse_op_table5[256] = {
2914    [0x0c] = gen_helper_pi2fw,
2915    [0x0d] = gen_helper_pi2fd,
2916    [0x1c] = gen_helper_pf2iw,
2917    [0x1d] = gen_helper_pf2id,
2918    [0x8a] = gen_helper_pfnacc,
2919    [0x8e] = gen_helper_pfpnacc,
2920    [0x90] = gen_helper_pfcmpge,
2921    [0x94] = gen_helper_pfmin,
2922    [0x96] = gen_helper_pfrcp,
2923    [0x97] = gen_helper_pfrsqrt,
2924    [0x9a] = gen_helper_pfsub,
2925    [0x9e] = gen_helper_pfadd,
2926    [0xa0] = gen_helper_pfcmpgt,
2927    [0xa4] = gen_helper_pfmax,
2928    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2929    [0xa7] = gen_helper_movq, /* pfrsqit1 */
2930    [0xaa] = gen_helper_pfsubr,
2931    [0xae] = gen_helper_pfacc,
2932    [0xb0] = gen_helper_pfcmpeq,
2933    [0xb4] = gen_helper_pfmul,
2934    [0xb6] = gen_helper_movq, /* pfrcpit2 */
2935    [0xb7] = gen_helper_pmulhrw_mmx,
2936    [0xbb] = gen_helper_pswapd,
2937    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2938};
2939
2940struct SSEOpHelper_epp {
2941    SSEFunc_0_epp op[2];
2942    uint32_t ext_mask;
2943};
2944
2945struct SSEOpHelper_eppi {
2946    SSEFunc_0_eppi op[2];
2947    uint32_t ext_mask;
2948};
2949
2950#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2951#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2952#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2953#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2954#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2955        CPUID_EXT_PCLMULQDQ }
2956#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2957
2958static const struct SSEOpHelper_epp sse_op_table6[256] = {
2959    [0x00] = SSSE3_OP(pshufb),
2960    [0x01] = SSSE3_OP(phaddw),
2961    [0x02] = SSSE3_OP(phaddd),
2962    [0x03] = SSSE3_OP(phaddsw),
2963    [0x04] = SSSE3_OP(pmaddubsw),
2964    [0x05] = SSSE3_OP(phsubw),
2965    [0x06] = SSSE3_OP(phsubd),
2966    [0x07] = SSSE3_OP(phsubsw),
2967    [0x08] = SSSE3_OP(psignb),
2968    [0x09] = SSSE3_OP(psignw),
2969    [0x0a] = SSSE3_OP(psignd),
2970    [0x0b] = SSSE3_OP(pmulhrsw),
2971    [0x10] = SSE41_OP(pblendvb),
2972    [0x14] = SSE41_OP(blendvps),
2973    [0x15] = SSE41_OP(blendvpd),
2974    [0x17] = SSE41_OP(ptest),
2975    [0x1c] = SSSE3_OP(pabsb),
2976    [0x1d] = SSSE3_OP(pabsw),
2977    [0x1e] = SSSE3_OP(pabsd),
2978    [0x20] = SSE41_OP(pmovsxbw),
2979    [0x21] = SSE41_OP(pmovsxbd),
2980    [0x22] = SSE41_OP(pmovsxbq),
2981    [0x23] = SSE41_OP(pmovsxwd),
2982    [0x24] = SSE41_OP(pmovsxwq),
2983    [0x25] = SSE41_OP(pmovsxdq),
2984    [0x28] = SSE41_OP(pmuldq),
2985    [0x29] = SSE41_OP(pcmpeqq),
2986    [0x2a] = SSE41_SPECIAL, /* movntqda */
2987    [0x2b] = SSE41_OP(packusdw),
2988    [0x30] = SSE41_OP(pmovzxbw),
2989    [0x31] = SSE41_OP(pmovzxbd),
2990    [0x32] = SSE41_OP(pmovzxbq),
2991    [0x33] = SSE41_OP(pmovzxwd),
2992    [0x34] = SSE41_OP(pmovzxwq),
2993    [0x35] = SSE41_OP(pmovzxdq),
2994    [0x37] = SSE42_OP(pcmpgtq),
2995    [0x38] = SSE41_OP(pminsb),
2996    [0x39] = SSE41_OP(pminsd),
2997    [0x3a] = SSE41_OP(pminuw),
2998    [0x3b] = SSE41_OP(pminud),
2999    [0x3c] = SSE41_OP(pmaxsb),
3000    [0x3d] = SSE41_OP(pmaxsd),
3001    [0x3e] = SSE41_OP(pmaxuw),
3002    [0x3f] = SSE41_OP(pmaxud),
3003    [0x40] = SSE41_OP(pmulld),
3004    [0x41] = SSE41_OP(phminposuw),
3005    [0xdb] = AESNI_OP(aesimc),
3006    [0xdc] = AESNI_OP(aesenc),
3007    [0xdd] = AESNI_OP(aesenclast),
3008    [0xde] = AESNI_OP(aesdec),
3009    [0xdf] = AESNI_OP(aesdeclast),
3010};
3011
3012static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3013    [0x08] = SSE41_OP(roundps),
3014    [0x09] = SSE41_OP(roundpd),
3015    [0x0a] = SSE41_OP(roundss),
3016    [0x0b] = SSE41_OP(roundsd),
3017    [0x0c] = SSE41_OP(blendps),
3018    [0x0d] = SSE41_OP(blendpd),
3019    [0x0e] = SSE41_OP(pblendw),
3020    [0x0f] = SSSE3_OP(palignr),
3021    [0x14] = SSE41_SPECIAL, /* pextrb */
3022    [0x15] = SSE41_SPECIAL, /* pextrw */
3023    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3024    [0x17] = SSE41_SPECIAL, /* extractps */
3025    [0x20] = SSE41_SPECIAL, /* pinsrb */
3026    [0x21] = SSE41_SPECIAL, /* insertps */
3027    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3028    [0x40] = SSE41_OP(dpps),
3029    [0x41] = SSE41_OP(dppd),
3030    [0x42] = SSE41_OP(mpsadbw),
3031    [0x44] = PCLMULQDQ_OP(pclmulqdq),
3032    [0x60] = SSE42_OP(pcmpestrm),
3033    [0x61] = SSE42_OP(pcmpestri),
3034    [0x62] = SSE42_OP(pcmpistrm),
3035    [0x63] = SSE42_OP(pcmpistri),
3036    [0xdf] = AESNI_OP(aeskeygenassist),
3037};
3038
3039static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3040                    target_ulong pc_start, int rex_r)
3041{
3042    int b1, op1_offset, op2_offset, is_xmm, val;
3043    int modrm, mod, rm, reg;
3044    SSEFunc_0_epp sse_fn_epp;
3045    SSEFunc_0_eppi sse_fn_eppi;
3046    SSEFunc_0_ppi sse_fn_ppi;
3047    SSEFunc_0_eppt sse_fn_eppt;
3048    TCGMemOp ot;
3049
3050    b &= 0xff;
3051    if (s->prefix & PREFIX_DATA)
3052        b1 = 1;
3053    else if (s->prefix & PREFIX_REPZ)
3054        b1 = 2;
3055    else if (s->prefix & PREFIX_REPNZ)
3056        b1 = 3;
3057    else
3058        b1 = 0;
3059    sse_fn_epp = sse_op_table1[b][b1];
3060    if (!sse_fn_epp) {
3061        goto unknown_op;
3062    }
3063    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3064        is_xmm = 1;
3065    } else {
3066        if (b1 == 0) {
3067            /* MMX case */
3068            is_xmm = 0;
3069        } else {
3070            is_xmm = 1;
3071        }
3072    }
3073    /* simple MMX/SSE operation */
3074    if (s->flags & HF_TS_MASK) {
3075        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3076        return;
3077    }
3078    if (s->flags & HF_EM_MASK) {
3079    illegal_op:
3080        gen_illegal_opcode(s);
3081        return;
3082    }
3083    if (is_xmm
3084        && !(s->flags & HF_OSFXSR_MASK)
3085        && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
3086        goto unknown_op;
3087    }
3088    if (b == 0x0e) {
3089        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3090            /* If we were fully decoding this we might use illegal_op.  */
3091            goto unknown_op;
3092        }
3093        /* femms */
3094        gen_helper_emms(cpu_env);
3095        return;
3096    }
3097    if (b == 0x77) {
3098        /* emms */
3099        gen_helper_emms(cpu_env);
3100        return;
3101    }
3102    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3103       the static cpu state) */
3104    if (!is_xmm) {
3105        gen_helper_enter_mmx(cpu_env);
3106    }
3107
3108    modrm = x86_ldub_code(env, s);
3109    reg = ((modrm >> 3) & 7);
3110    if (is_xmm)
3111        reg |= rex_r;
3112    mod = (modrm >> 6) & 3;
3113    if (sse_fn_epp == SSE_SPECIAL) {
3114        b |= (b1 << 8);
3115        switch(b) {
3116        case 0x0e7: /* movntq */
3117            if (mod == 3) {
3118                goto illegal_op;
3119            }
3120            gen_lea_modrm(env, s, modrm);
3121            gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3122            break;
3123        case 0x1e7: /* movntdq */
3124        case 0x02b: /* movntps */
3125        case 0x12b: /* movntps */
3126            if (mod == 3)
3127                goto illegal_op;
3128            gen_lea_modrm(env, s, modrm);
3129            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3130            break;
3131        case 0x3f0: /* lddqu */
3132            if (mod == 3)
3133                goto illegal_op;
3134            gen_lea_modrm(env, s, modrm);
3135            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3136            break;
3137        case 0x22b: /* movntss */
3138        case 0x32b: /* movntsd */
3139            if (mod == 3)
3140                goto illegal_op;
3141            gen_lea_modrm(env, s, modrm);
3142            if (b1 & 1) {
3143                gen_stq_env_A0(s, offsetof(CPUX86State,
3144                                           xmm_regs[reg].ZMM_Q(0)));
3145            } else {
3146                tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3147                    xmm_regs[reg].ZMM_L(0)));
3148                gen_op_st_v(s, MO_32, s->T0, s->A0);
3149            }
3150            break;
3151        case 0x6e: /* movd mm, ea */
3152#ifdef TARGET_X86_64
3153            if (s->dflag == MO_64) {
3154                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3155                tcg_gen_st_tl(s->T0, cpu_env,
3156                              offsetof(CPUX86State, fpregs[reg].mmx));
3157            } else
3158#endif
3159            {
3160                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3161                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3162                                 offsetof(CPUX86State,fpregs[reg].mmx));
3163                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3164                gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3165            }
3166            break;
3167        case 0x16e: /* movd xmm, ea */
3168#ifdef TARGET_X86_64
3169            if (s->dflag == MO_64) {
3170                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3171                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3172                                 offsetof(CPUX86State,xmm_regs[reg]));
3173                gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3174            } else
3175#endif
3176            {
3177                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3178                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3179                                 offsetof(CPUX86State,xmm_regs[reg]));
3180                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3181                gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3182            }
3183            break;
3184        case 0x6f: /* movq mm, ea */
3185            if (mod != 3) {
3186                gen_lea_modrm(env, s, modrm);
3187                gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3188            } else {
3189                rm = (modrm & 7);
3190                tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3191                               offsetof(CPUX86State,fpregs[rm].mmx));
3192                tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3193                               offsetof(CPUX86State,fpregs[reg].mmx));
3194            }
3195            break;
3196        case 0x010: /* movups */
3197        case 0x110: /* movupd */
3198        case 0x028: /* movaps */
3199        case 0x128: /* movapd */
3200        case 0x16f: /* movdqa xmm, ea */
3201        case 0x26f: /* movdqu xmm, ea */
3202            if (mod != 3) {
3203                gen_lea_modrm(env, s, modrm);
3204                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3205            } else {
3206                rm = (modrm & 7) | REX_B(s);
3207                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3208                            offsetof(CPUX86State,xmm_regs[rm]));
3209            }
3210            break;
3211        case 0x210: /* movss xmm, ea */
3212            if (mod != 3) {
3213                gen_lea_modrm(env, s, modrm);
3214                gen_op_ld_v(s, MO_32, s->T0, s->A0);
3215                tcg_gen_st32_tl(s->T0, cpu_env,
3216                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3217                tcg_gen_movi_tl(s->T0, 0);
3218                tcg_gen_st32_tl(s->T0, cpu_env,
3219                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3220                tcg_gen_st32_tl(s->T0, cpu_env,
3221                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3222                tcg_gen_st32_tl(s->T0, cpu_env,
3223                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3224            } else {
3225                rm = (modrm & 7) | REX_B(s);
3226                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3227                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3228            }
3229            break;
3230        case 0x310: /* movsd xmm, ea */
3231            if (mod != 3) {
3232                gen_lea_modrm(env, s, modrm);
3233                gen_ldq_env_A0(s, offsetof(CPUX86State,
3234                                           xmm_regs[reg].ZMM_Q(0)));
3235                tcg_gen_movi_tl(s->T0, 0);
3236                tcg_gen_st32_tl(s->T0, cpu_env,
3237                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3238                tcg_gen_st32_tl(s->T0, cpu_env,
3239                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3240            } else {
3241                rm = (modrm & 7) | REX_B(s);
3242                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3243                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3244            }
3245            break;
3246        case 0x012: /* movlps */
3247        case 0x112: /* movlpd */
3248            if (mod != 3) {
3249                gen_lea_modrm(env, s, modrm);
3250                gen_ldq_env_A0(s, offsetof(CPUX86State,
3251                                           xmm_regs[reg].ZMM_Q(0)));
3252            } else {
3253                /* movhlps */
3254                rm = (modrm & 7) | REX_B(s);
3255                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3256                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3257            }
3258            break;
3259        case 0x212: /* movsldup */
3260            if (mod != 3) {
3261                gen_lea_modrm(env, s, modrm);
3262                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3263            } else {
3264                rm = (modrm & 7) | REX_B(s);
3265                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3266                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3267                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3268                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3269            }
3270            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3271                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3272            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3273                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3274            break;
3275        case 0x312: /* movddup */
3276            if (mod != 3) {
3277                gen_lea_modrm(env, s, modrm);
3278                gen_ldq_env_A0(s, offsetof(CPUX86State,
3279                                           xmm_regs[reg].ZMM_Q(0)));
3280            } else {
3281                rm = (modrm & 7) | REX_B(s);
3282                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3283                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3284            }
3285            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3286                        offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3287            break;
3288        case 0x016: /* movhps */
3289        case 0x116: /* movhpd */
3290            if (mod != 3) {
3291                gen_lea_modrm(env, s, modrm);
3292                gen_ldq_env_A0(s, offsetof(CPUX86State,
3293                                           xmm_regs[reg].ZMM_Q(1)));
3294            } else {
3295                /* movlhps */
3296                rm = (modrm & 7) | REX_B(s);
3297                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3298                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3299            }
3300            break;
3301        case 0x216: /* movshdup */
3302            if (mod != 3) {
3303                gen_lea_modrm(env, s, modrm);
3304                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3305            } else {
3306                rm = (modrm & 7) | REX_B(s);
3307                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3308                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3309                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3310                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3311            }
3312            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3313                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3314            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3315                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3316            break;
3317        case 0x178:
3318        case 0x378:
3319            {
3320                int bit_index, field_length;
3321
3322                if (b1 == 1 && reg != 0)
3323                    goto illegal_op;
3324                field_length = x86_ldub_code(env, s) & 0x3F;
3325                bit_index = x86_ldub_code(env, s) & 0x3F;
3326                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3327                    offsetof(CPUX86State,xmm_regs[reg]));
3328                if (b1 == 1)
3329                    gen_helper_extrq_i(cpu_env, s->ptr0,
3330                                       tcg_const_i32(bit_index),
3331                                       tcg_const_i32(field_length));
3332                else
3333                    gen_helper_insertq_i(cpu_env, s->ptr0,
3334                                         tcg_const_i32(bit_index),
3335                                         tcg_const_i32(field_length));
3336            }
3337            break;
3338        case 0x7e: /* movd ea, mm */
3339#ifdef TARGET_X86_64
3340            if (s->dflag == MO_64) {
3341                tcg_gen_ld_i64(s->T0, cpu_env,
3342                               offsetof(CPUX86State,fpregs[reg].mmx));
3343                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3344            } else
3345#endif
3346            {
3347                tcg_gen_ld32u_tl(s->T0, cpu_env,
3348                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3349                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3350            }
3351            break;
3352        case 0x17e: /* movd ea, xmm */
3353#ifdef TARGET_X86_64
3354            if (s->dflag == MO_64) {
3355                tcg_gen_ld_i64(s->T0, cpu_env,
3356                               offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3357                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3358            } else
3359#endif
3360            {
3361                tcg_gen_ld32u_tl(s->T0, cpu_env,
3362                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3363                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3364            }
3365            break;
3366        case 0x27e: /* movq xmm, ea */
3367            if (mod != 3) {
3368                gen_lea_modrm(env, s, modrm);
3369                gen_ldq_env_A0(s, offsetof(CPUX86State,
3370                                           xmm_regs[reg].ZMM_Q(0)));
3371            } else {
3372                rm = (modrm & 7) | REX_B(s);
3373                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3374                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3375            }
3376            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3377            break;
3378        case 0x7f: /* movq ea, mm */
3379            if (mod != 3) {
3380                gen_lea_modrm(env, s, modrm);
3381                gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3382            } else {
3383                rm = (modrm & 7);
3384                gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3385                            offsetof(CPUX86State,fpregs[reg].mmx));
3386            }
3387            break;
3388        case 0x011: /* movups */
3389        case 0x111: /* movupd */
3390        case 0x029: /* movaps */
3391        case 0x129: /* movapd */
3392        case 0x17f: /* movdqa ea, xmm */
3393        case 0x27f: /* movdqu ea, xmm */
3394            if (mod != 3) {
3395                gen_lea_modrm(env, s, modrm);
3396                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3397            } else {
3398                rm = (modrm & 7) | REX_B(s);
3399                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3400                            offsetof(CPUX86State,xmm_regs[reg]));
3401            }
3402            break;
3403        case 0x211: /* movss ea, xmm */
3404            if (mod != 3) {
3405                gen_lea_modrm(env, s, modrm);
3406                tcg_gen_ld32u_tl(s->T0, cpu_env,
3407                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3408                gen_op_st_v(s, MO_32, s->T0, s->A0);
3409            } else {
3410                rm = (modrm & 7) | REX_B(s);
3411                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3412                            offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3413            }
3414            break;
3415        case 0x311: /* movsd ea, xmm */
3416            if (mod != 3) {
3417                gen_lea_modrm(env, s, modrm);
3418                gen_stq_env_A0(s, offsetof(CPUX86State,
3419                                           xmm_regs[reg].ZMM_Q(0)));
3420            } else {
3421                rm = (modrm & 7) | REX_B(s);
3422                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3423                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3424            }
3425            break;
3426        case 0x013: /* movlps */
3427        case 0x113: /* movlpd */
3428            if (mod != 3) {
3429                gen_lea_modrm(env, s, modrm);
3430                gen_stq_env_A0(s, offsetof(CPUX86State,
3431                                           xmm_regs[reg].ZMM_Q(0)));
3432            } else {
3433                goto illegal_op;
3434            }
3435            break;
3436        case 0x017: /* movhps */
3437        case 0x117: /* movhpd */
3438            if (mod != 3) {
3439                gen_lea_modrm(env, s, modrm);
3440                gen_stq_env_A0(s, offsetof(CPUX86State,
3441                                           xmm_regs[reg].ZMM_Q(1)));
3442            } else {
3443                goto illegal_op;
3444            }
3445            break;
3446        case 0x71: /* shift mm, im */
3447        case 0x72:
3448        case 0x73:
3449        case 0x171: /* shift xmm, im */
3450        case 0x172:
3451        case 0x173:
3452            if (b1 >= 2) {
3453                goto unknown_op;
3454            }
3455            val = x86_ldub_code(env, s);
3456            if (is_xmm) {
3457                tcg_gen_movi_tl(s->T0, val);
3458                tcg_gen_st32_tl(s->T0, cpu_env,
3459                                offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3460                tcg_gen_movi_tl(s->T0, 0);
3461                tcg_gen_st32_tl(s->T0, cpu_env,
3462                                offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3463                op1_offset = offsetof(CPUX86State,xmm_t0);
3464            } else {
3465                tcg_gen_movi_tl(s->T0, val);
3466                tcg_gen_st32_tl(s->T0, cpu_env,
3467                                offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3468                tcg_gen_movi_tl(s->T0, 0);
3469                tcg_gen_st32_tl(s->T0, cpu_env,
3470                                offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3471                op1_offset = offsetof(CPUX86State,mmx_t0);
3472            }
3473            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3474                                       (((modrm >> 3)) & 7)][b1];
3475            if (!sse_fn_epp) {
3476                goto unknown_op;
3477            }
3478            if (is_xmm) {
3479                rm = (modrm & 7) | REX_B(s);
3480                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3481            } else {
3482                rm = (modrm & 7);
3483                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3484            }
3485            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3486            tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3487            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3488            break;
3489        case 0x050: /* movmskps */
3490            rm = (modrm & 7) | REX_B(s);
3491            tcg_gen_addi_ptr(s->ptr0, cpu_env,
3492                             offsetof(CPUX86State,xmm_regs[rm]));
3493            gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3494            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3495            break;
3496        case 0x150: /* movmskpd */
3497            rm = (modrm & 7) | REX_B(s);
3498            tcg_gen_addi_ptr(s->ptr0, cpu_env,
3499                             offsetof(CPUX86State,xmm_regs[rm]));
3500            gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3501            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3502            break;
3503        case 0x02a: /* cvtpi2ps */
3504        case 0x12a: /* cvtpi2pd */
3505            gen_helper_enter_mmx(cpu_env);
3506            if (mod != 3) {
3507                gen_lea_modrm(env, s, modrm);
3508                op2_offset = offsetof(CPUX86State,mmx_t0);
3509                gen_ldq_env_A0(s, op2_offset);
3510            } else {
3511                rm = (modrm & 7);
3512                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3513            }
3514            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3515            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3516            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3517            switch(b >> 8) {
3518            case 0x0:
3519                gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3520                break;
3521            default:
3522            case 0x1:
3523                gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3524                break;
3525            }
3526            break;
3527        case 0x22a: /* cvtsi2ss */
3528        case 0x32a: /* cvtsi2sd */
3529            ot = mo_64_32(s->dflag);
3530            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3531            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3532            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3533            if (ot == MO_32) {
3534                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3535                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3536                sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3537            } else {
3538#ifdef TARGET_X86_64
3539                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3540                sse_fn_epl(cpu_env, s->ptr0, s->T0);
3541#else
3542                goto illegal_op;
3543#endif
3544            }
3545            break;
3546        case 0x02c: /* cvttps2pi */
3547        case 0x12c: /* cvttpd2pi */
3548        case 0x02d: /* cvtps2pi */
3549        case 0x12d: /* cvtpd2pi */
3550            gen_helper_enter_mmx(cpu_env);
3551            if (mod != 3) {
3552                gen_lea_modrm(env, s, modrm);
3553                op2_offset = offsetof(CPUX86State,xmm_t0);
3554                gen_ldo_env_A0(s, op2_offset);
3555            } else {
3556                rm = (modrm & 7) | REX_B(s);
3557                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3558            }
3559            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3560            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3561            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3562            switch(b) {
3563            case 0x02c:
3564                gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3565                break;
3566            case 0x12c:
3567                gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3568                break;
3569            case 0x02d:
3570                gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3571                break;
3572            case 0x12d:
3573                gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3574                break;
3575            }
3576            break;
3577        case 0x22c: /* cvttss2si */
3578        case 0x32c: /* cvttsd2si */
3579        case 0x22d: /* cvtss2si */
3580        case 0x32d: /* cvtsd2si */
3581            ot = mo_64_32(s->dflag);
3582            if (mod != 3) {
3583                gen_lea_modrm(env, s, modrm);
3584                if ((b >> 8) & 1) {
3585                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3586                } else {
3587                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
3588                    tcg_gen_st32_tl(s->T0, cpu_env,
3589                                    offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3590                }
3591                op2_offset = offsetof(CPUX86State,xmm_t0);
3592            } else {
3593                rm = (modrm & 7) | REX_B(s);
3594                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3595            }
3596            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3597            if (ot == MO_32) {
3598                SSEFunc_i_ep sse_fn_i_ep =
3599                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3600                sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3601                tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3602            } else {
3603#ifdef TARGET_X86_64
3604                SSEFunc_l_ep sse_fn_l_ep =
3605                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3606                sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3607#else
3608                goto illegal_op;
3609#endif
3610            }
3611            gen_op_mov_reg_v(s, ot, reg, s->T0);
3612            break;
3613        case 0xc4: /* pinsrw */
3614        case 0x1c4:
3615            s->rip_offset = 1;
3616            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3617            val = x86_ldub_code(env, s);
3618            if (b1) {
3619                val &= 7;
3620                tcg_gen_st16_tl(s->T0, cpu_env,
3621                                offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3622            } else {
3623                val &= 3;
3624                tcg_gen_st16_tl(s->T0, cpu_env,
3625                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3626            }
3627            break;
3628        case 0xc5: /* pextrw */
3629        case 0x1c5:
3630            if (mod != 3)
3631                goto illegal_op;
3632            ot = mo_64_32(s->dflag);
3633            val = x86_ldub_code(env, s);
3634            if (b1) {
3635                val &= 7;
3636                rm = (modrm & 7) | REX_B(s);
3637                tcg_gen_ld16u_tl(s->T0, cpu_env,
3638                                 offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3639            } else {
3640                val &= 3;
3641                rm = (modrm & 7);
3642                tcg_gen_ld16u_tl(s->T0, cpu_env,
3643                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3644            }
3645            reg = ((modrm >> 3) & 7) | rex_r;
3646            gen_op_mov_reg_v(s, ot, reg, s->T0);
3647            break;
3648        case 0x1d6: /* movq ea, xmm */
3649            if (mod != 3) {
3650                gen_lea_modrm(env, s, modrm);
3651                gen_stq_env_A0(s, offsetof(CPUX86State,
3652                                           xmm_regs[reg].ZMM_Q(0)));
3653            } else {
3654                rm = (modrm & 7) | REX_B(s);
3655                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3656                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3657                gen_op_movq_env_0(s,
3658                                  offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3659            }
3660            break;
3661        case 0x2d6: /* movq2dq */
3662            gen_helper_enter_mmx(cpu_env);
3663            rm = (modrm & 7);
3664            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3665                        offsetof(CPUX86State,fpregs[rm].mmx));
3666            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3667            break;
3668        case 0x3d6: /* movdq2q */
3669            gen_helper_enter_mmx(cpu_env);
3670            rm = (modrm & 7) | REX_B(s);
3671            gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3672                        offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3673            break;
3674        case 0xd7: /* pmovmskb */
3675        case 0x1d7:
3676            if (mod != 3)
3677                goto illegal_op;
3678            if (b1) {
3679                rm = (modrm & 7) | REX_B(s);
3680                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3681                                 offsetof(CPUX86State, xmm_regs[rm]));
3682                gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3683            } else {
3684                rm = (modrm & 7);
3685                tcg_gen_addi_ptr(s->ptr0, cpu_env,
3686                                 offsetof(CPUX86State, fpregs[rm].mmx));
3687                gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3688            }
3689            reg = ((modrm >> 3) & 7) | rex_r;
3690            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3691            break;
3692
3693        case 0x138:
3694        case 0x038:
3695            b = modrm;
3696            if ((b & 0xf0) == 0xf0) {
3697                goto do_0f_38_fx;
3698            }
3699            modrm = x86_ldub_code(env, s);
3700            rm = modrm & 7;
3701            reg = ((modrm >> 3) & 7) | rex_r;
3702            mod = (modrm >> 6) & 3;
3703            if (b1 >= 2) {
3704                goto unknown_op;
3705            }
3706
3707            sse_fn_epp = sse_op_table6[b].op[b1];
3708            if (!sse_fn_epp) {
3709                goto unknown_op;
3710            }
3711            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3712                goto illegal_op;
3713
3714            if (b1) {
3715                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3716                if (mod == 3) {
3717                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3718                } else {
3719                    op2_offset = offsetof(CPUX86State,xmm_t0);
3720                    gen_lea_modrm(env, s, modrm);
3721                    switch (b) {
3722                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3723                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3724                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3725                        gen_ldq_env_A0(s, op2_offset +
3726                                        offsetof(ZMMReg, ZMM_Q(0)));
3727                        break;
3728                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3729                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3730                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3731                                            s->mem_index, MO_LEUL);
3732                        tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3733                                        offsetof(ZMMReg, ZMM_L(0)));
3734                        break;
3735                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3736                        tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3737                                           s->mem_index, MO_LEUW);
3738                        tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3739                                        offsetof(ZMMReg, ZMM_W(0)));
3740                        break;
3741                    case 0x2a:            /* movntqda */
3742                        gen_ldo_env_A0(s, op1_offset);
3743                        return;
3744                    default:
3745                        gen_ldo_env_A0(s, op2_offset);
3746                    }
3747                }
3748            } else {
3749                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3750                if (mod == 3) {
3751                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3752                } else {
3753                    op2_offset = offsetof(CPUX86State,mmx_t0);
3754                    gen_lea_modrm(env, s, modrm);
3755                    gen_ldq_env_A0(s, op2_offset);
3756                }
3757            }
3758            if (sse_fn_epp == SSE_SPECIAL) {
3759                goto unknown_op;
3760            }
3761
3762            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3763            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3764            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3765
3766            if (b == 0x17) {
3767                set_cc_op(s, CC_OP_EFLAGS);
3768            }
3769            break;
3770
3771        case 0x238:
3772        case 0x338:
3773        do_0f_38_fx:
3774            /* Various integer extensions at 0f 38 f[0-f].  */
3775            b = modrm | (b1 << 8);
3776            modrm = x86_ldub_code(env, s);
3777            reg = ((modrm >> 3) & 7) | rex_r;
3778
3779            switch (b) {
3780            case 0x3f0: /* crc32 Gd,Eb */
3781            case 0x3f1: /* crc32 Gd,Ey */
3782            do_crc32:
3783                if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3784                    goto illegal_op;
3785                }
3786                if ((b & 0xff) == 0xf0) {
3787                    ot = MO_8;
3788                } else if (s->dflag != MO_64) {
3789                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3790                } else {
3791                    ot = MO_64;
3792                }
3793
3794                tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3795                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3796                gen_helper_crc32(s->T0, s->tmp2_i32,
3797                                 s->T0, tcg_const_i32(8 << ot));
3798
3799                ot = mo_64_32(s->dflag);
3800                gen_op_mov_reg_v(s, ot, reg, s->T0);
3801                break;
3802
3803            case 0x1f0: /* crc32 or movbe */
3804            case 0x1f1:
3805                /* For these insns, the f3 prefix is supposed to have priority
3806                   over the 66 prefix, but that's not what we implement above
3807                   setting b1.  */
3808                if (s->prefix & PREFIX_REPNZ) {
3809                    goto do_crc32;
3810                }
3811                /* FALLTHRU */
3812            case 0x0f0: /* movbe Gy,My */
3813            case 0x0f1: /* movbe My,Gy */
3814                if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3815                    goto illegal_op;
3816                }
3817                if (s->dflag != MO_64) {
3818                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3819                } else {
3820                    ot = MO_64;
3821                }
3822
3823                gen_lea_modrm(env, s, modrm);
3824                if ((b & 1) == 0) {
3825                    tcg_gen_qemu_ld_tl(s->T0, s->A0,
3826                                       s->mem_index, ot | MO_BE);
3827                    gen_op_mov_reg_v(s, ot, reg, s->T0);
3828                } else {
3829                    tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3830                                       s->mem_index, ot | MO_BE);
3831                }
3832                break;
3833
3834            case 0x0f2: /* andn Gy, By, Ey */
3835                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3836                    || !(s->prefix & PREFIX_VEX)
3837                    || s->vex_l != 0) {
3838                    goto illegal_op;
3839                }
3840                ot = mo_64_32(s->dflag);
3841                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3842                tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3843                gen_op_mov_reg_v(s, ot, reg, s->T0);
3844                gen_op_update1_cc(s);
3845                set_cc_op(s, CC_OP_LOGICB + ot);
3846                break;
3847
3848            case 0x0f7: /* bextr Gy, Ey, By */
3849                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3850                    || !(s->prefix & PREFIX_VEX)
3851                    || s->vex_l != 0) {
3852                    goto illegal_op;
3853                }
3854                ot = mo_64_32(s->dflag);
3855                {
3856                    TCGv bound, zero;
3857
3858                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3859                    /* Extract START, and shift the operand.
3860                       Shifts larger than operand size get zeros.  */
3861                    tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3862                    tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3863
3864                    bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3865                    zero = tcg_const_tl(0);
3866                    tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3867                                       s->T0, zero);
3868                    tcg_temp_free(zero);
3869
3870                    /* Extract the LEN into a mask.  Lengths larger than
3871                       operand size get all ones.  */
3872                    tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3873                    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3874                                       s->A0, bound);
3875                    tcg_temp_free(bound);
3876                    tcg_gen_movi_tl(s->T1, 1);
3877                    tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3878                    tcg_gen_subi_tl(s->T1, s->T1, 1);
3879                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
3880
3881                    gen_op_mov_reg_v(s, ot, reg, s->T0);
3882                    gen_op_update1_cc(s);
3883                    set_cc_op(s, CC_OP_LOGICB + ot);
3884                }
3885                break;
3886
3887            case 0x0f5: /* bzhi Gy, Ey, By */
3888                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3889                    || !(s->prefix & PREFIX_VEX)
3890                    || s->vex_l != 0) {
3891                    goto illegal_op;
3892                }
3893                ot = mo_64_32(s->dflag);
3894                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3895                tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3896                {
3897                    TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3898                    /* Note that since we're using BMILG (in order to get O
3899                       cleared) we need to store the inverse into C.  */
3900                    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3901                                       s->T1, bound);
3902                    tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3903                                       bound, bound, s->T1);
3904                    tcg_temp_free(bound);
3905                }
3906                tcg_gen_movi_tl(s->A0, -1);
3907                tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3908                tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3909                gen_op_mov_reg_v(s, ot, reg, s->T0);
3910                gen_op_update1_cc(s);
3911                set_cc_op(s, CC_OP_BMILGB + ot);
3912                break;
3913
3914            case 0x3f6: /* mulx By, Gy, rdx, Ey */
3915                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3916                    || !(s->prefix & PREFIX_VEX)
3917                    || s->vex_l != 0) {
3918                    goto illegal_op;
3919                }
3920                ot = mo_64_32(s->dflag);
3921                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3922                switch (ot) {
3923                default:
3924                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3925                    tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
3926                    tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
3927                                      s->tmp2_i32, s->tmp3_i32);
3928                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
3929                    tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
3930                    break;
3931#ifdef TARGET_X86_64
3932                case MO_64:
3933                    tcg_gen_mulu2_i64(s->T0, s->T1,
3934                                      s->T0, cpu_regs[R_EDX]);
3935                    tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
3936                    tcg_gen_mov_i64(cpu_regs[reg], s->T1);
3937                    break;
3938#endif
3939                }
3940                break;
3941
3942            case 0x3f5: /* pdep Gy, By, Ey */
3943                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3944                    || !(s->prefix & PREFIX_VEX)
3945                    || s->vex_l != 0) {
3946                    goto illegal_op;
3947                }
3948                ot = mo_64_32(s->dflag);
3949                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3950                /* Note that by zero-extending the mask operand, we
3951                   automatically handle zero-extending the result.  */
3952                if (ot == MO_64) {
3953                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3954                } else {
3955                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3956                }
3957                gen_helper_pdep(cpu_regs[reg], s->T0, s->T1);
3958                break;
3959
3960            case 0x2f5: /* pext Gy, By, Ey */
3961                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3962                    || !(s->prefix & PREFIX_VEX)
3963                    || s->vex_l != 0) {
3964                    goto illegal_op;
3965                }
3966                ot = mo_64_32(s->dflag);
3967                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3968                /* Note that by zero-extending the mask operand, we
3969                   automatically handle zero-extending the result.  */
3970                if (ot == MO_64) {
3971                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3972                } else {
3973                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3974                }
3975                gen_helper_pext(cpu_regs[reg], s->T0, s->T1);
3976                break;
3977
3978            case 0x1f6: /* adcx Gy, Ey */
3979            case 0x2f6: /* adox Gy, Ey */
3980                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3981                    goto illegal_op;
3982                } else {
3983                    TCGv carry_in, carry_out, zero;
3984                    int end_op;
3985
3986                    ot = mo_64_32(s->dflag);
3987                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3988
3989                    /* Re-use the carry-out from a previous round.  */
3990                    carry_in = NULL;
3991                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3992                    switch (s->cc_op) {
3993                    case CC_OP_ADCX:
3994                        if (b == 0x1f6) {
3995                            carry_in = cpu_cc_dst;
3996                            end_op = CC_OP_ADCX;
3997                        } else {
3998                            end_op = CC_OP_ADCOX;
3999                        }
4000                        break;
4001                    case CC_OP_ADOX:
4002                        if (b == 0x1f6) {
4003                            end_op = CC_OP_ADCOX;
4004                        } else {
4005                            carry_in = cpu_cc_src2;
4006                            end_op = CC_OP_ADOX;
4007                        }
4008                        break;
4009                    case CC_OP_ADCOX:
4010                        end_op = CC_OP_ADCOX;
4011                        carry_in = carry_out;
4012                        break;
4013                    default:
4014                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4015                        break;
4016                    }
4017                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
4018                    if (!carry_in) {
4019                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4020                            gen_compute_eflags(s);
4021                        }
4022                        carry_in = s->tmp0;
4023                        tcg_gen_extract_tl(carry_in, cpu_cc_src,
4024                                           ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4025                    }
4026
4027                    switch (ot) {
4028#ifdef TARGET_X86_64
4029                    case MO_32:
4030                        /* If we know TL is 64-bit, and we want a 32-bit
4031                           result, just do everything in 64-bit arithmetic.  */
4032                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4033                        tcg_gen_ext32u_i64(s->T0, s->T0);
4034                        tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4035                        tcg_gen_add_i64(s->T0, s->T0, carry_in);
4036                        tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4037                        tcg_gen_shri_i64(carry_out, s->T0, 32);
4038                        break;
4039#endif
4040                    default:
4041                        /* Otherwise compute the carry-out in two steps.  */
4042                        zero = tcg_const_tl(0);
4043                        tcg_gen_add2_tl(s->T0, carry_out,
4044                                        s->T0, zero,
4045                                        carry_in, zero);
4046                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4047                                        cpu_regs[reg], carry_out,
4048                                        s->T0, zero);
4049                        tcg_temp_free(zero);
4050                        break;
4051                    }
4052                    set_cc_op(s, end_op);
4053                }
4054                break;
4055
4056            case 0x1f7: /* shlx Gy, Ey, By */
4057            case 0x2f7: /* sarx Gy, Ey, By */
4058            case 0x3f7: /* shrx Gy, Ey, By */
4059                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4060                    || !(s->prefix & PREFIX_VEX)
4061                    || s->vex_l != 0) {
4062                    goto illegal_op;
4063                }
4064                ot = mo_64_32(s->dflag);
4065                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4066                if (ot == MO_64) {
4067                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4068                } else {
4069                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4070                }
4071                if (b == 0x1f7) {
4072                    tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4073                } else if (b == 0x2f7) {
4074                    if (ot != MO_64) {
4075                        tcg_gen_ext32s_tl(s->T0, s->T0);
4076                    }
4077                    tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4078                } else {
4079                    if (ot != MO_64) {
4080                        tcg_gen_ext32u_tl(s->T0, s->T0);
4081                    }
4082                    tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4083                }
4084                gen_op_mov_reg_v(s, ot, reg, s->T0);
4085                break;
4086
4087            case 0x0f3:
4088            case 0x1f3:
4089            case 0x2f3:
4090            case 0x3f3: /* Group 17 */
4091                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4092                    || !(s->prefix & PREFIX_VEX)
4093                    || s->vex_l != 0) {
4094                    goto illegal_op;
4095                }
4096                ot = mo_64_32(s->dflag);
4097                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4098
4099                tcg_gen_mov_tl(cpu_cc_src, s->T0);
4100                switch (reg & 7) {
4101                case 1: /* blsr By,Ey */
4102                    tcg_gen_subi_tl(s->T1, s->T0, 1);
4103                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
4104                    break;
4105                case 2: /* blsmsk By,Ey */
4106                    tcg_gen_subi_tl(s->T1, s->T0, 1);
4107                    tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4108                    break;
4109                case 3: /* blsi By, Ey */
4110                    tcg_gen_neg_tl(s->T1, s->T0);
4111                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
4112                    break;
4113                default:
4114                    goto unknown_op;
4115                }
4116                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4117                gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4118                set_cc_op(s, CC_OP_BMILGB + ot);
4119                break;
4120
4121            default:
4122                goto unknown_op;
4123            }
4124            break;
4125
4126        case 0x03a:
4127        case 0x13a:
4128            b = modrm;
4129            modrm = x86_ldub_code(env, s);
4130            rm = modrm & 7;
4131            reg = ((modrm >> 3) & 7) | rex_r;
4132            mod = (modrm >> 6) & 3;
4133            if (b1 >= 2) {
4134                goto unknown_op;
4135            }
4136
4137            sse_fn_eppi = sse_op_table7[b].op[b1];
4138            if (!sse_fn_eppi) {
4139                goto unknown_op;
4140            }
4141            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4142                goto illegal_op;
4143
4144            s->rip_offset = 1;
4145
4146            if (sse_fn_eppi == SSE_SPECIAL) {
4147                ot = mo_64_32(s->dflag);
4148                rm = (modrm & 7) | REX_B(s);
4149                if (mod != 3)
4150                    gen_lea_modrm(env, s, modrm);
4151                reg = ((modrm >> 3) & 7) | rex_r;
4152                val = x86_ldub_code(env, s);
4153                switch (b) {
4154                case 0x14: /* pextrb */
4155                    tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4156                                            xmm_regs[reg].ZMM_B(val & 15)));
4157                    if (mod == 3) {
4158                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4159                    } else {
4160                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4161                                           s->mem_index, MO_UB);
4162                    }
4163                    break;
4164                case 0x15: /* pextrw */
4165                    tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4166                                            xmm_regs[reg].ZMM_W(val & 7)));
4167                    if (mod == 3) {
4168                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4169                    } else {
4170                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4171                                           s->mem_index, MO_LEUW);
4172                    }
4173                    break;
4174                case 0x16:
4175                    if (ot == MO_32) { /* pextrd */
4176                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4177                                        offsetof(CPUX86State,
4178                                                xmm_regs[reg].ZMM_L(val & 3)));
4179                        if (mod == 3) {
4180                            tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4181                        } else {
4182                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4183                                                s->mem_index, MO_LEUL);
4184                        }
4185                    } else { /* pextrq */
4186#ifdef TARGET_X86_64
4187                        tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4188                                        offsetof(CPUX86State,
4189                                                xmm_regs[reg].ZMM_Q(val & 1)));
4190                        if (mod == 3) {
4191                            tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4192                        } else {
4193                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4194                                                s->mem_index, MO_LEQ);
4195                        }
4196#else
4197                        goto illegal_op;
4198#endif
4199                    }
4200                    break;
4201                case 0x17: /* extractps */
4202                    tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4203                                            xmm_regs[reg].ZMM_L(val & 3)));
4204                    if (mod == 3) {
4205                        gen_op_mov_reg_v(s, ot, rm, s->T0);
4206                    } else {
4207                        tcg_gen_qemu_st_tl(s->T0, s->A0,
4208                                           s->mem_index, MO_LEUL);
4209                    }
4210                    break;
4211                case 0x20: /* pinsrb */
4212                    if (mod == 3) {
4213                        gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4214                    } else {
4215                        tcg_gen_qemu_ld_tl(s->T0, s->A0,
4216                                           s->mem_index, MO_UB);
4217                    }
4218                    tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4219                                            xmm_regs[reg].ZMM_B(val & 15)));
4220                    break;
4221                case 0x21: /* insertps */
4222                    if (mod == 3) {
4223                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4224                                        offsetof(CPUX86State,xmm_regs[rm]
4225                                                .ZMM_L((val >> 6) & 3)));
4226                    } else {
4227                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4228                                            s->mem_index, MO_LEUL);
4229                    }
4230                    tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4231                                    offsetof(CPUX86State,xmm_regs[reg]
4232                                            .ZMM_L((val >> 4) & 3)));
4233                    if ((val >> 0) & 1)
4234                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4235                                        cpu_env, offsetof(CPUX86State,
4236                                                xmm_regs[reg].ZMM_L(0)));
4237                    if ((val >> 1) & 1)
4238                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4239                                        cpu_env, offsetof(CPUX86State,
4240                                                xmm_regs[reg].ZMM_L(1)));
4241                    if ((val >> 2) & 1)
4242                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4243                                        cpu_env, offsetof(CPUX86State,
4244                                                xmm_regs[reg].ZMM_L(2)));
4245                    if ((val >> 3) & 1)
4246                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4247                                        cpu_env, offsetof(CPUX86State,
4248                                                xmm_regs[reg].ZMM_L(3)));
4249                    break;
4250                case 0x22:
4251                    if (ot == MO_32) { /* pinsrd */
4252                        if (mod == 3) {
4253                            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4254                        } else {
4255                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4256                                                s->mem_index, MO_LEUL);
4257                        }
4258                        tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4259                                        offsetof(CPUX86State,
4260                                                xmm_regs[reg].ZMM_L(val & 3)));
4261                    } else { /* pinsrq */
4262#ifdef TARGET_X86_64
4263                        if (mod == 3) {
4264                            gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4265                        } else {
4266                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4267                                                s->mem_index, MO_LEQ);
4268                        }
4269                        tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4270                                        offsetof(CPUX86State,
4271                                                xmm_regs[reg].ZMM_Q(val & 1)));
4272#else
4273                        goto illegal_op;
4274#endif
4275                    }
4276                    break;
4277                }
4278                return;
4279            }
4280
4281            if (b1) {
4282                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4283                if (mod == 3) {
4284                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4285                } else {
4286                    op2_offset = offsetof(CPUX86State,xmm_t0);
4287                    gen_lea_modrm(env, s, modrm);
4288                    gen_ldo_env_A0(s, op2_offset);
4289                }
4290            } else {
4291                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4292                if (mod == 3) {
4293                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4294                } else {
4295                    op2_offset = offsetof(CPUX86State,mmx_t0);
4296                    gen_lea_modrm(env, s, modrm);
4297                    gen_ldq_env_A0(s, op2_offset);
4298                }
4299            }
4300            val = x86_ldub_code(env, s);
4301
4302            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4303                set_cc_op(s, CC_OP_EFLAGS);
4304
4305                if (s->dflag == MO_64) {
4306                    /* The helper must use entire 64-bit gp registers */
4307                    val |= 1 << 8;
4308                }
4309            }
4310
4311            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4312            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4313            sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4314            break;
4315
4316        case 0x33a:
4317            /* Various integer extensions at 0f 3a f[0-f].  */
4318            b = modrm | (b1 << 8);
4319            modrm = x86_ldub_code(env, s);
4320            reg = ((modrm >> 3) & 7) | rex_r;
4321
4322            switch (b) {
4323            case 0x3f0: /* rorx Gy,Ey, Ib */
4324                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4325                    || !(s->prefix & PREFIX_VEX)
4326                    || s->vex_l != 0) {
4327                    goto illegal_op;
4328                }
4329                ot = mo_64_32(s->dflag);
4330                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4331                b = x86_ldub_code(env, s);
4332                if (ot == MO_64) {
4333                    tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4334                } else {
4335                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4336                    tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4337                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4338                }
4339                gen_op_mov_reg_v(s, ot, reg, s->T0);
4340                break;
4341
4342            default:
4343                goto unknown_op;
4344            }
4345            break;
4346
4347        default:
4348        unknown_op:
4349            gen_unknown_opcode(env, s);
4350            return;
4351        }
4352    } else {
4353        /* generic MMX or SSE operation */
4354        switch(b) {
4355        case 0x70: /* pshufx insn */
4356        case 0xc6: /* pshufx insn */
4357        case 0xc2: /* compare insns */
4358            s->rip_offset = 1;
4359            break;
4360        default:
4361            break;
4362        }
4363        if (is_xmm) {
4364            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4365            if (mod != 3) {
4366                int sz = 4;
4367
4368                gen_lea_modrm(env, s, modrm);
4369                op2_offset = offsetof(CPUX86State,xmm_t0);
4370
4371                switch (b) {
4372                case 0x50 ... 0x5a:
4373                case 0x5c ... 0x5f:
4374                case 0xc2:
4375                    /* Most sse scalar operations.  */
4376                    if (b1 == 2) {
4377                        sz = 2;
4378                    } else if (b1 == 3) {
4379                        sz = 3;
4380                    }
4381                    break;
4382
4383                case 0x2e:  /* ucomis[sd] */
4384                case 0x2f:  /* comis[sd] */
4385                    if (b1 == 0) {
4386                        sz = 2;
4387                    } else {
4388                        sz = 3;
4389                    }
4390                    break;
4391                }
4392
4393                switch (sz) {
4394                case 2:
4395                    /* 32 bit access */
4396                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
4397                    tcg_gen_st32_tl(s->T0, cpu_env,
4398                                    offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4399                    break;
4400                case 3:
4401                    /* 64 bit access */
4402                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4403                    break;
4404                default:
4405                    /* 128 bit access */
4406                    gen_ldo_env_A0(s, op2_offset);
4407                    break;
4408                }
4409            } else {
4410                rm = (modrm & 7) | REX_B(s);
4411                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4412            }
4413        } else {
4414            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4415            if (mod != 3) {
4416                gen_lea_modrm(env, s, modrm);
4417                op2_offset = offsetof(CPUX86State,mmx_t0);
4418                gen_ldq_env_A0(s, op2_offset);
4419            } else {
4420                rm = (modrm & 7);
4421                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4422            }
4423        }
4424        switch(b) {
4425        case 0x0f: /* 3DNow! data insns */
4426            val = x86_ldub_code(env, s);
4427            sse_fn_epp = sse_op_table5[val];
4428            if (!sse_fn_epp) {
4429                goto unknown_op;
4430            }
4431            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4432                goto illegal_op;
4433            }
4434            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4435            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4436            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4437            break;
4438        case 0x70: /* pshufx insn */
4439        case 0xc6: /* pshufx insn */
4440            val = x86_ldub_code(env, s);
4441            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4442            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4443            /* XXX: introduce a new table? */
4444            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4445            sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4446            break;
4447        case 0xc2:
4448            /* compare insns */
4449            val = x86_ldub_code(env, s);
4450            if (val >= 8)
4451                goto unknown_op;
4452            sse_fn_epp = sse_op_table4[val][b1];
4453
4454            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4455            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4456            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4457            break;
4458        case 0xf7:
4459            /* maskmov : we must prepare A0 */
4460            if (mod != 3)
4461                goto illegal_op;
4462            tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4463            gen_extu(s->aflag, s->A0);
4464            gen_add_A0_ds_seg(s);
4465
4466            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4467            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4468            /* XXX: introduce a new table? */
4469            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4470            sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4471            break;
4472        default:
4473            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4474            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4475            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4476            break;
4477        }
4478        if (b == 0x2e || b == 0x2f) {
4479            set_cc_op(s, CC_OP_EFLAGS);
4480        }
4481    }
4482}
4483
4484/* convert one instruction. s->base.is_jmp is set if the translation must
4485   be stopped. Return the next pc value */
4486static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4487{
4488    CPUX86State *env = cpu->env_ptr;
4489    int b, prefixes;
4490    int shift;
4491    TCGMemOp ot, aflag, dflag;
4492    int modrm, reg, rm, mod, op, opreg, val;
4493    target_ulong next_eip, tval;
4494    int rex_w, rex_r;
4495    target_ulong pc_start = s->base.pc_next;
4496
4497    s->pc_start = s->pc = pc_start;
4498    s->override = -1;
4499#ifdef TARGET_X86_64
4500    s->rex_x = 0;
4501    s->rex_b = 0;
4502    s->x86_64_hregs = false;
4503#endif
4504    s->rip_offset = 0; /* for relative ip address */
4505    s->vex_l = 0;
4506    s->vex_v = 0;
4507    if (sigsetjmp(s->jmpbuf, 0) != 0) {
4508        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
4509        return s->pc;
4510    }
4511
4512    prefixes = 0;
4513    rex_w = -1;
4514    rex_r = 0;
4515
4516 next_byte:
4517    b = x86_ldub_code(env, s);
4518    /* Collect prefixes.  */
4519    switch (b) {
4520    case 0xf3:
4521        prefixes |= PREFIX_REPZ;
4522        goto next_byte;
4523    case 0xf2:
4524        prefixes |= PREFIX_REPNZ;
4525        goto next_byte;
4526    case 0xf0:
4527        prefixes |= PREFIX_LOCK;
4528        goto next_byte;
4529    case 0x2e:
4530        s->override = R_CS;
4531        goto next_byte;
4532    case 0x36:
4533        s->override = R_SS;
4534        goto next_byte;
4535    case 0x3e:
4536        s->override = R_DS;
4537        goto next_byte;
4538    case 0x26:
4539        s->override = R_ES;
4540        goto next_byte;
4541    case 0x64:
4542        s->override = R_FS;
4543        goto next_byte;
4544    case 0x65:
4545        s->override = R_GS;
4546        goto next_byte;
4547    case 0x66:
4548        prefixes |= PREFIX_DATA;
4549        goto next_byte;
4550    case 0x67:
4551        prefixes |= PREFIX_ADR;
4552        goto next_byte;
4553#ifdef TARGET_X86_64
4554    case 0x40 ... 0x4f:
4555        if (CODE64(s)) {
4556            /* REX prefix */
4557            rex_w = (b >> 3) & 1;
4558            rex_r = (b & 0x4) << 1;
4559            s->rex_x = (b & 0x2) << 2;
4560            REX_B(s) = (b & 0x1) << 3;
4561            /* select uniform byte register addressing */
4562            s->x86_64_hregs = true;
4563            goto next_byte;
4564        }
4565        break;
4566#endif
4567    case 0xc5: /* 2-byte VEX */
4568    case 0xc4: /* 3-byte VEX */
4569        /* VEX prefixes cannot be used except in 32-bit mode.
4570           Otherwise the instruction is LES or LDS.  */
4571        if (s->code32 && !s->vm86) {
4572            static const int pp_prefix[4] = {
4573                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4574            };
4575            int vex3, vex2 = x86_ldub_code(env, s);
4576
4577            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4578                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4579                   otherwise the instruction is LES or LDS.  */
4580                s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4581                break;
4582            }
4583
4584            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4585            if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4586                            | PREFIX_LOCK | PREFIX_DATA)) {
4587                goto illegal_op;
4588            }
4589#ifdef TARGET_X86_64
4590            if (s->x86_64_hregs) {
4591                goto illegal_op;
4592            }
4593#endif
4594            rex_r = (~vex2 >> 4) & 8;
4595            if (b == 0xc5) {
4596                /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4597                vex3 = vex2;
4598                b = x86_ldub_code(env, s) | 0x100;
4599            } else {
4600                /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4601#ifdef TARGET_X86_64
4602                s->rex_x = (~vex2 >> 3) & 8;
4603                s->rex_b = (~vex2 >> 2) & 8;
4604#endif
4605                vex3 = x86_ldub_code(env, s);
4606                rex_w = (vex3 >> 7) & 1;
4607                switch (vex2 & 0x1f) {
4608                case 0x01: /* Implied 0f leading opcode bytes.  */
4609                    b = x86_ldub_code(env, s) | 0x100;
4610                    break;
4611                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4612                    b = 0x138;
4613                    break;
4614                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4615                    b = 0x13a;
4616                    break;
4617                default:   /* Reserved for future use.  */
4618                    goto unknown_op;
4619                }
4620            }
4621            s->vex_v = (~vex3 >> 3) & 0xf;
4622            s->vex_l = (vex3 >> 2) & 1;
4623            prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4624        }
4625        break;
4626    }
4627
4628    /* Post-process prefixes.  */
4629    if (CODE64(s)) {
4630        /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4631           data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4632           over 0x66 if both are present.  */
4633        dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4634        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4635        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4636    } else {
4637        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4638        if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4639            dflag = MO_32;
4640        } else {
4641            dflag = MO_16;
4642        }
4643        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4644        if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4645            aflag = MO_32;
4646        }  else {
4647            aflag = MO_16;
4648        }
4649    }
4650
4651    s->prefix = prefixes;
4652    s->aflag = aflag;
4653    s->dflag = dflag;
4654
4655    /* now check op code */
4656 reswitch:
4657    switch(b) {
4658    case 0x0f:
4659        /**************************/
4660        /* extended op code */
4661        b = x86_ldub_code(env, s) | 0x100;
4662        goto reswitch;
4663
4664        /**************************/
4665        /* arith & logic */
4666    case 0x00 ... 0x05:
4667    case 0x08 ... 0x0d:
4668    case 0x10 ... 0x15:
4669    case 0x18 ... 0x1d:
4670    case 0x20 ... 0x25:
4671    case 0x28 ... 0x2d:
4672    case 0x30 ... 0x35:
4673    case 0x38 ... 0x3d:
4674        {
4675            int op, f, val;
4676            op = (b >> 3) & 7;
4677            f = (b >> 1) & 3;
4678
4679            ot = mo_b_d(b, dflag);
4680
4681            switch(f) {
4682            case 0: /* OP Ev, Gv */
4683                modrm = x86_ldub_code(env, s);
4684                reg = ((modrm >> 3) & 7) | rex_r;
4685                mod = (modrm >> 6) & 3;
4686                rm = (modrm & 7) | REX_B(s);
4687                if (mod != 3) {
4688                    gen_lea_modrm(env, s, modrm);
4689                    opreg = OR_TMP0;
4690                } else if (op == OP_XORL && rm == reg) {
4691                xor_zero:
4692                    /* xor reg, reg optimisation */
4693                    set_cc_op(s, CC_OP_CLR);
4694                    tcg_gen_movi_tl(s->T0, 0);
4695                    gen_op_mov_reg_v(s, ot, reg, s->T0);
4696                    break;
4697                } else {
4698                    opreg = rm;
4699                }
4700                gen_op_mov_v_reg(s, ot, s->T1, reg);
4701                gen_op(s, op, ot, opreg);
4702                break;
4703            case 1: /* OP Gv, Ev */
4704                modrm = x86_ldub_code(env, s);
4705                mod = (modrm >> 6) & 3;
4706                reg = ((modrm >> 3) & 7) | rex_r;
4707                rm = (modrm & 7) | REX_B(s);
4708                if (mod != 3) {
4709                    gen_lea_modrm(env, s, modrm);
4710                    gen_op_ld_v(s, ot, s->T1, s->A0);
4711                } else if (op == OP_XORL && rm == reg) {
4712                    goto xor_zero;
4713                } else {
4714                    gen_op_mov_v_reg(s, ot, s->T1, rm);
4715                }
4716                gen_op(s, op, ot, reg);
4717                break;
4718            case 2: /* OP A, Iv */
4719                val = insn_get(env, s, ot);
4720                tcg_gen_movi_tl(s->T1, val);
4721                gen_op(s, op, ot, OR_EAX);
4722                break;
4723            }
4724        }
4725        break;
4726
4727    case 0x82:
4728        if (CODE64(s))
4729            goto illegal_op;
4730        /* fall through */
4731    case 0x80: /* GRP1 */
4732    case 0x81:
4733    case 0x83:
4734        {
4735            int val;
4736
4737            ot = mo_b_d(b, dflag);
4738
4739            modrm = x86_ldub_code(env, s);
4740            mod = (modrm >> 6) & 3;
4741            rm = (modrm & 7) | REX_B(s);
4742            op = (modrm >> 3) & 7;
4743
4744            if (mod != 3) {
4745                if (b == 0x83)
4746                    s->rip_offset = 1;
4747                else
4748                    s->rip_offset = insn_const_size(ot);
4749                gen_lea_modrm(env, s, modrm);
4750                opreg = OR_TMP0;
4751            } else {
4752                opreg = rm;
4753            }
4754
4755            switch(b) {
4756            default:
4757            case 0x80:
4758            case 0x81:
4759            case 0x82:
4760                val = insn_get(env, s, ot);
4761                break;
4762            case 0x83:
4763                val = (int8_t)insn_get(env, s, MO_8);
4764                break;
4765            }
4766            tcg_gen_movi_tl(s->T1, val);
4767            gen_op(s, op, ot, opreg);
4768        }
4769        break;
4770
4771        /**************************/
4772        /* inc, dec, and other misc arith */
4773    case 0x40 ... 0x47: /* inc Gv */
4774        ot = dflag;
4775        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4776        break;
4777    case 0x48 ... 0x4f: /* dec Gv */
4778        ot = dflag;
4779        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4780        break;
4781    case 0xf6: /* GRP3 */
4782    case 0xf7:
4783        ot = mo_b_d(b, dflag);
4784
4785        modrm = x86_ldub_code(env, s);
4786        mod = (modrm >> 6) & 3;
4787        rm = (modrm & 7) | REX_B(s);
4788        op = (modrm >> 3) & 7;
4789        if (mod != 3) {
4790            if (op == 0) {
4791                s->rip_offset = insn_const_size(ot);
4792            }
4793            gen_lea_modrm(env, s, modrm);
4794            /* For those below that handle locked memory, don't load here.  */
4795            if (!(s->prefix & PREFIX_LOCK)
4796                || op != 2) {
4797                gen_op_ld_v(s, ot, s->T0, s->A0);
4798            }
4799        } else {
4800            gen_op_mov_v_reg(s, ot, s->T0, rm);
4801        }
4802
4803        switch(op) {
4804        case 0: /* test */
4805            val = insn_get(env, s, ot);
4806            tcg_gen_movi_tl(s->T1, val);
4807            gen_op_testl_T0_T1_cc(s);
4808            set_cc_op(s, CC_OP_LOGICB + ot);
4809            break;
4810        case 2: /* not */
4811            if (s->prefix & PREFIX_LOCK) {
4812                if (mod == 3) {
4813                    goto illegal_op;
4814                }
4815                tcg_gen_movi_tl(s->T0, ~0);
4816                tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4817                                            s->mem_index, ot | MO_LE);
4818            } else {
4819                tcg_gen_not_tl(s->T0, s->T0);
4820                if (mod != 3) {
4821                    gen_op_st_v(s, ot, s->T0, s->A0);
4822                } else {
4823                    gen_op_mov_reg_v(s, ot, rm, s->T0);
4824                }
4825            }
4826            break;
4827        case 3: /* neg */
4828            if (s->prefix & PREFIX_LOCK) {
4829                TCGLabel *label1;
4830                TCGv a0, t0, t1, t2;
4831
4832                if (mod == 3) {
4833                    goto illegal_op;
4834                }
4835                a0 = tcg_temp_local_new();
4836                t0 = tcg_temp_local_new();
4837                label1 = gen_new_label();
4838
4839                tcg_gen_mov_tl(a0, s->A0);
4840                tcg_gen_mov_tl(t0, s->T0);
4841
4842                gen_set_label(label1);
4843                t1 = tcg_temp_new();
4844                t2 = tcg_temp_new();
4845                tcg_gen_mov_tl(t2, t0);
4846                tcg_gen_neg_tl(t1, t0);
4847                tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4848                                          s->mem_index, ot | MO_LE);
4849                tcg_temp_free(t1);
4850                tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4851
4852                tcg_temp_free(t2);
4853                tcg_temp_free(a0);
4854                tcg_gen_mov_tl(s->T0, t0);
4855                tcg_temp_free(t0);
4856            } else {
4857                tcg_gen_neg_tl(s->T0, s->T0);
4858                if (mod != 3) {
4859                    gen_op_st_v(s, ot, s->T0, s->A0);
4860                } else {
4861                    gen_op_mov_reg_v(s, ot, rm, s->T0);
4862                }
4863            }
4864            gen_op_update_neg_cc(s);
4865            set_cc_op(s, CC_OP_SUBB + ot);
4866            break;
4867        case 4: /* mul */
4868            switch(ot) {
4869            case MO_8:
4870                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4871                tcg_gen_ext8u_tl(s->T0, s->T0);
4872                tcg_gen_ext8u_tl(s->T1, s->T1);
4873                /* XXX: use 32 bit mul which could be faster */
4874                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4875                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4876                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4877                tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4878                set_cc_op(s, CC_OP_MULB);
4879                break;
4880            case MO_16:
4881                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4882                tcg_gen_ext16u_tl(s->T0, s->T0);
4883                tcg_gen_ext16u_tl(s->T1, s->T1);
4884                /* XXX: use 32 bit mul which could be faster */
4885                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4886                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4887                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4888                tcg_gen_shri_tl(s->T0, s->T0, 16);
4889                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4890                tcg_gen_mov_tl(cpu_cc_src, s->T0);
4891                set_cc_op(s, CC_OP_MULW);
4892                break;
4893            default:
4894            case MO_32:
4895                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4896                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4897                tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4898                                  s->tmp2_i32, s->tmp3_i32);
4899                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4900                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4901                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4902                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4903                set_cc_op(s, CC_OP_MULL);
4904                break;
4905#ifdef TARGET_X86_64
4906            case MO_64:
4907                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4908                                  s->T0, cpu_regs[R_EAX]);
4909                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4910                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4911                set_cc_op(s, CC_OP_MULQ);
4912                break;
4913#endif
4914            }
4915            break;
4916        case 5: /* imul */
4917            switch(ot) {
4918            case MO_8:
4919                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4920                tcg_gen_ext8s_tl(s->T0, s->T0);
4921                tcg_gen_ext8s_tl(s->T1, s->T1);
4922                /* XXX: use 32 bit mul which could be faster */
4923                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4924                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4925                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4926                tcg_gen_ext8s_tl(s->tmp0, s->T0);
4927                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4928                set_cc_op(s, CC_OP_MULB);
4929                break;
4930            case MO_16:
4931                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4932                tcg_gen_ext16s_tl(s->T0, s->T0);
4933                tcg_gen_ext16s_tl(s->T1, s->T1);
4934                /* XXX: use 32 bit mul which could be faster */
4935                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4936                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4937                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4938                tcg_gen_ext16s_tl(s->tmp0, s->T0);
4939                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4940                tcg_gen_shri_tl(s->T0, s->T0, 16);
4941                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4942                set_cc_op(s, CC_OP_MULW);
4943                break;
4944            default:
4945            case MO_32:
4946                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4947                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4948                tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
4949                                  s->tmp2_i32, s->tmp3_i32);
4950                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4951                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4952                tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
4953                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4954                tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
4955                tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
4956                set_cc_op(s, CC_OP_MULL);
4957                break;
4958#ifdef TARGET_X86_64
4959            case MO_64:
4960                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4961                                  s->T0, cpu_regs[R_EAX]);
4962                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4963                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4964                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4965                set_cc_op(s, CC_OP_MULQ);
4966                break;
4967#endif
4968            }
4969            break;
4970        case 6: /* div */
4971            switch(ot) {
4972            case MO_8:
4973                gen_helper_divb_AL(cpu_env, s->T0);
4974                break;
4975            case MO_16:
4976                gen_helper_divw_AX(cpu_env, s->T0);
4977                break;
4978            default:
4979            case MO_32:
4980                gen_helper_divl_EAX(cpu_env, s->T0);
4981                break;
4982#ifdef TARGET_X86_64
4983            case MO_64:
4984                gen_helper_divq_EAX(cpu_env, s->T0);
4985                break;
4986#endif
4987            }
4988            break;
4989        case 7: /* idiv */
4990            switch(ot) {
4991            case MO_8:
4992                gen_helper_idivb_AL(cpu_env, s->T0);
4993                break;
4994            case MO_16:
4995                gen_helper_idivw_AX(cpu_env, s->T0);
4996                break;
4997            default:
4998            case MO_32:
4999                gen_helper_idivl_EAX(cpu_env, s->T0);
5000                break;
5001#ifdef TARGET_X86_64
5002            case MO_64:
5003                gen_helper_idivq_EAX(cpu_env, s->T0);
5004                break;
5005#endif
5006            }
5007            break;
5008        default:
5009            goto unknown_op;
5010        }
5011        break;
5012
5013    case 0xfe: /* GRP4 */
5014    case 0xff: /* GRP5 */
5015        ot = mo_b_d(b, dflag);
5016
5017        modrm = x86_ldub_code(env, s);
5018        mod = (modrm >> 6) & 3;
5019        rm = (modrm & 7) | REX_B(s);
5020        op = (modrm >> 3) & 7;
5021        if (op >= 2 && b == 0xfe) {
5022            goto unknown_op;
5023        }
5024        if (CODE64(s)) {
5025            if (op == 2 || op == 4) {
5026                /* operand size for jumps is 64 bit */
5027                ot = MO_64;
5028            } else if (op == 3 || op == 5) {
5029                ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
5030            } else if (op == 6) {
5031                /* default push size is 64 bit */
5032                ot = mo_pushpop(s, dflag);
5033            }
5034        }
5035        if (mod != 3) {
5036            gen_lea_modrm(env, s, modrm);
5037            if (op >= 2 && op != 3 && op != 5)
5038                gen_op_ld_v(s, ot, s->T0, s->A0);
5039        } else {
5040            gen_op_mov_v_reg(s, ot, s->T0, rm);
5041        }
5042
5043        switch(op) {
5044        case 0: /* inc Ev */
5045            if (mod != 3)
5046                opreg = OR_TMP0;
5047            else
5048                opreg = rm;
5049            gen_inc(s, ot, opreg, 1);
5050            break;
5051        case 1: /* dec Ev */
5052            if (mod != 3)
5053                opreg = OR_TMP0;
5054            else
5055                opreg = rm;
5056            gen_inc(s, ot, opreg, -1);
5057            break;
5058        case 2: /* call Ev */
5059            /* XXX: optimize if memory (no 'and' is necessary) */
5060            if (dflag == MO_16) {
5061                tcg_gen_ext16u_tl(s->T0, s->T0);
5062            }
5063            next_eip = s->pc - s->cs_base;
5064            tcg_gen_movi_tl(s->T1, next_eip);
5065            gen_push_v(s, s->T1);
5066            gen_op_jmp_v(s->T0);
5067            gen_bnd_jmp(s);
5068            gen_jr(s, s->T0);
5069            break;
5070        case 3: /* lcall Ev */
5071            gen_op_ld_v(s, ot, s->T1, s->A0);
5072            gen_add_A0_im(s, 1 << ot);
5073            gen_op_ld_v(s, MO_16, s->T0, s->A0);
5074        do_lcall:
5075            if (s->pe && !s->vm86) {
5076                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5077                gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5078                                           tcg_const_i32(dflag - 1),
5079                                           tcg_const_tl(s->pc - s->cs_base));
5080            } else {
5081                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5082                gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5083                                      tcg_const_i32(dflag - 1),
5084                                      tcg_const_i32(s->pc - s->cs_base));
5085            }
5086            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5087            gen_jr(s, s->tmp4);
5088            break;
5089        case 4: /* jmp Ev */
5090            if (dflag == MO_16) {
5091                tcg_gen_ext16u_tl(s->T0, s->T0);
5092            }
5093            gen_op_jmp_v(s->T0);
5094            gen_bnd_jmp(s);
5095            gen_jr(s, s->T0);
5096            break;
5097        case 5: /* ljmp Ev */
5098            gen_op_ld_v(s, ot, s->T1, s->A0);
5099            gen_add_A0_im(s, 1 << ot);
5100            gen_op_ld_v(s, MO_16, s->T0, s->A0);
5101        do_ljmp:
5102            if (s->pe && !s->vm86) {
5103                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5104                gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5105                                          tcg_const_tl(s->pc - s->cs_base));
5106            } else {
5107                gen_op_movl_seg_T0_vm(s, R_CS);
5108                gen_op_jmp_v(s->T1);
5109            }
5110            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5111            gen_jr(s, s->tmp4);
5112            break;
5113        case 6: /* push Ev */
5114            gen_push_v(s, s->T0);
5115            break;
5116        default:
5117            goto unknown_op;
5118        }
5119        break;
5120
5121    case 0x84: /* test Ev, Gv */
5122    case 0x85:
5123        ot = mo_b_d(b, dflag);
5124
5125        modrm = x86_ldub_code(env, s);
5126        reg = ((modrm >> 3) & 7) | rex_r;
5127
5128        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5129        gen_op_mov_v_reg(s, ot, s->T1, reg);
5130        gen_op_testl_T0_T1_cc(s);
5131        set_cc_op(s, CC_OP_LOGICB + ot);
5132        break;
5133
5134    case 0xa8: /* test eAX, Iv */
5135    case 0xa9:
5136        ot = mo_b_d(b, dflag);
5137        val = insn_get(env, s, ot);
5138
5139        gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5140        tcg_gen_movi_tl(s->T1, val);
5141        gen_op_testl_T0_T1_cc(s);
5142        set_cc_op(s, CC_OP_LOGICB + ot);
5143        break;
5144
5145    case 0x98: /* CWDE/CBW */
5146        switch (dflag) {
5147#ifdef TARGET_X86_64
5148        case MO_64:
5149            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5150            tcg_gen_ext32s_tl(s->T0, s->T0);
5151            gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5152            break;
5153#endif
5154        case MO_32:
5155            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5156            tcg_gen_ext16s_tl(s->T0, s->T0);
5157            gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5158            break;
5159        case MO_16:
5160            gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5161            tcg_gen_ext8s_tl(s->T0, s->T0);
5162            gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5163            break;
5164        default:
5165            tcg_abort();
5166        }
5167        break;
5168    case 0x99: /* CDQ/CWD */
5169        switch (dflag) {
5170#ifdef TARGET_X86_64
5171        case MO_64:
5172            gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5173            tcg_gen_sari_tl(s->T0, s->T0, 63);
5174            gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5175            break;
5176#endif
5177        case MO_32:
5178            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5179            tcg_gen_ext32s_tl(s->T0, s->T0);
5180            tcg_gen_sari_tl(s->T0, s->T0, 31);
5181            gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5182            break;
5183        case MO_16:
5184            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5185            tcg_gen_ext16s_tl(s->T0, s->T0);
5186            tcg_gen_sari_tl(s->T0, s->T0, 15);
5187            gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5188            break;
5189        default:
5190            tcg_abort();
5191        }
5192        break;
5193    case 0x1af: /* imul Gv, Ev */
5194    case 0x69: /* imul Gv, Ev, I */
5195    case 0x6b:
5196        ot = dflag;
5197        modrm = x86_ldub_code(env, s);
5198        reg = ((modrm >> 3) & 7) | rex_r;
5199        if (b == 0x69)
5200            s->rip_offset = insn_const_size(ot);
5201        else if (b == 0x6b)
5202            s->rip_offset = 1;
5203        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5204        if (b == 0x69) {
5205            val = insn_get(env, s, ot);
5206            tcg_gen_movi_tl(s->T1, val);
5207        } else if (b == 0x6b) {
5208            val = (int8_t)insn_get(env, s, MO_8);
5209            tcg_gen_movi_tl(s->T1, val);
5210        } else {
5211            gen_op_mov_v_reg(s, ot, s->T1, reg);
5212        }
5213        switch (ot) {
5214#ifdef TARGET_X86_64
5215        case MO_64:
5216            tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5217            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5218            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5219            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5220            break;
5221#endif
5222        case MO_32:
5223            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5224            tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5225            tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5226                              s->tmp2_i32, s->tmp3_i32);
5227            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5228            tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5229            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5230            tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5231            tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5232            break;
5233        default:
5234            tcg_gen_ext16s_tl(s->T0, s->T0);
5235            tcg_gen_ext16s_tl(s->T1, s->T1);
5236            /* XXX: use 32 bit mul which could be faster */
5237            tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5238            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5239            tcg_gen_ext16s_tl(s->tmp0, s->T0);
5240            tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5241            gen_op_mov_reg_v(s, ot, reg, s->T0);
5242            break;
5243        }
5244        set_cc_op(s, CC_OP_MULB + ot);
5245        break;
5246    case 0x1c0:
5247    case 0x1c1: /* xadd Ev, Gv */
5248        ot = mo_b_d(b, dflag);
5249        modrm = x86_ldub_code(env, s);
5250        reg = ((modrm >> 3) & 7) | rex_r;
5251        mod = (modrm >> 6) & 3;
5252        gen_op_mov_v_reg(s, ot, s->T0, reg);
5253        if (mod == 3) {
5254            rm = (modrm & 7) | REX_B(s);
5255            gen_op_mov_v_reg(s, ot, s->T1, rm);
5256            tcg_gen_add_tl(s->T0, s->T0, s->T1);
5257            gen_op_mov_reg_v(s, ot, reg, s->T1);
5258            gen_op_mov_reg_v(s, ot, rm, s->T0);
5259        } else {
5260            gen_lea_modrm(env, s, modrm);
5261            if (s->prefix & PREFIX_LOCK) {
5262                tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5263                                            s->mem_index, ot | MO_LE);
5264                tcg_gen_add_tl(s->T0, s->T0, s->T1);
5265            } else {
5266                gen_op_ld_v(s, ot, s->T1, s->A0);
5267                tcg_gen_add_tl(s->T0, s->T0, s->T1);
5268                gen_op_st_v(s, ot, s->T0, s->A0);
5269            }
5270            gen_op_mov_reg_v(s, ot, reg, s->T1);
5271        }
5272        gen_op_update2_cc(s);
5273        set_cc_op(s, CC_OP_ADDB + ot);
5274        break;
5275    case 0x1b0:
5276    case 0x1b1: /* cmpxchg Ev, Gv */
5277        {
5278            TCGv oldv, newv, cmpv;
5279
5280            ot = mo_b_d(b, dflag);
5281            modrm = x86_ldub_code(env, s);
5282            reg = ((modrm >> 3) & 7) | rex_r;
5283            mod = (modrm >> 6) & 3;
5284            oldv = tcg_temp_new();
5285            newv = tcg_temp_new();
5286            cmpv = tcg_temp_new();
5287            gen_op_mov_v_reg(s, ot, newv, reg);
5288            tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5289
5290            if (s->prefix & PREFIX_LOCK) {
5291                if (mod == 3) {
5292                    goto illegal_op;
5293                }
5294                gen_lea_modrm(env, s, modrm);
5295                tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5296                                          s->mem_index, ot | MO_LE);
5297                gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5298            } else {
5299                if (mod == 3) {
5300                    rm = (modrm & 7) | REX_B(s);
5301                    gen_op_mov_v_reg(s, ot, oldv, rm);
5302                } else {
5303                    gen_lea_modrm(env, s, modrm);
5304                    gen_op_ld_v(s, ot, oldv, s->A0);
5305                    rm = 0; /* avoid warning */
5306                }
5307                gen_extu(ot, oldv);
5308                gen_extu(ot, cmpv);
5309                /* store value = (old == cmp ? new : old);  */
5310                tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5311                if (mod == 3) {
5312                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5313                    gen_op_mov_reg_v(s, ot, rm, newv);
5314                } else {
5315                    /* Perform an unconditional store cycle like physical cpu;
5316                       must be before changing accumulator to ensure
5317                       idempotency if the store faults and the instruction
5318                       is restarted */
5319                    gen_op_st_v(s, ot, newv, s->A0);
5320                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5321                }
5322            }
5323            tcg_gen_mov_tl(cpu_cc_src, oldv);
5324            tcg_gen_mov_tl(s->cc_srcT, cmpv);
5325            tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5326            set_cc_op(s, CC_OP_SUBB + ot);
5327            tcg_temp_free(oldv);
5328            tcg_temp_free(newv);
5329            tcg_temp_free(cmpv);
5330        }
5331        break;
5332    case 0x1c7: /* cmpxchg8b */
5333        modrm = x86_ldub_code(env, s);
5334        mod = (modrm >> 6) & 3;
5335        if ((mod == 3) || ((modrm & 0x38) != 0x8))
5336            goto illegal_op;
5337#ifdef TARGET_X86_64
5338        if (dflag == MO_64) {
5339            if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
5340                goto illegal_op;
5341            gen_lea_modrm(env, s, modrm);
5342            if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5343                gen_helper_cmpxchg16b(cpu_env, s->A0);
5344            } else {
5345                gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5346            }
5347        } else
5348#endif        
5349        {
5350            if (!(s->cpuid_features & CPUID_CX8))
5351                goto illegal_op;
5352            gen_lea_modrm(env, s, modrm);
5353            if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5354                gen_helper_cmpxchg8b(cpu_env, s->A0);
5355            } else {
5356                gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5357            }
5358        }
5359        set_cc_op(s, CC_OP_EFLAGS);
5360        break;
5361
5362        /**************************/
5363        /* push/pop */
5364    case 0x50 ... 0x57: /* push */
5365        gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5366        gen_push_v(s, s->T0);
5367        break;
5368    case 0x58 ... 0x5f: /* pop */
5369        ot = gen_pop_T0(s);
5370        /* NOTE: order is important for pop %sp */
5371        gen_pop_update(s, ot);
5372        gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5373        break;
5374    case 0x60: /* pusha */
5375        if (CODE64(s))
5376            goto illegal_op;
5377        gen_pusha(s);
5378        break;
5379    case 0x61: /* popa */
5380        if (CODE64(s))
5381            goto illegal_op;
5382        gen_popa(s);
5383        break;
5384    case 0x68: /* push Iv */
5385    case 0x6a:
5386        ot = mo_pushpop(s, dflag);
5387        if (b == 0x68)
5388            val = insn_get(env, s, ot);
5389        else
5390            val = (int8_t)insn_get(env, s, MO_8);
5391        tcg_gen_movi_tl(s->T0, val);
5392        gen_push_v(s, s->T0);
5393        break;
5394    case 0x8f: /* pop Ev */
5395        modrm = x86_ldub_code(env, s);
5396        mod = (modrm >> 6) & 3;
5397        ot = gen_pop_T0(s);
5398        if (mod == 3) {
5399            /* NOTE: order is important for pop %sp */
5400            gen_pop_update(s, ot);
5401            rm = (modrm & 7) | REX_B(s);
5402            gen_op_mov_reg_v(s, ot, rm, s->T0);
5403        } else {
5404            /* NOTE: order is important too for MMU exceptions */
5405            s->popl_esp_hack = 1 << ot;
5406            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5407            s->popl_esp_hack = 0;
5408            gen_pop_update(s, ot);
5409        }
5410        break;
5411    case 0xc8: /* enter */
5412        {
5413            int level;
5414            val = x86_lduw_code(env, s);
5415            level = x86_ldub_code(env, s);
5416            gen_enter(s, val, level);
5417        }
5418        break;
5419    case 0xc9: /* leave */
5420        gen_leave(s);
5421        break;
5422    case 0x06: /* push es */
5423    case 0x0e: /* push cs */
5424    case 0x16: /* push ss */
5425    case 0x1e: /* push ds */
5426        if (CODE64(s))
5427            goto illegal_op;
5428        gen_op_movl_T0_seg(s, b >> 3);
5429        gen_push_v(s, s->T0);
5430        break;
5431    case 0x1a0: /* push fs */
5432    case 0x1a8: /* push gs */
5433        gen_op_movl_T0_seg(s, (b >> 3) & 7);
5434        gen_push_v(s, s->T0);
5435        break;
5436    case 0x07: /* pop es */
5437    case 0x17: /* pop ss */
5438    case 0x1f: /* pop ds */
5439        if (CODE64(s))
5440            goto illegal_op;
5441        reg = b >> 3;
5442        ot = gen_pop_T0(s);
5443        gen_movl_seg_T0(s, reg);
5444        gen_pop_update(s, ot);
5445        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5446        if (s->base.is_jmp) {
5447            gen_jmp_im(s, s->pc - s->cs_base);
5448            if (reg == R_SS) {
5449                s->tf = 0;
5450                gen_eob_inhibit_irq(s, true);
5451            } else {
5452                gen_eob(s);
5453            }
5454        }
5455        break;
5456    case 0x1a1: /* pop fs */
5457    case 0x1a9: /* pop gs */
5458        ot = gen_pop_T0(s);
5459        gen_movl_seg_T0(s, (b >> 3) & 7);
5460        gen_pop_update(s, ot);
5461        if (s->base.is_jmp) {
5462            gen_jmp_im(s, s->pc - s->cs_base);
5463            gen_eob(s);
5464        }
5465        break;
5466
5467        /**************************/
5468        /* mov */
5469    case 0x88:
5470    case 0x89: /* mov Gv, Ev */
5471        ot = mo_b_d(b, dflag);
5472        modrm = x86_ldub_code(env, s);
5473        reg = ((modrm >> 3) & 7) | rex_r;
5474
5475        /* generate a generic store */
5476        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5477        break;
5478    case 0xc6:
5479    case 0xc7: /* mov Ev, Iv */
5480        ot = mo_b_d(b, dflag);
5481        modrm = x86_ldub_code(env, s);
5482        mod = (modrm >> 6) & 3;
5483        if (mod != 3) {
5484            s->rip_offset = insn_const_size(ot);
5485            gen_lea_modrm(env, s, modrm);
5486        }
5487        val = insn_get(env, s, ot);
5488        tcg_gen_movi_tl(s->T0, val);
5489        if (mod != 3) {
5490            gen_op_st_v(s, ot, s->T0, s->A0);
5491        } else {
5492            gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5493        }
5494        break;
5495    case 0x8a:
5496    case 0x8b: /* mov Ev, Gv */
5497        ot = mo_b_d(b, dflag);
5498        modrm = x86_ldub_code(env, s);
5499        reg = ((modrm >> 3) & 7) | rex_r;
5500
5501        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5502        gen_op_mov_reg_v(s, ot, reg, s->T0);
5503        break;
5504    case 0x8e: /* mov seg, Gv */
5505        modrm = x86_ldub_code(env, s);
5506        reg = (modrm >> 3) & 7;
5507        if (reg >= 6 || reg == R_CS)
5508            goto illegal_op;
5509        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5510        gen_movl_seg_T0(s, reg);
5511        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5512        if (s->base.is_jmp) {
5513            gen_jmp_im(s, s->pc - s->cs_base);
5514            if (reg == R_SS) {
5515                s->tf = 0;
5516                gen_eob_inhibit_irq(s, true);
5517            } else {
5518                gen_eob(s);
5519            }
5520        }
5521        break;
5522    case 0x8c: /* mov Gv, seg */
5523        modrm = x86_ldub_code(env, s);
5524        reg = (modrm >> 3) & 7;
5525        mod = (modrm >> 6) & 3;
5526        if (reg >= 6)
5527            goto illegal_op;
5528        gen_op_movl_T0_seg(s, reg);
5529        ot = mod == 3 ? dflag : MO_16;
5530        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5531        break;
5532
5533    case 0x1b6: /* movzbS Gv, Eb */
5534    case 0x1b7: /* movzwS Gv, Eb */
5535    case 0x1be: /* movsbS Gv, Eb */
5536    case 0x1bf: /* movswS Gv, Eb */
5537        {
5538            TCGMemOp d_ot;
5539            TCGMemOp s_ot;
5540
5541            /* d_ot is the size of destination */
5542            d_ot = dflag;
5543            /* ot is the size of source */
5544            ot = (b & 1) + MO_8;
5545            /* s_ot is the sign+size of source */
5546            s_ot = b & 8 ? MO_SIGN | ot : ot;
5547
5548            modrm = x86_ldub_code(env, s);
5549            reg = ((modrm >> 3) & 7) | rex_r;
5550            mod = (modrm >> 6) & 3;
5551            rm = (modrm & 7) | REX_B(s);
5552
5553            if (mod == 3) {
5554                if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5555                    tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5556                } else {
5557                    gen_op_mov_v_reg(s, ot, s->T0, rm);
5558                    switch (s_ot) {
5559                    case MO_UB:
5560                        tcg_gen_ext8u_tl(s->T0, s->T0);
5561                        break;
5562                    case MO_SB:
5563                        tcg_gen_ext8s_tl(s->T0, s->T0);
5564                        break;
5565                    case MO_UW:
5566                        tcg_gen_ext16u_tl(s->T0, s->T0);
5567                        break;
5568                    default:
5569                    case MO_SW:
5570                        tcg_gen_ext16s_tl(s->T0, s->T0);
5571                        break;
5572                    }
5573                }
5574                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5575            } else {
5576                gen_lea_modrm(env, s, modrm);
5577                gen_op_ld_v(s, s_ot, s->T0, s->A0);
5578                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5579            }
5580        }
5581        break;
5582
5583    case 0x8d: /* lea */
5584        modrm = x86_ldub_code(env, s);
5585        mod = (modrm >> 6) & 3;
5586        if (mod == 3)
5587            goto illegal_op;
5588        reg = ((modrm >> 3) & 7) | rex_r;
5589        {
5590            AddressParts a = gen_lea_modrm_0(env, s, modrm);
5591            TCGv ea = gen_lea_modrm_1(s, a);
5592            gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5593            gen_op_mov_reg_v(s, dflag, reg, s->A0);
5594        }
5595        break;
5596
5597    case 0xa0: /* mov EAX, Ov */
5598    case 0xa1:
5599    case 0xa2: /* mov Ov, EAX */
5600    case 0xa3:
5601        {
5602            target_ulong offset_addr;
5603
5604            ot = mo_b_d(b, dflag);
5605            switch (s->aflag) {
5606#ifdef TARGET_X86_64
5607            case MO_64:
5608                offset_addr = x86_ldq_code(env, s);
5609                break;
5610#endif
5611            default:
5612                offset_addr = insn_get(env, s, s->aflag);
5613                break;
5614            }
5615            tcg_gen_movi_tl(s->A0, offset_addr);
5616            gen_add_A0_ds_seg(s);
5617            if ((b & 2) == 0) {
5618                gen_op_ld_v(s, ot, s->T0, s->A0);
5619                gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5620            } else {
5621                gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5622                gen_op_st_v(s, ot, s->T0, s->A0);
5623            }
5624        }
5625        break;
5626    case 0xd7: /* xlat */
5627        tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5628        tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5629        tcg_gen_add_tl(s->A0, s->A0, s->T0);
5630        gen_extu(s->aflag, s->A0);
5631        gen_add_A0_ds_seg(s);
5632        gen_op_ld_v(s, MO_8, s->T0, s->A0);
5633        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5634        break;
5635    case 0xb0 ... 0xb7: /* mov R, Ib */
5636        val = insn_get(env, s, MO_8);
5637        tcg_gen_movi_tl(s->T0, val);
5638        gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5639        break;
5640    case 0xb8 ... 0xbf: /* mov R, Iv */
5641#ifdef TARGET_X86_64
5642        if (dflag == MO_64) {
5643            uint64_t tmp;
5644            /* 64 bit case */
5645            tmp = x86_ldq_code(env, s);
5646            reg = (b & 7) | REX_B(s);
5647            tcg_gen_movi_tl(s->T0, tmp);
5648            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5649        } else
5650#endif
5651        {
5652            ot = dflag;
5653            val = insn_get(env, s, ot);
5654            reg = (b & 7) | REX_B(s);
5655            tcg_gen_movi_tl(s->T0, val);
5656            gen_op_mov_reg_v(s, ot, reg, s->T0);
5657        }
5658        break;
5659
5660    case 0x91 ... 0x97: /* xchg R, EAX */
5661    do_xchg_reg_eax:
5662        ot = dflag;
5663        reg = (b & 7) | REX_B(s);
5664        rm = R_EAX;
5665        goto do_xchg_reg;
5666    case 0x86:
5667    case 0x87: /* xchg Ev, Gv */
5668        ot = mo_b_d(b, dflag);
5669        modrm = x86_ldub_code(env, s);
5670        reg = ((modrm >> 3) & 7) | rex_r;
5671        mod = (modrm >> 6) & 3;
5672        if (mod == 3) {
5673            rm = (modrm & 7) | REX_B(s);
5674        do_xchg_reg:
5675            gen_op_mov_v_reg(s, ot, s->T0, reg);
5676            gen_op_mov_v_reg(s, ot, s->T1, rm);
5677            gen_op_mov_reg_v(s, ot, rm, s->T0);
5678            gen_op_mov_reg_v(s, ot, reg, s->T1);
5679        } else {
5680            gen_lea_modrm(env, s, modrm);
5681            gen_op_mov_v_reg(s, ot, s->T0, reg);
5682            /* for xchg, lock is implicit */
5683            tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5684                                   s->mem_index, ot | MO_LE);
5685            gen_op_mov_reg_v(s, ot, reg, s->T1);
5686        }
5687        break;
5688    case 0xc4: /* les Gv */
5689        /* In CODE64 this is VEX3; see above.  */
5690        op = R_ES;
5691        goto do_lxx;
5692    case 0xc5: /* lds Gv */
5693        /* In CODE64 this is VEX2; see above.  */
5694        op = R_DS;
5695        goto do_lxx;
5696    case 0x1b2: /* lss Gv */
5697        op = R_SS;
5698        goto do_lxx;
5699    case 0x1b4: /* lfs Gv */
5700        op = R_FS;
5701        goto do_lxx;
5702    case 0x1b5: /* lgs Gv */
5703        op = R_GS;
5704    do_lxx:
5705        ot = dflag != MO_16 ? MO_32 : MO_16;
5706        modrm = x86_ldub_code(env, s);
5707        reg = ((modrm >> 3) & 7) | rex_r;
5708        mod = (modrm >> 6) & 3;
5709        if (mod == 3)
5710            goto illegal_op;
5711        gen_lea_modrm(env, s, modrm);
5712        gen_op_ld_v(s, ot, s->T1, s->A0);
5713        gen_add_A0_im(s, 1 << ot);
5714        /* load the segment first to handle exceptions properly */
5715        gen_op_ld_v(s, MO_16, s->T0, s->A0);
5716        gen_movl_seg_T0(s, op);
5717        /* then put the data */
5718        gen_op_mov_reg_v(s, ot, reg, s->T1);
5719        if (s->base.is_jmp) {
5720            gen_jmp_im(s, s->pc - s->cs_base);
5721            gen_eob(s);
5722        }
5723        break;
5724
5725        /************************/
5726        /* shifts */
5727    case 0xc0:
5728    case 0xc1:
5729        /* shift Ev,Ib */
5730        shift = 2;
5731    grp2:
5732        {
5733            ot = mo_b_d(b, dflag);
5734            modrm = x86_ldub_code(env, s);
5735            mod = (modrm >> 6) & 3;
5736            op = (modrm >> 3) & 7;
5737
5738            if (mod != 3) {
5739                if (shift == 2) {
5740                    s->rip_offset = 1;
5741                }
5742                gen_lea_modrm(env, s, modrm);
5743                opreg = OR_TMP0;
5744            } else {
5745                opreg = (modrm & 7) | REX_B(s);
5746            }
5747
5748            /* simpler op */
5749            if (shift == 0) {
5750                gen_shift(s, op, ot, opreg, OR_ECX);
5751            } else {
5752                if (shift == 2) {
5753                    shift = x86_ldub_code(env, s);
5754                }
5755                gen_shifti(s, op, ot, opreg, shift);
5756            }
5757        }
5758        break;
5759    case 0xd0:
5760    case 0xd1:
5761        /* shift Ev,1 */
5762        shift = 1;
5763        goto grp2;
5764    case 0xd2:
5765    case 0xd3:
5766        /* shift Ev,cl */
5767        shift = 0;
5768        goto grp2;
5769
5770    case 0x1a4: /* shld imm */
5771        op = 0;
5772        shift = 1;
5773        goto do_shiftd;
5774    case 0x1a5: /* shld cl */
5775        op = 0;
5776        shift = 0;
5777        goto do_shiftd;
5778    case 0x1ac: /* shrd imm */
5779        op = 1;
5780        shift = 1;
5781        goto do_shiftd;
5782    case 0x1ad: /* shrd cl */
5783        op = 1;
5784        shift = 0;
5785    do_shiftd:
5786        ot = dflag;
5787        modrm = x86_ldub_code(env, s);
5788        mod = (modrm >> 6) & 3;
5789        rm = (modrm & 7) | REX_B(s);
5790        reg = ((modrm >> 3) & 7) | rex_r;
5791        if (mod != 3) {
5792            gen_lea_modrm(env, s, modrm);
5793            opreg = OR_TMP0;
5794        } else {
5795            opreg = rm;
5796        }
5797        gen_op_mov_v_reg(s, ot, s->T1, reg);
5798
5799        if (shift) {
5800            TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5801            gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5802            tcg_temp_free(imm);
5803        } else {
5804            gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5805        }
5806        break;
5807
5808        /************************/
5809        /* floats */
5810    case 0xd8 ... 0xdf:
5811        if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5812            /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5813            /* XXX: what to do if illegal op ? */
5814            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5815            break;
5816        }
5817        modrm = x86_ldub_code(env, s);
5818        mod = (modrm >> 6) & 3;
5819        rm = modrm & 7;
5820        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5821        if (mod != 3) {
5822            /* memory op */
5823            gen_lea_modrm(env, s, modrm);
5824            switch(op) {
5825            case 0x00 ... 0x07: /* fxxxs */
5826            case 0x10 ... 0x17: /* fixxxl */
5827            case 0x20 ... 0x27: /* fxxxl */
5828            case 0x30 ... 0x37: /* fixxx */
5829                {
5830                    int op1;
5831                    op1 = op & 7;
5832
5833                    switch(op >> 4) {
5834                    case 0:
5835                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5836                                            s->mem_index, MO_LEUL);
5837                        gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5838                        break;
5839                    case 1:
5840                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5841                                            s->mem_index, MO_LEUL);
5842                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5843                        break;
5844                    case 2:
5845                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5846                                            s->mem_index, MO_LEQ);
5847                        gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5848                        break;
5849                    case 3:
5850                    default:
5851                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5852                                            s->mem_index, MO_LESW);
5853                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5854                        break;
5855                    }
5856
5857                    gen_helper_fp_arith_ST0_FT0(op1);
5858                    if (op1 == 3) {
5859                        /* fcomp needs pop */
5860                        gen_helper_fpop(cpu_env);
5861                    }
5862                }
5863                break;
5864            case 0x08: /* flds */
5865            case 0x0a: /* fsts */
5866            case 0x0b: /* fstps */
5867            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5868            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5869            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5870                switch(op & 7) {
5871                case 0:
5872                    switch(op >> 4) {
5873                    case 0:
5874                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5875                                            s->mem_index, MO_LEUL);
5876                        gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5877                        break;
5878                    case 1:
5879                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5880                                            s->mem_index, MO_LEUL);
5881                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5882                        break;
5883                    case 2:
5884                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5885                                            s->mem_index, MO_LEQ);
5886                        gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
5887                        break;
5888                    case 3:
5889                    default:
5890                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5891                                            s->mem_index, MO_LESW);
5892                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5893                        break;
5894                    }
5895                    break;
5896                case 1:
5897                    /* XXX: the corresponding CPUID bit must be tested ! */
5898                    switch(op >> 4) {
5899                    case 1:
5900                        gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
5901                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5902                                            s->mem_index, MO_LEUL);
5903                        break;
5904                    case 2:
5905                        gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
5906                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5907                                            s->mem_index, MO_LEQ);
5908                        break;
5909                    case 3:
5910                    default:
5911                        gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
5912                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5913                                            s->mem_index, MO_LEUW);
5914                        break;
5915                    }
5916                    gen_helper_fpop(cpu_env);
5917                    break;
5918                default:
5919                    switch(op >> 4) {
5920                    case 0:
5921                        gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
5922                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5923                                            s->mem_index, MO_LEUL);
5924                        break;
5925                    case 1:
5926                        gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
5927                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5928                                            s->mem_index, MO_LEUL);
5929                        break;
5930                    case 2:
5931                        gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
5932                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5933                                            s->mem_index, MO_LEQ);
5934                        break;
5935                    case 3:
5936                    default:
5937                        gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
5938                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5939                                            s->mem_index, MO_LEUW);
5940                        break;
5941                    }
5942                    if ((op & 7) == 3)
5943                        gen_helper_fpop(cpu_env);
5944                    break;
5945                }
5946                break;
5947            case 0x0c: /* fldenv mem */
5948                gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5949                break;
5950            case 0x0d: /* fldcw mem */
5951                tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5952                                    s->mem_index, MO_LEUW);
5953                gen_helper_fldcw(cpu_env, s->tmp2_i32);
5954                break;
5955            case 0x0e: /* fnstenv mem */
5956                gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5957                break;
5958            case 0x0f: /* fnstcw mem */
5959                gen_helper_fnstcw(s->tmp2_i32, cpu_env);
5960                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5961                                    s->mem_index, MO_LEUW);
5962                break;
5963            case 0x1d: /* fldt mem */
5964                gen_helper_fldt_ST0(cpu_env, s->A0);
5965                break;
5966            case 0x1f: /* fstpt mem */
5967                gen_helper_fstt_ST0(cpu_env, s->A0);
5968                gen_helper_fpop(cpu_env);
5969                break;
5970            case 0x2c: /* frstor mem */
5971                gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5972                break;
5973            case 0x2e: /* fnsave mem */
5974                gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5975                break;
5976            case 0x2f: /* fnstsw mem */
5977                gen_helper_fnstsw(s->tmp2_i32, cpu_env);
5978                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5979                                    s->mem_index, MO_LEUW);
5980                break;
5981            case 0x3c: /* fbld */
5982                gen_helper_fbld_ST0(cpu_env, s->A0);
5983                break;
5984            case 0x3e: /* fbstp */
5985                gen_helper_fbst_ST0(cpu_env, s->A0);
5986                gen_helper_fpop(cpu_env);
5987                break;
5988            case 0x3d: /* fildll */
5989                tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
5990                gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
5991                break;
5992            case 0x3f: /* fistpll */
5993                gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
5994                tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
5995                gen_helper_fpop(cpu_env);
5996                break;
5997            default:
5998                goto unknown_op;
5999            }
6000        } else {
6001            /* register float ops */
6002            opreg = rm;
6003
6004            switch(op) {
6005            case 0x08: /* fld sti */
6006                gen_helper_fpush(cpu_env);
6007                gen_helper_fmov_ST0_STN(cpu_env,
6008                                        tcg_const_i32((opreg + 1) & 7));
6009                break;
6010            case 0x09: /* fxchg sti */
6011            case 0x29: /* fxchg4 sti, undocumented op */
6012            case 0x39: /* fxchg7 sti, undocumented op */
6013                gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6014                break;
6015            case 0x0a: /* grp d9/2 */
6016                switch(rm) {
6017                case 0: /* fnop */
6018                    /* check exceptions (FreeBSD FPU probe) */
6019                    gen_helper_fwait(cpu_env);
6020                    break;
6021                default:
6022                    goto unknown_op;
6023                }
6024                break;
6025            case 0x0c: /* grp d9/4 */
6026                switch(rm) {
6027                case 0: /* fchs */
6028                    gen_helper_fchs_ST0(cpu_env);
6029                    break;
6030                case 1: /* fabs */
6031                    gen_helper_fabs_ST0(cpu_env);
6032                    break;
6033                case 4: /* ftst */
6034                    gen_helper_fldz_FT0(cpu_env);
6035                    gen_helper_fcom_ST0_FT0(cpu_env);
6036                    break;
6037                case 5: /* fxam */
6038                    gen_helper_fxam_ST0(cpu_env);
6039                    break;
6040                default:
6041                    goto unknown_op;
6042                }
6043                break;
6044            case 0x0d: /* grp d9/5 */
6045                {
6046                    switch(rm) {
6047                    case 0:
6048                        gen_helper_fpush(cpu_env);
6049                        gen_helper_fld1_ST0(cpu_env);
6050                        break;
6051                    case 1:
6052                        gen_helper_fpush(cpu_env);
6053                        gen_helper_fldl2t_ST0(cpu_env);
6054                        break;
6055                    case 2:
6056                        gen_helper_fpush(cpu_env);
6057                        gen_helper_fldl2e_ST0(cpu_env);
6058                        break;
6059                    case 3:
6060                        gen_helper_fpush(cpu_env);
6061                        gen_helper_fldpi_ST0(cpu_env);
6062                        break;
6063                    case 4:
6064                        gen_helper_fpush(cpu_env);
6065                        gen_helper_fldlg2_ST0(cpu_env);
6066                        break;
6067                    case 5:
6068                        gen_helper_fpush(cpu_env);
6069                        gen_helper_fldln2_ST0(cpu_env);
6070                        break;
6071                    case 6:
6072                        gen_helper_fpush(cpu_env);
6073                        gen_helper_fldz_ST0(cpu_env);
6074                        break;
6075                    default:
6076                        goto unknown_op;
6077                    }
6078                }
6079                break;
6080            case 0x0e: /* grp d9/6 */
6081                switch(rm) {
6082                case 0: /* f2xm1 */
6083                    gen_helper_f2xm1(cpu_env);
6084                    break;
6085                case 1: /* fyl2x */
6086                    gen_helper_fyl2x(cpu_env);
6087                    break;
6088                case 2: /* fptan */
6089                    gen_helper_fptan(cpu_env);
6090                    break;
6091                case 3: /* fpatan */
6092                    gen_helper_fpatan(cpu_env);
6093                    break;
6094                case 4: /* fxtract */
6095                    gen_helper_fxtract(cpu_env);
6096                    break;
6097                case 5: /* fprem1 */
6098                    gen_helper_fprem1(cpu_env);
6099                    break;
6100                case 6: /* fdecstp */
6101                    gen_helper_fdecstp(cpu_env);
6102                    break;
6103                default:
6104                case 7: /* fincstp */
6105                    gen_helper_fincstp(cpu_env);
6106                    break;
6107                }
6108                break;
6109            case 0x0f: /* grp d9/7 */
6110                switch(rm) {
6111                case 0: /* fprem */
6112                    gen_helper_fprem(cpu_env);
6113                    break;
6114                case 1: /* fyl2xp1 */
6115                    gen_helper_fyl2xp1(cpu_env);
6116                    break;
6117                case 2: /* fsqrt */
6118                    gen_helper_fsqrt(cpu_env);
6119                    break;
6120                case 3: /* fsincos */
6121                    gen_helper_fsincos(cpu_env);
6122                    break;
6123                case 5: /* fscale */
6124                    gen_helper_fscale(cpu_env);
6125                    break;
6126                case 4: /* frndint */
6127                    gen_helper_frndint(cpu_env);
6128                    break;
6129                case 6: /* fsin */
6130                    gen_helper_fsin(cpu_env);
6131                    break;
6132                default:
6133                case 7: /* fcos */
6134                    gen_helper_fcos(cpu_env);
6135                    break;
6136                }
6137                break;
6138            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6139            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6140            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6141                {
6142                    int op1;
6143
6144                    op1 = op & 7;
6145                    if (op >= 0x20) {
6146                        gen_helper_fp_arith_STN_ST0(op1, opreg);
6147                        if (op >= 0x30)
6148                            gen_helper_fpop(cpu_env);
6149                    } else {
6150                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6151                        gen_helper_fp_arith_ST0_FT0(op1);
6152                    }
6153                }
6154                break;
6155            case 0x02: /* fcom */
6156            case 0x22: /* fcom2, undocumented op */
6157                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6158                gen_helper_fcom_ST0_FT0(cpu_env);
6159                break;
6160            case 0x03: /* fcomp */
6161            case 0x23: /* fcomp3, undocumented op */
6162            case 0x32: /* fcomp5, undocumented op */
6163                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6164                gen_helper_fcom_ST0_FT0(cpu_env);
6165                gen_helper_fpop(cpu_env);
6166                break;
6167            case 0x15: /* da/5 */
6168                switch(rm) {
6169                case 1: /* fucompp */
6170                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6171                    gen_helper_fucom_ST0_FT0(cpu_env);
6172                    gen_helper_fpop(cpu_env);
6173                    gen_helper_fpop(cpu_env);
6174                    break;
6175                default:
6176                    goto unknown_op;
6177                }
6178                break;
6179            case 0x1c:
6180                switch(rm) {
6181                case 0: /* feni (287 only, just do nop here) */
6182                    break;
6183                case 1: /* fdisi (287 only, just do nop here) */
6184                    break;
6185                case 2: /* fclex */
6186                    gen_helper_fclex(cpu_env);
6187                    break;
6188                case 3: /* fninit */
6189                    gen_helper_fninit(cpu_env);
6190                    break;
6191                case 4: /* fsetpm (287 only, just do nop here) */
6192                    break;
6193                default:
6194                    goto unknown_op;
6195                }
6196                break;
6197            case 0x1d: /* fucomi */
6198                if (!(s->cpuid_features & CPUID_CMOV)) {
6199                    goto illegal_op;
6200                }
6201                gen_update_cc_op(s);
6202                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6203                gen_helper_fucomi_ST0_FT0(cpu_env);
6204                set_cc_op(s, CC_OP_EFLAGS);
6205                break;
6206            case 0x1e: /* fcomi */
6207                if (!(s->cpuid_features & CPUID_CMOV)) {
6208                    goto illegal_op;
6209                }
6210                gen_update_cc_op(s);
6211                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6212                gen_helper_fcomi_ST0_FT0(cpu_env);
6213                set_cc_op(s, CC_OP_EFLAGS);
6214                break;
6215            case 0x28: /* ffree sti */
6216                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6217                break;
6218            case 0x2a: /* fst sti */
6219                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6220                break;
6221            case 0x2b: /* fstp sti */
6222            case 0x0b: /* fstp1 sti, undocumented op */
6223            case 0x3a: /* fstp8 sti, undocumented op */
6224            case 0x3b: /* fstp9 sti, undocumented op */
6225                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6226                gen_helper_fpop(cpu_env);
6227                break;
6228            case 0x2c: /* fucom st(i) */
6229                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6230                gen_helper_fucom_ST0_FT0(cpu_env);
6231                break;
6232            case 0x2d: /* fucomp st(i) */
6233                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6234                gen_helper_fucom_ST0_FT0(cpu_env);
6235                gen_helper_fpop(cpu_env);
6236                break;
6237            case 0x33: /* de/3 */
6238                switch(rm) {
6239                case 1: /* fcompp */
6240                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6241                    gen_helper_fcom_ST0_FT0(cpu_env);
6242                    gen_helper_fpop(cpu_env);
6243                    gen_helper_fpop(cpu_env);
6244                    break;
6245                default:
6246                    goto unknown_op;
6247                }
6248                break;
6249            case 0x38: /* ffreep sti, undocumented op */
6250                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6251                gen_helper_fpop(cpu_env);
6252                break;
6253            case 0x3c: /* df/4 */
6254                switch(rm) {
6255                case 0:
6256                    gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6257                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6258                    gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6259                    break;
6260                default:
6261                    goto unknown_op;
6262                }
6263                break;
6264            case 0x3d: /* fucomip */
6265                if (!(s->cpuid_features & CPUID_CMOV)) {
6266                    goto illegal_op;
6267                }
6268                gen_update_cc_op(s);
6269                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6270                gen_helper_fucomi_ST0_FT0(cpu_env);
6271                gen_helper_fpop(cpu_env);
6272                set_cc_op(s, CC_OP_EFLAGS);
6273                break;
6274            case 0x3e: /* fcomip */
6275                if (!(s->cpuid_features & CPUID_CMOV)) {
6276                    goto illegal_op;
6277                }
6278                gen_update_cc_op(s);
6279                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6280                gen_helper_fcomi_ST0_FT0(cpu_env);
6281                gen_helper_fpop(cpu_env);
6282                set_cc_op(s, CC_OP_EFLAGS);
6283                break;
6284            case 0x10 ... 0x13: /* fcmovxx */
6285            case 0x18 ... 0x1b:
6286                {
6287                    int op1;
6288                    TCGLabel *l1;
6289                    static const uint8_t fcmov_cc[8] = {
6290                        (JCC_B << 1),
6291                        (JCC_Z << 1),
6292                        (JCC_BE << 1),
6293                        (JCC_P << 1),
6294                    };
6295
6296                    if (!(s->cpuid_features & CPUID_CMOV)) {
6297                        goto illegal_op;
6298                    }
6299                    op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6300                    l1 = gen_new_label();
6301                    gen_jcc1_noeob(s, op1, l1);
6302                    gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6303                    gen_set_label(l1);
6304                }
6305                break;
6306            default:
6307                goto unknown_op;
6308            }
6309        }
6310        break;
6311        /************************/
6312        /* string ops */
6313
6314    case 0xa4: /* movsS */
6315    case 0xa5:
6316        ot = mo_b_d(b, dflag);
6317        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6318            gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6319        } else {
6320            gen_movs(s, ot);
6321        }
6322        break;
6323
6324    case 0xaa: /* stosS */
6325    case 0xab:
6326        ot = mo_b_d(b, dflag);
6327        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6328            gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6329        } else {
6330            gen_stos(s, ot);
6331        }
6332        break;
6333    case 0xac: /* lodsS */
6334    case 0xad:
6335        ot = mo_b_d(b, dflag);
6336        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6337            gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6338        } else {
6339            gen_lods(s, ot);
6340        }
6341        break;
6342    case 0xae: /* scasS */
6343    case 0xaf:
6344        ot = mo_b_d(b, dflag);
6345        if (prefixes & PREFIX_REPNZ) {
6346            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6347        } else if (prefixes & PREFIX_REPZ) {
6348            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6349        } else {
6350            gen_scas(s, ot);
6351        }
6352        break;
6353
6354    case 0xa6: /* cmpsS */
6355    case 0xa7:
6356        ot = mo_b_d(b, dflag);
6357        if (prefixes & PREFIX_REPNZ) {
6358            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6359        } else if (prefixes & PREFIX_REPZ) {
6360            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6361        } else {
6362            gen_cmps(s, ot);
6363        }
6364        break;
6365    case 0x6c: /* insS */
6366    case 0x6d:
6367        ot = mo_b_d32(b, dflag);
6368        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6369        gen_check_io(s, ot, pc_start - s->cs_base, 
6370                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6371        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6372            gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6373        } else {
6374            gen_ins(s, ot);
6375            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6376                gen_jmp(s, s->pc - s->cs_base);
6377            }
6378        }
6379        break;
6380    case 0x6e: /* outsS */
6381    case 0x6f:
6382        ot = mo_b_d32(b, dflag);
6383        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6384        gen_check_io(s, ot, pc_start - s->cs_base,
6385                     svm_is_rep(prefixes) | 4);
6386        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6387            gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6388        } else {
6389            gen_outs(s, ot);
6390            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6391                gen_jmp(s, s->pc - s->cs_base);
6392            }
6393        }
6394        break;
6395
6396        /************************/
6397        /* port I/O */
6398
6399    case 0xe4:
6400    case 0xe5:
6401        ot = mo_b_d32(b, dflag);
6402        val = x86_ldub_code(env, s);
6403        tcg_gen_movi_tl(s->T0, val);
6404        gen_check_io(s, ot, pc_start - s->cs_base,
6405                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6406        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6407            gen_io_start();
6408        }
6409        tcg_gen_movi_i32(s->tmp2_i32, val);
6410        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6411        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6412        gen_bpt_io(s, s->tmp2_i32, ot);
6413        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6414            gen_io_end();
6415            gen_jmp(s, s->pc - s->cs_base);
6416        }
6417        break;
6418    case 0xe6:
6419    case 0xe7:
6420        ot = mo_b_d32(b, dflag);
6421        val = x86_ldub_code(env, s);
6422        tcg_gen_movi_tl(s->T0, val);
6423        gen_check_io(s, ot, pc_start - s->cs_base,
6424                     svm_is_rep(prefixes));
6425        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6426
6427        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6428            gen_io_start();
6429        }
6430        tcg_gen_movi_i32(s->tmp2_i32, val);
6431        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6432        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6433        gen_bpt_io(s, s->tmp2_i32, ot);
6434        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6435            gen_io_end();
6436            gen_jmp(s, s->pc - s->cs_base);
6437        }
6438        break;
6439    case 0xec:
6440    case 0xed:
6441        ot = mo_b_d32(b, dflag);
6442        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6443        gen_check_io(s, ot, pc_start - s->cs_base,
6444                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6445        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6446            gen_io_start();
6447        }
6448        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6449        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6450        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6451        gen_bpt_io(s, s->tmp2_i32, ot);
6452        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6453            gen_io_end();
6454            gen_jmp(s, s->pc - s->cs_base);
6455        }
6456        break;
6457    case 0xee:
6458    case 0xef:
6459        ot = mo_b_d32(b, dflag);
6460        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6461        gen_check_io(s, ot, pc_start - s->cs_base,
6462                     svm_is_rep(prefixes));
6463        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6464
6465        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6466            gen_io_start();
6467        }
6468        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6469        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6470        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6471        gen_bpt_io(s, s->tmp2_i32, ot);
6472        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6473            gen_io_end();
6474            gen_jmp(s, s->pc - s->cs_base);
6475        }
6476        break;
6477
6478        /************************/
6479        /* control */
6480    case 0xc2: /* ret im */
6481        val = x86_ldsw_code(env, s);
6482        ot = gen_pop_T0(s);
6483        gen_stack_update(s, val + (1 << ot));
6484        /* Note that gen_pop_T0 uses a zero-extending load.  */
6485        gen_op_jmp_v(s->T0);
6486        gen_bnd_jmp(s);
6487        gen_jr(s, s->T0);
6488        break;
6489    case 0xc3: /* ret */
6490        ot = gen_pop_T0(s);
6491        gen_pop_update(s, ot);
6492        /* Note that gen_pop_T0 uses a zero-extending load.  */
6493        gen_op_jmp_v(s->T0);
6494        gen_bnd_jmp(s);
6495        gen_jr(s, s->T0);
6496        break;
6497    case 0xca: /* lret im */
6498        val = x86_ldsw_code(env, s);
6499    do_lret:
6500        if (s->pe && !s->vm86) {
6501            gen_update_cc_op(s);
6502            gen_jmp_im(s, pc_start - s->cs_base);
6503            gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6504                                      tcg_const_i32(val));
6505        } else {
6506            gen_stack_A0(s);
6507            /* pop offset */
6508            gen_op_ld_v(s, dflag, s->T0, s->A0);
6509            /* NOTE: keeping EIP updated is not a problem in case of
6510               exception */
6511            gen_op_jmp_v(s->T0);
6512            /* pop selector */
6513            gen_add_A0_im(s, 1 << dflag);
6514            gen_op_ld_v(s, dflag, s->T0, s->A0);
6515            gen_op_movl_seg_T0_vm(s, R_CS);
6516            /* add stack offset */
6517            gen_stack_update(s, val + (2 << dflag));
6518        }
6519        gen_eob(s);
6520        break;
6521    case 0xcb: /* lret */
6522        val = 0;
6523        goto do_lret;
6524    case 0xcf: /* iret */
6525        gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6526        if (!s->pe) {
6527            /* real mode */
6528            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6529            set_cc_op(s, CC_OP_EFLAGS);
6530        } else if (s->vm86) {
6531            if (s->iopl != 3) {
6532                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6533            } else {
6534                gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6535                set_cc_op(s, CC_OP_EFLAGS);
6536            }
6537        } else {
6538            gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6539                                      tcg_const_i32(s->pc - s->cs_base));
6540            set_cc_op(s, CC_OP_EFLAGS);
6541        }
6542        gen_eob(s);
6543        break;
6544    case 0xe8: /* call im */
6545        {
6546            if (dflag != MO_16) {
6547                tval = (int32_t)insn_get(env, s, MO_32);
6548            } else {
6549                tval = (int16_t)insn_get(env, s, MO_16);
6550            }
6551            next_eip = s->pc - s->cs_base;
6552            tval += next_eip;
6553            if (dflag == MO_16) {
6554                tval &= 0xffff;
6555            } else if (!CODE64(s)) {
6556                tval &= 0xffffffff;
6557            }
6558            tcg_gen_movi_tl(s->T0, next_eip);
6559            gen_push_v(s, s->T0);
6560            gen_bnd_jmp(s);
6561            gen_jmp(s, tval);
6562        }
6563        break;
6564    case 0x9a: /* lcall im */
6565        {
6566            unsigned int selector, offset;
6567
6568            if (CODE64(s))
6569                goto illegal_op;
6570            ot = dflag;
6571            offset = insn_get(env, s, ot);
6572            selector = insn_get(env, s, MO_16);
6573
6574            tcg_gen_movi_tl(s->T0, selector);
6575            tcg_gen_movi_tl(s->T1, offset);
6576        }
6577        goto do_lcall;
6578    case 0xe9: /* jmp im */
6579        if (dflag != MO_16) {
6580            tval = (int32_t)insn_get(env, s, MO_32);
6581        } else {
6582            tval = (int16_t)insn_get(env, s, MO_16);
6583        }
6584        tval += s->pc - s->cs_base;
6585        if (dflag == MO_16) {
6586            tval &= 0xffff;
6587        } else if (!CODE64(s)) {
6588            tval &= 0xffffffff;
6589        }
6590        gen_bnd_jmp(s);
6591        gen_jmp(s, tval);
6592        break;
6593    case 0xea: /* ljmp im */
6594        {
6595            unsigned int selector, offset;
6596
6597            if (CODE64(s))
6598                goto illegal_op;
6599            ot = dflag;
6600            offset = insn_get(env, s, ot);
6601            selector = insn_get(env, s, MO_16);
6602
6603            tcg_gen_movi_tl(s->T0, selector);
6604            tcg_gen_movi_tl(s->T1, offset);
6605        }
6606        goto do_ljmp;
6607    case 0xeb: /* jmp Jb */
6608        tval = (int8_t)insn_get(env, s, MO_8);
6609        tval += s->pc - s->cs_base;
6610        if (dflag == MO_16) {
6611            tval &= 0xffff;
6612        }
6613        gen_jmp(s, tval);
6614        break;
6615    case 0x70 ... 0x7f: /* jcc Jb */
6616        tval = (int8_t)insn_get(env, s, MO_8);
6617        goto do_jcc;
6618    case 0x180 ... 0x18f: /* jcc Jv */
6619        if (dflag != MO_16) {
6620            tval = (int32_t)insn_get(env, s, MO_32);
6621        } else {
6622            tval = (int16_t)insn_get(env, s, MO_16);
6623        }
6624    do_jcc:
6625        next_eip = s->pc - s->cs_base;
6626        tval += next_eip;
6627        if (dflag == MO_16) {
6628            tval &= 0xffff;
6629        }
6630        gen_bnd_jmp(s);
6631        gen_jcc(s, b, tval, next_eip);
6632        break;
6633
6634    case 0x190 ... 0x19f: /* setcc Gv */
6635        modrm = x86_ldub_code(env, s);
6636        gen_setcc1(s, b, s->T0);
6637        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6638        break;
6639    case 0x140 ... 0x14f: /* cmov Gv, Ev */
6640        if (!(s->cpuid_features & CPUID_CMOV)) {
6641            goto illegal_op;
6642        }
6643        ot = dflag;
6644        modrm = x86_ldub_code(env, s);
6645        reg = ((modrm >> 3) & 7) | rex_r;
6646        gen_cmovcc1(env, s, ot, b, modrm, reg);
6647        break;
6648
6649        /************************/
6650        /* flags */
6651    case 0x9c: /* pushf */
6652        gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6653        if (s->vm86 && s->iopl != 3) {
6654            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6655        } else {
6656            gen_update_cc_op(s);
6657            gen_helper_read_eflags(s->T0, cpu_env);
6658            gen_push_v(s, s->T0);
6659        }
6660        break;
6661    case 0x9d: /* popf */
6662        gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6663        if (s->vm86 && s->iopl != 3) {
6664            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6665        } else {
6666            ot = gen_pop_T0(s);
6667            if (s->cpl == 0) {
6668                if (dflag != MO_16) {
6669                    gen_helper_write_eflags(cpu_env, s->T0,
6670                                            tcg_const_i32((TF_MASK | AC_MASK |
6671                                                           ID_MASK | NT_MASK |
6672                                                           IF_MASK |
6673                                                           IOPL_MASK)));
6674                } else {
6675                    gen_helper_write_eflags(cpu_env, s->T0,
6676                                            tcg_const_i32((TF_MASK | AC_MASK |
6677                                                           ID_MASK | NT_MASK |
6678                                                           IF_MASK | IOPL_MASK)
6679                                                          & 0xffff));
6680                }
6681            } else {
6682                if (s->cpl <= s->iopl) {
6683                    if (dflag != MO_16) {
6684                        gen_helper_write_eflags(cpu_env, s->T0,
6685                                                tcg_const_i32((TF_MASK |
6686                                                               AC_MASK |
6687                                                               ID_MASK |
6688                                                               NT_MASK |
6689                                                               IF_MASK)));
6690                    } else {
6691                        gen_helper_write_eflags(cpu_env, s->T0,
6692                                                tcg_const_i32((TF_MASK |
6693                                                               AC_MASK |
6694                                                               ID_MASK |
6695                                                               NT_MASK |
6696                                                               IF_MASK)
6697                                                              & 0xffff));
6698                    }
6699                } else {
6700                    if (dflag != MO_16) {
6701                        gen_helper_write_eflags(cpu_env, s->T0,
6702                                           tcg_const_i32((TF_MASK | AC_MASK |
6703                                                          ID_MASK | NT_MASK)));
6704                    } else {
6705                        gen_helper_write_eflags(cpu_env, s->T0,
6706                                           tcg_const_i32((TF_MASK | AC_MASK |
6707                                                          ID_MASK | NT_MASK)
6708                                                         & 0xffff));
6709                    }
6710                }
6711            }
6712            gen_pop_update(s, ot);
6713            set_cc_op(s, CC_OP_EFLAGS);
6714            /* abort translation because TF/AC flag may change */
6715            gen_jmp_im(s, s->pc - s->cs_base);
6716            gen_eob(s);
6717        }
6718        break;
6719    case 0x9e: /* sahf */
6720        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6721            goto illegal_op;
6722        gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6723        gen_compute_eflags(s);
6724        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6725        tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6726        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6727        break;
6728    case 0x9f: /* lahf */
6729        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6730            goto illegal_op;
6731        gen_compute_eflags(s);
6732        /* Note: gen_compute_eflags() only gives the condition codes */
6733        tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6734        gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6735        break;
6736    case 0xf5: /* cmc */
6737        gen_compute_eflags(s);
6738        tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6739        break;
6740    case 0xf8: /* clc */
6741        gen_compute_eflags(s);
6742        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6743        break;
6744    case 0xf9: /* stc */
6745        gen_compute_eflags(s);
6746        tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6747        break;
6748    case 0xfc: /* cld */
6749        tcg_gen_movi_i32(s->tmp2_i32, 1);
6750        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6751        break;
6752    case 0xfd: /* std */
6753        tcg_gen_movi_i32(s->tmp2_i32, -1);
6754        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6755        break;
6756
6757        /************************/
6758        /* bit operations */
6759    case 0x1ba: /* bt/bts/btr/btc Gv, im */
6760        ot = dflag;
6761        modrm = x86_ldub_code(env, s);
6762        op = (modrm >> 3) & 7;
6763        mod = (modrm >> 6) & 3;
6764        rm = (modrm & 7) | REX_B(s);
6765        if (mod != 3) {
6766            s->rip_offset = 1;
6767            gen_lea_modrm(env, s, modrm);
6768            if (!(s->prefix & PREFIX_LOCK)) {
6769                gen_op_ld_v(s, ot, s->T0, s->A0);
6770            }
6771        } else {
6772            gen_op_mov_v_reg(s, ot, s->T0, rm);
6773        }
6774        /* load shift */
6775        val = x86_ldub_code(env, s);
6776        tcg_gen_movi_tl(s->T1, val);
6777        if (op < 4)
6778            goto unknown_op;
6779        op -= 4;
6780        goto bt_op;
6781    case 0x1a3: /* bt Gv, Ev */
6782        op = 0;
6783        goto do_btx;
6784    case 0x1ab: /* bts */
6785        op = 1;
6786        goto do_btx;
6787    case 0x1b3: /* btr */
6788        op = 2;
6789        goto do_btx;
6790    case 0x1bb: /* btc */
6791        op = 3;
6792    do_btx:
6793        ot = dflag;
6794        modrm = x86_ldub_code(env, s);
6795        reg = ((modrm >> 3) & 7) | rex_r;
6796        mod = (modrm >> 6) & 3;
6797        rm = (modrm & 7) | REX_B(s);
6798        gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6799        if (mod != 3) {
6800            AddressParts a = gen_lea_modrm_0(env, s, modrm);
6801            /* specific case: we need to add a displacement */
6802            gen_exts(ot, s->T1);
6803            tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6804            tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6805            tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6806            gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6807            if (!(s->prefix & PREFIX_LOCK)) {
6808                gen_op_ld_v(s, ot, s->T0, s->A0);
6809            }
6810        } else {
6811            gen_op_mov_v_reg(s, ot, s->T0, rm);
6812        }
6813    bt_op:
6814        tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6815        tcg_gen_movi_tl(s->tmp0, 1);
6816        tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6817        if (s->prefix & PREFIX_LOCK) {
6818            switch (op) {
6819            case 0: /* bt */
6820                /* Needs no atomic ops; we surpressed the normal
6821                   memory load for LOCK above so do it now.  */
6822                gen_op_ld_v(s, ot, s->T0, s->A0);
6823                break;
6824            case 1: /* bts */
6825                tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6826                                           s->mem_index, ot | MO_LE);
6827                break;
6828            case 2: /* btr */
6829                tcg_gen_not_tl(s->tmp0, s->tmp0);
6830                tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6831                                            s->mem_index, ot | MO_LE);
6832                break;
6833            default:
6834            case 3: /* btc */
6835                tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6836                                            s->mem_index, ot | MO_LE);
6837                break;
6838            }
6839            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6840        } else {
6841            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6842            switch (op) {
6843            case 0: /* bt */
6844                /* Data already loaded; nothing to do.  */
6845                break;
6846            case 1: /* bts */
6847                tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
6848                break;
6849            case 2: /* btr */
6850                tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
6851                break;
6852            default:
6853            case 3: /* btc */
6854                tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
6855                break;
6856            }
6857            if (op != 0) {
6858                if (mod != 3) {
6859                    gen_op_st_v(s, ot, s->T0, s->A0);
6860                } else {
6861                    gen_op_mov_reg_v(s, ot, rm, s->T0);
6862                }
6863            }
6864        }
6865
6866        /* Delay all CC updates until after the store above.  Note that
6867           C is the result of the test, Z is unchanged, and the others
6868           are all undefined.  */
6869        switch (s->cc_op) {
6870        case CC_OP_MULB ... CC_OP_MULQ:
6871        case CC_OP_ADDB ... CC_OP_ADDQ:
6872        case CC_OP_ADCB ... CC_OP_ADCQ:
6873        case CC_OP_SUBB ... CC_OP_SUBQ:
6874        case CC_OP_SBBB ... CC_OP_SBBQ:
6875        case CC_OP_LOGICB ... CC_OP_LOGICQ:
6876        case CC_OP_INCB ... CC_OP_INCQ:
6877        case CC_OP_DECB ... CC_OP_DECQ:
6878        case CC_OP_SHLB ... CC_OP_SHLQ:
6879        case CC_OP_SARB ... CC_OP_SARQ:
6880        case CC_OP_BMILGB ... CC_OP_BMILGQ:
6881            /* Z was going to be computed from the non-zero status of CC_DST.
6882               We can get that same Z value (and the new C value) by leaving
6883               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6884               same width.  */
6885            tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
6886            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6887            break;
6888        default:
6889            /* Otherwise, generate EFLAGS and replace the C bit.  */
6890            gen_compute_eflags(s);
6891            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
6892                               ctz32(CC_C), 1);
6893            break;
6894        }
6895        break;
6896    case 0x1bc: /* bsf / tzcnt */
6897    case 0x1bd: /* bsr / lzcnt */
6898        ot = dflag;
6899        modrm = x86_ldub_code(env, s);
6900        reg = ((modrm >> 3) & 7) | rex_r;
6901        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6902        gen_extu(ot, s->T0);
6903
6904        /* Note that lzcnt and tzcnt are in different extensions.  */
6905        if ((prefixes & PREFIX_REPZ)
6906            && (b & 1
6907                ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6908                : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6909            int size = 8 << ot;
6910            /* For lzcnt/tzcnt, C bit is defined related to the input. */
6911            tcg_gen_mov_tl(cpu_cc_src, s->T0);
6912            if (b & 1) {
6913                /* For lzcnt, reduce the target_ulong result by the
6914                   number of zeros that we expect to find at the top.  */
6915                tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
6916                tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
6917            } else {
6918                /* For tzcnt, a zero input must return the operand size.  */
6919                tcg_gen_ctzi_tl(s->T0, s->T0, size);
6920            }
6921            /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
6922            gen_op_update1_cc(s);
6923            set_cc_op(s, CC_OP_BMILGB + ot);
6924        } else {
6925            /* For bsr/bsf, only the Z bit is defined and it is related
6926               to the input and not the result.  */
6927            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
6928            set_cc_op(s, CC_OP_LOGICB + ot);
6929
6930            /* ??? The manual says that the output is undefined when the
6931               input is zero, but real hardware leaves it unchanged, and
6932               real programs appear to depend on that.  Accomplish this
6933               by passing the output as the value to return upon zero.  */
6934            if (b & 1) {
6935                /* For bsr, return the bit index of the first 1 bit,
6936                   not the count of leading zeros.  */
6937                tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
6938                tcg_gen_clz_tl(s->T0, s->T0, s->T1);
6939                tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
6940            } else {
6941                tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
6942            }
6943        }
6944        gen_op_mov_reg_v(s, ot, reg, s->T0);
6945        break;
6946        /************************/
6947        /* bcd */
6948    case 0x27: /* daa */
6949        if (CODE64(s))
6950            goto illegal_op;
6951        gen_update_cc_op(s);
6952        gen_helper_daa(cpu_env);
6953        set_cc_op(s, CC_OP_EFLAGS);
6954        break;
6955    case 0x2f: /* das */
6956        if (CODE64(s))
6957            goto illegal_op;
6958        gen_update_cc_op(s);
6959        gen_helper_das(cpu_env);
6960        set_cc_op(s, CC_OP_EFLAGS);
6961        break;
6962    case 0x37: /* aaa */
6963        if (CODE64(s))
6964            goto illegal_op;
6965        gen_update_cc_op(s);
6966        gen_helper_aaa(cpu_env);
6967        set_cc_op(s, CC_OP_EFLAGS);
6968        break;
6969    case 0x3f: /* aas */
6970        if (CODE64(s))
6971            goto illegal_op;
6972        gen_update_cc_op(s);
6973        gen_helper_aas(cpu_env);
6974        set_cc_op(s, CC_OP_EFLAGS);
6975        break;
6976    case 0xd4: /* aam */
6977        if (CODE64(s))
6978            goto illegal_op;
6979        val = x86_ldub_code(env, s);
6980        if (val == 0) {
6981            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
6982        } else {
6983            gen_helper_aam(cpu_env, tcg_const_i32(val));
6984            set_cc_op(s, CC_OP_LOGICB);
6985        }
6986        break;
6987    case 0xd5: /* aad */
6988        if (CODE64(s))
6989            goto illegal_op;
6990        val = x86_ldub_code(env, s);
6991        gen_helper_aad(cpu_env, tcg_const_i32(val));
6992        set_cc_op(s, CC_OP_LOGICB);
6993        break;
6994        /************************/
6995        /* misc */
6996    case 0x90: /* nop */
6997        /* XXX: correct lock test for all insn */
6998        if (prefixes & PREFIX_LOCK) {
6999            goto illegal_op;
7000        }
7001        /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7002        if (REX_B(s)) {
7003            goto do_xchg_reg_eax;
7004        }
7005        if (prefixes & PREFIX_REPZ) {
7006            gen_update_cc_op(s);
7007            gen_jmp_im(s, pc_start - s->cs_base);
7008            gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7009            s->base.is_jmp = DISAS_NORETURN;
7010        }
7011        break;
7012    case 0x9b: /* fwait */
7013        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7014            (HF_MP_MASK | HF_TS_MASK)) {
7015            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7016        } else {
7017            gen_helper_fwait(cpu_env);
7018        }
7019        break;
7020    case 0xcc: /* int3 */
7021        gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7022        break;
7023    case 0xcd: /* int N */
7024        val = x86_ldub_code(env, s);
7025        if (s->vm86 && s->iopl != 3) {
7026            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7027        } else {
7028            gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7029        }
7030        break;
7031    case 0xce: /* into */
7032        if (CODE64(s))
7033            goto illegal_op;
7034        gen_update_cc_op(s);
7035        gen_jmp_im(s, pc_start - s->cs_base);
7036        gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7037        break;
7038#ifdef WANT_ICEBP
7039    case 0xf1: /* icebp (undocumented, exits to external debugger) */
7040        gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
7041        gen_debug(s, pc_start - s->cs_base);
7042        break;
7043#endif
7044    case 0xfa: /* cli */
7045        if (!s->vm86) {
7046            if (s->cpl <= s->iopl) {
7047                gen_helper_cli(cpu_env);
7048            } else {
7049                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7050            }
7051        } else {
7052            if (s->iopl == 3) {
7053                gen_helper_cli(cpu_env);
7054            } else {
7055                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7056            }
7057        }
7058        break;
7059    case 0xfb: /* sti */
7060        if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
7061            gen_helper_sti(cpu_env);
7062            /* interruptions are enabled only the first insn after sti */
7063            gen_jmp_im(s, s->pc - s->cs_base);
7064            gen_eob_inhibit_irq(s, true);
7065        } else {
7066            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7067        }
7068        break;
7069    case 0x62: /* bound */
7070        if (CODE64(s))
7071            goto illegal_op;
7072        ot = dflag;
7073        modrm = x86_ldub_code(env, s);
7074        reg = (modrm >> 3) & 7;
7075        mod = (modrm >> 6) & 3;
7076        if (mod == 3)
7077            goto illegal_op;
7078        gen_op_mov_v_reg(s, ot, s->T0, reg);
7079        gen_lea_modrm(env, s, modrm);
7080        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7081        if (ot == MO_16) {
7082            gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7083        } else {
7084            gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7085        }
7086        break;
7087    case 0x1c8 ... 0x1cf: /* bswap reg */
7088        reg = (b & 7) | REX_B(s);
7089#ifdef TARGET_X86_64
7090        if (dflag == MO_64) {
7091            gen_op_mov_v_reg(s, MO_64, s->T0, reg);
7092            tcg_gen_bswap64_i64(s->T0, s->T0);
7093            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
7094        } else
7095#endif
7096        {
7097            gen_op_mov_v_reg(s, MO_32, s->T0, reg);
7098            tcg_gen_ext32u_tl(s->T0, s->T0);
7099            tcg_gen_bswap32_tl(s->T0, s->T0);
7100            gen_op_mov_reg_v(s, MO_32, reg, s->T0);
7101        }
7102        break;
7103    case 0xd6: /* salc */
7104        if (CODE64(s))
7105            goto illegal_op;
7106        gen_compute_eflags_c(s, s->T0);
7107        tcg_gen_neg_tl(s->T0, s->T0);
7108        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7109        break;
7110    case 0xe0: /* loopnz */
7111    case 0xe1: /* loopz */
7112    case 0xe2: /* loop */
7113    case 0xe3: /* jecxz */
7114        {
7115            TCGLabel *l1, *l2, *l3;
7116
7117            tval = (int8_t)insn_get(env, s, MO_8);
7118            next_eip = s->pc - s->cs_base;
7119            tval += next_eip;
7120            if (dflag == MO_16) {
7121                tval &= 0xffff;
7122            }
7123
7124            l1 = gen_new_label();
7125            l2 = gen_new_label();
7126            l3 = gen_new_label();
7127            b &= 3;
7128            switch(b) {
7129            case 0: /* loopnz */
7130            case 1: /* loopz */
7131                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7132                gen_op_jz_ecx(s, s->aflag, l3);
7133                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7134                break;
7135            case 2: /* loop */
7136                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7137                gen_op_jnz_ecx(s, s->aflag, l1);
7138                break;
7139            default:
7140            case 3: /* jcxz */
7141                gen_op_jz_ecx(s, s->aflag, l1);
7142                break;
7143            }
7144
7145            gen_set_label(l3);
7146            gen_jmp_im(s, next_eip);
7147            tcg_gen_br(l2);
7148
7149            gen_set_label(l1);
7150            gen_jmp_im(s, tval);
7151            gen_set_label(l2);
7152            gen_eob(s);
7153        }
7154        break;
7155    case 0x130: /* wrmsr */
7156    case 0x132: /* rdmsr */
7157        if (s->cpl != 0) {
7158            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7159        } else {
7160            gen_update_cc_op(s);
7161            gen_jmp_im(s, pc_start - s->cs_base);
7162            if (b & 2) {
7163                gen_helper_rdmsr(cpu_env);
7164            } else {
7165                gen_helper_wrmsr(cpu_env);
7166            }
7167        }
7168        break;
7169    case 0x131: /* rdtsc */
7170        gen_update_cc_op(s);
7171        gen_jmp_im(s, pc_start - s->cs_base);
7172        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7173            gen_io_start();
7174        }
7175        gen_helper_rdtsc(cpu_env);
7176        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7177            gen_io_end();
7178            gen_jmp(s, s->pc - s->cs_base);
7179        }
7180        break;
7181    case 0x133: /* rdpmc */
7182        gen_update_cc_op(s);
7183        gen_jmp_im(s, pc_start - s->cs_base);
7184        gen_helper_rdpmc(cpu_env);
7185        break;
7186    case 0x134: /* sysenter */
7187        /* For Intel SYSENTER is valid on 64-bit */
7188        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7189            goto illegal_op;
7190        if (!s->pe) {
7191            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7192        } else {
7193            gen_helper_sysenter(cpu_env);
7194            gen_eob(s);
7195        }
7196        break;
7197    case 0x135: /* sysexit */
7198        /* For Intel SYSEXIT is valid on 64-bit */
7199        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7200            goto illegal_op;
7201        if (!s->pe) {
7202            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7203        } else {
7204            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7205            gen_eob(s);
7206        }
7207        break;
7208#ifdef TARGET_X86_64
7209    case 0x105: /* syscall */
7210        /* XXX: is it usable in real mode ? */
7211        gen_update_cc_op(s);
7212        gen_jmp_im(s, pc_start - s->cs_base);
7213        gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7214        /* TF handling for the syscall insn is different. The TF bit is  checked
7215           after the syscall insn completes. This allows #DB to not be
7216           generated after one has entered CPL0 if TF is set in FMASK.  */
7217        gen_eob_worker(s, false, true);
7218        break;
7219    case 0x107: /* sysret */
7220        if (!s->pe) {
7221            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7222        } else {
7223            gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7224            /* condition codes are modified only in long mode */
7225            if (s->lma) {
7226                set_cc_op(s, CC_OP_EFLAGS);
7227            }
7228            /* TF handling for the sysret insn is different. The TF bit is
7229               checked after the sysret insn completes. This allows #DB to be
7230               generated "as if" the syscall insn in userspace has just
7231               completed.  */
7232            gen_eob_worker(s, false, true);
7233        }
7234        break;
7235#endif
7236    case 0x1a2: /* cpuid */
7237        gen_update_cc_op(s);
7238        gen_jmp_im(s, pc_start - s->cs_base);
7239        gen_helper_cpuid(cpu_env);
7240        break;
7241    case 0xf4: /* hlt */
7242        if (s->cpl != 0) {
7243            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7244        } else {
7245            gen_update_cc_op(s);
7246            gen_jmp_im(s, pc_start - s->cs_base);
7247            gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7248            s->base.is_jmp = DISAS_NORETURN;
7249        }
7250        break;
7251    case 0x100:
7252        modrm = x86_ldub_code(env, s);
7253        mod = (modrm >> 6) & 3;
7254        op = (modrm >> 3) & 7;
7255        switch(op) {
7256        case 0: /* sldt */
7257            if (!s->pe || s->vm86)
7258                goto illegal_op;
7259            gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7260            tcg_gen_ld32u_tl(s->T0, cpu_env,
7261                             offsetof(CPUX86State, ldt.selector));
7262            ot = mod == 3 ? dflag : MO_16;
7263            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7264            break;
7265        case 2: /* lldt */
7266            if (!s->pe || s->vm86)
7267                goto illegal_op;
7268            if (s->cpl != 0) {
7269                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7270            } else {
7271                gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7272                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7273                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7274                gen_helper_lldt(cpu_env, s->tmp2_i32);
7275            }
7276            break;
7277        case 1: /* str */
7278            if (!s->pe || s->vm86)
7279                goto illegal_op;
7280            gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7281            tcg_gen_ld32u_tl(s->T0, cpu_env,
7282                             offsetof(CPUX86State, tr.selector));
7283            ot = mod == 3 ? dflag : MO_16;
7284            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7285            break;
7286        case 3: /* ltr */
7287            if (!s->pe || s->vm86)
7288                goto illegal_op;
7289            if (s->cpl != 0) {
7290                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7291            } else {
7292                gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7293                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7294                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7295                gen_helper_ltr(cpu_env, s->tmp2_i32);
7296            }
7297            break;
7298        case 4: /* verr */
7299        case 5: /* verw */
7300            if (!s->pe || s->vm86)
7301                goto illegal_op;
7302            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7303            gen_update_cc_op(s);
7304            if (op == 4) {
7305                gen_helper_verr(cpu_env, s->T0);
7306            } else {
7307                gen_helper_verw(cpu_env, s->T0);
7308            }
7309            set_cc_op(s, CC_OP_EFLAGS);
7310            break;
7311        default:
7312            goto unknown_op;
7313        }
7314        break;
7315
7316    case 0x101:
7317        modrm = x86_ldub_code(env, s);
7318        switch (modrm) {
7319        CASE_MODRM_MEM_OP(0): /* sgdt */
7320            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7321            gen_lea_modrm(env, s, modrm);
7322            tcg_gen_ld32u_tl(s->T0,
7323                             cpu_env, offsetof(CPUX86State, gdt.limit));
7324            gen_op_st_v(s, MO_16, s->T0, s->A0);
7325            gen_add_A0_im(s, 2);
7326            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7327            if (dflag == MO_16) {
7328                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7329            }
7330            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7331            break;
7332
7333        case 0xc8: /* monitor */
7334            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7335                goto illegal_op;
7336            }
7337            gen_update_cc_op(s);
7338            gen_jmp_im(s, pc_start - s->cs_base);
7339            tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7340            gen_extu(s->aflag, s->A0);
7341            gen_add_A0_ds_seg(s);
7342            gen_helper_monitor(cpu_env, s->A0);
7343            break;
7344
7345        case 0xc9: /* mwait */
7346            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7347                goto illegal_op;
7348            }
7349            gen_update_cc_op(s);
7350            gen_jmp_im(s, pc_start - s->cs_base);
7351            gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7352            gen_eob(s);
7353            break;
7354
7355        case 0xca: /* clac */
7356            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7357                || s->cpl != 0) {
7358                goto illegal_op;
7359            }
7360            gen_helper_clac(cpu_env);
7361            gen_jmp_im(s, s->pc - s->cs_base);
7362            gen_eob(s);
7363            break;
7364
7365        case 0xcb: /* stac */
7366            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7367                || s->cpl != 0) {
7368                goto illegal_op;
7369            }
7370            gen_helper_stac(cpu_env);
7371            gen_jmp_im(s, s->pc - s->cs_base);
7372            gen_eob(s);
7373            break;
7374
7375        CASE_MODRM_MEM_OP(1): /* sidt */
7376            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7377            gen_lea_modrm(env, s, modrm);
7378            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7379            gen_op_st_v(s, MO_16, s->T0, s->A0);
7380            gen_add_A0_im(s, 2);
7381            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7382            if (dflag == MO_16) {
7383                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7384            }
7385            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7386            break;
7387
7388        case 0xd0: /* xgetbv */
7389            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7390                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7391                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7392                goto illegal_op;
7393            }
7394            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7395            gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7396            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7397            break;
7398
7399        case 0xd1: /* xsetbv */
7400            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7401                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7402                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7403                goto illegal_op;
7404            }
7405            if (s->cpl != 0) {
7406                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7407                break;
7408            }
7409            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7410                                  cpu_regs[R_EDX]);
7411            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7412            gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7413            /* End TB because translation flags may change.  */
7414            gen_jmp_im(s, s->pc - s->cs_base);
7415            gen_eob(s);
7416            break;
7417
7418        case 0xd8: /* VMRUN */
7419            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7420                goto illegal_op;
7421            }
7422            if (s->cpl != 0) {
7423                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7424                break;
7425            }
7426            gen_update_cc_op(s);
7427            gen_jmp_im(s, pc_start - s->cs_base);
7428            gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7429                             tcg_const_i32(s->pc - pc_start));
7430            tcg_gen_exit_tb(NULL, 0);
7431            s->base.is_jmp = DISAS_NORETURN;
7432            break;
7433
7434        case 0xd9: /* VMMCALL */
7435            if (!(s->flags & HF_SVME_MASK)) {
7436                goto illegal_op;
7437            }
7438            gen_update_cc_op(s);
7439            gen_jmp_im(s, pc_start - s->cs_base);
7440            gen_helper_vmmcall(cpu_env);
7441            break;
7442
7443        case 0xda: /* VMLOAD */
7444            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7445                goto illegal_op;
7446            }
7447            if (s->cpl != 0) {
7448                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7449                break;
7450            }
7451            gen_update_cc_op(s);
7452            gen_jmp_im(s, pc_start - s->cs_base);
7453            gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7454            break;
7455
7456        case 0xdb: /* VMSAVE */
7457            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7458                goto illegal_op;
7459            }
7460            if (s->cpl != 0) {
7461                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7462                break;
7463            }
7464            gen_update_cc_op(s);
7465            gen_jmp_im(s, pc_start - s->cs_base);
7466            gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7467            break;
7468
7469        case 0xdc: /* STGI */
7470            if ((!(s->flags & HF_SVME_MASK)
7471                   && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7472                || !s->pe) {
7473                goto illegal_op;
7474            }
7475            if (s->cpl != 0) {
7476                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7477                break;
7478            }
7479            gen_update_cc_op(s);
7480            gen_helper_stgi(cpu_env);
7481            gen_jmp_im(s, s->pc - s->cs_base);
7482            gen_eob(s);
7483            break;
7484
7485        case 0xdd: /* CLGI */
7486            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7487                goto illegal_op;
7488            }
7489            if (s->cpl != 0) {
7490                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7491                break;
7492            }
7493            gen_update_cc_op(s);
7494            gen_jmp_im(s, pc_start - s->cs_base);
7495            gen_helper_clgi(cpu_env);
7496            break;
7497
7498        case 0xde: /* SKINIT */
7499            if ((!(s->flags & HF_SVME_MASK)
7500                 && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7501                || !s->pe) {
7502                goto illegal_op;
7503            }
7504            gen_update_cc_op(s);
7505            gen_jmp_im(s, pc_start - s->cs_base);
7506            gen_helper_skinit(cpu_env);
7507            break;
7508
7509        case 0xdf: /* INVLPGA */
7510            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7511                goto illegal_op;
7512            }
7513            if (s->cpl != 0) {
7514                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7515                break;
7516            }
7517            gen_update_cc_op(s);
7518            gen_jmp_im(s, pc_start - s->cs_base);
7519            gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
7520            break;
7521
7522        CASE_MODRM_MEM_OP(2): /* lgdt */
7523            if (s->cpl != 0) {
7524                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7525                break;
7526            }
7527            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
7528            gen_lea_modrm(env, s, modrm);
7529            gen_op_ld_v(s, MO_16, s->T1, s->A0);
7530            gen_add_A0_im(s, 2);
7531            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7532            if (dflag == MO_16) {
7533                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7534            }
7535            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7536            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7537            break;
7538
7539        CASE_MODRM_MEM_OP(3): /* lidt */
7540            if (s->cpl != 0) {
7541                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7542                break;
7543            }
7544            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
7545            gen_lea_modrm(env, s, modrm);
7546            gen_op_ld_v(s, MO_16, s->T1, s->A0);
7547            gen_add_A0_im(s, 2);
7548            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7549            if (dflag == MO_16) {
7550                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7551            }
7552            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7553            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7554            break;
7555
7556        CASE_MODRM_OP(4): /* smsw */
7557            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7558            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7559            if (CODE64(s)) {
7560                mod = (modrm >> 6) & 3;
7561                ot = (mod != 3 ? MO_16 : s->dflag);
7562            } else {
7563                ot = MO_16;
7564            }
7565            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7566            break;
7567        case 0xee: /* rdpkru */
7568            if (prefixes & PREFIX_LOCK) {
7569                goto illegal_op;
7570            }
7571            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7572            gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7573            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7574            break;
7575        case 0xef: /* wrpkru */
7576            if (prefixes & PREFIX_LOCK) {
7577                goto illegal_op;
7578            }
7579            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7580                                  cpu_regs[R_EDX]);
7581            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7582            gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7583            break;
7584        CASE_MODRM_OP(6): /* lmsw */
7585            if (s->cpl != 0) {
7586                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7587                break;
7588            }
7589            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7590            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7591            gen_helper_lmsw(cpu_env, s->T0);
7592            gen_jmp_im(s, s->pc - s->cs_base);
7593            gen_eob(s);
7594            break;
7595
7596        CASE_MODRM_MEM_OP(7): /* invlpg */
7597            if (s->cpl != 0) {
7598                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7599                break;
7600            }
7601            gen_update_cc_op(s);
7602            gen_jmp_im(s, pc_start - s->cs_base);
7603            gen_lea_modrm(env, s, modrm);
7604            gen_helper_invlpg(cpu_env, s->A0);
7605            gen_jmp_im(s, s->pc - s->cs_base);
7606            gen_eob(s);
7607            break;
7608
7609        case 0xf8: /* swapgs */
7610#ifdef TARGET_X86_64
7611            if (CODE64(s)) {
7612                if (s->cpl != 0) {
7613                    gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7614                } else {
7615                    tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7616                    tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7617                                  offsetof(CPUX86State, kernelgsbase));
7618                    tcg_gen_st_tl(s->T0, cpu_env,
7619                                  offsetof(CPUX86State, kernelgsbase));
7620                }
7621                break;
7622            }
7623#endif
7624            goto illegal_op;
7625
7626        case 0xf9: /* rdtscp */
7627            if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7628                goto illegal_op;
7629            }
7630            gen_update_cc_op(s);
7631            gen_jmp_im(s, pc_start - s->cs_base);
7632            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7633                gen_io_start();
7634            }
7635            gen_helper_rdtscp(cpu_env);
7636            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7637                gen_io_end();
7638                gen_jmp(s, s->pc - s->cs_base);
7639            }
7640            break;
7641
7642        default:
7643            goto unknown_op;
7644        }
7645        break;
7646
7647    case 0x108: /* invd */
7648    case 0x109: /* wbinvd */
7649        if (s->cpl != 0) {
7650            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7651        } else {
7652            gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7653            /* nothing to do */
7654        }
7655        break;
7656    case 0x63: /* arpl or movslS (x86_64) */
7657#ifdef TARGET_X86_64
7658        if (CODE64(s)) {
7659            int d_ot;
7660            /* d_ot is the size of destination */
7661            d_ot = dflag;
7662
7663            modrm = x86_ldub_code(env, s);
7664            reg = ((modrm >> 3) & 7) | rex_r;
7665            mod = (modrm >> 6) & 3;
7666            rm = (modrm & 7) | REX_B(s);
7667
7668            if (mod == 3) {
7669                gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7670                /* sign extend */
7671                if (d_ot == MO_64) {
7672                    tcg_gen_ext32s_tl(s->T0, s->T0);
7673                }
7674                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7675            } else {
7676                gen_lea_modrm(env, s, modrm);
7677                gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7678                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7679            }
7680        } else
7681#endif
7682        {
7683            TCGLabel *label1;
7684            TCGv t0, t1, t2, a0;
7685
7686            if (!s->pe || s->vm86)
7687                goto illegal_op;
7688            t0 = tcg_temp_local_new();
7689            t1 = tcg_temp_local_new();
7690            t2 = tcg_temp_local_new();
7691            ot = MO_16;
7692            modrm = x86_ldub_code(env, s);
7693            reg = (modrm >> 3) & 7;
7694            mod = (modrm >> 6) & 3;
7695            rm = modrm & 7;
7696            if (mod != 3) {
7697                gen_lea_modrm(env, s, modrm);
7698                gen_op_ld_v(s, ot, t0, s->A0);
7699                a0 = tcg_temp_local_new();
7700                tcg_gen_mov_tl(a0, s->A0);
7701            } else {
7702                gen_op_mov_v_reg(s, ot, t0, rm);
7703                a0 = NULL;
7704            }
7705            gen_op_mov_v_reg(s, ot, t1, reg);
7706            tcg_gen_andi_tl(s->tmp0, t0, 3);
7707            tcg_gen_andi_tl(t1, t1, 3);
7708            tcg_gen_movi_tl(t2, 0);
7709            label1 = gen_new_label();
7710            tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7711            tcg_gen_andi_tl(t0, t0, ~3);
7712            tcg_gen_or_tl(t0, t0, t1);
7713            tcg_gen_movi_tl(t2, CC_Z);
7714            gen_set_label(label1);
7715            if (mod != 3) {
7716                gen_op_st_v(s, ot, t0, a0);
7717                tcg_temp_free(a0);
7718           } else {
7719                gen_op_mov_reg_v(s, ot, rm, t0);
7720            }
7721            gen_compute_eflags(s);
7722            tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7723            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7724            tcg_temp_free(t0);
7725            tcg_temp_free(t1);
7726            tcg_temp_free(t2);
7727        }
7728        break;
7729    case 0x102: /* lar */
7730    case 0x103: /* lsl */
7731        {
7732            TCGLabel *label1;
7733            TCGv t0;
7734            if (!s->pe || s->vm86)
7735                goto illegal_op;
7736            ot = dflag != MO_16 ? MO_32 : MO_16;
7737            modrm = x86_ldub_code(env, s);
7738            reg = ((modrm >> 3) & 7) | rex_r;
7739            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7740            t0 = tcg_temp_local_new();
7741            gen_update_cc_op(s);
7742            if (b == 0x102) {
7743                gen_helper_lar(t0, cpu_env, s->T0);
7744            } else {
7745                gen_helper_lsl(t0, cpu_env, s->T0);
7746            }
7747            tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7748            label1 = gen_new_label();
7749            tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7750            gen_op_mov_reg_v(s, ot, reg, t0);
7751            gen_set_label(label1);
7752            set_cc_op(s, CC_OP_EFLAGS);
7753            tcg_temp_free(t0);
7754        }
7755        break;
7756    case 0x118:
7757        modrm = x86_ldub_code(env, s);
7758        mod = (modrm >> 6) & 3;
7759        op = (modrm >> 3) & 7;
7760        switch(op) {
7761        case 0: /* prefetchnta */
7762        case 1: /* prefetchnt0 */
7763        case 2: /* prefetchnt0 */
7764        case 3: /* prefetchnt0 */
7765            if (mod == 3)
7766                goto illegal_op;
7767            gen_nop_modrm(env, s, modrm);
7768            /* nothing more to do */
7769            break;
7770        default: /* nop (multi byte) */
7771            gen_nop_modrm(env, s, modrm);
7772            break;
7773        }
7774        break;
7775    case 0x11a:
7776        modrm = x86_ldub_code(env, s);
7777        if (s->flags & HF_MPX_EN_MASK) {
7778            mod = (modrm >> 6) & 3;
7779            reg = ((modrm >> 3) & 7) | rex_r;
7780            if (prefixes & PREFIX_REPZ) {
7781                /* bndcl */
7782                if (reg >= 4
7783                    || (prefixes & PREFIX_LOCK)
7784                    || s->aflag == MO_16) {
7785                    goto illegal_op;
7786                }
7787                gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7788            } else if (prefixes & PREFIX_REPNZ) {
7789                /* bndcu */
7790                if (reg >= 4
7791                    || (prefixes & PREFIX_LOCK)
7792                    || s->aflag == MO_16) {
7793                    goto illegal_op;
7794                }
7795                TCGv_i64 notu = tcg_temp_new_i64();
7796                tcg_gen_not_i64(notu, cpu_bndu[reg]);
7797                gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7798                tcg_temp_free_i64(notu);
7799            } else if (prefixes & PREFIX_DATA) {
7800                /* bndmov -- from reg/mem */
7801                if (reg >= 4 || s->aflag == MO_16) {
7802                    goto illegal_op;
7803                }
7804                if (mod == 3) {
7805                    int reg2 = (modrm & 7) | REX_B(s);
7806                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7807                        goto illegal_op;
7808                    }
7809                    if (s->flags & HF_MPX_IU_MASK) {
7810                        tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7811                        tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7812                    }
7813                } else {
7814                    gen_lea_modrm(env, s, modrm);
7815                    if (CODE64(s)) {
7816                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7817                                            s->mem_index, MO_LEQ);
7818                        tcg_gen_addi_tl(s->A0, s->A0, 8);
7819                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7820                                            s->mem_index, MO_LEQ);
7821                    } else {
7822                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7823                                            s->mem_index, MO_LEUL);
7824                        tcg_gen_addi_tl(s->A0, s->A0, 4);
7825                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7826                                            s->mem_index, MO_LEUL);
7827                    }
7828                    /* bnd registers are now in-use */
7829                    gen_set_hflag(s, HF_MPX_IU_MASK);
7830                }
7831            } else if (mod != 3) {
7832                /* bndldx */
7833                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7834                if (reg >= 4
7835                    || (prefixes & PREFIX_LOCK)
7836                    || s->aflag == MO_16
7837                    || a.base < -1) {
7838                    goto illegal_op;
7839                }
7840                if (a.base >= 0) {
7841                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7842                } else {
7843                    tcg_gen_movi_tl(s->A0, 0);
7844                }
7845                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7846                if (a.index >= 0) {
7847                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7848                } else {
7849                    tcg_gen_movi_tl(s->T0, 0);
7850                }
7851                if (CODE64(s)) {
7852                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7853                    tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7854                                   offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7855                } else {
7856                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7857                    tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7858                    tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7859                }
7860                gen_set_hflag(s, HF_MPX_IU_MASK);
7861            }
7862        }
7863        gen_nop_modrm(env, s, modrm);
7864        break;
7865    case 0x11b:
7866        modrm = x86_ldub_code(env, s);
7867        if (s->flags & HF_MPX_EN_MASK) {
7868            mod = (modrm >> 6) & 3;
7869            reg = ((modrm >> 3) & 7) | rex_r;
7870            if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7871                /* bndmk */
7872                if (reg >= 4
7873                    || (prefixes & PREFIX_LOCK)
7874                    || s->aflag == MO_16) {
7875                    goto illegal_op;
7876                }
7877                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7878                if (a.base >= 0) {
7879                    tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7880                    if (!CODE64(s)) {
7881                        tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7882                    }
7883                } else if (a.base == -1) {
7884                    /* no base register has lower bound of 0 */
7885                    tcg_gen_movi_i64(cpu_bndl[reg], 0);
7886                } else {
7887                    /* rip-relative generates #ud */
7888                    goto illegal_op;
7889                }
7890                tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
7891                if (!CODE64(s)) {
7892                    tcg_gen_ext32u_tl(s->A0, s->A0);
7893                }
7894                tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
7895                /* bnd registers are now in-use */
7896                gen_set_hflag(s, HF_MPX_IU_MASK);
7897                break;
7898            } else if (prefixes & PREFIX_REPNZ) {
7899                /* bndcn */
7900                if (reg >= 4
7901                    || (prefixes & PREFIX_LOCK)
7902                    || s->aflag == MO_16) {
7903                    goto illegal_op;
7904                }
7905                gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7906            } else if (prefixes & PREFIX_DATA) {
7907                /* bndmov -- to reg/mem */
7908                if (reg >= 4 || s->aflag == MO_16) {
7909                    goto illegal_op;
7910                }
7911                if (mod == 3) {
7912                    int reg2 = (modrm & 7) | REX_B(s);
7913                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7914                        goto illegal_op;
7915                    }
7916                    if (s->flags & HF_MPX_IU_MASK) {
7917                        tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7918                        tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
7919                    }
7920                } else {
7921                    gen_lea_modrm(env, s, modrm);
7922                    if (CODE64(s)) {
7923                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7924                                            s->mem_index, MO_LEQ);
7925                        tcg_gen_addi_tl(s->A0, s->A0, 8);
7926                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7927                                            s->mem_index, MO_LEQ);
7928                    } else {
7929                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7930                                            s->mem_index, MO_LEUL);
7931                        tcg_gen_addi_tl(s->A0, s->A0, 4);
7932                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7933                                            s->mem_index, MO_LEUL);
7934                    }
7935                }
7936            } else if (mod != 3) {
7937                /* bndstx */
7938                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7939                if (reg >= 4
7940                    || (prefixes & PREFIX_LOCK)
7941                    || s->aflag == MO_16
7942                    || a.base < -1) {
7943                    goto illegal_op;
7944                }
7945                if (a.base >= 0) {
7946                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7947                } else {
7948                    tcg_gen_movi_tl(s->A0, 0);
7949                }
7950                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7951                if (a.index >= 0) {
7952                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7953                } else {
7954                    tcg_gen_movi_tl(s->T0, 0);
7955                }
7956                if (CODE64(s)) {
7957                    gen_helper_bndstx64(cpu_env, s->A0, s->T0,
7958                                        cpu_bndl[reg], cpu_bndu[reg]);
7959                } else {
7960                    gen_helper_bndstx32(cpu_env, s->A0, s->T0,
7961                                        cpu_bndl[reg], cpu_bndu[reg]);
7962                }
7963            }
7964        }
7965        gen_nop_modrm(env, s, modrm);
7966        break;
7967    case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
7968        modrm = x86_ldub_code(env, s);
7969        gen_nop_modrm(env, s, modrm);
7970        break;
7971    case 0x120: /* mov reg, crN */
7972    case 0x122: /* mov crN, reg */
7973        if (s->cpl != 0) {
7974            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7975        } else {
7976            modrm = x86_ldub_code(env, s);
7977            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7978             * AMD documentation (24594.pdf) and testing of
7979             * intel 386 and 486 processors all show that the mod bits
7980             * are assumed to be 1's, regardless of actual values.
7981             */
7982            rm = (modrm & 7) | REX_B(s);
7983            reg = ((modrm >> 3) & 7) | rex_r;
7984            if (CODE64(s))
7985                ot = MO_64;
7986            else
7987                ot = MO_32;
7988            if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
7989                (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
7990                reg = 8;
7991            }
7992            switch(reg) {
7993            case 0:
7994            case 2:
7995            case 3:
7996            case 4:
7997            case 8:
7998                gen_update_cc_op(s);
7999                gen_jmp_im(s, pc_start - s->cs_base);
8000                if (b & 2) {
8001                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8002                        gen_io_start();
8003                    }
8004                    gen_op_mov_v_reg(s, ot, s->T0, rm);
8005                    gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
8006                                         s->T0);
8007                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8008                        gen_io_end();
8009                    }
8010                    gen_jmp_im(s, s->pc - s->cs_base);
8011                    gen_eob(s);
8012                } else {
8013                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8014                        gen_io_start();
8015                    }
8016                    gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
8017                    gen_op_mov_reg_v(s, ot, rm, s->T0);
8018                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8019                        gen_io_end();
8020                    }
8021                }
8022                break;
8023            default:
8024                goto unknown_op;
8025            }
8026        }
8027        break;
8028    case 0x121: /* mov reg, drN */
8029    case 0x123: /* mov drN, reg */
8030        if (s->cpl != 0) {
8031            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8032        } else {
8033            modrm = x86_ldub_code(env, s);
8034            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8035             * AMD documentation (24594.pdf) and testing of
8036             * intel 386 and 486 processors all show that the mod bits
8037             * are assumed to be 1's, regardless of actual values.
8038             */
8039            rm = (modrm & 7) | REX_B(s);
8040            reg = ((modrm >> 3) & 7) | rex_r;
8041            if (CODE64(s))
8042                ot = MO_64;
8043            else
8044                ot = MO_32;
8045            if (reg >= 8) {
8046                goto illegal_op;
8047            }
8048            if (b & 2) {
8049                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
8050                gen_op_mov_v_reg(s, ot, s->T0, rm);
8051                tcg_gen_movi_i32(s->tmp2_i32, reg);
8052                gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8053                gen_jmp_im(s, s->pc - s->cs_base);
8054                gen_eob(s);
8055            } else {
8056                gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
8057                tcg_gen_movi_i32(s->tmp2_i32, reg);
8058                gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8059                gen_op_mov_reg_v(s, ot, rm, s->T0);
8060            }
8061        }
8062        break;
8063    case 0x106: /* clts */
8064        if (s->cpl != 0) {
8065            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8066        } else {
8067            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
8068            gen_helper_clts(cpu_env);
8069            /* abort block because static cpu state changed */
8070            gen_jmp_im(s, s->pc - s->cs_base);
8071            gen_eob(s);
8072        }
8073        break;
8074    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8075    case 0x1c3: /* MOVNTI reg, mem */
8076        if (!(s->cpuid_features & CPUID_SSE2))
8077            goto illegal_op;
8078        ot = mo_64_32(dflag);
8079        modrm = x86_ldub_code(env, s);
8080        mod = (modrm >> 6) & 3;
8081        if (mod == 3)
8082            goto illegal_op;
8083        reg = ((modrm >> 3) & 7) | rex_r;
8084        /* generate a generic store */
8085        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8086        break;
8087    case 0x1ae:
8088        modrm = x86_ldub_code(env, s);
8089        switch (modrm) {
8090        CASE_MODRM_MEM_OP(0): /* fxsave */
8091            if (!(s->cpuid_features & CPUID_FXSR)
8092                || (prefixes & PREFIX_LOCK)) {
8093                goto illegal_op;
8094            }
8095            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8096                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8097                break;
8098            }
8099            gen_lea_modrm(env, s, modrm);
8100            gen_helper_fxsave(cpu_env, s->A0);
8101            break;
8102
8103        CASE_MODRM_MEM_OP(1): /* fxrstor */
8104            if (!(s->cpuid_features & CPUID_FXSR)
8105                || (prefixes & PREFIX_LOCK)) {
8106                goto illegal_op;
8107            }
8108            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8109                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8110                break;
8111            }
8112            gen_lea_modrm(env, s, modrm);
8113            gen_helper_fxrstor(cpu_env, s->A0);
8114            break;
8115
8116        CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8117            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8118                goto illegal_op;
8119            }
8120            if (s->flags & HF_TS_MASK) {
8121                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8122                break;
8123            }
8124            gen_lea_modrm(env, s, modrm);
8125            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8126            gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8127            break;
8128
8129        CASE_MODRM_MEM_OP(3): /* stmxcsr */
8130            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8131                goto illegal_op;
8132            }
8133            if (s->flags & HF_TS_MASK) {
8134                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8135                break;
8136            }
8137            gen_lea_modrm(env, s, modrm);
8138            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8139            gen_op_st_v(s, MO_32, s->T0, s->A0);
8140            break;
8141
8142        CASE_MODRM_MEM_OP(4): /* xsave */
8143            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8144                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8145                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8146                goto illegal_op;
8147            }
8148            gen_lea_modrm(env, s, modrm);
8149            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8150                                  cpu_regs[R_EDX]);
8151            gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8152            break;
8153
8154        CASE_MODRM_MEM_OP(5): /* xrstor */
8155            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8156                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8157                                | PREFIX_REPZ | PREFIX_REPNZ))) {
8158                goto illegal_op;
8159            }
8160            gen_lea_modrm(env, s, modrm);
8161            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8162                                  cpu_regs[R_EDX]);
8163            gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8164            /* XRSTOR is how MPX is enabled, which changes how
8165               we translate.  Thus we need to end the TB.  */
8166            gen_update_cc_op(s);
8167            gen_jmp_im(s, s->pc - s->cs_base);
8168            gen_eob(s);
8169            break;
8170
8171        CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8172            if (prefixes & PREFIX_LOCK) {
8173                goto illegal_op;
8174            }
8175            if (prefixes & PREFIX_DATA) {
8176                /* clwb */
8177                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8178                    goto illegal_op;
8179                }
8180                gen_nop_modrm(env, s, modrm);
8181            } else {
8182                /* xsaveopt */
8183                if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8184                    || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8185                    || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8186                    goto illegal_op;
8187                }
8188                gen_lea_modrm(env, s, modrm);
8189                tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8190                                      cpu_regs[R_EDX]);
8191                gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8192            }
8193            break;
8194
8195        CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8196            if (prefixes & PREFIX_LOCK) {
8197                goto illegal_op;
8198            }
8199            if (prefixes & PREFIX_DATA) {
8200                /* clflushopt */
8201                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8202                    goto illegal_op;
8203                }
8204            } else {
8205                /* clflush */
8206                if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8207                    || !(s->cpuid_features & CPUID_CLFLUSH)) {
8208                    goto illegal_op;
8209                }
8210            }
8211            gen_nop_modrm(env, s, modrm);
8212            break;
8213
8214        case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8215        case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8216        case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8217        case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8218            if (CODE64(s)
8219                && (prefixes & PREFIX_REPZ)
8220                && !(prefixes & PREFIX_LOCK)
8221                && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8222                TCGv base, treg, src, dst;
8223
8224                /* Preserve hflags bits by testing CR4 at runtime.  */
8225                tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8226                gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8227
8228                base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8229                treg = cpu_regs[(modrm & 7) | REX_B(s)];
8230
8231                if (modrm & 0x10) {
8232                    /* wr*base */
8233                    dst = base, src = treg;
8234                } else {
8235                    /* rd*base */
8236                    dst = treg, src = base;
8237                }
8238
8239                if (s->dflag == MO_32) {
8240                    tcg_gen_ext32u_tl(dst, src);
8241                } else {
8242                    tcg_gen_mov_tl(dst, src);
8243                }
8244                break;
8245            }
8246            goto unknown_op;
8247
8248        case 0xf8: /* sfence / pcommit */
8249            if (prefixes & PREFIX_DATA) {
8250                /* pcommit */
8251                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8252                    || (prefixes & PREFIX_LOCK)) {
8253                    goto illegal_op;
8254                }
8255                break;
8256            }
8257            /* fallthru */
8258        case 0xf9 ... 0xff: /* sfence */
8259            if (!(s->cpuid_features & CPUID_SSE)
8260                || (prefixes & PREFIX_LOCK)) {
8261                goto illegal_op;
8262            }
8263            tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8264            break;
8265        case 0xe8 ... 0xef: /* lfence */
8266            if (!(s->cpuid_features & CPUID_SSE)
8267                || (prefixes & PREFIX_LOCK)) {
8268                goto illegal_op;
8269            }
8270            tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8271            break;
8272        case 0xf0 ... 0xf7: /* mfence */
8273            if (!(s->cpuid_features & CPUID_SSE2)
8274                || (prefixes & PREFIX_LOCK)) {
8275                goto illegal_op;
8276            }
8277            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8278            break;
8279
8280        default:
8281            goto unknown_op;
8282        }
8283        break;
8284
8285    case 0x10d: /* 3DNow! prefetch(w) */
8286        modrm = x86_ldub_code(env, s);
8287        mod = (modrm >> 6) & 3;
8288        if (mod == 3)
8289            goto illegal_op;
8290        gen_nop_modrm(env, s, modrm);
8291        break;
8292    case 0x1aa: /* rsm */
8293        gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
8294        if (!(s->flags & HF_SMM_MASK))
8295            goto illegal_op;
8296        gen_update_cc_op(s);
8297        gen_jmp_im(s, s->pc - s->cs_base);
8298        gen_helper_rsm(cpu_env);
8299        gen_eob(s);
8300        break;
8301    case 0x1b8: /* SSE4.2 popcnt */
8302        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8303             PREFIX_REPZ)
8304            goto illegal_op;
8305        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8306            goto illegal_op;
8307
8308        modrm = x86_ldub_code(env, s);
8309        reg = ((modrm >> 3) & 7) | rex_r;
8310
8311        if (s->prefix & PREFIX_DATA) {
8312            ot = MO_16;
8313        } else {
8314            ot = mo_64_32(dflag);
8315        }
8316
8317        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8318        gen_extu(ot, s->T0);
8319        tcg_gen_mov_tl(cpu_cc_src, s->T0);
8320        tcg_gen_ctpop_tl(s->T0, s->T0);
8321        gen_op_mov_reg_v(s, ot, reg, s->T0);
8322
8323        set_cc_op(s, CC_OP_POPCNT);
8324        break;
8325    case 0x10e ... 0x10f:
8326        /* 3DNow! instructions, ignore prefixes */
8327        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8328        /* fall through */
8329    case 0x110 ... 0x117:
8330    case 0x128 ... 0x12f:
8331    case 0x138 ... 0x13a:
8332    case 0x150 ... 0x179:
8333    case 0x17c ... 0x17f:
8334    case 0x1c2:
8335    case 0x1c4 ... 0x1c6:
8336    case 0x1d0 ... 0x1fe:
8337        gen_sse(env, s, b, pc_start, rex_r);
8338        break;
8339    default:
8340        goto unknown_op;
8341    }
8342    return s->pc;
8343 illegal_op:
8344    gen_illegal_opcode(s);
8345    return s->pc;
8346 unknown_op:
8347    gen_unknown_opcode(env, s);
8348    return s->pc;
8349}
8350
8351void tcg_x86_init(void)
8352{
8353    static const char reg_names[CPU_NB_REGS][4] = {
8354#ifdef TARGET_X86_64
8355        [R_EAX] = "rax",
8356        [R_EBX] = "rbx",
8357        [R_ECX] = "rcx",
8358        [R_EDX] = "rdx",
8359        [R_ESI] = "rsi",
8360        [R_EDI] = "rdi",
8361        [R_EBP] = "rbp",
8362        [R_ESP] = "rsp",
8363        [8]  = "r8",
8364        [9]  = "r9",
8365        [10] = "r10",
8366        [11] = "r11",
8367        [12] = "r12",
8368        [13] = "r13",
8369        [14] = "r14",
8370        [15] = "r15",
8371#else
8372        [R_EAX] = "eax",
8373        [R_EBX] = "ebx",
8374        [R_ECX] = "ecx",
8375        [R_EDX] = "edx",
8376        [R_ESI] = "esi",
8377        [R_EDI] = "edi",
8378        [R_EBP] = "ebp",
8379        [R_ESP] = "esp",
8380#endif
8381    };
8382    static const char seg_base_names[6][8] = {
8383        [R_CS] = "cs_base",
8384        [R_DS] = "ds_base",
8385        [R_ES] = "es_base",
8386        [R_FS] = "fs_base",
8387        [R_GS] = "gs_base",
8388        [R_SS] = "ss_base",
8389    };
8390    static const char bnd_regl_names[4][8] = {
8391        "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8392    };
8393    static const char bnd_regu_names[4][8] = {
8394        "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8395    };
8396    int i;
8397
8398    cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8399                                       offsetof(CPUX86State, cc_op), "cc_op");
8400    cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8401                                    "cc_dst");
8402    cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8403                                    "cc_src");
8404    cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8405                                     "cc_src2");
8406
8407    for (i = 0; i < CPU_NB_REGS; ++i) {
8408        cpu_regs[i] = tcg_global_mem_new(cpu_env,
8409                                         offsetof(CPUX86State, regs[i]),
8410                                         reg_names[i]);
8411    }
8412
8413    for (i = 0; i < 6; ++i) {
8414        cpu_seg_base[i]
8415            = tcg_global_mem_new(cpu_env,
8416                                 offsetof(CPUX86State, segs[i].base),
8417                                 seg_base_names[i]);
8418    }
8419
8420    for (i = 0; i < 4; ++i) {
8421        cpu_bndl[i]
8422            = tcg_global_mem_new_i64(cpu_env,
8423                                     offsetof(CPUX86State, bnd_regs[i].lb),
8424                                     bnd_regl_names[i]);
8425        cpu_bndu[i]
8426            = tcg_global_mem_new_i64(cpu_env,
8427                                     offsetof(CPUX86State, bnd_regs[i].ub),
8428                                     bnd_regu_names[i]);
8429    }
8430}
8431
8432static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8433{
8434    DisasContext *dc = container_of(dcbase, DisasContext, base);
8435    CPUX86State *env = cpu->env_ptr;
8436    uint32_t flags = dc->base.tb->flags;
8437    target_ulong cs_base = dc->base.tb->cs_base;
8438
8439    dc->pe = (flags >> HF_PE_SHIFT) & 1;
8440    dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
8441    dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
8442    dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
8443    dc->f_st = 0;
8444    dc->vm86 = (flags >> VM_SHIFT) & 1;
8445    dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
8446    dc->iopl = (flags >> IOPL_SHIFT) & 3;
8447    dc->tf = (flags >> TF_SHIFT) & 1;
8448    dc->cc_op = CC_OP_DYNAMIC;
8449    dc->cc_op_dirty = false;
8450    dc->cs_base = cs_base;
8451    dc->popl_esp_hack = 0;
8452    /* select memory access functions */
8453    dc->mem_index = 0;
8454#ifdef CONFIG_SOFTMMU
8455    dc->mem_index = cpu_mmu_index(env, false);
8456#endif
8457    dc->cpuid_features = env->features[FEAT_1_EDX];
8458    dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8459    dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8460    dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8461    dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8462    dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8463#ifdef TARGET_X86_64
8464    dc->lma = (flags >> HF_LMA_SHIFT) & 1;
8465    dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
8466#endif
8467    dc->flags = flags;
8468    dc->jmp_opt = !(dc->tf || dc->base.singlestep_enabled ||
8469                    (flags & HF_INHIBIT_IRQ_MASK));
8470    /* Do not optimize repz jumps at all in icount mode, because
8471       rep movsS instructions are execured with different paths
8472       in !repz_opt and repz_opt modes. The first one was used
8473       always except single step mode. And this setting
8474       disables jumps optimization and control paths become
8475       equivalent in run and single step modes.
8476       Now there will be no jump optimization for repz in
8477       record/replay modes and there will always be an
8478       additional step for ecx=0 when icount is enabled.
8479     */
8480    dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT);
8481#if 0
8482    /* check addseg logic */
8483    if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
8484        printf("ERROR addseg\n");
8485#endif
8486
8487    dc->T0 = tcg_temp_new();
8488    dc->T1 = tcg_temp_new();
8489    dc->A0 = tcg_temp_new();
8490
8491    dc->tmp0 = tcg_temp_new();
8492    dc->tmp1_i64 = tcg_temp_new_i64();
8493    dc->tmp2_i32 = tcg_temp_new_i32();
8494    dc->tmp3_i32 = tcg_temp_new_i32();
8495    dc->tmp4 = tcg_temp_new();
8496    dc->ptr0 = tcg_temp_new_ptr();
8497    dc->ptr1 = tcg_temp_new_ptr();
8498    dc->cc_srcT = tcg_temp_local_new();
8499}
8500
8501static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8502{
8503}
8504
8505static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8506{
8507    DisasContext *dc = container_of(dcbase, DisasContext, base);
8508
8509    tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8510}
8511
8512static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8513                                     const CPUBreakpoint *bp)
8514{
8515    DisasContext *dc = container_of(dcbase, DisasContext, base);
8516    /* If RF is set, suppress an internally generated breakpoint.  */
8517    int flags = dc->base.tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY;
8518    if (bp->flags & flags) {
8519        gen_debug(dc, dc->base.pc_next - dc->cs_base);
8520        dc->base.is_jmp = DISAS_NORETURN;
8521        /* The address covered by the breakpoint must be included in
8522           [tb->pc, tb->pc + tb->size) in order to for it to be
8523           properly cleared -- thus we increment the PC here so that
8524           the generic logic setting tb->size later does the right thing.  */
8525        dc->base.pc_next += 1;
8526        return true;
8527    } else {
8528        return false;
8529    }
8530}
8531
8532static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8533{
8534    DisasContext *dc = container_of(dcbase, DisasContext, base);
8535    target_ulong pc_next = disas_insn(dc, cpu);
8536
8537    if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) {
8538        /* if single step mode, we generate only one instruction and
8539           generate an exception */
8540        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8541           the flag and abort the translation to give the irqs a
8542           chance to happen */
8543        dc->base.is_jmp = DISAS_TOO_MANY;
8544    } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8545               && ((pc_next & TARGET_PAGE_MASK)
8546                   != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8547                       & TARGET_PAGE_MASK)
8548                   || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8549        /* Do not cross the boundary of the pages in icount mode,
8550           it can cause an exception. Do it only when boundary is
8551           crossed by the first instruction in the block.
8552           If current instruction already crossed the bound - it's ok,
8553           because an exception hasn't stopped this code.
8554         */
8555        dc->base.is_jmp = DISAS_TOO_MANY;
8556    } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8557        dc->base.is_jmp = DISAS_TOO_MANY;
8558    }
8559
8560    dc->base.pc_next = pc_next;
8561}
8562
8563static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8564{
8565    DisasContext *dc = container_of(dcbase, DisasContext, base);
8566
8567    if (dc->base.is_jmp == DISAS_TOO_MANY) {
8568        gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8569        gen_eob(dc);
8570    }
8571}
8572
8573static void i386_tr_disas_log(const DisasContextBase *dcbase,
8574                              CPUState *cpu)
8575{
8576    DisasContext *dc = container_of(dcbase, DisasContext, base);
8577
8578    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8579    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8580}
8581
8582static const TranslatorOps i386_tr_ops = {
8583    .init_disas_context = i386_tr_init_disas_context,
8584    .tb_start           = i386_tr_tb_start,
8585    .insn_start         = i386_tr_insn_start,
8586    .breakpoint_check   = i386_tr_breakpoint_check,
8587    .translate_insn     = i386_tr_translate_insn,
8588    .tb_stop            = i386_tr_tb_stop,
8589    .disas_log          = i386_tr_disas_log,
8590};
8591
8592/* generate intermediate code for basic block 'tb'.  */
8593void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb)
8594{
8595    DisasContext dc;
8596
8597    translator_loop(&i386_tr_ops, &dc.base, cpu, tb);
8598}
8599
8600void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8601                          target_ulong *data)
8602{
8603    int cc_op = data[1];
8604    env->eip = data[0] - tb->cs_base;
8605    if (cc_op != CC_OP_DYNAMIC) {
8606        env->cc_op = cc_op;
8607    }
8608}
8609