qemu/target-i386/translate.c
<<
>>
Prefs
   1/*
   2 *  i386 translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "qemu/host-utils.h"
  22#include "cpu.h"
  23#include "disas/disas.h"
  24#include "exec/exec-all.h"
  25#include "tcg-op.h"
  26#include "exec/cpu_ldst.h"
  27
  28#include "exec/helper-proto.h"
  29#include "exec/helper-gen.h"
  30
  31#include "trace-tcg.h"
  32#include "exec/log.h"
  33
  34
  35#define PREFIX_REPZ   0x01
  36#define PREFIX_REPNZ  0x02
  37#define PREFIX_LOCK   0x04
  38#define PREFIX_DATA   0x08
  39#define PREFIX_ADR    0x10
  40#define PREFIX_VEX    0x20
  41
  42#ifdef TARGET_X86_64
  43#define CODE64(s) ((s)->code64)
  44#define REX_X(s) ((s)->rex_x)
  45#define REX_B(s) ((s)->rex_b)
  46#else
  47#define CODE64(s) 0
  48#define REX_X(s) 0
  49#define REX_B(s) 0
  50#endif
  51
  52#ifdef TARGET_X86_64
  53# define ctztl  ctz64
  54# define clztl  clz64
  55#else
  56# define ctztl  ctz32
  57# define clztl  clz32
  58#endif
  59
  60/* For a switch indexed by MODRM, match all memory operands for a given OP.  */
  61#define CASE_MODRM_MEM_OP(OP) \
  62    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  63    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  64    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
  65
  66#define CASE_MODRM_OP(OP) \
  67    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  68    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  69    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
  70    case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
  71
  72//#define MACRO_TEST   1
  73
  74/* global register indexes */
  75static TCGv_env cpu_env;
  76static TCGv cpu_A0;
  77static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
  78static TCGv_i32 cpu_cc_op;
  79static TCGv cpu_regs[CPU_NB_REGS];
  80static TCGv cpu_seg_base[6];
  81static TCGv_i64 cpu_bndl[4];
  82static TCGv_i64 cpu_bndu[4];
  83/* local temps */
  84static TCGv cpu_T0, cpu_T1;
  85/* local register indexes (only used inside old micro ops) */
  86static TCGv cpu_tmp0, cpu_tmp4;
  87static TCGv_ptr cpu_ptr0, cpu_ptr1;
  88static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
  89static TCGv_i64 cpu_tmp1_i64;
  90
  91#include "exec/gen-icount.h"
  92
  93#ifdef TARGET_X86_64
  94static int x86_64_hregs;
  95#endif
  96
  97typedef struct DisasContext {
  98    /* current insn context */
  99    int override; /* -1 if no override */
 100    int prefix;
 101    TCGMemOp aflag;
 102    TCGMemOp dflag;
 103    target_ulong pc_start;
 104    target_ulong pc; /* pc = eip + cs_base */
 105    int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
 106                   static state change (stop translation) */
 107    /* current block context */
 108    target_ulong cs_base; /* base of CS segment */
 109    int pe;     /* protected mode */
 110    int code32; /* 32 bit code segment */
 111#ifdef TARGET_X86_64
 112    int lma;    /* long mode active */
 113    int code64; /* 64 bit code segment */
 114    int rex_x, rex_b;
 115#endif
 116    int vex_l;  /* vex vector length */
 117    int vex_v;  /* vex vvvv register, without 1's compliment.  */
 118    int ss32;   /* 32 bit stack segment */
 119    CCOp cc_op;  /* current CC operation */
 120    bool cc_op_dirty;
 121    int addseg; /* non zero if either DS/ES/SS have a non zero base */
 122    int f_st;   /* currently unused */
 123    int vm86;   /* vm86 mode */
 124    int cpl;
 125    int iopl;
 126    int tf;     /* TF cpu flag */
 127    int singlestep_enabled; /* "hardware" single step enabled */
 128    int jmp_opt; /* use direct block chaining for direct jumps */
 129    int repz_opt; /* optimize jumps within repz instructions */
 130    int mem_index; /* select memory access functions */
 131    uint64_t flags; /* all execution flags */
 132    struct TranslationBlock *tb;
 133    int popl_esp_hack; /* for correct popl with esp base handling */
 134    int rip_offset; /* only used in x86_64, but left for simplicity */
 135    int cpuid_features;
 136    int cpuid_ext_features;
 137    int cpuid_ext2_features;
 138    int cpuid_ext3_features;
 139    int cpuid_7_0_ebx_features;
 140    int cpuid_xsave_features;
 141} DisasContext;
 142
 143static void gen_eob(DisasContext *s);
 144static void gen_jmp(DisasContext *s, target_ulong eip);
 145static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 146static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
 147
 148/* i386 arith/logic operations */
 149enum {
 150    OP_ADDL,
 151    OP_ORL,
 152    OP_ADCL,
 153    OP_SBBL,
 154    OP_ANDL,
 155    OP_SUBL,
 156    OP_XORL,
 157    OP_CMPL,
 158};
 159
 160/* i386 shift ops */
 161enum {
 162    OP_ROL,
 163    OP_ROR,
 164    OP_RCL,
 165    OP_RCR,
 166    OP_SHL,
 167    OP_SHR,
 168    OP_SHL1, /* undocumented */
 169    OP_SAR = 7,
 170};
 171
 172enum {
 173    JCC_O,
 174    JCC_B,
 175    JCC_Z,
 176    JCC_BE,
 177    JCC_S,
 178    JCC_P,
 179    JCC_L,
 180    JCC_LE,
 181};
 182
 183enum {
 184    /* I386 int registers */
 185    OR_EAX,   /* MUST be even numbered */
 186    OR_ECX,
 187    OR_EDX,
 188    OR_EBX,
 189    OR_ESP,
 190    OR_EBP,
 191    OR_ESI,
 192    OR_EDI,
 193
 194    OR_TMP0 = 16,    /* temporary operand register */
 195    OR_TMP1,
 196    OR_A0, /* temporary register used when doing address evaluation */
 197};
 198
 199enum {
 200    USES_CC_DST  = 1,
 201    USES_CC_SRC  = 2,
 202    USES_CC_SRC2 = 4,
 203    USES_CC_SRCT = 8,
 204};
 205
 206/* Bit set if the global variable is live after setting CC_OP to X.  */
 207static const uint8_t cc_op_live[CC_OP_NB] = {
 208    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 209    [CC_OP_EFLAGS] = USES_CC_SRC,
 210    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
 211    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
 212    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 213    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
 214    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 215    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
 216    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
 217    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
 218    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
 219    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
 220    [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
 221    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
 222    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
 223    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 224    [CC_OP_CLR] = 0,
 225};
 226
 227static void set_cc_op(DisasContext *s, CCOp op)
 228{
 229    int dead;
 230
 231    if (s->cc_op == op) {
 232        return;
 233    }
 234
 235    /* Discard CC computation that will no longer be used.  */
 236    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
 237    if (dead & USES_CC_DST) {
 238        tcg_gen_discard_tl(cpu_cc_dst);
 239    }
 240    if (dead & USES_CC_SRC) {
 241        tcg_gen_discard_tl(cpu_cc_src);
 242    }
 243    if (dead & USES_CC_SRC2) {
 244        tcg_gen_discard_tl(cpu_cc_src2);
 245    }
 246    if (dead & USES_CC_SRCT) {
 247        tcg_gen_discard_tl(cpu_cc_srcT);
 248    }
 249
 250    if (op == CC_OP_DYNAMIC) {
 251        /* The DYNAMIC setting is translator only, and should never be
 252           stored.  Thus we always consider it clean.  */
 253        s->cc_op_dirty = false;
 254    } else {
 255        /* Discard any computed CC_OP value (see shifts).  */
 256        if (s->cc_op == CC_OP_DYNAMIC) {
 257            tcg_gen_discard_i32(cpu_cc_op);
 258        }
 259        s->cc_op_dirty = true;
 260    }
 261    s->cc_op = op;
 262}
 263
 264static void gen_update_cc_op(DisasContext *s)
 265{
 266    if (s->cc_op_dirty) {
 267        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
 268        s->cc_op_dirty = false;
 269    }
 270}
 271
 272#ifdef TARGET_X86_64
 273
 274#define NB_OP_SIZES 4
 275
 276#else /* !TARGET_X86_64 */
 277
 278#define NB_OP_SIZES 3
 279
 280#endif /* !TARGET_X86_64 */
 281
 282#if defined(HOST_WORDS_BIGENDIAN)
 283#define REG_B_OFFSET (sizeof(target_ulong) - 1)
 284#define REG_H_OFFSET (sizeof(target_ulong) - 2)
 285#define REG_W_OFFSET (sizeof(target_ulong) - 2)
 286#define REG_L_OFFSET (sizeof(target_ulong) - 4)
 287#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
 288#else
 289#define REG_B_OFFSET 0
 290#define REG_H_OFFSET 1
 291#define REG_W_OFFSET 0
 292#define REG_L_OFFSET 0
 293#define REG_LH_OFFSET 4
 294#endif
 295
 296/* In instruction encodings for byte register accesses the
 297 * register number usually indicates "low 8 bits of register N";
 298 * however there are some special cases where N 4..7 indicates
 299 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
 300 * true for this special case, false otherwise.
 301 */
 302static inline bool byte_reg_is_xH(int reg)
 303{
 304    if (reg < 4) {
 305        return false;
 306    }
 307#ifdef TARGET_X86_64
 308    if (reg >= 8 || x86_64_hregs) {
 309        return false;
 310    }
 311#endif
 312    return true;
 313}
 314
 315/* Select the size of a push/pop operation.  */
 316static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
 317{
 318    if (CODE64(s)) {
 319        return ot == MO_16 ? MO_16 : MO_64;
 320    } else {
 321        return ot;
 322    }
 323}
 324
 325/* Select the size of the stack pointer.  */
 326static inline TCGMemOp mo_stacksize(DisasContext *s)
 327{
 328    return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
 329}
 330
 331/* Select only size 64 else 32.  Used for SSE operand sizes.  */
 332static inline TCGMemOp mo_64_32(TCGMemOp ot)
 333{
 334#ifdef TARGET_X86_64
 335    return ot == MO_64 ? MO_64 : MO_32;
 336#else
 337    return MO_32;
 338#endif
 339}
 340
 341/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
 342   byte vs word opcodes.  */
 343static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
 344{
 345    return b & 1 ? ot : MO_8;
 346}
 347
 348/* Select size 8 if lsb of B is clear, else OT capped at 32.
 349   Used for decoding operand size of port opcodes.  */
 350static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
 351{
 352    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 353}
 354
 355static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
 356{
 357    switch(ot) {
 358    case MO_8:
 359        if (!byte_reg_is_xH(reg)) {
 360            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
 361        } else {
 362            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
 363        }
 364        break;
 365    case MO_16:
 366        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
 367        break;
 368    case MO_32:
 369        /* For x86_64, this sets the higher half of register to zero.
 370           For i386, this is equivalent to a mov. */
 371        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
 372        break;
 373#ifdef TARGET_X86_64
 374    case MO_64:
 375        tcg_gen_mov_tl(cpu_regs[reg], t0);
 376        break;
 377#endif
 378    default:
 379        tcg_abort();
 380    }
 381}
 382
 383static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
 384{
 385    if (ot == MO_8 && byte_reg_is_xH(reg)) {
 386        tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
 387        tcg_gen_ext8u_tl(t0, t0);
 388    } else {
 389        tcg_gen_mov_tl(t0, cpu_regs[reg]);
 390    }
 391}
 392
 393static void gen_add_A0_im(DisasContext *s, int val)
 394{
 395    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
 396    if (!CODE64(s)) {
 397        tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
 398    }
 399}
 400
 401static inline void gen_op_jmp_v(TCGv dest)
 402{
 403    tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 404}
 405
 406static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
 407{
 408    tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
 409    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 410}
 411
 412static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
 413{
 414    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0);
 415    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 416}
 417
 418static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 419{
 420    tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
 421}
 422
 423static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 424{
 425    tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 426}
 427
 428static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 429{
 430    if (d == OR_TMP0) {
 431        gen_op_st_v(s, idx, cpu_T0, cpu_A0);
 432    } else {
 433        gen_op_mov_reg_v(idx, d, cpu_T0);
 434    }
 435}
 436
 437static inline void gen_jmp_im(target_ulong pc)
 438{
 439    tcg_gen_movi_tl(cpu_tmp0, pc);
 440    gen_op_jmp_v(cpu_tmp0);
 441}
 442
 443/* Compute SEG:REG into A0.  SEG is selected from the override segment
 444   (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
 445   indicate no override.  */
 446static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
 447                          int def_seg, int ovr_seg)
 448{
 449    switch (aflag) {
 450#ifdef TARGET_X86_64
 451    case MO_64:
 452        if (ovr_seg < 0) {
 453            tcg_gen_mov_tl(cpu_A0, a0);
 454            return;
 455        }
 456        break;
 457#endif
 458    case MO_32:
 459        /* 32 bit address */
 460        if (ovr_seg < 0) {
 461            if (s->addseg) {
 462                ovr_seg = def_seg;
 463            } else {
 464                tcg_gen_ext32u_tl(cpu_A0, a0);
 465                return;
 466            }
 467        }
 468        break;
 469    case MO_16:
 470        /* 16 bit address */
 471        tcg_gen_ext16u_tl(cpu_A0, a0);
 472        a0 = cpu_A0;
 473        if (ovr_seg < 0) {
 474            if (s->addseg) {
 475                ovr_seg = def_seg;
 476            } else {
 477                return;
 478            }
 479        }
 480        break;
 481    default:
 482        tcg_abort();
 483    }
 484
 485    if (ovr_seg >= 0) {
 486        TCGv seg = cpu_seg_base[ovr_seg];
 487
 488        if (aflag == MO_64) {
 489            tcg_gen_add_tl(cpu_A0, a0, seg);
 490        } else if (CODE64(s)) {
 491            tcg_gen_ext32u_tl(cpu_A0, a0);
 492            tcg_gen_add_tl(cpu_A0, cpu_A0, seg);
 493        } else {
 494            tcg_gen_add_tl(cpu_A0, a0, seg);
 495            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
 496        }
 497    }
 498}
 499
 500static inline void gen_string_movl_A0_ESI(DisasContext *s)
 501{
 502    gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
 503}
 504
 505static inline void gen_string_movl_A0_EDI(DisasContext *s)
 506{
 507    gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
 508}
 509
 510static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
 511{
 512    tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df));
 513    tcg_gen_shli_tl(cpu_T0, cpu_T0, ot);
 514};
 515
 516static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
 517{
 518    switch (size) {
 519    case MO_8:
 520        if (sign) {
 521            tcg_gen_ext8s_tl(dst, src);
 522        } else {
 523            tcg_gen_ext8u_tl(dst, src);
 524        }
 525        return dst;
 526    case MO_16:
 527        if (sign) {
 528            tcg_gen_ext16s_tl(dst, src);
 529        } else {
 530            tcg_gen_ext16u_tl(dst, src);
 531        }
 532        return dst;
 533#ifdef TARGET_X86_64
 534    case MO_32:
 535        if (sign) {
 536            tcg_gen_ext32s_tl(dst, src);
 537        } else {
 538            tcg_gen_ext32u_tl(dst, src);
 539        }
 540        return dst;
 541#endif
 542    default:
 543        return src;
 544    }
 545}
 546
 547static void gen_extu(TCGMemOp ot, TCGv reg)
 548{
 549    gen_ext_tl(reg, reg, ot, false);
 550}
 551
 552static void gen_exts(TCGMemOp ot, TCGv reg)
 553{
 554    gen_ext_tl(reg, reg, ot, true);
 555}
 556
 557static inline void gen_op_jnz_ecx(TCGMemOp size, TCGLabel *label1)
 558{
 559    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
 560    gen_extu(size, cpu_tmp0);
 561    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
 562}
 563
 564static inline void gen_op_jz_ecx(TCGMemOp size, TCGLabel *label1)
 565{
 566    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
 567    gen_extu(size, cpu_tmp0);
 568    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
 569}
 570
 571static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
 572{
 573    switch (ot) {
 574    case MO_8:
 575        gen_helper_inb(v, cpu_env, n);
 576        break;
 577    case MO_16:
 578        gen_helper_inw(v, cpu_env, n);
 579        break;
 580    case MO_32:
 581        gen_helper_inl(v, cpu_env, n);
 582        break;
 583    default:
 584        tcg_abort();
 585    }
 586}
 587
 588static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
 589{
 590    switch (ot) {
 591    case MO_8:
 592        gen_helper_outb(cpu_env, v, n);
 593        break;
 594    case MO_16:
 595        gen_helper_outw(cpu_env, v, n);
 596        break;
 597    case MO_32:
 598        gen_helper_outl(cpu_env, v, n);
 599        break;
 600    default:
 601        tcg_abort();
 602    }
 603}
 604
 605static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
 606                         uint32_t svm_flags)
 607{
 608    target_ulong next_eip;
 609
 610    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
 611        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
 612        switch (ot) {
 613        case MO_8:
 614            gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
 615            break;
 616        case MO_16:
 617            gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
 618            break;
 619        case MO_32:
 620            gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
 621            break;
 622        default:
 623            tcg_abort();
 624        }
 625    }
 626    if(s->flags & HF_SVMI_MASK) {
 627        gen_update_cc_op(s);
 628        gen_jmp_im(cur_eip);
 629        svm_flags |= (1 << (4 + ot));
 630        next_eip = s->pc - s->cs_base;
 631        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
 632        gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
 633                                tcg_const_i32(svm_flags),
 634                                tcg_const_i32(next_eip - cur_eip));
 635    }
 636}
 637
 638static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 639{
 640    gen_string_movl_A0_ESI(s);
 641    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
 642    gen_string_movl_A0_EDI(s);
 643    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
 644    gen_op_movl_T0_Dshift(ot);
 645    gen_op_add_reg_T0(s->aflag, R_ESI);
 646    gen_op_add_reg_T0(s->aflag, R_EDI);
 647}
 648
 649static void gen_op_update1_cc(void)
 650{
 651    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 652}
 653
 654static void gen_op_update2_cc(void)
 655{
 656    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
 657    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 658}
 659
 660static void gen_op_update3_cc(TCGv reg)
 661{
 662    tcg_gen_mov_tl(cpu_cc_src2, reg);
 663    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
 664    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 665}
 666
 667static inline void gen_op_testl_T0_T1_cc(void)
 668{
 669    tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
 670}
 671
 672static void gen_op_update_neg_cc(void)
 673{
 674    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 675    tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
 676    tcg_gen_movi_tl(cpu_cc_srcT, 0);
 677}
 678
 679/* compute all eflags to cc_src */
 680static void gen_compute_eflags(DisasContext *s)
 681{
 682    TCGv zero, dst, src1, src2;
 683    int live, dead;
 684
 685    if (s->cc_op == CC_OP_EFLAGS) {
 686        return;
 687    }
 688    if (s->cc_op == CC_OP_CLR) {
 689        tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
 690        set_cc_op(s, CC_OP_EFLAGS);
 691        return;
 692    }
 693
 694    TCGV_UNUSED(zero);
 695    dst = cpu_cc_dst;
 696    src1 = cpu_cc_src;
 697    src2 = cpu_cc_src2;
 698
 699    /* Take care to not read values that are not live.  */
 700    live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
 701    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
 702    if (dead) {
 703        zero = tcg_const_tl(0);
 704        if (dead & USES_CC_DST) {
 705            dst = zero;
 706        }
 707        if (dead & USES_CC_SRC) {
 708            src1 = zero;
 709        }
 710        if (dead & USES_CC_SRC2) {
 711            src2 = zero;
 712        }
 713    }
 714
 715    gen_update_cc_op(s);
 716    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
 717    set_cc_op(s, CC_OP_EFLAGS);
 718
 719    if (dead) {
 720        tcg_temp_free(zero);
 721    }
 722}
 723
 724typedef struct CCPrepare {
 725    TCGCond cond;
 726    TCGv reg;
 727    TCGv reg2;
 728    target_ulong imm;
 729    target_ulong mask;
 730    bool use_reg2;
 731    bool no_setcond;
 732} CCPrepare;
 733
 734/* compute eflags.C to reg */
 735static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 736{
 737    TCGv t0, t1;
 738    int size, shift;
 739
 740    switch (s->cc_op) {
 741    case CC_OP_SUBB ... CC_OP_SUBQ:
 742        /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
 743        size = s->cc_op - CC_OP_SUBB;
 744        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 745        /* If no temporary was used, be careful not to alias t1 and t0.  */
 746        t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg;
 747        tcg_gen_mov_tl(t0, cpu_cc_srcT);
 748        gen_extu(size, t0);
 749        goto add_sub;
 750
 751    case CC_OP_ADDB ... CC_OP_ADDQ:
 752        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
 753        size = s->cc_op - CC_OP_ADDB;
 754        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 755        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 756    add_sub:
 757        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
 758                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 759
 760    case CC_OP_LOGICB ... CC_OP_LOGICQ:
 761    case CC_OP_CLR:
 762        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 763
 764    case CC_OP_INCB ... CC_OP_INCQ:
 765    case CC_OP_DECB ... CC_OP_DECQ:
 766        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 767                             .mask = -1, .no_setcond = true };
 768
 769    case CC_OP_SHLB ... CC_OP_SHLQ:
 770        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
 771        size = s->cc_op - CC_OP_SHLB;
 772        shift = (8 << size) - 1;
 773        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 774                             .mask = (target_ulong)1 << shift };
 775
 776    case CC_OP_MULB ... CC_OP_MULQ:
 777        return (CCPrepare) { .cond = TCG_COND_NE,
 778                             .reg = cpu_cc_src, .mask = -1 };
 779
 780    case CC_OP_BMILGB ... CC_OP_BMILGQ:
 781        size = s->cc_op - CC_OP_BMILGB;
 782        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
 783        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 784
 785    case CC_OP_ADCX:
 786    case CC_OP_ADCOX:
 787        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
 788                             .mask = -1, .no_setcond = true };
 789
 790    case CC_OP_EFLAGS:
 791    case CC_OP_SARB ... CC_OP_SARQ:
 792        /* CC_SRC & 1 */
 793        return (CCPrepare) { .cond = TCG_COND_NE,
 794                             .reg = cpu_cc_src, .mask = CC_C };
 795
 796    default:
 797       /* The need to compute only C from CC_OP_DYNAMIC is important
 798          in efficiently implementing e.g. INC at the start of a TB.  */
 799       gen_update_cc_op(s);
 800       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
 801                               cpu_cc_src2, cpu_cc_op);
 802       return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 803                            .mask = -1, .no_setcond = true };
 804    }
 805}
 806
 807/* compute eflags.P to reg */
 808static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 809{
 810    gen_compute_eflags(s);
 811    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 812                         .mask = CC_P };
 813}
 814
 815/* compute eflags.S to reg */
 816static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 817{
 818    switch (s->cc_op) {
 819    case CC_OP_DYNAMIC:
 820        gen_compute_eflags(s);
 821        /* FALLTHRU */
 822    case CC_OP_EFLAGS:
 823    case CC_OP_ADCX:
 824    case CC_OP_ADOX:
 825    case CC_OP_ADCOX:
 826        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 827                             .mask = CC_S };
 828    case CC_OP_CLR:
 829        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 830    default:
 831        {
 832            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 833            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
 834            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
 835        }
 836    }
 837}
 838
 839/* compute eflags.O to reg */
 840static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 841{
 842    switch (s->cc_op) {
 843    case CC_OP_ADOX:
 844    case CC_OP_ADCOX:
 845        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
 846                             .mask = -1, .no_setcond = true };
 847    case CC_OP_CLR:
 848        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 849    default:
 850        gen_compute_eflags(s);
 851        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 852                             .mask = CC_O };
 853    }
 854}
 855
 856/* compute eflags.Z to reg */
 857static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 858{
 859    switch (s->cc_op) {
 860    case CC_OP_DYNAMIC:
 861        gen_compute_eflags(s);
 862        /* FALLTHRU */
 863    case CC_OP_EFLAGS:
 864    case CC_OP_ADCX:
 865    case CC_OP_ADOX:
 866    case CC_OP_ADCOX:
 867        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 868                             .mask = CC_Z };
 869    case CC_OP_CLR:
 870        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
 871    default:
 872        {
 873            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 874            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 875            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 876        }
 877    }
 878}
 879
 880/* perform a conditional store into register 'reg' according to jump opcode
 881   value 'b'. In the fast case, T0 is guaranted not to be used. */
 882static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 883{
 884    int inv, jcc_op, cond;
 885    TCGMemOp size;
 886    CCPrepare cc;
 887    TCGv t0;
 888
 889    inv = b & 1;
 890    jcc_op = (b >> 1) & 7;
 891
 892    switch (s->cc_op) {
 893    case CC_OP_SUBB ... CC_OP_SUBQ:
 894        /* We optimize relational operators for the cmp/jcc case.  */
 895        size = s->cc_op - CC_OP_SUBB;
 896        switch (jcc_op) {
 897        case JCC_BE:
 898            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
 899            gen_extu(size, cpu_tmp4);
 900            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 901            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
 902                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 903            break;
 904
 905        case JCC_L:
 906            cond = TCG_COND_LT;
 907            goto fast_jcc_l;
 908        case JCC_LE:
 909            cond = TCG_COND_LE;
 910        fast_jcc_l:
 911            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
 912            gen_exts(size, cpu_tmp4);
 913            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
 914            cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
 915                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 916            break;
 917
 918        default:
 919            goto slow_jcc;
 920        }
 921        break;
 922
 923    default:
 924    slow_jcc:
 925        /* This actually generates good code for JC, JZ and JS.  */
 926        switch (jcc_op) {
 927        case JCC_O:
 928            cc = gen_prepare_eflags_o(s, reg);
 929            break;
 930        case JCC_B:
 931            cc = gen_prepare_eflags_c(s, reg);
 932            break;
 933        case JCC_Z:
 934            cc = gen_prepare_eflags_z(s, reg);
 935            break;
 936        case JCC_BE:
 937            gen_compute_eflags(s);
 938            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 939                               .mask = CC_Z | CC_C };
 940            break;
 941        case JCC_S:
 942            cc = gen_prepare_eflags_s(s, reg);
 943            break;
 944        case JCC_P:
 945            cc = gen_prepare_eflags_p(s, reg);
 946            break;
 947        case JCC_L:
 948            gen_compute_eflags(s);
 949            if (TCGV_EQUAL(reg, cpu_cc_src)) {
 950                reg = cpu_tmp0;
 951            }
 952            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 953            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 954            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 955                               .mask = CC_S };
 956            break;
 957        default:
 958        case JCC_LE:
 959            gen_compute_eflags(s);
 960            if (TCGV_EQUAL(reg, cpu_cc_src)) {
 961                reg = cpu_tmp0;
 962            }
 963            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 964            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 965            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 966                               .mask = CC_S | CC_Z };
 967            break;
 968        }
 969        break;
 970    }
 971
 972    if (inv) {
 973        cc.cond = tcg_invert_cond(cc.cond);
 974    }
 975    return cc;
 976}
 977
 978static void gen_setcc1(DisasContext *s, int b, TCGv reg)
 979{
 980    CCPrepare cc = gen_prepare_cc(s, b, reg);
 981
 982    if (cc.no_setcond) {
 983        if (cc.cond == TCG_COND_EQ) {
 984            tcg_gen_xori_tl(reg, cc.reg, 1);
 985        } else {
 986            tcg_gen_mov_tl(reg, cc.reg);
 987        }
 988        return;
 989    }
 990
 991    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
 992        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
 993        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
 994        tcg_gen_andi_tl(reg, reg, 1);
 995        return;
 996    }
 997    if (cc.mask != -1) {
 998        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
 999        cc.reg = reg;
1000    }
1001    if (cc.use_reg2) {
1002        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1003    } else {
1004        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1005    }
1006}
1007
1008static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1009{
1010    gen_setcc1(s, JCC_B << 1, reg);
1011}
1012
1013/* generate a conditional jump to label 'l1' according to jump opcode
1014   value 'b'. In the fast case, T0 is guaranted not to be used. */
1015static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1016{
1017    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1018
1019    if (cc.mask != -1) {
1020        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1021        cc.reg = cpu_T0;
1022    }
1023    if (cc.use_reg2) {
1024        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1025    } else {
1026        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1027    }
1028}
1029
1030/* Generate a conditional jump to label 'l1' according to jump opcode
1031   value 'b'. In the fast case, T0 is guaranted not to be used.
1032   A translation block must end soon.  */
1033static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1034{
1035    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1036
1037    gen_update_cc_op(s);
1038    if (cc.mask != -1) {
1039        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1040        cc.reg = cpu_T0;
1041    }
1042    set_cc_op(s, CC_OP_DYNAMIC);
1043    if (cc.use_reg2) {
1044        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1045    } else {
1046        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1047    }
1048}
1049
1050/* XXX: does not work with gdbstub "ice" single step - not a
1051   serious problem */
1052static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1053{
1054    TCGLabel *l1 = gen_new_label();
1055    TCGLabel *l2 = gen_new_label();
1056    gen_op_jnz_ecx(s->aflag, l1);
1057    gen_set_label(l2);
1058    gen_jmp_tb(s, next_eip, 1);
1059    gen_set_label(l1);
1060    return l2;
1061}
1062
1063static inline void gen_stos(DisasContext *s, TCGMemOp ot)
1064{
1065    gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
1066    gen_string_movl_A0_EDI(s);
1067    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1068    gen_op_movl_T0_Dshift(ot);
1069    gen_op_add_reg_T0(s->aflag, R_EDI);
1070}
1071
1072static inline void gen_lods(DisasContext *s, TCGMemOp ot)
1073{
1074    gen_string_movl_A0_ESI(s);
1075    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1076    gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
1077    gen_op_movl_T0_Dshift(ot);
1078    gen_op_add_reg_T0(s->aflag, R_ESI);
1079}
1080
1081static inline void gen_scas(DisasContext *s, TCGMemOp ot)
1082{
1083    gen_string_movl_A0_EDI(s);
1084    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1085    gen_op(s, OP_CMPL, ot, R_EAX);
1086    gen_op_movl_T0_Dshift(ot);
1087    gen_op_add_reg_T0(s->aflag, R_EDI);
1088}
1089
1090static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
1091{
1092    gen_string_movl_A0_EDI(s);
1093    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1094    gen_string_movl_A0_ESI(s);
1095    gen_op(s, OP_CMPL, ot, OR_TMP0);
1096    gen_op_movl_T0_Dshift(ot);
1097    gen_op_add_reg_T0(s->aflag, R_ESI);
1098    gen_op_add_reg_T0(s->aflag, R_EDI);
1099}
1100
1101static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1102{
1103    if (s->flags & HF_IOBPT_MASK) {
1104        TCGv_i32 t_size = tcg_const_i32(1 << ot);
1105        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1106
1107        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1108        tcg_temp_free_i32(t_size);
1109        tcg_temp_free(t_next);
1110    }
1111}
1112
1113
1114static inline void gen_ins(DisasContext *s, TCGMemOp ot)
1115{
1116    if (s->tb->cflags & CF_USE_ICOUNT) {
1117        gen_io_start();
1118    }
1119    gen_string_movl_A0_EDI(s);
1120    /* Note: we must do this dummy write first to be restartable in
1121       case of page fault. */
1122    tcg_gen_movi_tl(cpu_T0, 0);
1123    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1124    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1125    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1126    gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
1127    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1128    gen_op_movl_T0_Dshift(ot);
1129    gen_op_add_reg_T0(s->aflag, R_EDI);
1130    gen_bpt_io(s, cpu_tmp2_i32, ot);
1131    if (s->tb->cflags & CF_USE_ICOUNT) {
1132        gen_io_end();
1133    }
1134}
1135
1136static inline void gen_outs(DisasContext *s, TCGMemOp ot)
1137{
1138    if (s->tb->cflags & CF_USE_ICOUNT) {
1139        gen_io_start();
1140    }
1141    gen_string_movl_A0_ESI(s);
1142    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1143
1144    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1145    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1146    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0);
1147    gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
1148    gen_op_movl_T0_Dshift(ot);
1149    gen_op_add_reg_T0(s->aflag, R_ESI);
1150    gen_bpt_io(s, cpu_tmp2_i32, ot);
1151    if (s->tb->cflags & CF_USE_ICOUNT) {
1152        gen_io_end();
1153    }
1154}
1155
1156/* same method as Valgrind : we generate jumps to current or next
1157   instruction */
1158#define GEN_REPZ(op)                                                          \
1159static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1160                                 target_ulong cur_eip, target_ulong next_eip) \
1161{                                                                             \
1162    TCGLabel *l2;                                                             \
1163    gen_update_cc_op(s);                                                      \
1164    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1165    gen_ ## op(s, ot);                                                        \
1166    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1167    /* a loop would cause two single step exceptions if ECX = 1               \
1168       before rep string_insn */                                              \
1169    if (s->repz_opt)                                                          \
1170        gen_op_jz_ecx(s->aflag, l2);                                          \
1171    gen_jmp(s, cur_eip);                                                      \
1172}
1173
1174#define GEN_REPZ2(op)                                                         \
1175static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1176                                   target_ulong cur_eip,                      \
1177                                   target_ulong next_eip,                     \
1178                                   int nz)                                    \
1179{                                                                             \
1180    TCGLabel *l2;                                                             \
1181    gen_update_cc_op(s);                                                      \
1182    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1183    gen_ ## op(s, ot);                                                        \
1184    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1185    gen_update_cc_op(s);                                                      \
1186    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1187    if (s->repz_opt)                                                          \
1188        gen_op_jz_ecx(s->aflag, l2);                                          \
1189    gen_jmp(s, cur_eip);                                                      \
1190}
1191
1192GEN_REPZ(movs)
1193GEN_REPZ(stos)
1194GEN_REPZ(lods)
1195GEN_REPZ(ins)
1196GEN_REPZ(outs)
1197GEN_REPZ2(scas)
1198GEN_REPZ2(cmps)
1199
1200static void gen_helper_fp_arith_ST0_FT0(int op)
1201{
1202    switch (op) {
1203    case 0:
1204        gen_helper_fadd_ST0_FT0(cpu_env);
1205        break;
1206    case 1:
1207        gen_helper_fmul_ST0_FT0(cpu_env);
1208        break;
1209    case 2:
1210        gen_helper_fcom_ST0_FT0(cpu_env);
1211        break;
1212    case 3:
1213        gen_helper_fcom_ST0_FT0(cpu_env);
1214        break;
1215    case 4:
1216        gen_helper_fsub_ST0_FT0(cpu_env);
1217        break;
1218    case 5:
1219        gen_helper_fsubr_ST0_FT0(cpu_env);
1220        break;
1221    case 6:
1222        gen_helper_fdiv_ST0_FT0(cpu_env);
1223        break;
1224    case 7:
1225        gen_helper_fdivr_ST0_FT0(cpu_env);
1226        break;
1227    }
1228}
1229
1230/* NOTE the exception in "r" op ordering */
1231static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1232{
1233    TCGv_i32 tmp = tcg_const_i32(opreg);
1234    switch (op) {
1235    case 0:
1236        gen_helper_fadd_STN_ST0(cpu_env, tmp);
1237        break;
1238    case 1:
1239        gen_helper_fmul_STN_ST0(cpu_env, tmp);
1240        break;
1241    case 4:
1242        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1243        break;
1244    case 5:
1245        gen_helper_fsub_STN_ST0(cpu_env, tmp);
1246        break;
1247    case 6:
1248        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1249        break;
1250    case 7:
1251        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1252        break;
1253    }
1254}
1255
1256/* if d == OR_TMP0, it means memory operand (address in A0) */
1257static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
1258{
1259    if (d != OR_TMP0) {
1260        gen_op_mov_v_reg(ot, cpu_T0, d);
1261    } else {
1262        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1263    }
1264    switch(op) {
1265    case OP_ADCL:
1266        gen_compute_eflags_c(s1, cpu_tmp4);
1267        tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1268        tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
1269        gen_op_st_rm_T0_A0(s1, ot, d);
1270        gen_op_update3_cc(cpu_tmp4);
1271        set_cc_op(s1, CC_OP_ADCB + ot);
1272        break;
1273    case OP_SBBL:
1274        gen_compute_eflags_c(s1, cpu_tmp4);
1275        tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1276        tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
1277        gen_op_st_rm_T0_A0(s1, ot, d);
1278        gen_op_update3_cc(cpu_tmp4);
1279        set_cc_op(s1, CC_OP_SBBB + ot);
1280        break;
1281    case OP_ADDL:
1282        tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1283        gen_op_st_rm_T0_A0(s1, ot, d);
1284        gen_op_update2_cc();
1285        set_cc_op(s1, CC_OP_ADDB + ot);
1286        break;
1287    case OP_SUBL:
1288        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1289        tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1290        gen_op_st_rm_T0_A0(s1, ot, d);
1291        gen_op_update2_cc();
1292        set_cc_op(s1, CC_OP_SUBB + ot);
1293        break;
1294    default:
1295    case OP_ANDL:
1296        tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
1297        gen_op_st_rm_T0_A0(s1, ot, d);
1298        gen_op_update1_cc();
1299        set_cc_op(s1, CC_OP_LOGICB + ot);
1300        break;
1301    case OP_ORL:
1302        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1303        gen_op_st_rm_T0_A0(s1, ot, d);
1304        gen_op_update1_cc();
1305        set_cc_op(s1, CC_OP_LOGICB + ot);
1306        break;
1307    case OP_XORL:
1308        tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
1309        gen_op_st_rm_T0_A0(s1, ot, d);
1310        gen_op_update1_cc();
1311        set_cc_op(s1, CC_OP_LOGICB + ot);
1312        break;
1313    case OP_CMPL:
1314        tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
1315        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1316        tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
1317        set_cc_op(s1, CC_OP_SUBB + ot);
1318        break;
1319    }
1320}
1321
1322/* if d == OR_TMP0, it means memory operand (address in A0) */
1323static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
1324{
1325    if (d != OR_TMP0) {
1326        gen_op_mov_v_reg(ot, cpu_T0, d);
1327    } else {
1328        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1329    }
1330    gen_compute_eflags_c(s1, cpu_cc_src);
1331    if (c > 0) {
1332        tcg_gen_addi_tl(cpu_T0, cpu_T0, 1);
1333        set_cc_op(s1, CC_OP_INCB + ot);
1334    } else {
1335        tcg_gen_addi_tl(cpu_T0, cpu_T0, -1);
1336        set_cc_op(s1, CC_OP_DECB + ot);
1337    }
1338    gen_op_st_rm_T0_A0(s1, ot, d);
1339    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1340}
1341
1342static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
1343                            TCGv shm1, TCGv count, bool is_right)
1344{
1345    TCGv_i32 z32, s32, oldop;
1346    TCGv z_tl;
1347
1348    /* Store the results into the CC variables.  If we know that the
1349       variable must be dead, store unconditionally.  Otherwise we'll
1350       need to not disrupt the current contents.  */
1351    z_tl = tcg_const_tl(0);
1352    if (cc_op_live[s->cc_op] & USES_CC_DST) {
1353        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1354                           result, cpu_cc_dst);
1355    } else {
1356        tcg_gen_mov_tl(cpu_cc_dst, result);
1357    }
1358    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1359        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1360                           shm1, cpu_cc_src);
1361    } else {
1362        tcg_gen_mov_tl(cpu_cc_src, shm1);
1363    }
1364    tcg_temp_free(z_tl);
1365
1366    /* Get the two potential CC_OP values into temporaries.  */
1367    tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1368    if (s->cc_op == CC_OP_DYNAMIC) {
1369        oldop = cpu_cc_op;
1370    } else {
1371        tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
1372        oldop = cpu_tmp3_i32;
1373    }
1374
1375    /* Conditionally store the CC_OP value.  */
1376    z32 = tcg_const_i32(0);
1377    s32 = tcg_temp_new_i32();
1378    tcg_gen_trunc_tl_i32(s32, count);
1379    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
1380    tcg_temp_free_i32(z32);
1381    tcg_temp_free_i32(s32);
1382
1383    /* The CC_OP value is no longer predictable.  */
1384    set_cc_op(s, CC_OP_DYNAMIC);
1385}
1386
1387static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1388                            int is_right, int is_arith)
1389{
1390    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1391
1392    /* load */
1393    if (op1 == OR_TMP0) {
1394        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1395    } else {
1396        gen_op_mov_v_reg(ot, cpu_T0, op1);
1397    }
1398
1399    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1400    tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1);
1401
1402    if (is_right) {
1403        if (is_arith) {
1404            gen_exts(ot, cpu_T0);
1405            tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1406            tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
1407        } else {
1408            gen_extu(ot, cpu_T0);
1409            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1410            tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
1411        }
1412    } else {
1413        tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1414        tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
1415    }
1416
1417    /* store */
1418    gen_op_st_rm_T0_A0(s, ot, op1);
1419
1420    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right);
1421}
1422
1423static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1424                            int is_right, int is_arith)
1425{
1426    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1427
1428    /* load */
1429    if (op1 == OR_TMP0)
1430        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1431    else
1432        gen_op_mov_v_reg(ot, cpu_T0, op1);
1433
1434    op2 &= mask;
1435    if (op2 != 0) {
1436        if (is_right) {
1437            if (is_arith) {
1438                gen_exts(ot, cpu_T0);
1439                tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1);
1440                tcg_gen_sari_tl(cpu_T0, cpu_T0, op2);
1441            } else {
1442                gen_extu(ot, cpu_T0);
1443                tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1);
1444                tcg_gen_shri_tl(cpu_T0, cpu_T0, op2);
1445            }
1446        } else {
1447            tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1);
1448            tcg_gen_shli_tl(cpu_T0, cpu_T0, op2);
1449        }
1450    }
1451
1452    /* store */
1453    gen_op_st_rm_T0_A0(s, ot, op1);
1454
1455    /* update eflags if non zero shift */
1456    if (op2 != 0) {
1457        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1458        tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1459        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1460    }
1461}
1462
1463static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
1464{
1465    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1466    TCGv_i32 t0, t1;
1467
1468    /* load */
1469    if (op1 == OR_TMP0) {
1470        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1471    } else {
1472        gen_op_mov_v_reg(ot, cpu_T0, op1);
1473    }
1474
1475    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1476
1477    switch (ot) {
1478    case MO_8:
1479        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1480        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
1481        tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101);
1482        goto do_long;
1483    case MO_16:
1484        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1485        tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16);
1486        goto do_long;
1487    do_long:
1488#ifdef TARGET_X86_64
1489    case MO_32:
1490        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1491        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
1492        if (is_right) {
1493            tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1494        } else {
1495            tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1496        }
1497        tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1498        break;
1499#endif
1500    default:
1501        if (is_right) {
1502            tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1);
1503        } else {
1504            tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1);
1505        }
1506        break;
1507    }
1508
1509    /* store */
1510    gen_op_st_rm_T0_A0(s, ot, op1);
1511
1512    /* We'll need the flags computed into CC_SRC.  */
1513    gen_compute_eflags(s);
1514
1515    /* The value that was "rotated out" is now present at the other end
1516       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1517       since we've computed the flags into CC_SRC, these variables are
1518       currently dead.  */
1519    if (is_right) {
1520        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1521        tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1522        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1523    } else {
1524        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1525        tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1526    }
1527    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1528    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1529
1530    /* Now conditionally store the new CC_OP value.  If the shift count
1531       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1532       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1533       exactly as we computed above.  */
1534    t0 = tcg_const_i32(0);
1535    t1 = tcg_temp_new_i32();
1536    tcg_gen_trunc_tl_i32(t1, cpu_T1);
1537    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); 
1538    tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
1539    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1540                        cpu_tmp2_i32, cpu_tmp3_i32);
1541    tcg_temp_free_i32(t0);
1542    tcg_temp_free_i32(t1);
1543
1544    /* The CC_OP value is no longer predictable.  */ 
1545    set_cc_op(s, CC_OP_DYNAMIC);
1546}
1547
1548static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1549                          int is_right)
1550{
1551    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1552    int shift;
1553
1554    /* load */
1555    if (op1 == OR_TMP0) {
1556        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1557    } else {
1558        gen_op_mov_v_reg(ot, cpu_T0, op1);
1559    }
1560
1561    op2 &= mask;
1562    if (op2 != 0) {
1563        switch (ot) {
1564#ifdef TARGET_X86_64
1565        case MO_32:
1566            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1567            if (is_right) {
1568                tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1569            } else {
1570                tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1571            }
1572            tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1573            break;
1574#endif
1575        default:
1576            if (is_right) {
1577                tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2);
1578            } else {
1579                tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2);
1580            }
1581            break;
1582        case MO_8:
1583            mask = 7;
1584            goto do_shifts;
1585        case MO_16:
1586            mask = 15;
1587        do_shifts:
1588            shift = op2 & mask;
1589            if (is_right) {
1590                shift = mask + 1 - shift;
1591            }
1592            gen_extu(ot, cpu_T0);
1593            tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift);
1594            tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift);
1595            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
1596            break;
1597        }
1598    }
1599
1600    /* store */
1601    gen_op_st_rm_T0_A0(s, ot, op1);
1602
1603    if (op2 != 0) {
1604        /* Compute the flags into CC_SRC.  */
1605        gen_compute_eflags(s);
1606
1607        /* The value that was "rotated out" is now present at the other end
1608           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1609           since we've computed the flags into CC_SRC, these variables are
1610           currently dead.  */
1611        if (is_right) {
1612            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1613            tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1614            tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1615        } else {
1616            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1617            tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1618        }
1619        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1620        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1621        set_cc_op(s, CC_OP_ADCOX);
1622    }
1623}
1624
1625/* XXX: add faster immediate = 1 case */
1626static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1627                           int is_right)
1628{
1629    gen_compute_eflags(s);
1630    assert(s->cc_op == CC_OP_EFLAGS);
1631
1632    /* load */
1633    if (op1 == OR_TMP0)
1634        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1635    else
1636        gen_op_mov_v_reg(ot, cpu_T0, op1);
1637    
1638    if (is_right) {
1639        switch (ot) {
1640        case MO_8:
1641            gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1642            break;
1643        case MO_16:
1644            gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1645            break;
1646        case MO_32:
1647            gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1648            break;
1649#ifdef TARGET_X86_64
1650        case MO_64:
1651            gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1652            break;
1653#endif
1654        default:
1655            tcg_abort();
1656        }
1657    } else {
1658        switch (ot) {
1659        case MO_8:
1660            gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1661            break;
1662        case MO_16:
1663            gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1664            break;
1665        case MO_32:
1666            gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1667            break;
1668#ifdef TARGET_X86_64
1669        case MO_64:
1670            gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1671            break;
1672#endif
1673        default:
1674            tcg_abort();
1675        }
1676    }
1677    /* store */
1678    gen_op_st_rm_T0_A0(s, ot, op1);
1679}
1680
1681/* XXX: add faster immediate case */
1682static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1683                             bool is_right, TCGv count_in)
1684{
1685    target_ulong mask = (ot == MO_64 ? 63 : 31);
1686    TCGv count;
1687
1688    /* load */
1689    if (op1 == OR_TMP0) {
1690        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1691    } else {
1692        gen_op_mov_v_reg(ot, cpu_T0, op1);
1693    }
1694
1695    count = tcg_temp_new();
1696    tcg_gen_andi_tl(count, count_in, mask);
1697
1698    switch (ot) {
1699    case MO_16:
1700        /* Note: we implement the Intel behaviour for shift count > 16.
1701           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1702           portion by constructing it as a 32-bit value.  */
1703        if (is_right) {
1704            tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16);
1705            tcg_gen_mov_tl(cpu_T1, cpu_T0);
1706            tcg_gen_mov_tl(cpu_T0, cpu_tmp0);
1707        } else {
1708            tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16);
1709        }
1710        /* FALLTHRU */
1711#ifdef TARGET_X86_64
1712    case MO_32:
1713        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1714        tcg_gen_subi_tl(cpu_tmp0, count, 1);
1715        if (is_right) {
1716            tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1);
1717            tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1718            tcg_gen_shr_i64(cpu_T0, cpu_T0, count);
1719        } else {
1720            tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0);
1721            tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1722            tcg_gen_shl_i64(cpu_T0, cpu_T0, count);
1723            tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
1724            tcg_gen_shri_i64(cpu_T0, cpu_T0, 32);
1725        }
1726        break;
1727#endif
1728    default:
1729        tcg_gen_subi_tl(cpu_tmp0, count, 1);
1730        if (is_right) {
1731            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1732
1733            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1734            tcg_gen_shr_tl(cpu_T0, cpu_T0, count);
1735            tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
1736        } else {
1737            tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1738            if (ot == MO_16) {
1739                /* Only needed if count > 16, for Intel behaviour.  */
1740                tcg_gen_subfi_tl(cpu_tmp4, 33, count);
1741                tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4);
1742                tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
1743            }
1744
1745            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1746            tcg_gen_shl_tl(cpu_T0, cpu_T0, count);
1747            tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
1748        }
1749        tcg_gen_movi_tl(cpu_tmp4, 0);
1750        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
1751                           cpu_tmp4, cpu_T1);
1752        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1753        break;
1754    }
1755
1756    /* store */
1757    gen_op_st_rm_T0_A0(s, ot, op1);
1758
1759    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right);
1760    tcg_temp_free(count);
1761}
1762
1763static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
1764{
1765    if (s != OR_TMP1)
1766        gen_op_mov_v_reg(ot, cpu_T1, s);
1767    switch(op) {
1768    case OP_ROL:
1769        gen_rot_rm_T1(s1, ot, d, 0);
1770        break;
1771    case OP_ROR:
1772        gen_rot_rm_T1(s1, ot, d, 1);
1773        break;
1774    case OP_SHL:
1775    case OP_SHL1:
1776        gen_shift_rm_T1(s1, ot, d, 0, 0);
1777        break;
1778    case OP_SHR:
1779        gen_shift_rm_T1(s1, ot, d, 1, 0);
1780        break;
1781    case OP_SAR:
1782        gen_shift_rm_T1(s1, ot, d, 1, 1);
1783        break;
1784    case OP_RCL:
1785        gen_rotc_rm_T1(s1, ot, d, 0);
1786        break;
1787    case OP_RCR:
1788        gen_rotc_rm_T1(s1, ot, d, 1);
1789        break;
1790    }
1791}
1792
1793static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
1794{
1795    switch(op) {
1796    case OP_ROL:
1797        gen_rot_rm_im(s1, ot, d, c, 0);
1798        break;
1799    case OP_ROR:
1800        gen_rot_rm_im(s1, ot, d, c, 1);
1801        break;
1802    case OP_SHL:
1803    case OP_SHL1:
1804        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1805        break;
1806    case OP_SHR:
1807        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1808        break;
1809    case OP_SAR:
1810        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1811        break;
1812    default:
1813        /* currently not optimized */
1814        tcg_gen_movi_tl(cpu_T1, c);
1815        gen_shift(s1, op, ot, d, OR_TMP1);
1816        break;
1817    }
1818}
1819
1820/* Decompose an address.  */
1821
1822typedef struct AddressParts {
1823    int def_seg;
1824    int base;
1825    int index;
1826    int scale;
1827    target_long disp;
1828} AddressParts;
1829
1830static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1831                                    int modrm)
1832{
1833    int def_seg, base, index, scale, mod, rm;
1834    target_long disp;
1835    bool havesib;
1836
1837    def_seg = R_DS;
1838    index = -1;
1839    scale = 0;
1840    disp = 0;
1841
1842    mod = (modrm >> 6) & 3;
1843    rm = modrm & 7;
1844    base = rm | REX_B(s);
1845
1846    if (mod == 3) {
1847        /* Normally filtered out earlier, but including this path
1848           simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
1849        goto done;
1850    }
1851
1852    switch (s->aflag) {
1853    case MO_64:
1854    case MO_32:
1855        havesib = 0;
1856        if (rm == 4) {
1857            int code = cpu_ldub_code(env, s->pc++);
1858            scale = (code >> 6) & 3;
1859            index = ((code >> 3) & 7) | REX_X(s);
1860            if (index == 4) {
1861                index = -1;  /* no index */
1862            }
1863            base = (code & 7) | REX_B(s);
1864            havesib = 1;
1865        }
1866
1867        switch (mod) {
1868        case 0:
1869            if ((base & 7) == 5) {
1870                base = -1;
1871                disp = (int32_t)cpu_ldl_code(env, s->pc);
1872                s->pc += 4;
1873                if (CODE64(s) && !havesib) {
1874                    base = -2;
1875                    disp += s->pc + s->rip_offset;
1876                }
1877            }
1878            break;
1879        case 1:
1880            disp = (int8_t)cpu_ldub_code(env, s->pc++);
1881            break;
1882        default:
1883        case 2:
1884            disp = (int32_t)cpu_ldl_code(env, s->pc);
1885            s->pc += 4;
1886            break;
1887        }
1888
1889        /* For correct popl handling with esp.  */
1890        if (base == R_ESP && s->popl_esp_hack) {
1891            disp += s->popl_esp_hack;
1892        }
1893        if (base == R_EBP || base == R_ESP) {
1894            def_seg = R_SS;
1895        }
1896        break;
1897
1898    case MO_16:
1899        if (mod == 0) {
1900            if (rm == 6) {
1901                base = -1;
1902                disp = cpu_lduw_code(env, s->pc);
1903                s->pc += 2;
1904                break;
1905            }
1906        } else if (mod == 1) {
1907            disp = (int8_t)cpu_ldub_code(env, s->pc++);
1908        } else {
1909            disp = (int16_t)cpu_lduw_code(env, s->pc);
1910            s->pc += 2;
1911        }
1912
1913        switch (rm) {
1914        case 0:
1915            base = R_EBX;
1916            index = R_ESI;
1917            break;
1918        case 1:
1919            base = R_EBX;
1920            index = R_EDI;
1921            break;
1922        case 2:
1923            base = R_EBP;
1924            index = R_ESI;
1925            def_seg = R_SS;
1926            break;
1927        case 3:
1928            base = R_EBP;
1929            index = R_EDI;
1930            def_seg = R_SS;
1931            break;
1932        case 4:
1933            base = R_ESI;
1934            break;
1935        case 5:
1936            base = R_EDI;
1937            break;
1938        case 6:
1939            base = R_EBP;
1940            def_seg = R_SS;
1941            break;
1942        default:
1943        case 7:
1944            base = R_EBX;
1945            break;
1946        }
1947        break;
1948
1949    default:
1950        tcg_abort();
1951    }
1952
1953 done:
1954    return (AddressParts){ def_seg, base, index, scale, disp };
1955}
1956
1957/* Compute the address, with a minimum number of TCG ops.  */
1958static TCGv gen_lea_modrm_1(AddressParts a)
1959{
1960    TCGv ea;
1961
1962    TCGV_UNUSED(ea);
1963    if (a.index >= 0) {
1964        if (a.scale == 0) {
1965            ea = cpu_regs[a.index];
1966        } else {
1967            tcg_gen_shli_tl(cpu_A0, cpu_regs[a.index], a.scale);
1968            ea = cpu_A0;
1969        }
1970        if (a.base >= 0) {
1971            tcg_gen_add_tl(cpu_A0, ea, cpu_regs[a.base]);
1972            ea = cpu_A0;
1973        }
1974    } else if (a.base >= 0) {
1975        ea = cpu_regs[a.base];
1976    }
1977    if (TCGV_IS_UNUSED(ea)) {
1978        tcg_gen_movi_tl(cpu_A0, a.disp);
1979        ea = cpu_A0;
1980    } else if (a.disp != 0) {
1981        tcg_gen_addi_tl(cpu_A0, ea, a.disp);
1982        ea = cpu_A0;
1983    }
1984
1985    return ea;
1986}
1987
1988static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
1989{
1990    AddressParts a = gen_lea_modrm_0(env, s, modrm);
1991    TCGv ea = gen_lea_modrm_1(a);
1992    gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
1993}
1994
1995static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
1996{
1997    (void)gen_lea_modrm_0(env, s, modrm);
1998}
1999
2000/* Used for BNDCL, BNDCU, BNDCN.  */
2001static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2002                      TCGCond cond, TCGv_i64 bndv)
2003{
2004    TCGv ea = gen_lea_modrm_1(gen_lea_modrm_0(env, s, modrm));
2005
2006    tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
2007    if (!CODE64(s)) {
2008        tcg_gen_ext32u_i64(cpu_tmp1_i64, cpu_tmp1_i64);
2009    }
2010    tcg_gen_setcond_i64(cond, cpu_tmp1_i64, cpu_tmp1_i64, bndv);
2011    tcg_gen_extrl_i64_i32(cpu_tmp2_i32, cpu_tmp1_i64);
2012    gen_helper_bndck(cpu_env, cpu_tmp2_i32);
2013}
2014
2015/* used for LEA and MOV AX, mem */
2016static void gen_add_A0_ds_seg(DisasContext *s)
2017{
2018    gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override);
2019}
2020
2021/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2022   OR_TMP0 */
2023static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2024                           TCGMemOp ot, int reg, int is_store)
2025{
2026    int mod, rm;
2027
2028    mod = (modrm >> 6) & 3;
2029    rm = (modrm & 7) | REX_B(s);
2030    if (mod == 3) {
2031        if (is_store) {
2032            if (reg != OR_TMP0)
2033                gen_op_mov_v_reg(ot, cpu_T0, reg);
2034            gen_op_mov_reg_v(ot, rm, cpu_T0);
2035        } else {
2036            gen_op_mov_v_reg(ot, cpu_T0, rm);
2037            if (reg != OR_TMP0)
2038                gen_op_mov_reg_v(ot, reg, cpu_T0);
2039        }
2040    } else {
2041        gen_lea_modrm(env, s, modrm);
2042        if (is_store) {
2043            if (reg != OR_TMP0)
2044                gen_op_mov_v_reg(ot, cpu_T0, reg);
2045            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
2046        } else {
2047            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
2048            if (reg != OR_TMP0)
2049                gen_op_mov_reg_v(ot, reg, cpu_T0);
2050        }
2051    }
2052}
2053
2054static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
2055{
2056    uint32_t ret;
2057
2058    switch (ot) {
2059    case MO_8:
2060        ret = cpu_ldub_code(env, s->pc);
2061        s->pc++;
2062        break;
2063    case MO_16:
2064        ret = cpu_lduw_code(env, s->pc);
2065        s->pc += 2;
2066        break;
2067    case MO_32:
2068#ifdef TARGET_X86_64
2069    case MO_64:
2070#endif
2071        ret = cpu_ldl_code(env, s->pc);
2072        s->pc += 4;
2073        break;
2074    default:
2075        tcg_abort();
2076    }
2077    return ret;
2078}
2079
2080static inline int insn_const_size(TCGMemOp ot)
2081{
2082    if (ot <= MO_32) {
2083        return 1 << ot;
2084    } else {
2085        return 4;
2086    }
2087}
2088
2089static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2090{
2091#ifndef CONFIG_USER_ONLY
2092    return (pc & TARGET_PAGE_MASK) == (s->tb->pc & TARGET_PAGE_MASK) ||
2093           (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2094#else
2095    return true;
2096#endif
2097}
2098
2099static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2100{
2101    target_ulong pc = s->cs_base + eip;
2102
2103    if (use_goto_tb(s, pc))  {
2104        /* jump to same page: we can use a direct jump */
2105        tcg_gen_goto_tb(tb_num);
2106        gen_jmp_im(eip);
2107        tcg_gen_exit_tb((uintptr_t)s->tb + tb_num);
2108    } else {
2109        /* jump to another page: currently not optimized */
2110        gen_jmp_im(eip);
2111        gen_eob(s);
2112    }
2113}
2114
2115static inline void gen_jcc(DisasContext *s, int b,
2116                           target_ulong val, target_ulong next_eip)
2117{
2118    TCGLabel *l1, *l2;
2119
2120    if (s->jmp_opt) {
2121        l1 = gen_new_label();
2122        gen_jcc1(s, b, l1);
2123
2124        gen_goto_tb(s, 0, next_eip);
2125
2126        gen_set_label(l1);
2127        gen_goto_tb(s, 1, val);
2128        s->is_jmp = DISAS_TB_JUMP;
2129    } else {
2130        l1 = gen_new_label();
2131        l2 = gen_new_label();
2132        gen_jcc1(s, b, l1);
2133
2134        gen_jmp_im(next_eip);
2135        tcg_gen_br(l2);
2136
2137        gen_set_label(l1);
2138        gen_jmp_im(val);
2139        gen_set_label(l2);
2140        gen_eob(s);
2141    }
2142}
2143
2144static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
2145                        int modrm, int reg)
2146{
2147    CCPrepare cc;
2148
2149    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2150
2151    cc = gen_prepare_cc(s, b, cpu_T1);
2152    if (cc.mask != -1) {
2153        TCGv t0 = tcg_temp_new();
2154        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2155        cc.reg = t0;
2156    }
2157    if (!cc.use_reg2) {
2158        cc.reg2 = tcg_const_tl(cc.imm);
2159    }
2160
2161    tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2,
2162                       cpu_T0, cpu_regs[reg]);
2163    gen_op_mov_reg_v(ot, reg, cpu_T0);
2164
2165    if (cc.mask != -1) {
2166        tcg_temp_free(cc.reg);
2167    }
2168    if (!cc.use_reg2) {
2169        tcg_temp_free(cc.reg2);
2170    }
2171}
2172
2173static inline void gen_op_movl_T0_seg(int seg_reg)
2174{
2175    tcg_gen_ld32u_tl(cpu_T0, cpu_env,
2176                     offsetof(CPUX86State,segs[seg_reg].selector));
2177}
2178
2179static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2180{
2181    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
2182    tcg_gen_st32_tl(cpu_T0, cpu_env,
2183                    offsetof(CPUX86State,segs[seg_reg].selector));
2184    tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4);
2185}
2186
2187/* move T0 to seg_reg and compute if the CPU state may change. Never
2188   call this function with seg_reg == R_CS */
2189static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2190{
2191    if (s->pe && !s->vm86) {
2192        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
2193        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
2194        /* abort translation because the addseg value may change or
2195           because ss32 may change. For R_SS, translation must always
2196           stop as a special handling must be done to disable hardware
2197           interrupts for the next instruction */
2198        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
2199            s->is_jmp = DISAS_TB_JUMP;
2200    } else {
2201        gen_op_movl_seg_T0_vm(seg_reg);
2202        if (seg_reg == R_SS)
2203            s->is_jmp = DISAS_TB_JUMP;
2204    }
2205}
2206
2207static inline int svm_is_rep(int prefixes)
2208{
2209    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2210}
2211
2212static inline void
2213gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2214                              uint32_t type, uint64_t param)
2215{
2216    /* no SVM activated; fast case */
2217    if (likely(!(s->flags & HF_SVMI_MASK)))
2218        return;
2219    gen_update_cc_op(s);
2220    gen_jmp_im(pc_start - s->cs_base);
2221    gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2222                                         tcg_const_i64(param));
2223}
2224
2225static inline void
2226gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2227{
2228    gen_svm_check_intercept_param(s, pc_start, type, 0);
2229}
2230
2231static inline void gen_stack_update(DisasContext *s, int addend)
2232{
2233    gen_op_add_reg_im(mo_stacksize(s), R_ESP, addend);
2234}
2235
2236/* Generate a push. It depends on ss32, addseg and dflag.  */
2237static void gen_push_v(DisasContext *s, TCGv val)
2238{
2239    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2240    TCGMemOp a_ot = mo_stacksize(s);
2241    int size = 1 << d_ot;
2242    TCGv new_esp = cpu_A0;
2243
2244    tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
2245
2246    if (!CODE64(s)) {
2247        if (s->addseg) {
2248            new_esp = cpu_tmp4;
2249            tcg_gen_mov_tl(new_esp, cpu_A0);
2250        }
2251        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2252    }
2253
2254    gen_op_st_v(s, d_ot, val, cpu_A0);
2255    gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
2256}
2257
2258/* two step pop is necessary for precise exceptions */
2259static TCGMemOp gen_pop_T0(DisasContext *s)
2260{
2261    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2262
2263    gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2264    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2265
2266    return d_ot;
2267}
2268
2269static inline void gen_pop_update(DisasContext *s, TCGMemOp ot)
2270{
2271    gen_stack_update(s, 1 << ot);
2272}
2273
2274static inline void gen_stack_A0(DisasContext *s)
2275{
2276    gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2277}
2278
2279static void gen_pusha(DisasContext *s)
2280{
2281    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2282    TCGMemOp d_ot = s->dflag;
2283    int size = 1 << d_ot;
2284    int i;
2285
2286    for (i = 0; i < 8; i++) {
2287        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size);
2288        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2289        gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0);
2290    }
2291
2292    gen_stack_update(s, -8 * size);
2293}
2294
2295static void gen_popa(DisasContext *s)
2296{
2297    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2298    TCGMemOp d_ot = s->dflag;
2299    int size = 1 << d_ot;
2300    int i;
2301
2302    for (i = 0; i < 8; i++) {
2303        /* ESP is not reloaded */
2304        if (7 - i == R_ESP) {
2305            continue;
2306        }
2307        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size);
2308        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2309        gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2310        gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
2311    }
2312
2313    gen_stack_update(s, 8 * size);
2314}
2315
2316static void gen_enter(DisasContext *s, int esp_addend, int level)
2317{
2318    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2319    TCGMemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2320    int size = 1 << d_ot;
2321
2322    /* Push BP; compute FrameTemp into T1.  */
2323    tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
2324    gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
2325    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0);
2326
2327    level &= 31;
2328    if (level != 0) {
2329        int i;
2330
2331        /* Copy level-1 pointers from the previous frame.  */
2332        for (i = 1; i < level; ++i) {
2333            tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i);
2334            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2335            gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0);
2336
2337            tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i);
2338            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2339            gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0);
2340        }
2341
2342        /* Push the current FrameTemp as the last level.  */
2343        tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level);
2344        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2345        gen_op_st_v(s, d_ot, cpu_T1, cpu_A0);
2346    }
2347
2348    /* Copy the FrameTemp value to EBP.  */
2349    gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1);
2350
2351    /* Compute the final value of ESP.  */
2352    tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level);
2353    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2354}
2355
2356static void gen_leave(DisasContext *s)
2357{
2358    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2359    TCGMemOp a_ot = mo_stacksize(s);
2360
2361    gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2362    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2363
2364    tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
2365
2366    gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0);
2367    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2368}
2369
2370static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2371{
2372    gen_update_cc_op(s);
2373    gen_jmp_im(cur_eip);
2374    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
2375    s->is_jmp = DISAS_TB_JUMP;
2376}
2377
2378/* Generate #UD for the current instruction.  The assumption here is that
2379   the instruction is known, but it isn't allowed in the current cpu mode.  */
2380static void gen_illegal_opcode(DisasContext *s)
2381{
2382    gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
2383}
2384
2385/* Similarly, except that the assumption here is that we don't decode
2386   the instruction at all -- either a missing opcode, an unimplemented
2387   feature, or just a bogus instruction stream.  */
2388static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2389{
2390    gen_illegal_opcode(s);
2391
2392    if (qemu_loglevel_mask(LOG_UNIMP)) {
2393        target_ulong pc = s->pc_start, end = s->pc;
2394        qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2395        for (; pc < end; ++pc) {
2396            qemu_log(" %02x", cpu_ldub_code(env, pc));
2397        }
2398        qemu_log("\n");
2399    }
2400}
2401
2402/* an interrupt is different from an exception because of the
2403   privilege checks */
2404static void gen_interrupt(DisasContext *s, int intno,
2405                          target_ulong cur_eip, target_ulong next_eip)
2406{
2407    gen_update_cc_op(s);
2408    gen_jmp_im(cur_eip);
2409    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2410                               tcg_const_i32(next_eip - cur_eip));
2411    s->is_jmp = DISAS_TB_JUMP;
2412}
2413
2414static void gen_debug(DisasContext *s, target_ulong cur_eip)
2415{
2416    gen_update_cc_op(s);
2417    gen_jmp_im(cur_eip);
2418    gen_helper_debug(cpu_env);
2419    s->is_jmp = DISAS_TB_JUMP;
2420}
2421
2422static void gen_set_hflag(DisasContext *s, uint32_t mask)
2423{
2424    if ((s->flags & mask) == 0) {
2425        TCGv_i32 t = tcg_temp_new_i32();
2426        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2427        tcg_gen_ori_i32(t, t, mask);
2428        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2429        tcg_temp_free_i32(t);
2430        s->flags |= mask;
2431    }
2432}
2433
2434static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2435{
2436    if (s->flags & mask) {
2437        TCGv_i32 t = tcg_temp_new_i32();
2438        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2439        tcg_gen_andi_i32(t, t, ~mask);
2440        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2441        tcg_temp_free_i32(t);
2442        s->flags &= ~mask;
2443    }
2444}
2445
2446/* Clear BND registers during legacy branches.  */
2447static void gen_bnd_jmp(DisasContext *s)
2448{
2449    /* Clear the registers only if BND prefix is missing, MPX is enabled,
2450       and if the BNDREGs are known to be in use (non-zero) already.
2451       The helper itself will check BNDPRESERVE at runtime.  */
2452    if ((s->prefix & PREFIX_REPNZ) == 0
2453        && (s->flags & HF_MPX_EN_MASK) != 0
2454        && (s->flags & HF_MPX_IU_MASK) != 0) {
2455        gen_helper_bnd_jmp(cpu_env);
2456    }
2457}
2458
2459/* Generate an end of block. Trace exception is also generated if needed.
2460   If IIM, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2461static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2462{
2463    gen_update_cc_op(s);
2464
2465    /* If several instructions disable interrupts, only the first does it.  */
2466    if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2467        gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2468    } else {
2469        gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2470    }
2471
2472    if (s->tb->flags & HF_RF_MASK) {
2473        gen_helper_reset_rf(cpu_env);
2474    }
2475    if (s->singlestep_enabled) {
2476        gen_helper_debug(cpu_env);
2477    } else if (s->tf) {
2478        gen_helper_single_step(cpu_env);
2479    } else {
2480        tcg_gen_exit_tb(0);
2481    }
2482    s->is_jmp = DISAS_TB_JUMP;
2483}
2484
2485/* End of block, resetting the inhibit irq flag.  */
2486static void gen_eob(DisasContext *s)
2487{
2488    gen_eob_inhibit_irq(s, false);
2489}
2490
2491/* generate a jump to eip. No segment change must happen before as a
2492   direct call to the next block may occur */
2493static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2494{
2495    gen_update_cc_op(s);
2496    set_cc_op(s, CC_OP_DYNAMIC);
2497    if (s->jmp_opt) {
2498        gen_goto_tb(s, tb_num, eip);
2499        s->is_jmp = DISAS_TB_JUMP;
2500    } else {
2501        gen_jmp_im(eip);
2502        gen_eob(s);
2503    }
2504}
2505
2506static void gen_jmp(DisasContext *s, target_ulong eip)
2507{
2508    gen_jmp_tb(s, eip, 0);
2509}
2510
2511static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2512{
2513    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2514    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2515}
2516
2517static inline void gen_stq_env_A0(DisasContext *s, int offset)
2518{
2519    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2520    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2521}
2522
2523static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2524{
2525    int mem_index = s->mem_index;
2526    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2527    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2528    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2529    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2530    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2531}
2532
2533static inline void gen_sto_env_A0(DisasContext *s, int offset)
2534{
2535    int mem_index = s->mem_index;
2536    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2537    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2538    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2539    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2540    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2541}
2542
2543static inline void gen_op_movo(int d_offset, int s_offset)
2544{
2545    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2546    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2547    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2548    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2549}
2550
2551static inline void gen_op_movq(int d_offset, int s_offset)
2552{
2553    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2554    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2555}
2556
2557static inline void gen_op_movl(int d_offset, int s_offset)
2558{
2559    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2560    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2561}
2562
2563static inline void gen_op_movq_env_0(int d_offset)
2564{
2565    tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2566    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2567}
2568
2569typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2570typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2571typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2572typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2573typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2574typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2575                               TCGv_i32 val);
2576typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2577typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2578                               TCGv val);
2579
2580#define SSE_SPECIAL ((void *)1)
2581#define SSE_DUMMY ((void *)2)
2582
2583#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2584#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2585                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2586
2587static const SSEFunc_0_epp sse_op_table1[256][4] = {
2588    /* 3DNow! extensions */
2589    [0x0e] = { SSE_DUMMY }, /* femms */
2590    [0x0f] = { SSE_DUMMY }, /* pf... */
2591    /* pure SSE operations */
2592    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2593    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2594    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2595    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2596    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2597    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2598    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2599    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2600
2601    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2602    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2603    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2604    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2605    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2606    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2607    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2608    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2609    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2610    [0x51] = SSE_FOP(sqrt),
2611    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2612    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2613    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2614    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2615    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2616    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2617    [0x58] = SSE_FOP(add),
2618    [0x59] = SSE_FOP(mul),
2619    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2620               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2621    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2622    [0x5c] = SSE_FOP(sub),
2623    [0x5d] = SSE_FOP(min),
2624    [0x5e] = SSE_FOP(div),
2625    [0x5f] = SSE_FOP(max),
2626
2627    [0xc2] = SSE_FOP(cmpeq),
2628    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2629               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2630
2631    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2632    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2633    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2634
2635    /* MMX ops and their SSE extensions */
2636    [0x60] = MMX_OP2(punpcklbw),
2637    [0x61] = MMX_OP2(punpcklwd),
2638    [0x62] = MMX_OP2(punpckldq),
2639    [0x63] = MMX_OP2(packsswb),
2640    [0x64] = MMX_OP2(pcmpgtb),
2641    [0x65] = MMX_OP2(pcmpgtw),
2642    [0x66] = MMX_OP2(pcmpgtl),
2643    [0x67] = MMX_OP2(packuswb),
2644    [0x68] = MMX_OP2(punpckhbw),
2645    [0x69] = MMX_OP2(punpckhwd),
2646    [0x6a] = MMX_OP2(punpckhdq),
2647    [0x6b] = MMX_OP2(packssdw),
2648    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2649    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2650    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2651    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2652    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2653               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2654               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2655               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2656    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2657    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2658    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2659    [0x74] = MMX_OP2(pcmpeqb),
2660    [0x75] = MMX_OP2(pcmpeqw),
2661    [0x76] = MMX_OP2(pcmpeql),
2662    [0x77] = { SSE_DUMMY }, /* emms */
2663    [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2664    [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2665    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2666    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2667    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2668    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2669    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2670    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2671    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2672    [0xd1] = MMX_OP2(psrlw),
2673    [0xd2] = MMX_OP2(psrld),
2674    [0xd3] = MMX_OP2(psrlq),
2675    [0xd4] = MMX_OP2(paddq),
2676    [0xd5] = MMX_OP2(pmullw),
2677    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2678    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2679    [0xd8] = MMX_OP2(psubusb),
2680    [0xd9] = MMX_OP2(psubusw),
2681    [0xda] = MMX_OP2(pminub),
2682    [0xdb] = MMX_OP2(pand),
2683    [0xdc] = MMX_OP2(paddusb),
2684    [0xdd] = MMX_OP2(paddusw),
2685    [0xde] = MMX_OP2(pmaxub),
2686    [0xdf] = MMX_OP2(pandn),
2687    [0xe0] = MMX_OP2(pavgb),
2688    [0xe1] = MMX_OP2(psraw),
2689    [0xe2] = MMX_OP2(psrad),
2690    [0xe3] = MMX_OP2(pavgw),
2691    [0xe4] = MMX_OP2(pmulhuw),
2692    [0xe5] = MMX_OP2(pmulhw),
2693    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2694    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2695    [0xe8] = MMX_OP2(psubsb),
2696    [0xe9] = MMX_OP2(psubsw),
2697    [0xea] = MMX_OP2(pminsw),
2698    [0xeb] = MMX_OP2(por),
2699    [0xec] = MMX_OP2(paddsb),
2700    [0xed] = MMX_OP2(paddsw),
2701    [0xee] = MMX_OP2(pmaxsw),
2702    [0xef] = MMX_OP2(pxor),
2703    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2704    [0xf1] = MMX_OP2(psllw),
2705    [0xf2] = MMX_OP2(pslld),
2706    [0xf3] = MMX_OP2(psllq),
2707    [0xf4] = MMX_OP2(pmuludq),
2708    [0xf5] = MMX_OP2(pmaddwd),
2709    [0xf6] = MMX_OP2(psadbw),
2710    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2711               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2712    [0xf8] = MMX_OP2(psubb),
2713    [0xf9] = MMX_OP2(psubw),
2714    [0xfa] = MMX_OP2(psubl),
2715    [0xfb] = MMX_OP2(psubq),
2716    [0xfc] = MMX_OP2(paddb),
2717    [0xfd] = MMX_OP2(paddw),
2718    [0xfe] = MMX_OP2(paddl),
2719};
2720
2721static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2722    [0 + 2] = MMX_OP2(psrlw),
2723    [0 + 4] = MMX_OP2(psraw),
2724    [0 + 6] = MMX_OP2(psllw),
2725    [8 + 2] = MMX_OP2(psrld),
2726    [8 + 4] = MMX_OP2(psrad),
2727    [8 + 6] = MMX_OP2(pslld),
2728    [16 + 2] = MMX_OP2(psrlq),
2729    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2730    [16 + 6] = MMX_OP2(psllq),
2731    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2732};
2733
2734static const SSEFunc_0_epi sse_op_table3ai[] = {
2735    gen_helper_cvtsi2ss,
2736    gen_helper_cvtsi2sd
2737};
2738
2739#ifdef TARGET_X86_64
2740static const SSEFunc_0_epl sse_op_table3aq[] = {
2741    gen_helper_cvtsq2ss,
2742    gen_helper_cvtsq2sd
2743};
2744#endif
2745
2746static const SSEFunc_i_ep sse_op_table3bi[] = {
2747    gen_helper_cvttss2si,
2748    gen_helper_cvtss2si,
2749    gen_helper_cvttsd2si,
2750    gen_helper_cvtsd2si
2751};
2752
2753#ifdef TARGET_X86_64
2754static const SSEFunc_l_ep sse_op_table3bq[] = {
2755    gen_helper_cvttss2sq,
2756    gen_helper_cvtss2sq,
2757    gen_helper_cvttsd2sq,
2758    gen_helper_cvtsd2sq
2759};
2760#endif
2761
2762static const SSEFunc_0_epp sse_op_table4[8][4] = {
2763    SSE_FOP(cmpeq),
2764    SSE_FOP(cmplt),
2765    SSE_FOP(cmple),
2766    SSE_FOP(cmpunord),
2767    SSE_FOP(cmpneq),
2768    SSE_FOP(cmpnlt),
2769    SSE_FOP(cmpnle),
2770    SSE_FOP(cmpord),
2771};
2772
2773static const SSEFunc_0_epp sse_op_table5[256] = {
2774    [0x0c] = gen_helper_pi2fw,
2775    [0x0d] = gen_helper_pi2fd,
2776    [0x1c] = gen_helper_pf2iw,
2777    [0x1d] = gen_helper_pf2id,
2778    [0x8a] = gen_helper_pfnacc,
2779    [0x8e] = gen_helper_pfpnacc,
2780    [0x90] = gen_helper_pfcmpge,
2781    [0x94] = gen_helper_pfmin,
2782    [0x96] = gen_helper_pfrcp,
2783    [0x97] = gen_helper_pfrsqrt,
2784    [0x9a] = gen_helper_pfsub,
2785    [0x9e] = gen_helper_pfadd,
2786    [0xa0] = gen_helper_pfcmpgt,
2787    [0xa4] = gen_helper_pfmax,
2788    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2789    [0xa7] = gen_helper_movq, /* pfrsqit1 */
2790    [0xaa] = gen_helper_pfsubr,
2791    [0xae] = gen_helper_pfacc,
2792    [0xb0] = gen_helper_pfcmpeq,
2793    [0xb4] = gen_helper_pfmul,
2794    [0xb6] = gen_helper_movq, /* pfrcpit2 */
2795    [0xb7] = gen_helper_pmulhrw_mmx,
2796    [0xbb] = gen_helper_pswapd,
2797    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2798};
2799
2800struct SSEOpHelper_epp {
2801    SSEFunc_0_epp op[2];
2802    uint32_t ext_mask;
2803};
2804
2805struct SSEOpHelper_eppi {
2806    SSEFunc_0_eppi op[2];
2807    uint32_t ext_mask;
2808};
2809
2810#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2811#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2812#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2813#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2814#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2815        CPUID_EXT_PCLMULQDQ }
2816#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2817
2818static const struct SSEOpHelper_epp sse_op_table6[256] = {
2819    [0x00] = SSSE3_OP(pshufb),
2820    [0x01] = SSSE3_OP(phaddw),
2821    [0x02] = SSSE3_OP(phaddd),
2822    [0x03] = SSSE3_OP(phaddsw),
2823    [0x04] = SSSE3_OP(pmaddubsw),
2824    [0x05] = SSSE3_OP(phsubw),
2825    [0x06] = SSSE3_OP(phsubd),
2826    [0x07] = SSSE3_OP(phsubsw),
2827    [0x08] = SSSE3_OP(psignb),
2828    [0x09] = SSSE3_OP(psignw),
2829    [0x0a] = SSSE3_OP(psignd),
2830    [0x0b] = SSSE3_OP(pmulhrsw),
2831    [0x10] = SSE41_OP(pblendvb),
2832    [0x14] = SSE41_OP(blendvps),
2833    [0x15] = SSE41_OP(blendvpd),
2834    [0x17] = SSE41_OP(ptest),
2835    [0x1c] = SSSE3_OP(pabsb),
2836    [0x1d] = SSSE3_OP(pabsw),
2837    [0x1e] = SSSE3_OP(pabsd),
2838    [0x20] = SSE41_OP(pmovsxbw),
2839    [0x21] = SSE41_OP(pmovsxbd),
2840    [0x22] = SSE41_OP(pmovsxbq),
2841    [0x23] = SSE41_OP(pmovsxwd),
2842    [0x24] = SSE41_OP(pmovsxwq),
2843    [0x25] = SSE41_OP(pmovsxdq),
2844    [0x28] = SSE41_OP(pmuldq),
2845    [0x29] = SSE41_OP(pcmpeqq),
2846    [0x2a] = SSE41_SPECIAL, /* movntqda */
2847    [0x2b] = SSE41_OP(packusdw),
2848    [0x30] = SSE41_OP(pmovzxbw),
2849    [0x31] = SSE41_OP(pmovzxbd),
2850    [0x32] = SSE41_OP(pmovzxbq),
2851    [0x33] = SSE41_OP(pmovzxwd),
2852    [0x34] = SSE41_OP(pmovzxwq),
2853    [0x35] = SSE41_OP(pmovzxdq),
2854    [0x37] = SSE42_OP(pcmpgtq),
2855    [0x38] = SSE41_OP(pminsb),
2856    [0x39] = SSE41_OP(pminsd),
2857    [0x3a] = SSE41_OP(pminuw),
2858    [0x3b] = SSE41_OP(pminud),
2859    [0x3c] = SSE41_OP(pmaxsb),
2860    [0x3d] = SSE41_OP(pmaxsd),
2861    [0x3e] = SSE41_OP(pmaxuw),
2862    [0x3f] = SSE41_OP(pmaxud),
2863    [0x40] = SSE41_OP(pmulld),
2864    [0x41] = SSE41_OP(phminposuw),
2865    [0xdb] = AESNI_OP(aesimc),
2866    [0xdc] = AESNI_OP(aesenc),
2867    [0xdd] = AESNI_OP(aesenclast),
2868    [0xde] = AESNI_OP(aesdec),
2869    [0xdf] = AESNI_OP(aesdeclast),
2870};
2871
2872static const struct SSEOpHelper_eppi sse_op_table7[256] = {
2873    [0x08] = SSE41_OP(roundps),
2874    [0x09] = SSE41_OP(roundpd),
2875    [0x0a] = SSE41_OP(roundss),
2876    [0x0b] = SSE41_OP(roundsd),
2877    [0x0c] = SSE41_OP(blendps),
2878    [0x0d] = SSE41_OP(blendpd),
2879    [0x0e] = SSE41_OP(pblendw),
2880    [0x0f] = SSSE3_OP(palignr),
2881    [0x14] = SSE41_SPECIAL, /* pextrb */
2882    [0x15] = SSE41_SPECIAL, /* pextrw */
2883    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
2884    [0x17] = SSE41_SPECIAL, /* extractps */
2885    [0x20] = SSE41_SPECIAL, /* pinsrb */
2886    [0x21] = SSE41_SPECIAL, /* insertps */
2887    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
2888    [0x40] = SSE41_OP(dpps),
2889    [0x41] = SSE41_OP(dppd),
2890    [0x42] = SSE41_OP(mpsadbw),
2891    [0x44] = PCLMULQDQ_OP(pclmulqdq),
2892    [0x60] = SSE42_OP(pcmpestrm),
2893    [0x61] = SSE42_OP(pcmpestri),
2894    [0x62] = SSE42_OP(pcmpistrm),
2895    [0x63] = SSE42_OP(pcmpistri),
2896    [0xdf] = AESNI_OP(aeskeygenassist),
2897};
2898
2899static void gen_sse(CPUX86State *env, DisasContext *s, int b,
2900                    target_ulong pc_start, int rex_r)
2901{
2902    int b1, op1_offset, op2_offset, is_xmm, val;
2903    int modrm, mod, rm, reg;
2904    SSEFunc_0_epp sse_fn_epp;
2905    SSEFunc_0_eppi sse_fn_eppi;
2906    SSEFunc_0_ppi sse_fn_ppi;
2907    SSEFunc_0_eppt sse_fn_eppt;
2908    TCGMemOp ot;
2909
2910    b &= 0xff;
2911    if (s->prefix & PREFIX_DATA)
2912        b1 = 1;
2913    else if (s->prefix & PREFIX_REPZ)
2914        b1 = 2;
2915    else if (s->prefix & PREFIX_REPNZ)
2916        b1 = 3;
2917    else
2918        b1 = 0;
2919    sse_fn_epp = sse_op_table1[b][b1];
2920    if (!sse_fn_epp) {
2921        goto unknown_op;
2922    }
2923    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
2924        is_xmm = 1;
2925    } else {
2926        if (b1 == 0) {
2927            /* MMX case */
2928            is_xmm = 0;
2929        } else {
2930            is_xmm = 1;
2931        }
2932    }
2933    /* simple MMX/SSE operation */
2934    if (s->flags & HF_TS_MASK) {
2935        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
2936        return;
2937    }
2938    if (s->flags & HF_EM_MASK) {
2939    illegal_op:
2940        gen_illegal_opcode(s);
2941        return;
2942    }
2943    if (is_xmm
2944        && !(s->flags & HF_OSFXSR_MASK)
2945        && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
2946        goto unknown_op;
2947    }
2948    if (b == 0x0e) {
2949        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
2950            /* If we were fully decoding this we might use illegal_op.  */
2951            goto unknown_op;
2952        }
2953        /* femms */
2954        gen_helper_emms(cpu_env);
2955        return;
2956    }
2957    if (b == 0x77) {
2958        /* emms */
2959        gen_helper_emms(cpu_env);
2960        return;
2961    }
2962    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
2963       the static cpu state) */
2964    if (!is_xmm) {
2965        gen_helper_enter_mmx(cpu_env);
2966    }
2967
2968    modrm = cpu_ldub_code(env, s->pc++);
2969    reg = ((modrm >> 3) & 7);
2970    if (is_xmm)
2971        reg |= rex_r;
2972    mod = (modrm >> 6) & 3;
2973    if (sse_fn_epp == SSE_SPECIAL) {
2974        b |= (b1 << 8);
2975        switch(b) {
2976        case 0x0e7: /* movntq */
2977            if (mod == 3) {
2978                goto illegal_op;
2979            }
2980            gen_lea_modrm(env, s, modrm);
2981            gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
2982            break;
2983        case 0x1e7: /* movntdq */
2984        case 0x02b: /* movntps */
2985        case 0x12b: /* movntps */
2986            if (mod == 3)
2987                goto illegal_op;
2988            gen_lea_modrm(env, s, modrm);
2989            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
2990            break;
2991        case 0x3f0: /* lddqu */
2992            if (mod == 3)
2993                goto illegal_op;
2994            gen_lea_modrm(env, s, modrm);
2995            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
2996            break;
2997        case 0x22b: /* movntss */
2998        case 0x32b: /* movntsd */
2999            if (mod == 3)
3000                goto illegal_op;
3001            gen_lea_modrm(env, s, modrm);
3002            if (b1 & 1) {
3003                gen_stq_env_A0(s, offsetof(CPUX86State,
3004                                           xmm_regs[reg].ZMM_Q(0)));
3005            } else {
3006                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
3007                    xmm_regs[reg].ZMM_L(0)));
3008                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3009            }
3010            break;
3011        case 0x6e: /* movd mm, ea */
3012#ifdef TARGET_X86_64
3013            if (s->dflag == MO_64) {
3014                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3015                tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
3016            } else
3017#endif
3018            {
3019                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3020                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3021                                 offsetof(CPUX86State,fpregs[reg].mmx));
3022                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3023                gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
3024            }
3025            break;
3026        case 0x16e: /* movd xmm, ea */
3027#ifdef TARGET_X86_64
3028            if (s->dflag == MO_64) {
3029                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3030                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3031                                 offsetof(CPUX86State,xmm_regs[reg]));
3032                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0);
3033            } else
3034#endif
3035            {
3036                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3037                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3038                                 offsetof(CPUX86State,xmm_regs[reg]));
3039                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3040                gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
3041            }
3042            break;
3043        case 0x6f: /* movq mm, ea */
3044            if (mod != 3) {
3045                gen_lea_modrm(env, s, modrm);
3046                gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3047            } else {
3048                rm = (modrm & 7);
3049                tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3050                               offsetof(CPUX86State,fpregs[rm].mmx));
3051                tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3052                               offsetof(CPUX86State,fpregs[reg].mmx));
3053            }
3054            break;
3055        case 0x010: /* movups */
3056        case 0x110: /* movupd */
3057        case 0x028: /* movaps */
3058        case 0x128: /* movapd */
3059        case 0x16f: /* movdqa xmm, ea */
3060        case 0x26f: /* movdqu xmm, ea */
3061            if (mod != 3) {
3062                gen_lea_modrm(env, s, modrm);
3063                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3064            } else {
3065                rm = (modrm & 7) | REX_B(s);
3066                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
3067                            offsetof(CPUX86State,xmm_regs[rm]));
3068            }
3069            break;
3070        case 0x210: /* movss xmm, ea */
3071            if (mod != 3) {
3072                gen_lea_modrm(env, s, modrm);
3073                gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3074                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3075                tcg_gen_movi_tl(cpu_T0, 0);
3076                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3077                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3078                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3079            } else {
3080                rm = (modrm & 7) | REX_B(s);
3081                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3082                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3083            }
3084            break;
3085        case 0x310: /* movsd xmm, ea */
3086            if (mod != 3) {
3087                gen_lea_modrm(env, s, modrm);
3088                gen_ldq_env_A0(s, offsetof(CPUX86State,
3089                                           xmm_regs[reg].ZMM_Q(0)));
3090                tcg_gen_movi_tl(cpu_T0, 0);
3091                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3092                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3093            } else {
3094                rm = (modrm & 7) | REX_B(s);
3095                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3096                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3097            }
3098            break;
3099        case 0x012: /* movlps */
3100        case 0x112: /* movlpd */
3101            if (mod != 3) {
3102                gen_lea_modrm(env, s, modrm);
3103                gen_ldq_env_A0(s, offsetof(CPUX86State,
3104                                           xmm_regs[reg].ZMM_Q(0)));
3105            } else {
3106                /* movhlps */
3107                rm = (modrm & 7) | REX_B(s);
3108                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3109                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3110            }
3111            break;
3112        case 0x212: /* movsldup */
3113            if (mod != 3) {
3114                gen_lea_modrm(env, s, modrm);
3115                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3116            } else {
3117                rm = (modrm & 7) | REX_B(s);
3118                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3119                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3120                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3121                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3122            }
3123            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3124                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3125            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3126                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3127            break;
3128        case 0x312: /* movddup */
3129            if (mod != 3) {
3130                gen_lea_modrm(env, s, modrm);
3131                gen_ldq_env_A0(s, offsetof(CPUX86State,
3132                                           xmm_regs[reg].ZMM_Q(0)));
3133            } else {
3134                rm = (modrm & 7) | REX_B(s);
3135                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3136                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3137            }
3138            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3139                        offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3140            break;
3141        case 0x016: /* movhps */
3142        case 0x116: /* movhpd */
3143            if (mod != 3) {
3144                gen_lea_modrm(env, s, modrm);
3145                gen_ldq_env_A0(s, offsetof(CPUX86State,
3146                                           xmm_regs[reg].ZMM_Q(1)));
3147            } else {
3148                /* movlhps */
3149                rm = (modrm & 7) | REX_B(s);
3150                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3151                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3152            }
3153            break;
3154        case 0x216: /* movshdup */
3155            if (mod != 3) {
3156                gen_lea_modrm(env, s, modrm);
3157                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3158            } else {
3159                rm = (modrm & 7) | REX_B(s);
3160                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3161                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3162                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3163                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3164            }
3165            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3166                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3167            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3168                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3169            break;
3170        case 0x178:
3171        case 0x378:
3172            {
3173                int bit_index, field_length;
3174
3175                if (b1 == 1 && reg != 0)
3176                    goto illegal_op;
3177                field_length = cpu_ldub_code(env, s->pc++) & 0x3F;
3178                bit_index = cpu_ldub_code(env, s->pc++) & 0x3F;
3179                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3180                    offsetof(CPUX86State,xmm_regs[reg]));
3181                if (b1 == 1)
3182                    gen_helper_extrq_i(cpu_env, cpu_ptr0,
3183                                       tcg_const_i32(bit_index),
3184                                       tcg_const_i32(field_length));
3185                else
3186                    gen_helper_insertq_i(cpu_env, cpu_ptr0,
3187                                         tcg_const_i32(bit_index),
3188                                         tcg_const_i32(field_length));
3189            }
3190            break;
3191        case 0x7e: /* movd ea, mm */
3192#ifdef TARGET_X86_64
3193            if (s->dflag == MO_64) {
3194                tcg_gen_ld_i64(cpu_T0, cpu_env,
3195                               offsetof(CPUX86State,fpregs[reg].mmx));
3196                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3197            } else
3198#endif
3199            {
3200                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3201                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3202                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3203            }
3204            break;
3205        case 0x17e: /* movd ea, xmm */
3206#ifdef TARGET_X86_64
3207            if (s->dflag == MO_64) {
3208                tcg_gen_ld_i64(cpu_T0, cpu_env,
3209                               offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3210                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3211            } else
3212#endif
3213            {
3214                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3215                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3216                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3217            }
3218            break;
3219        case 0x27e: /* movq xmm, ea */
3220            if (mod != 3) {
3221                gen_lea_modrm(env, s, modrm);
3222                gen_ldq_env_A0(s, offsetof(CPUX86State,
3223                                           xmm_regs[reg].ZMM_Q(0)));
3224            } else {
3225                rm = (modrm & 7) | REX_B(s);
3226                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3227                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3228            }
3229            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3230            break;
3231        case 0x7f: /* movq ea, mm */
3232            if (mod != 3) {
3233                gen_lea_modrm(env, s, modrm);
3234                gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3235            } else {
3236                rm = (modrm & 7);
3237                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
3238                            offsetof(CPUX86State,fpregs[reg].mmx));
3239            }
3240            break;
3241        case 0x011: /* movups */
3242        case 0x111: /* movupd */
3243        case 0x029: /* movaps */
3244        case 0x129: /* movapd */
3245        case 0x17f: /* movdqa ea, xmm */
3246        case 0x27f: /* movdqu ea, xmm */
3247            if (mod != 3) {
3248                gen_lea_modrm(env, s, modrm);
3249                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3250            } else {
3251                rm = (modrm & 7) | REX_B(s);
3252                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
3253                            offsetof(CPUX86State,xmm_regs[reg]));
3254            }
3255            break;
3256        case 0x211: /* movss ea, xmm */
3257            if (mod != 3) {
3258                gen_lea_modrm(env, s, modrm);
3259                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3260                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3261            } else {
3262                rm = (modrm & 7) | REX_B(s);
3263                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
3264                            offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3265            }
3266            break;
3267        case 0x311: /* movsd ea, xmm */
3268            if (mod != 3) {
3269                gen_lea_modrm(env, s, modrm);
3270                gen_stq_env_A0(s, offsetof(CPUX86State,
3271                                           xmm_regs[reg].ZMM_Q(0)));
3272            } else {
3273                rm = (modrm & 7) | REX_B(s);
3274                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
3275                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3276            }
3277            break;
3278        case 0x013: /* movlps */
3279        case 0x113: /* movlpd */
3280            if (mod != 3) {
3281                gen_lea_modrm(env, s, modrm);
3282                gen_stq_env_A0(s, offsetof(CPUX86State,
3283                                           xmm_regs[reg].ZMM_Q(0)));
3284            } else {
3285                goto illegal_op;
3286            }
3287            break;
3288        case 0x017: /* movhps */
3289        case 0x117: /* movhpd */
3290            if (mod != 3) {
3291                gen_lea_modrm(env, s, modrm);
3292                gen_stq_env_A0(s, offsetof(CPUX86State,
3293                                           xmm_regs[reg].ZMM_Q(1)));
3294            } else {
3295                goto illegal_op;
3296            }
3297            break;
3298        case 0x71: /* shift mm, im */
3299        case 0x72:
3300        case 0x73:
3301        case 0x171: /* shift xmm, im */
3302        case 0x172:
3303        case 0x173:
3304            if (b1 >= 2) {
3305                goto unknown_op;
3306            }
3307            val = cpu_ldub_code(env, s->pc++);
3308            if (is_xmm) {
3309                tcg_gen_movi_tl(cpu_T0, val);
3310                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
3311                tcg_gen_movi_tl(cpu_T0, 0);
3312                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(1)));
3313                op1_offset = offsetof(CPUX86State,xmm_t0);
3314            } else {
3315                tcg_gen_movi_tl(cpu_T0, val);
3316                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
3317                tcg_gen_movi_tl(cpu_T0, 0);
3318                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
3319                op1_offset = offsetof(CPUX86State,mmx_t0);
3320            }
3321            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3322                                       (((modrm >> 3)) & 7)][b1];
3323            if (!sse_fn_epp) {
3324                goto unknown_op;
3325            }
3326            if (is_xmm) {
3327                rm = (modrm & 7) | REX_B(s);
3328                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3329            } else {
3330                rm = (modrm & 7);
3331                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3332            }
3333            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3334            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
3335            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3336            break;
3337        case 0x050: /* movmskps */
3338            rm = (modrm & 7) | REX_B(s);
3339            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3340                             offsetof(CPUX86State,xmm_regs[rm]));
3341            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3342            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3343            break;
3344        case 0x150: /* movmskpd */
3345            rm = (modrm & 7) | REX_B(s);
3346            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3347                             offsetof(CPUX86State,xmm_regs[rm]));
3348            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3349            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3350            break;
3351        case 0x02a: /* cvtpi2ps */
3352        case 0x12a: /* cvtpi2pd */
3353            gen_helper_enter_mmx(cpu_env);
3354            if (mod != 3) {
3355                gen_lea_modrm(env, s, modrm);
3356                op2_offset = offsetof(CPUX86State,mmx_t0);
3357                gen_ldq_env_A0(s, op2_offset);
3358            } else {
3359                rm = (modrm & 7);
3360                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3361            }
3362            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3363            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3364            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3365            switch(b >> 8) {
3366            case 0x0:
3367                gen_helper_cvtpi2ps(cpu_env, cpu_ptr0, cpu_ptr1);
3368                break;
3369            default:
3370            case 0x1:
3371                gen_helper_cvtpi2pd(cpu_env, cpu_ptr0, cpu_ptr1);
3372                break;
3373            }
3374            break;
3375        case 0x22a: /* cvtsi2ss */
3376        case 0x32a: /* cvtsi2sd */
3377            ot = mo_64_32(s->dflag);
3378            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3379            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3380            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3381            if (ot == MO_32) {
3382                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3383                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3384                sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
3385            } else {
3386#ifdef TARGET_X86_64
3387                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3388                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T0);
3389#else
3390                goto illegal_op;
3391#endif
3392            }
3393            break;
3394        case 0x02c: /* cvttps2pi */
3395        case 0x12c: /* cvttpd2pi */
3396        case 0x02d: /* cvtps2pi */
3397        case 0x12d: /* cvtpd2pi */
3398            gen_helper_enter_mmx(cpu_env);
3399            if (mod != 3) {
3400                gen_lea_modrm(env, s, modrm);
3401                op2_offset = offsetof(CPUX86State,xmm_t0);
3402                gen_ldo_env_A0(s, op2_offset);
3403            } else {
3404                rm = (modrm & 7) | REX_B(s);
3405                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3406            }
3407            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3408            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3409            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3410            switch(b) {
3411            case 0x02c:
3412                gen_helper_cvttps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3413                break;
3414            case 0x12c:
3415                gen_helper_cvttpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3416                break;
3417            case 0x02d:
3418                gen_helper_cvtps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3419                break;
3420            case 0x12d:
3421                gen_helper_cvtpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3422                break;
3423            }
3424            break;
3425        case 0x22c: /* cvttss2si */
3426        case 0x32c: /* cvttsd2si */
3427        case 0x22d: /* cvtss2si */
3428        case 0x32d: /* cvtsd2si */
3429            ot = mo_64_32(s->dflag);
3430            if (mod != 3) {
3431                gen_lea_modrm(env, s, modrm);
3432                if ((b >> 8) & 1) {
3433                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3434                } else {
3435                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3436                    tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
3437                }
3438                op2_offset = offsetof(CPUX86State,xmm_t0);
3439            } else {
3440                rm = (modrm & 7) | REX_B(s);
3441                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3442            }
3443            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3444            if (ot == MO_32) {
3445                SSEFunc_i_ep sse_fn_i_ep =
3446                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3447                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3448                tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
3449            } else {
3450#ifdef TARGET_X86_64
3451                SSEFunc_l_ep sse_fn_l_ep =
3452                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3453                sse_fn_l_ep(cpu_T0, cpu_env, cpu_ptr0);
3454#else
3455                goto illegal_op;
3456#endif
3457            }
3458            gen_op_mov_reg_v(ot, reg, cpu_T0);
3459            break;
3460        case 0xc4: /* pinsrw */
3461        case 0x1c4:
3462            s->rip_offset = 1;
3463            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3464            val = cpu_ldub_code(env, s->pc++);
3465            if (b1) {
3466                val &= 7;
3467                tcg_gen_st16_tl(cpu_T0, cpu_env,
3468                                offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3469            } else {
3470                val &= 3;
3471                tcg_gen_st16_tl(cpu_T0, cpu_env,
3472                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3473            }
3474            break;
3475        case 0xc5: /* pextrw */
3476        case 0x1c5:
3477            if (mod != 3)
3478                goto illegal_op;
3479            ot = mo_64_32(s->dflag);
3480            val = cpu_ldub_code(env, s->pc++);
3481            if (b1) {
3482                val &= 7;
3483                rm = (modrm & 7) | REX_B(s);
3484                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
3485                                 offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3486            } else {
3487                val &= 3;
3488                rm = (modrm & 7);
3489                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
3490                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3491            }
3492            reg = ((modrm >> 3) & 7) | rex_r;
3493            gen_op_mov_reg_v(ot, reg, cpu_T0);
3494            break;
3495        case 0x1d6: /* movq ea, xmm */
3496            if (mod != 3) {
3497                gen_lea_modrm(env, s, modrm);
3498                gen_stq_env_A0(s, offsetof(CPUX86State,
3499                                           xmm_regs[reg].ZMM_Q(0)));
3500            } else {
3501                rm = (modrm & 7) | REX_B(s);
3502                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
3503                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3504                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3505            }
3506            break;
3507        case 0x2d6: /* movq2dq */
3508            gen_helper_enter_mmx(cpu_env);
3509            rm = (modrm & 7);
3510            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3511                        offsetof(CPUX86State,fpregs[rm].mmx));
3512            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3513            break;
3514        case 0x3d6: /* movdq2q */
3515            gen_helper_enter_mmx(cpu_env);
3516            rm = (modrm & 7) | REX_B(s);
3517            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3518                        offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3519            break;
3520        case 0xd7: /* pmovmskb */
3521        case 0x1d7:
3522            if (mod != 3)
3523                goto illegal_op;
3524            if (b1) {
3525                rm = (modrm & 7) | REX_B(s);
3526                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
3527                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3528            } else {
3529                rm = (modrm & 7);
3530                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
3531                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3532            }
3533            reg = ((modrm >> 3) & 7) | rex_r;
3534            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3535            break;
3536
3537        case 0x138:
3538        case 0x038:
3539            b = modrm;
3540            if ((b & 0xf0) == 0xf0) {
3541                goto do_0f_38_fx;
3542            }
3543            modrm = cpu_ldub_code(env, s->pc++);
3544            rm = modrm & 7;
3545            reg = ((modrm >> 3) & 7) | rex_r;
3546            mod = (modrm >> 6) & 3;
3547            if (b1 >= 2) {
3548                goto unknown_op;
3549            }
3550
3551            sse_fn_epp = sse_op_table6[b].op[b1];
3552            if (!sse_fn_epp) {
3553                goto unknown_op;
3554            }
3555            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3556                goto illegal_op;
3557
3558            if (b1) {
3559                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3560                if (mod == 3) {
3561                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3562                } else {
3563                    op2_offset = offsetof(CPUX86State,xmm_t0);
3564                    gen_lea_modrm(env, s, modrm);
3565                    switch (b) {
3566                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3567                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3568                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3569                        gen_ldq_env_A0(s, op2_offset +
3570                                        offsetof(ZMMReg, ZMM_Q(0)));
3571                        break;
3572                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3573                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3574                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
3575                                            s->mem_index, MO_LEUL);
3576                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
3577                                        offsetof(ZMMReg, ZMM_L(0)));
3578                        break;
3579                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3580                        tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
3581                                           s->mem_index, MO_LEUW);
3582                        tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
3583                                        offsetof(ZMMReg, ZMM_W(0)));
3584                        break;
3585                    case 0x2a:            /* movntqda */
3586                        gen_ldo_env_A0(s, op1_offset);
3587                        return;
3588                    default:
3589                        gen_ldo_env_A0(s, op2_offset);
3590                    }
3591                }
3592            } else {
3593                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3594                if (mod == 3) {
3595                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3596                } else {
3597                    op2_offset = offsetof(CPUX86State,mmx_t0);
3598                    gen_lea_modrm(env, s, modrm);
3599                    gen_ldq_env_A0(s, op2_offset);
3600                }
3601            }
3602            if (sse_fn_epp == SSE_SPECIAL) {
3603                goto unknown_op;
3604            }
3605
3606            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3607            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3608            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3609
3610            if (b == 0x17) {
3611                set_cc_op(s, CC_OP_EFLAGS);
3612            }
3613            break;
3614
3615        case 0x238:
3616        case 0x338:
3617        do_0f_38_fx:
3618            /* Various integer extensions at 0f 38 f[0-f].  */
3619            b = modrm | (b1 << 8);
3620            modrm = cpu_ldub_code(env, s->pc++);
3621            reg = ((modrm >> 3) & 7) | rex_r;
3622
3623            switch (b) {
3624            case 0x3f0: /* crc32 Gd,Eb */
3625            case 0x3f1: /* crc32 Gd,Ey */
3626            do_crc32:
3627                if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3628                    goto illegal_op;
3629                }
3630                if ((b & 0xff) == 0xf0) {
3631                    ot = MO_8;
3632                } else if (s->dflag != MO_64) {
3633                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3634                } else {
3635                    ot = MO_64;
3636                }
3637
3638                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
3639                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3640                gen_helper_crc32(cpu_T0, cpu_tmp2_i32,
3641                                 cpu_T0, tcg_const_i32(8 << ot));
3642
3643                ot = mo_64_32(s->dflag);
3644                gen_op_mov_reg_v(ot, reg, cpu_T0);
3645                break;
3646
3647            case 0x1f0: /* crc32 or movbe */
3648            case 0x1f1:
3649                /* For these insns, the f3 prefix is supposed to have priority
3650                   over the 66 prefix, but that's not what we implement above
3651                   setting b1.  */
3652                if (s->prefix & PREFIX_REPNZ) {
3653                    goto do_crc32;
3654                }
3655                /* FALLTHRU */
3656            case 0x0f0: /* movbe Gy,My */
3657            case 0x0f1: /* movbe My,Gy */
3658                if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3659                    goto illegal_op;
3660                }
3661                if (s->dflag != MO_64) {
3662                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3663                } else {
3664                    ot = MO_64;
3665                }
3666
3667                gen_lea_modrm(env, s, modrm);
3668                if ((b & 1) == 0) {
3669                    tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
3670                                       s->mem_index, ot | MO_BE);
3671                    gen_op_mov_reg_v(ot, reg, cpu_T0);
3672                } else {
3673                    tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
3674                                       s->mem_index, ot | MO_BE);
3675                }
3676                break;
3677
3678            case 0x0f2: /* andn Gy, By, Ey */
3679                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3680                    || !(s->prefix & PREFIX_VEX)
3681                    || s->vex_l != 0) {
3682                    goto illegal_op;
3683                }
3684                ot = mo_64_32(s->dflag);
3685                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3686                tcg_gen_andc_tl(cpu_T0, cpu_regs[s->vex_v], cpu_T0);
3687                gen_op_mov_reg_v(ot, reg, cpu_T0);
3688                gen_op_update1_cc();
3689                set_cc_op(s, CC_OP_LOGICB + ot);
3690                break;
3691
3692            case 0x0f7: /* bextr Gy, Ey, By */
3693                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3694                    || !(s->prefix & PREFIX_VEX)
3695                    || s->vex_l != 0) {
3696                    goto illegal_op;
3697                }
3698                ot = mo_64_32(s->dflag);
3699                {
3700                    TCGv bound, zero;
3701
3702                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3703                    /* Extract START, and shift the operand.
3704                       Shifts larger than operand size get zeros.  */
3705                    tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
3706                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_A0);
3707
3708                    bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3709                    zero = tcg_const_tl(0);
3710                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, cpu_A0, bound,
3711                                       cpu_T0, zero);
3712                    tcg_temp_free(zero);
3713
3714                    /* Extract the LEN into a mask.  Lengths larger than
3715                       operand size get all ones.  */
3716                    tcg_gen_shri_tl(cpu_A0, cpu_regs[s->vex_v], 8);
3717                    tcg_gen_ext8u_tl(cpu_A0, cpu_A0);
3718                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
3719                                       cpu_A0, bound);
3720                    tcg_temp_free(bound);
3721                    tcg_gen_movi_tl(cpu_T1, 1);
3722                    tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_A0);
3723                    tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
3724                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
3725
3726                    gen_op_mov_reg_v(ot, reg, cpu_T0);
3727                    gen_op_update1_cc();
3728                    set_cc_op(s, CC_OP_LOGICB + ot);
3729                }
3730                break;
3731
3732            case 0x0f5: /* bzhi Gy, Ey, By */
3733                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3734                    || !(s->prefix & PREFIX_VEX)
3735                    || s->vex_l != 0) {
3736                    goto illegal_op;
3737                }
3738                ot = mo_64_32(s->dflag);
3739                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3740                tcg_gen_ext8u_tl(cpu_T1, cpu_regs[s->vex_v]);
3741                {
3742                    TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3743                    /* Note that since we're using BMILG (in order to get O
3744                       cleared) we need to store the inverse into C.  */
3745                    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3746                                       cpu_T1, bound);
3747                    tcg_gen_movcond_tl(TCG_COND_GT, cpu_T1, cpu_T1,
3748                                       bound, bound, cpu_T1);
3749                    tcg_temp_free(bound);
3750                }
3751                tcg_gen_movi_tl(cpu_A0, -1);
3752                tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T1);
3753                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_A0);
3754                gen_op_mov_reg_v(ot, reg, cpu_T0);
3755                gen_op_update1_cc();
3756                set_cc_op(s, CC_OP_BMILGB + ot);
3757                break;
3758
3759            case 0x3f6: /* mulx By, Gy, rdx, Ey */
3760                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3761                    || !(s->prefix & PREFIX_VEX)
3762                    || s->vex_l != 0) {
3763                    goto illegal_op;
3764                }
3765                ot = mo_64_32(s->dflag);
3766                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3767                switch (ot) {
3768                default:
3769                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3770                    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
3771                    tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
3772                                      cpu_tmp2_i32, cpu_tmp3_i32);
3773                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
3774                    tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
3775                    break;
3776#ifdef TARGET_X86_64
3777                case MO_64:
3778                    tcg_gen_mulu2_i64(cpu_T0, cpu_T1,
3779                                      cpu_T0, cpu_regs[R_EDX]);
3780                    tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T0);
3781                    tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
3782                    break;
3783#endif
3784                }
3785                break;
3786
3787            case 0x3f5: /* pdep Gy, By, Ey */
3788                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3789                    || !(s->prefix & PREFIX_VEX)
3790                    || s->vex_l != 0) {
3791                    goto illegal_op;
3792                }
3793                ot = mo_64_32(s->dflag);
3794                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3795                /* Note that by zero-extending the mask operand, we
3796                   automatically handle zero-extending the result.  */
3797                if (ot == MO_64) {
3798                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
3799                } else {
3800                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
3801                }
3802                gen_helper_pdep(cpu_regs[reg], cpu_T0, cpu_T1);
3803                break;
3804
3805            case 0x2f5: /* pext Gy, By, Ey */
3806                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3807                    || !(s->prefix & PREFIX_VEX)
3808                    || s->vex_l != 0) {
3809                    goto illegal_op;
3810                }
3811                ot = mo_64_32(s->dflag);
3812                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3813                /* Note that by zero-extending the mask operand, we
3814                   automatically handle zero-extending the result.  */
3815                if (ot == MO_64) {
3816                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
3817                } else {
3818                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
3819                }
3820                gen_helper_pext(cpu_regs[reg], cpu_T0, cpu_T1);
3821                break;
3822
3823            case 0x1f6: /* adcx Gy, Ey */
3824            case 0x2f6: /* adox Gy, Ey */
3825                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3826                    goto illegal_op;
3827                } else {
3828                    TCGv carry_in, carry_out, zero;
3829                    int end_op;
3830
3831                    ot = mo_64_32(s->dflag);
3832                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3833
3834                    /* Re-use the carry-out from a previous round.  */
3835                    TCGV_UNUSED(carry_in);
3836                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3837                    switch (s->cc_op) {
3838                    case CC_OP_ADCX:
3839                        if (b == 0x1f6) {
3840                            carry_in = cpu_cc_dst;
3841                            end_op = CC_OP_ADCX;
3842                        } else {
3843                            end_op = CC_OP_ADCOX;
3844                        }
3845                        break;
3846                    case CC_OP_ADOX:
3847                        if (b == 0x1f6) {
3848                            end_op = CC_OP_ADCOX;
3849                        } else {
3850                            carry_in = cpu_cc_src2;
3851                            end_op = CC_OP_ADOX;
3852                        }
3853                        break;
3854                    case CC_OP_ADCOX:
3855                        end_op = CC_OP_ADCOX;
3856                        carry_in = carry_out;
3857                        break;
3858                    default:
3859                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
3860                        break;
3861                    }
3862                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
3863                    if (TCGV_IS_UNUSED(carry_in)) {
3864                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
3865                            gen_compute_eflags(s);
3866                        }
3867                        carry_in = cpu_tmp0;
3868                        tcg_gen_shri_tl(carry_in, cpu_cc_src,
3869                                        ctz32(b == 0x1f6 ? CC_C : CC_O));
3870                        tcg_gen_andi_tl(carry_in, carry_in, 1);
3871                    }
3872
3873                    switch (ot) {
3874#ifdef TARGET_X86_64
3875                    case MO_32:
3876                        /* If we know TL is 64-bit, and we want a 32-bit
3877                           result, just do everything in 64-bit arithmetic.  */
3878                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
3879                        tcg_gen_ext32u_i64(cpu_T0, cpu_T0);
3880                        tcg_gen_add_i64(cpu_T0, cpu_T0, cpu_regs[reg]);
3881                        tcg_gen_add_i64(cpu_T0, cpu_T0, carry_in);
3882                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T0);
3883                        tcg_gen_shri_i64(carry_out, cpu_T0, 32);
3884                        break;
3885#endif
3886                    default:
3887                        /* Otherwise compute the carry-out in two steps.  */
3888                        zero = tcg_const_tl(0);
3889                        tcg_gen_add2_tl(cpu_T0, carry_out,
3890                                        cpu_T0, zero,
3891                                        carry_in, zero);
3892                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
3893                                        cpu_regs[reg], carry_out,
3894                                        cpu_T0, zero);
3895                        tcg_temp_free(zero);
3896                        break;
3897                    }
3898                    set_cc_op(s, end_op);
3899                }
3900                break;
3901
3902            case 0x1f7: /* shlx Gy, Ey, By */
3903            case 0x2f7: /* sarx Gy, Ey, By */
3904            case 0x3f7: /* shrx Gy, Ey, By */
3905                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3906                    || !(s->prefix & PREFIX_VEX)
3907                    || s->vex_l != 0) {
3908                    goto illegal_op;
3909                }
3910                ot = mo_64_32(s->dflag);
3911                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3912                if (ot == MO_64) {
3913                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 63);
3914                } else {
3915                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
3916                }
3917                if (b == 0x1f7) {
3918                    tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
3919                } else if (b == 0x2f7) {
3920                    if (ot != MO_64) {
3921                        tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
3922                    }
3923                    tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
3924                } else {
3925                    if (ot != MO_64) {
3926                        tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
3927                    }
3928                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
3929                }
3930                gen_op_mov_reg_v(ot, reg, cpu_T0);
3931                break;
3932
3933            case 0x0f3:
3934            case 0x1f3:
3935            case 0x2f3:
3936            case 0x3f3: /* Group 17 */
3937                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3938                    || !(s->prefix & PREFIX_VEX)
3939                    || s->vex_l != 0) {
3940                    goto illegal_op;
3941                }
3942                ot = mo_64_32(s->dflag);
3943                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3944
3945                switch (reg & 7) {
3946                case 1: /* blsr By,Ey */
3947                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
3948                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
3949                    gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
3950                    gen_op_update2_cc();
3951                    set_cc_op(s, CC_OP_BMILGB + ot);
3952                    break;
3953
3954                case 2: /* blsmsk By,Ey */
3955                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
3956                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
3957                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
3958                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
3959                    set_cc_op(s, CC_OP_BMILGB + ot);
3960                    break;
3961
3962                case 3: /* blsi By, Ey */
3963                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
3964                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
3965                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
3966                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
3967                    set_cc_op(s, CC_OP_BMILGB + ot);
3968                    break;
3969
3970                default:
3971                    goto unknown_op;
3972                }
3973                break;
3974
3975            default:
3976                goto unknown_op;
3977            }
3978            break;
3979
3980        case 0x03a:
3981        case 0x13a:
3982            b = modrm;
3983            modrm = cpu_ldub_code(env, s->pc++);
3984            rm = modrm & 7;
3985            reg = ((modrm >> 3) & 7) | rex_r;
3986            mod = (modrm >> 6) & 3;
3987            if (b1 >= 2) {
3988                goto unknown_op;
3989            }
3990
3991            sse_fn_eppi = sse_op_table7[b].op[b1];
3992            if (!sse_fn_eppi) {
3993                goto unknown_op;
3994            }
3995            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
3996                goto illegal_op;
3997
3998            if (sse_fn_eppi == SSE_SPECIAL) {
3999                ot = mo_64_32(s->dflag);
4000                rm = (modrm & 7) | REX_B(s);
4001                if (mod != 3)
4002                    gen_lea_modrm(env, s, modrm);
4003                reg = ((modrm >> 3) & 7) | rex_r;
4004                val = cpu_ldub_code(env, s->pc++);
4005                switch (b) {
4006                case 0x14: /* pextrb */
4007                    tcg_gen_ld8u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4008                                            xmm_regs[reg].ZMM_B(val & 15)));
4009                    if (mod == 3) {
4010                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4011                    } else {
4012                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4013                                           s->mem_index, MO_UB);
4014                    }
4015                    break;
4016                case 0x15: /* pextrw */
4017                    tcg_gen_ld16u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4018                                            xmm_regs[reg].ZMM_W(val & 7)));
4019                    if (mod == 3) {
4020                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4021                    } else {
4022                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4023                                           s->mem_index, MO_LEUW);
4024                    }
4025                    break;
4026                case 0x16:
4027                    if (ot == MO_32) { /* pextrd */
4028                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4029                                        offsetof(CPUX86State,
4030                                                xmm_regs[reg].ZMM_L(val & 3)));
4031                        if (mod == 3) {
4032                            tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
4033                        } else {
4034                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
4035                                                s->mem_index, MO_LEUL);
4036                        }
4037                    } else { /* pextrq */
4038#ifdef TARGET_X86_64
4039                        tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
4040                                        offsetof(CPUX86State,
4041                                                xmm_regs[reg].ZMM_Q(val & 1)));
4042                        if (mod == 3) {
4043                            tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
4044                        } else {
4045                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
4046                                                s->mem_index, MO_LEQ);
4047                        }
4048#else
4049                        goto illegal_op;
4050#endif
4051                    }
4052                    break;
4053                case 0x17: /* extractps */
4054                    tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4055                                            xmm_regs[reg].ZMM_L(val & 3)));
4056                    if (mod == 3) {
4057                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4058                    } else {
4059                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4060                                           s->mem_index, MO_LEUL);
4061                    }
4062                    break;
4063                case 0x20: /* pinsrb */
4064                    if (mod == 3) {
4065                        gen_op_mov_v_reg(MO_32, cpu_T0, rm);
4066                    } else {
4067                        tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
4068                                           s->mem_index, MO_UB);
4069                    }
4070                    tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4071                                            xmm_regs[reg].ZMM_B(val & 15)));
4072                    break;
4073                case 0x21: /* insertps */
4074                    if (mod == 3) {
4075                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4076                                        offsetof(CPUX86State,xmm_regs[rm]
4077                                                .ZMM_L((val >> 6) & 3)));
4078                    } else {
4079                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4080                                            s->mem_index, MO_LEUL);
4081                    }
4082                    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4083                                    offsetof(CPUX86State,xmm_regs[reg]
4084                                            .ZMM_L((val >> 4) & 3)));
4085                    if ((val >> 0) & 1)
4086                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4087                                        cpu_env, offsetof(CPUX86State,
4088                                                xmm_regs[reg].ZMM_L(0)));
4089                    if ((val >> 1) & 1)
4090                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4091                                        cpu_env, offsetof(CPUX86State,
4092                                                xmm_regs[reg].ZMM_L(1)));
4093                    if ((val >> 2) & 1)
4094                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4095                                        cpu_env, offsetof(CPUX86State,
4096                                                xmm_regs[reg].ZMM_L(2)));
4097                    if ((val >> 3) & 1)
4098                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4099                                        cpu_env, offsetof(CPUX86State,
4100                                                xmm_regs[reg].ZMM_L(3)));
4101                    break;
4102                case 0x22:
4103                    if (ot == MO_32) { /* pinsrd */
4104                        if (mod == 3) {
4105                            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
4106                        } else {
4107                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4108                                                s->mem_index, MO_LEUL);
4109                        }
4110                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4111                                        offsetof(CPUX86State,
4112                                                xmm_regs[reg].ZMM_L(val & 3)));
4113                    } else { /* pinsrq */
4114#ifdef TARGET_X86_64
4115                        if (mod == 3) {
4116                            gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
4117                        } else {
4118                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
4119                                                s->mem_index, MO_LEQ);
4120                        }
4121                        tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
4122                                        offsetof(CPUX86State,
4123                                                xmm_regs[reg].ZMM_Q(val & 1)));
4124#else
4125                        goto illegal_op;
4126#endif
4127                    }
4128                    break;
4129                }
4130                return;
4131            }
4132
4133            if (b1) {
4134                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4135                if (mod == 3) {
4136                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4137                } else {
4138                    op2_offset = offsetof(CPUX86State,xmm_t0);
4139                    gen_lea_modrm(env, s, modrm);
4140                    gen_ldo_env_A0(s, op2_offset);
4141                }
4142            } else {
4143                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4144                if (mod == 3) {
4145                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4146                } else {
4147                    op2_offset = offsetof(CPUX86State,mmx_t0);
4148                    gen_lea_modrm(env, s, modrm);
4149                    gen_ldq_env_A0(s, op2_offset);
4150                }
4151            }
4152            val = cpu_ldub_code(env, s->pc++);
4153
4154            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4155                set_cc_op(s, CC_OP_EFLAGS);
4156
4157                if (s->dflag == MO_64) {
4158                    /* The helper must use entire 64-bit gp registers */
4159                    val |= 1 << 8;
4160                }
4161            }
4162
4163            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4164            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4165            sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4166            break;
4167
4168        case 0x33a:
4169            /* Various integer extensions at 0f 3a f[0-f].  */
4170            b = modrm | (b1 << 8);
4171            modrm = cpu_ldub_code(env, s->pc++);
4172            reg = ((modrm >> 3) & 7) | rex_r;
4173
4174            switch (b) {
4175            case 0x3f0: /* rorx Gy,Ey, Ib */
4176                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4177                    || !(s->prefix & PREFIX_VEX)
4178                    || s->vex_l != 0) {
4179                    goto illegal_op;
4180                }
4181                ot = mo_64_32(s->dflag);
4182                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4183                b = cpu_ldub_code(env, s->pc++);
4184                if (ot == MO_64) {
4185                    tcg_gen_rotri_tl(cpu_T0, cpu_T0, b & 63);
4186                } else {
4187                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4188                    tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
4189                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
4190                }
4191                gen_op_mov_reg_v(ot, reg, cpu_T0);
4192                break;
4193
4194            default:
4195                goto unknown_op;
4196            }
4197            break;
4198
4199        default:
4200        unknown_op:
4201            gen_unknown_opcode(env, s);
4202            return;
4203        }
4204    } else {
4205        /* generic MMX or SSE operation */
4206        switch(b) {
4207        case 0x70: /* pshufx insn */
4208        case 0xc6: /* pshufx insn */
4209        case 0xc2: /* compare insns */
4210            s->rip_offset = 1;
4211            break;
4212        default:
4213            break;
4214        }
4215        if (is_xmm) {
4216            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4217            if (mod != 3) {
4218                int sz = 4;
4219
4220                gen_lea_modrm(env, s, modrm);
4221                op2_offset = offsetof(CPUX86State,xmm_t0);
4222
4223                switch (b) {
4224                case 0x50 ... 0x5a:
4225                case 0x5c ... 0x5f:
4226                case 0xc2:
4227                    /* Most sse scalar operations.  */
4228                    if (b1 == 2) {
4229                        sz = 2;
4230                    } else if (b1 == 3) {
4231                        sz = 3;
4232                    }
4233                    break;
4234
4235                case 0x2e:  /* ucomis[sd] */
4236                case 0x2f:  /* comis[sd] */
4237                    if (b1 == 0) {
4238                        sz = 2;
4239                    } else {
4240                        sz = 3;
4241                    }
4242                    break;
4243                }
4244
4245                switch (sz) {
4246                case 2:
4247                    /* 32 bit access */
4248                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
4249                    tcg_gen_st32_tl(cpu_T0, cpu_env,
4250                                    offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4251                    break;
4252                case 3:
4253                    /* 64 bit access */
4254                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4255                    break;
4256                default:
4257                    /* 128 bit access */
4258                    gen_ldo_env_A0(s, op2_offset);
4259                    break;
4260                }
4261            } else {
4262                rm = (modrm & 7) | REX_B(s);
4263                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4264            }
4265        } else {
4266            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4267            if (mod != 3) {
4268                gen_lea_modrm(env, s, modrm);
4269                op2_offset = offsetof(CPUX86State,mmx_t0);
4270                gen_ldq_env_A0(s, op2_offset);
4271            } else {
4272                rm = (modrm & 7);
4273                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4274            }
4275        }
4276        switch(b) {
4277        case 0x0f: /* 3DNow! data insns */
4278            val = cpu_ldub_code(env, s->pc++);
4279            sse_fn_epp = sse_op_table5[val];
4280            if (!sse_fn_epp) {
4281                goto unknown_op;
4282            }
4283            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4284                goto illegal_op;
4285            }
4286            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4287            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4288            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4289            break;
4290        case 0x70: /* pshufx insn */
4291        case 0xc6: /* pshufx insn */
4292            val = cpu_ldub_code(env, s->pc++);
4293            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4294            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4295            /* XXX: introduce a new table? */
4296            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4297            sse_fn_ppi(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4298            break;
4299        case 0xc2:
4300            /* compare insns */
4301            val = cpu_ldub_code(env, s->pc++);
4302            if (val >= 8)
4303                goto unknown_op;
4304            sse_fn_epp = sse_op_table4[val][b1];
4305
4306            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4307            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4308            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4309            break;
4310        case 0xf7:
4311            /* maskmov : we must prepare A0 */
4312            if (mod != 3)
4313                goto illegal_op;
4314            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]);
4315            gen_extu(s->aflag, cpu_A0);
4316            gen_add_A0_ds_seg(s);
4317
4318            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4319            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4320            /* XXX: introduce a new table? */
4321            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4322            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
4323            break;
4324        default:
4325            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4326            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4327            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4328            break;
4329        }
4330        if (b == 0x2e || b == 0x2f) {
4331            set_cc_op(s, CC_OP_EFLAGS);
4332        }
4333    }
4334}
4335
4336/* convert one instruction. s->is_jmp is set if the translation must
4337   be stopped. Return the next pc value */
4338static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
4339                               target_ulong pc_start)
4340{
4341    int b, prefixes;
4342    int shift;
4343    TCGMemOp ot, aflag, dflag;
4344    int modrm, reg, rm, mod, op, opreg, val;
4345    target_ulong next_eip, tval;
4346    int rex_w, rex_r;
4347
4348    s->pc_start = s->pc = pc_start;
4349    prefixes = 0;
4350    s->override = -1;
4351    rex_w = -1;
4352    rex_r = 0;
4353#ifdef TARGET_X86_64
4354    s->rex_x = 0;
4355    s->rex_b = 0;
4356    x86_64_hregs = 0;
4357#endif
4358    s->rip_offset = 0; /* for relative ip address */
4359    s->vex_l = 0;
4360    s->vex_v = 0;
4361 next_byte:
4362    b = cpu_ldub_code(env, s->pc);
4363    s->pc++;
4364    /* Collect prefixes.  */
4365    switch (b) {
4366    case 0xf3:
4367        prefixes |= PREFIX_REPZ;
4368        goto next_byte;
4369    case 0xf2:
4370        prefixes |= PREFIX_REPNZ;
4371        goto next_byte;
4372    case 0xf0:
4373        prefixes |= PREFIX_LOCK;
4374        goto next_byte;
4375    case 0x2e:
4376        s->override = R_CS;
4377        goto next_byte;
4378    case 0x36:
4379        s->override = R_SS;
4380        goto next_byte;
4381    case 0x3e:
4382        s->override = R_DS;
4383        goto next_byte;
4384    case 0x26:
4385        s->override = R_ES;
4386        goto next_byte;
4387    case 0x64:
4388        s->override = R_FS;
4389        goto next_byte;
4390    case 0x65:
4391        s->override = R_GS;
4392        goto next_byte;
4393    case 0x66:
4394        prefixes |= PREFIX_DATA;
4395        goto next_byte;
4396    case 0x67:
4397        prefixes |= PREFIX_ADR;
4398        goto next_byte;
4399#ifdef TARGET_X86_64
4400    case 0x40 ... 0x4f:
4401        if (CODE64(s)) {
4402            /* REX prefix */
4403            rex_w = (b >> 3) & 1;
4404            rex_r = (b & 0x4) << 1;
4405            s->rex_x = (b & 0x2) << 2;
4406            REX_B(s) = (b & 0x1) << 3;
4407            x86_64_hregs = 1; /* select uniform byte register addressing */
4408            goto next_byte;
4409        }
4410        break;
4411#endif
4412    case 0xc5: /* 2-byte VEX */
4413    case 0xc4: /* 3-byte VEX */
4414        /* VEX prefixes cannot be used except in 32-bit mode.
4415           Otherwise the instruction is LES or LDS.  */
4416        if (s->code32 && !s->vm86) {
4417            static const int pp_prefix[4] = {
4418                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4419            };
4420            int vex3, vex2 = cpu_ldub_code(env, s->pc);
4421
4422            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4423                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4424                   otherwise the instruction is LES or LDS.  */
4425                break;
4426            }
4427            s->pc++;
4428
4429            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4430            if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4431                            | PREFIX_LOCK | PREFIX_DATA)) {
4432                goto illegal_op;
4433            }
4434#ifdef TARGET_X86_64
4435            if (x86_64_hregs) {
4436                goto illegal_op;
4437            }
4438#endif
4439            rex_r = (~vex2 >> 4) & 8;
4440            if (b == 0xc5) {
4441                vex3 = vex2;
4442                b = cpu_ldub_code(env, s->pc++);
4443            } else {
4444#ifdef TARGET_X86_64
4445                s->rex_x = (~vex2 >> 3) & 8;
4446                s->rex_b = (~vex2 >> 2) & 8;
4447#endif
4448                vex3 = cpu_ldub_code(env, s->pc++);
4449                rex_w = (vex3 >> 7) & 1;
4450                switch (vex2 & 0x1f) {
4451                case 0x01: /* Implied 0f leading opcode bytes.  */
4452                    b = cpu_ldub_code(env, s->pc++) | 0x100;
4453                    break;
4454                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4455                    b = 0x138;
4456                    break;
4457                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4458                    b = 0x13a;
4459                    break;
4460                default:   /* Reserved for future use.  */
4461                    goto unknown_op;
4462                }
4463            }
4464            s->vex_v = (~vex3 >> 3) & 0xf;
4465            s->vex_l = (vex3 >> 2) & 1;
4466            prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4467        }
4468        break;
4469    }
4470
4471    /* Post-process prefixes.  */
4472    if (CODE64(s)) {
4473        /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4474           data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4475           over 0x66 if both are present.  */
4476        dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4477        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4478        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4479    } else {
4480        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4481        if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4482            dflag = MO_32;
4483        } else {
4484            dflag = MO_16;
4485        }
4486        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4487        if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4488            aflag = MO_32;
4489        }  else {
4490            aflag = MO_16;
4491        }
4492    }
4493
4494    s->prefix = prefixes;
4495    s->aflag = aflag;
4496    s->dflag = dflag;
4497
4498    /* lock generation */
4499    if (prefixes & PREFIX_LOCK)
4500        gen_helper_lock();
4501
4502    /* now check op code */
4503 reswitch:
4504    switch(b) {
4505    case 0x0f:
4506        /**************************/
4507        /* extended op code */
4508        b = cpu_ldub_code(env, s->pc++) | 0x100;
4509        goto reswitch;
4510
4511        /**************************/
4512        /* arith & logic */
4513    case 0x00 ... 0x05:
4514    case 0x08 ... 0x0d:
4515    case 0x10 ... 0x15:
4516    case 0x18 ... 0x1d:
4517    case 0x20 ... 0x25:
4518    case 0x28 ... 0x2d:
4519    case 0x30 ... 0x35:
4520    case 0x38 ... 0x3d:
4521        {
4522            int op, f, val;
4523            op = (b >> 3) & 7;
4524            f = (b >> 1) & 3;
4525
4526            ot = mo_b_d(b, dflag);
4527
4528            switch(f) {
4529            case 0: /* OP Ev, Gv */
4530                modrm = cpu_ldub_code(env, s->pc++);
4531                reg = ((modrm >> 3) & 7) | rex_r;
4532                mod = (modrm >> 6) & 3;
4533                rm = (modrm & 7) | REX_B(s);
4534                if (mod != 3) {
4535                    gen_lea_modrm(env, s, modrm);
4536                    opreg = OR_TMP0;
4537                } else if (op == OP_XORL && rm == reg) {
4538                xor_zero:
4539                    /* xor reg, reg optimisation */
4540                    set_cc_op(s, CC_OP_CLR);
4541                    tcg_gen_movi_tl(cpu_T0, 0);
4542                    gen_op_mov_reg_v(ot, reg, cpu_T0);
4543                    break;
4544                } else {
4545                    opreg = rm;
4546                }
4547                gen_op_mov_v_reg(ot, cpu_T1, reg);
4548                gen_op(s, op, ot, opreg);
4549                break;
4550            case 1: /* OP Gv, Ev */
4551                modrm = cpu_ldub_code(env, s->pc++);
4552                mod = (modrm >> 6) & 3;
4553                reg = ((modrm >> 3) & 7) | rex_r;
4554                rm = (modrm & 7) | REX_B(s);
4555                if (mod != 3) {
4556                    gen_lea_modrm(env, s, modrm);
4557                    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4558                } else if (op == OP_XORL && rm == reg) {
4559                    goto xor_zero;
4560                } else {
4561                    gen_op_mov_v_reg(ot, cpu_T1, rm);
4562                }
4563                gen_op(s, op, ot, reg);
4564                break;
4565            case 2: /* OP A, Iv */
4566                val = insn_get(env, s, ot);
4567                tcg_gen_movi_tl(cpu_T1, val);
4568                gen_op(s, op, ot, OR_EAX);
4569                break;
4570            }
4571        }
4572        break;
4573
4574    case 0x82:
4575        if (CODE64(s))
4576            goto illegal_op;
4577    case 0x80: /* GRP1 */
4578    case 0x81:
4579    case 0x83:
4580        {
4581            int val;
4582
4583            ot = mo_b_d(b, dflag);
4584
4585            modrm = cpu_ldub_code(env, s->pc++);
4586            mod = (modrm >> 6) & 3;
4587            rm = (modrm & 7) | REX_B(s);
4588            op = (modrm >> 3) & 7;
4589
4590            if (mod != 3) {
4591                if (b == 0x83)
4592                    s->rip_offset = 1;
4593                else
4594                    s->rip_offset = insn_const_size(ot);
4595                gen_lea_modrm(env, s, modrm);
4596                opreg = OR_TMP0;
4597            } else {
4598                opreg = rm;
4599            }
4600
4601            switch(b) {
4602            default:
4603            case 0x80:
4604            case 0x81:
4605            case 0x82:
4606                val = insn_get(env, s, ot);
4607                break;
4608            case 0x83:
4609                val = (int8_t)insn_get(env, s, MO_8);
4610                break;
4611            }
4612            tcg_gen_movi_tl(cpu_T1, val);
4613            gen_op(s, op, ot, opreg);
4614        }
4615        break;
4616
4617        /**************************/
4618        /* inc, dec, and other misc arith */
4619    case 0x40 ... 0x47: /* inc Gv */
4620        ot = dflag;
4621        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4622        break;
4623    case 0x48 ... 0x4f: /* dec Gv */
4624        ot = dflag;
4625        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4626        break;
4627    case 0xf6: /* GRP3 */
4628    case 0xf7:
4629        ot = mo_b_d(b, dflag);
4630
4631        modrm = cpu_ldub_code(env, s->pc++);
4632        mod = (modrm >> 6) & 3;
4633        rm = (modrm & 7) | REX_B(s);
4634        op = (modrm >> 3) & 7;
4635        if (mod != 3) {
4636            if (op == 0)
4637                s->rip_offset = insn_const_size(ot);
4638            gen_lea_modrm(env, s, modrm);
4639            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
4640        } else {
4641            gen_op_mov_v_reg(ot, cpu_T0, rm);
4642        }
4643
4644        switch(op) {
4645        case 0: /* test */
4646            val = insn_get(env, s, ot);
4647            tcg_gen_movi_tl(cpu_T1, val);
4648            gen_op_testl_T0_T1_cc();
4649            set_cc_op(s, CC_OP_LOGICB + ot);
4650            break;
4651        case 2: /* not */
4652            tcg_gen_not_tl(cpu_T0, cpu_T0);
4653            if (mod != 3) {
4654                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
4655            } else {
4656                gen_op_mov_reg_v(ot, rm, cpu_T0);
4657            }
4658            break;
4659        case 3: /* neg */
4660            tcg_gen_neg_tl(cpu_T0, cpu_T0);
4661            if (mod != 3) {
4662                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
4663            } else {
4664                gen_op_mov_reg_v(ot, rm, cpu_T0);
4665            }
4666            gen_op_update_neg_cc();
4667            set_cc_op(s, CC_OP_SUBB + ot);
4668            break;
4669        case 4: /* mul */
4670            switch(ot) {
4671            case MO_8:
4672                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
4673                tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
4674                tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
4675                /* XXX: use 32 bit mul which could be faster */
4676                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4677                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4678                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4679                tcg_gen_andi_tl(cpu_cc_src, cpu_T0, 0xff00);
4680                set_cc_op(s, CC_OP_MULB);
4681                break;
4682            case MO_16:
4683                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
4684                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4685                tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
4686                /* XXX: use 32 bit mul which could be faster */
4687                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4688                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4689                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4690                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
4691                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4692                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4693                set_cc_op(s, CC_OP_MULW);
4694                break;
4695            default:
4696            case MO_32:
4697                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4698                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4699                tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4700                                  cpu_tmp2_i32, cpu_tmp3_i32);
4701                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4702                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4703                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4704                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4705                set_cc_op(s, CC_OP_MULL);
4706                break;
4707#ifdef TARGET_X86_64
4708            case MO_64:
4709                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4710                                  cpu_T0, cpu_regs[R_EAX]);
4711                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4712                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4713                set_cc_op(s, CC_OP_MULQ);
4714                break;
4715#endif
4716            }
4717            break;
4718        case 5: /* imul */
4719            switch(ot) {
4720            case MO_8:
4721                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
4722                tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
4723                tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
4724                /* XXX: use 32 bit mul which could be faster */
4725                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4726                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4727                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4728                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T0);
4729                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
4730                set_cc_op(s, CC_OP_MULB);
4731                break;
4732            case MO_16:
4733                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
4734                tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
4735                tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
4736                /* XXX: use 32 bit mul which could be faster */
4737                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4738                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4739                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4740                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
4741                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
4742                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
4743                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4744                set_cc_op(s, CC_OP_MULW);
4745                break;
4746            default:
4747            case MO_32:
4748                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4749                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4750                tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4751                                  cpu_tmp2_i32, cpu_tmp3_i32);
4752                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4753                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4754                tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
4755                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4756                tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
4757                tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
4758                set_cc_op(s, CC_OP_MULL);
4759                break;
4760#ifdef TARGET_X86_64
4761            case MO_64:
4762                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4763                                  cpu_T0, cpu_regs[R_EAX]);
4764                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4765                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4766                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4767                set_cc_op(s, CC_OP_MULQ);
4768                break;
4769#endif
4770            }
4771            break;
4772        case 6: /* div */
4773            switch(ot) {
4774            case MO_8:
4775                gen_helper_divb_AL(cpu_env, cpu_T0);
4776                break;
4777            case MO_16:
4778                gen_helper_divw_AX(cpu_env, cpu_T0);
4779                break;
4780            default:
4781            case MO_32:
4782                gen_helper_divl_EAX(cpu_env, cpu_T0);
4783                break;
4784#ifdef TARGET_X86_64
4785            case MO_64:
4786                gen_helper_divq_EAX(cpu_env, cpu_T0);
4787                break;
4788#endif
4789            }
4790            break;
4791        case 7: /* idiv */
4792            switch(ot) {
4793            case MO_8:
4794                gen_helper_idivb_AL(cpu_env, cpu_T0);
4795                break;
4796            case MO_16:
4797                gen_helper_idivw_AX(cpu_env, cpu_T0);
4798                break;
4799            default:
4800            case MO_32:
4801                gen_helper_idivl_EAX(cpu_env, cpu_T0);
4802                break;
4803#ifdef TARGET_X86_64
4804            case MO_64:
4805                gen_helper_idivq_EAX(cpu_env, cpu_T0);
4806                break;
4807#endif
4808            }
4809            break;
4810        default:
4811            goto unknown_op;
4812        }
4813        break;
4814
4815    case 0xfe: /* GRP4 */
4816    case 0xff: /* GRP5 */
4817        ot = mo_b_d(b, dflag);
4818
4819        modrm = cpu_ldub_code(env, s->pc++);
4820        mod = (modrm >> 6) & 3;
4821        rm = (modrm & 7) | REX_B(s);
4822        op = (modrm >> 3) & 7;
4823        if (op >= 2 && b == 0xfe) {
4824            goto unknown_op;
4825        }
4826        if (CODE64(s)) {
4827            if (op == 2 || op == 4) {
4828                /* operand size for jumps is 64 bit */
4829                ot = MO_64;
4830            } else if (op == 3 || op == 5) {
4831                ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
4832            } else if (op == 6) {
4833                /* default push size is 64 bit */
4834                ot = mo_pushpop(s, dflag);
4835            }
4836        }
4837        if (mod != 3) {
4838            gen_lea_modrm(env, s, modrm);
4839            if (op >= 2 && op != 3 && op != 5)
4840                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
4841        } else {
4842            gen_op_mov_v_reg(ot, cpu_T0, rm);
4843        }
4844
4845        switch(op) {
4846        case 0: /* inc Ev */
4847            if (mod != 3)
4848                opreg = OR_TMP0;
4849            else
4850                opreg = rm;
4851            gen_inc(s, ot, opreg, 1);
4852            break;
4853        case 1: /* dec Ev */
4854            if (mod != 3)
4855                opreg = OR_TMP0;
4856            else
4857                opreg = rm;
4858            gen_inc(s, ot, opreg, -1);
4859            break;
4860        case 2: /* call Ev */
4861            /* XXX: optimize if memory (no 'and' is necessary) */
4862            if (dflag == MO_16) {
4863                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4864            }
4865            next_eip = s->pc - s->cs_base;
4866            tcg_gen_movi_tl(cpu_T1, next_eip);
4867            gen_push_v(s, cpu_T1);
4868            gen_op_jmp_v(cpu_T0);
4869            gen_bnd_jmp(s);
4870            gen_eob(s);
4871            break;
4872        case 3: /* lcall Ev */
4873            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4874            gen_add_A0_im(s, 1 << ot);
4875            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
4876        do_lcall:
4877            if (s->pe && !s->vm86) {
4878                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4879                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
4880                                           tcg_const_i32(dflag - 1),
4881                                           tcg_const_tl(s->pc - s->cs_base));
4882            } else {
4883                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4884                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
4885                                      tcg_const_i32(dflag - 1),
4886                                      tcg_const_i32(s->pc - s->cs_base));
4887            }
4888            gen_eob(s);
4889            break;
4890        case 4: /* jmp Ev */
4891            if (dflag == MO_16) {
4892                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4893            }
4894            gen_op_jmp_v(cpu_T0);
4895            gen_bnd_jmp(s);
4896            gen_eob(s);
4897            break;
4898        case 5: /* ljmp Ev */
4899            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4900            gen_add_A0_im(s, 1 << ot);
4901            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
4902        do_ljmp:
4903            if (s->pe && !s->vm86) {
4904                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4905                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
4906                                          tcg_const_tl(s->pc - s->cs_base));
4907            } else {
4908                gen_op_movl_seg_T0_vm(R_CS);
4909                gen_op_jmp_v(cpu_T1);
4910            }
4911            gen_eob(s);
4912            break;
4913        case 6: /* push Ev */
4914            gen_push_v(s, cpu_T0);
4915            break;
4916        default:
4917            goto unknown_op;
4918        }
4919        break;
4920
4921    case 0x84: /* test Ev, Gv */
4922    case 0x85:
4923        ot = mo_b_d(b, dflag);
4924
4925        modrm = cpu_ldub_code(env, s->pc++);
4926        reg = ((modrm >> 3) & 7) | rex_r;
4927
4928        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4929        gen_op_mov_v_reg(ot, cpu_T1, reg);
4930        gen_op_testl_T0_T1_cc();
4931        set_cc_op(s, CC_OP_LOGICB + ot);
4932        break;
4933
4934    case 0xa8: /* test eAX, Iv */
4935    case 0xa9:
4936        ot = mo_b_d(b, dflag);
4937        val = insn_get(env, s, ot);
4938
4939        gen_op_mov_v_reg(ot, cpu_T0, OR_EAX);
4940        tcg_gen_movi_tl(cpu_T1, val);
4941        gen_op_testl_T0_T1_cc();
4942        set_cc_op(s, CC_OP_LOGICB + ot);
4943        break;
4944
4945    case 0x98: /* CWDE/CBW */
4946        switch (dflag) {
4947#ifdef TARGET_X86_64
4948        case MO_64:
4949            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
4950            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
4951            gen_op_mov_reg_v(MO_64, R_EAX, cpu_T0);
4952            break;
4953#endif
4954        case MO_32:
4955            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
4956            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
4957            gen_op_mov_reg_v(MO_32, R_EAX, cpu_T0);
4958            break;
4959        case MO_16:
4960            gen_op_mov_v_reg(MO_8, cpu_T0, R_EAX);
4961            tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
4962            gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4963            break;
4964        default:
4965            tcg_abort();
4966        }
4967        break;
4968    case 0x99: /* CDQ/CWD */
4969        switch (dflag) {
4970#ifdef TARGET_X86_64
4971        case MO_64:
4972            gen_op_mov_v_reg(MO_64, cpu_T0, R_EAX);
4973            tcg_gen_sari_tl(cpu_T0, cpu_T0, 63);
4974            gen_op_mov_reg_v(MO_64, R_EDX, cpu_T0);
4975            break;
4976#endif
4977        case MO_32:
4978            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
4979            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
4980            tcg_gen_sari_tl(cpu_T0, cpu_T0, 31);
4981            gen_op_mov_reg_v(MO_32, R_EDX, cpu_T0);
4982            break;
4983        case MO_16:
4984            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
4985            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
4986            tcg_gen_sari_tl(cpu_T0, cpu_T0, 15);
4987            gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4988            break;
4989        default:
4990            tcg_abort();
4991        }
4992        break;
4993    case 0x1af: /* imul Gv, Ev */
4994    case 0x69: /* imul Gv, Ev, I */
4995    case 0x6b:
4996        ot = dflag;
4997        modrm = cpu_ldub_code(env, s->pc++);
4998        reg = ((modrm >> 3) & 7) | rex_r;
4999        if (b == 0x69)
5000            s->rip_offset = insn_const_size(ot);
5001        else if (b == 0x6b)
5002            s->rip_offset = 1;
5003        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5004        if (b == 0x69) {
5005            val = insn_get(env, s, ot);
5006            tcg_gen_movi_tl(cpu_T1, val);
5007        } else if (b == 0x6b) {
5008            val = (int8_t)insn_get(env, s, MO_8);
5009            tcg_gen_movi_tl(cpu_T1, val);
5010        } else {
5011            gen_op_mov_v_reg(ot, cpu_T1, reg);
5012        }
5013        switch (ot) {
5014#ifdef TARGET_X86_64
5015        case MO_64:
5016            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, cpu_T0, cpu_T1);
5017            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5018            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5019            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
5020            break;
5021#endif
5022        case MO_32:
5023            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
5024            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
5025            tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
5026                              cpu_tmp2_i32, cpu_tmp3_i32);
5027            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
5028            tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
5029            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5030            tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
5031            tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
5032            break;
5033        default:
5034            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5035            tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
5036            /* XXX: use 32 bit mul which could be faster */
5037            tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
5038            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
5039            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
5040            tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
5041            gen_op_mov_reg_v(ot, reg, cpu_T0);
5042            break;
5043        }
5044        set_cc_op(s, CC_OP_MULB + ot);
5045        break;
5046    case 0x1c0:
5047    case 0x1c1: /* xadd Ev, Gv */
5048        ot = mo_b_d(b, dflag);
5049        modrm = cpu_ldub_code(env, s->pc++);
5050        reg = ((modrm >> 3) & 7) | rex_r;
5051        mod = (modrm >> 6) & 3;
5052        if (mod == 3) {
5053            rm = (modrm & 7) | REX_B(s);
5054            gen_op_mov_v_reg(ot, cpu_T0, reg);
5055            gen_op_mov_v_reg(ot, cpu_T1, rm);
5056            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5057            gen_op_mov_reg_v(ot, reg, cpu_T1);
5058            gen_op_mov_reg_v(ot, rm, cpu_T0);
5059        } else {
5060            gen_lea_modrm(env, s, modrm);
5061            gen_op_mov_v_reg(ot, cpu_T0, reg);
5062            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5063            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5064            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5065            gen_op_mov_reg_v(ot, reg, cpu_T1);
5066        }
5067        gen_op_update2_cc();
5068        set_cc_op(s, CC_OP_ADDB + ot);
5069        break;
5070    case 0x1b0:
5071    case 0x1b1: /* cmpxchg Ev, Gv */
5072        {
5073            TCGLabel *label1, *label2;
5074            TCGv t0, t1, t2, a0;
5075
5076            ot = mo_b_d(b, dflag);
5077            modrm = cpu_ldub_code(env, s->pc++);
5078            reg = ((modrm >> 3) & 7) | rex_r;
5079            mod = (modrm >> 6) & 3;
5080            t0 = tcg_temp_local_new();
5081            t1 = tcg_temp_local_new();
5082            t2 = tcg_temp_local_new();
5083            a0 = tcg_temp_local_new();
5084            gen_op_mov_v_reg(ot, t1, reg);
5085            if (mod == 3) {
5086                rm = (modrm & 7) | REX_B(s);
5087                gen_op_mov_v_reg(ot, t0, rm);
5088            } else {
5089                gen_lea_modrm(env, s, modrm);
5090                tcg_gen_mov_tl(a0, cpu_A0);
5091                gen_op_ld_v(s, ot, t0, a0);
5092                rm = 0; /* avoid warning */
5093            }
5094            label1 = gen_new_label();
5095            tcg_gen_mov_tl(t2, cpu_regs[R_EAX]);
5096            gen_extu(ot, t0);
5097            gen_extu(ot, t2);
5098            tcg_gen_brcond_tl(TCG_COND_EQ, t2, t0, label1);
5099            label2 = gen_new_label();
5100            if (mod == 3) {
5101                gen_op_mov_reg_v(ot, R_EAX, t0);
5102                tcg_gen_br(label2);
5103                gen_set_label(label1);
5104                gen_op_mov_reg_v(ot, rm, t1);
5105            } else {
5106                /* perform no-op store cycle like physical cpu; must be
5107                   before changing accumulator to ensure idempotency if
5108                   the store faults and the instruction is restarted */
5109                gen_op_st_v(s, ot, t0, a0);
5110                gen_op_mov_reg_v(ot, R_EAX, t0);
5111                tcg_gen_br(label2);
5112                gen_set_label(label1);
5113                gen_op_st_v(s, ot, t1, a0);
5114            }
5115            gen_set_label(label2);
5116            tcg_gen_mov_tl(cpu_cc_src, t0);
5117            tcg_gen_mov_tl(cpu_cc_srcT, t2);
5118            tcg_gen_sub_tl(cpu_cc_dst, t2, t0);
5119            set_cc_op(s, CC_OP_SUBB + ot);
5120            tcg_temp_free(t0);
5121            tcg_temp_free(t1);
5122            tcg_temp_free(t2);
5123            tcg_temp_free(a0);
5124        }
5125        break;
5126    case 0x1c7: /* cmpxchg8b */
5127        modrm = cpu_ldub_code(env, s->pc++);
5128        mod = (modrm >> 6) & 3;
5129        if ((mod == 3) || ((modrm & 0x38) != 0x8))
5130            goto illegal_op;
5131#ifdef TARGET_X86_64
5132        if (dflag == MO_64) {
5133            if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
5134                goto illegal_op;
5135            gen_lea_modrm(env, s, modrm);
5136            gen_helper_cmpxchg16b(cpu_env, cpu_A0);
5137        } else
5138#endif        
5139        {
5140            if (!(s->cpuid_features & CPUID_CX8))
5141                goto illegal_op;
5142            gen_lea_modrm(env, s, modrm);
5143            gen_helper_cmpxchg8b(cpu_env, cpu_A0);
5144        }
5145        set_cc_op(s, CC_OP_EFLAGS);
5146        break;
5147
5148        /**************************/
5149        /* push/pop */
5150    case 0x50 ... 0x57: /* push */
5151        gen_op_mov_v_reg(MO_32, cpu_T0, (b & 7) | REX_B(s));
5152        gen_push_v(s, cpu_T0);
5153        break;
5154    case 0x58 ... 0x5f: /* pop */
5155        ot = gen_pop_T0(s);
5156        /* NOTE: order is important for pop %sp */
5157        gen_pop_update(s, ot);
5158        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T0);
5159        break;
5160    case 0x60: /* pusha */
5161        if (CODE64(s))
5162            goto illegal_op;
5163        gen_pusha(s);
5164        break;
5165    case 0x61: /* popa */
5166        if (CODE64(s))
5167            goto illegal_op;
5168        gen_popa(s);
5169        break;
5170    case 0x68: /* push Iv */
5171    case 0x6a:
5172        ot = mo_pushpop(s, dflag);
5173        if (b == 0x68)
5174            val = insn_get(env, s, ot);
5175        else
5176            val = (int8_t)insn_get(env, s, MO_8);
5177        tcg_gen_movi_tl(cpu_T0, val);
5178        gen_push_v(s, cpu_T0);
5179        break;
5180    case 0x8f: /* pop Ev */
5181        modrm = cpu_ldub_code(env, s->pc++);
5182        mod = (modrm >> 6) & 3;
5183        ot = gen_pop_T0(s);
5184        if (mod == 3) {
5185            /* NOTE: order is important for pop %sp */
5186            gen_pop_update(s, ot);
5187            rm = (modrm & 7) | REX_B(s);
5188            gen_op_mov_reg_v(ot, rm, cpu_T0);
5189        } else {
5190            /* NOTE: order is important too for MMU exceptions */
5191            s->popl_esp_hack = 1 << ot;
5192            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5193            s->popl_esp_hack = 0;
5194            gen_pop_update(s, ot);
5195        }
5196        break;
5197    case 0xc8: /* enter */
5198        {
5199            int level;
5200            val = cpu_lduw_code(env, s->pc);
5201            s->pc += 2;
5202            level = cpu_ldub_code(env, s->pc++);
5203            gen_enter(s, val, level);
5204        }
5205        break;
5206    case 0xc9: /* leave */
5207        gen_leave(s);
5208        break;
5209    case 0x06: /* push es */
5210    case 0x0e: /* push cs */
5211    case 0x16: /* push ss */
5212    case 0x1e: /* push ds */
5213        if (CODE64(s))
5214            goto illegal_op;
5215        gen_op_movl_T0_seg(b >> 3);
5216        gen_push_v(s, cpu_T0);
5217        break;
5218    case 0x1a0: /* push fs */
5219    case 0x1a8: /* push gs */
5220        gen_op_movl_T0_seg((b >> 3) & 7);
5221        gen_push_v(s, cpu_T0);
5222        break;
5223    case 0x07: /* pop es */
5224    case 0x17: /* pop ss */
5225    case 0x1f: /* pop ds */
5226        if (CODE64(s))
5227            goto illegal_op;
5228        reg = b >> 3;
5229        ot = gen_pop_T0(s);
5230        gen_movl_seg_T0(s, reg);
5231        gen_pop_update(s, ot);
5232        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5233        if (s->is_jmp) {
5234            gen_jmp_im(s->pc - s->cs_base);
5235            if (reg == R_SS) {
5236                s->tf = 0;
5237                gen_eob_inhibit_irq(s, true);
5238            } else {
5239                gen_eob(s);
5240            }
5241        }
5242        break;
5243    case 0x1a1: /* pop fs */
5244    case 0x1a9: /* pop gs */
5245        ot = gen_pop_T0(s);
5246        gen_movl_seg_T0(s, (b >> 3) & 7);
5247        gen_pop_update(s, ot);
5248        if (s->is_jmp) {
5249            gen_jmp_im(s->pc - s->cs_base);
5250            gen_eob(s);
5251        }
5252        break;
5253
5254        /**************************/
5255        /* mov */
5256    case 0x88:
5257    case 0x89: /* mov Gv, Ev */
5258        ot = mo_b_d(b, dflag);
5259        modrm = cpu_ldub_code(env, s->pc++);
5260        reg = ((modrm >> 3) & 7) | rex_r;
5261
5262        /* generate a generic store */
5263        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5264        break;
5265    case 0xc6:
5266    case 0xc7: /* mov Ev, Iv */
5267        ot = mo_b_d(b, dflag);
5268        modrm = cpu_ldub_code(env, s->pc++);
5269        mod = (modrm >> 6) & 3;
5270        if (mod != 3) {
5271            s->rip_offset = insn_const_size(ot);
5272            gen_lea_modrm(env, s, modrm);
5273        }
5274        val = insn_get(env, s, ot);
5275        tcg_gen_movi_tl(cpu_T0, val);
5276        if (mod != 3) {
5277            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5278        } else {
5279            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
5280        }
5281        break;
5282    case 0x8a:
5283    case 0x8b: /* mov Ev, Gv */
5284        ot = mo_b_d(b, dflag);
5285        modrm = cpu_ldub_code(env, s->pc++);
5286        reg = ((modrm >> 3) & 7) | rex_r;
5287
5288        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5289        gen_op_mov_reg_v(ot, reg, cpu_T0);
5290        break;
5291    case 0x8e: /* mov seg, Gv */
5292        modrm = cpu_ldub_code(env, s->pc++);
5293        reg = (modrm >> 3) & 7;
5294        if (reg >= 6 || reg == R_CS)
5295            goto illegal_op;
5296        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5297        gen_movl_seg_T0(s, reg);
5298        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5299        if (s->is_jmp) {
5300            gen_jmp_im(s->pc - s->cs_base);
5301            if (reg == R_SS) {
5302                s->tf = 0;
5303                gen_eob_inhibit_irq(s, true);
5304            } else {
5305                gen_eob(s);
5306            }
5307        }
5308        break;
5309    case 0x8c: /* mov Gv, seg */
5310        modrm = cpu_ldub_code(env, s->pc++);
5311        reg = (modrm >> 3) & 7;
5312        mod = (modrm >> 6) & 3;
5313        if (reg >= 6)
5314            goto illegal_op;
5315        gen_op_movl_T0_seg(reg);
5316        ot = mod == 3 ? dflag : MO_16;
5317        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5318        break;
5319
5320    case 0x1b6: /* movzbS Gv, Eb */
5321    case 0x1b7: /* movzwS Gv, Eb */
5322    case 0x1be: /* movsbS Gv, Eb */
5323    case 0x1bf: /* movswS Gv, Eb */
5324        {
5325            TCGMemOp d_ot;
5326            TCGMemOp s_ot;
5327
5328            /* d_ot is the size of destination */
5329            d_ot = dflag;
5330            /* ot is the size of source */
5331            ot = (b & 1) + MO_8;
5332            /* s_ot is the sign+size of source */
5333            s_ot = b & 8 ? MO_SIGN | ot : ot;
5334
5335            modrm = cpu_ldub_code(env, s->pc++);
5336            reg = ((modrm >> 3) & 7) | rex_r;
5337            mod = (modrm >> 6) & 3;
5338            rm = (modrm & 7) | REX_B(s);
5339
5340            if (mod == 3) {
5341                gen_op_mov_v_reg(ot, cpu_T0, rm);
5342                switch (s_ot) {
5343                case MO_UB:
5344                    tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
5345                    break;
5346                case MO_SB:
5347                    tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
5348                    break;
5349                case MO_UW:
5350                    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
5351                    break;
5352                default:
5353                case MO_SW:
5354                    tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5355                    break;
5356                }
5357                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
5358            } else {
5359                gen_lea_modrm(env, s, modrm);
5360                gen_op_ld_v(s, s_ot, cpu_T0, cpu_A0);
5361                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
5362            }
5363        }
5364        break;
5365
5366    case 0x8d: /* lea */
5367        modrm = cpu_ldub_code(env, s->pc++);
5368        mod = (modrm >> 6) & 3;
5369        if (mod == 3)
5370            goto illegal_op;
5371        reg = ((modrm >> 3) & 7) | rex_r;
5372        {
5373            AddressParts a = gen_lea_modrm_0(env, s, modrm);
5374            TCGv ea = gen_lea_modrm_1(a);
5375            gen_op_mov_reg_v(dflag, reg, ea);
5376        }
5377        break;
5378
5379    case 0xa0: /* mov EAX, Ov */
5380    case 0xa1:
5381    case 0xa2: /* mov Ov, EAX */
5382    case 0xa3:
5383        {
5384            target_ulong offset_addr;
5385
5386            ot = mo_b_d(b, dflag);
5387            switch (s->aflag) {
5388#ifdef TARGET_X86_64
5389            case MO_64:
5390                offset_addr = cpu_ldq_code(env, s->pc);
5391                s->pc += 8;
5392                break;
5393#endif
5394            default:
5395                offset_addr = insn_get(env, s, s->aflag);
5396                break;
5397            }
5398            tcg_gen_movi_tl(cpu_A0, offset_addr);
5399            gen_add_A0_ds_seg(s);
5400            if ((b & 2) == 0) {
5401                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
5402                gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
5403            } else {
5404                gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
5405                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5406            }
5407        }
5408        break;
5409    case 0xd7: /* xlat */
5410        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]);
5411        tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
5412        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T0);
5413        gen_extu(s->aflag, cpu_A0);
5414        gen_add_A0_ds_seg(s);
5415        gen_op_ld_v(s, MO_8, cpu_T0, cpu_A0);
5416        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
5417        break;
5418    case 0xb0 ... 0xb7: /* mov R, Ib */
5419        val = insn_get(env, s, MO_8);
5420        tcg_gen_movi_tl(cpu_T0, val);
5421        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T0);
5422        break;
5423    case 0xb8 ... 0xbf: /* mov R, Iv */
5424#ifdef TARGET_X86_64
5425        if (dflag == MO_64) {
5426            uint64_t tmp;
5427            /* 64 bit case */
5428            tmp = cpu_ldq_code(env, s->pc);
5429            s->pc += 8;
5430            reg = (b & 7) | REX_B(s);
5431            tcg_gen_movi_tl(cpu_T0, tmp);
5432            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
5433        } else
5434#endif
5435        {
5436            ot = dflag;
5437            val = insn_get(env, s, ot);
5438            reg = (b & 7) | REX_B(s);
5439            tcg_gen_movi_tl(cpu_T0, val);
5440            gen_op_mov_reg_v(ot, reg, cpu_T0);
5441        }
5442        break;
5443
5444    case 0x91 ... 0x97: /* xchg R, EAX */
5445    do_xchg_reg_eax:
5446        ot = dflag;
5447        reg = (b & 7) | REX_B(s);
5448        rm = R_EAX;
5449        goto do_xchg_reg;
5450    case 0x86:
5451    case 0x87: /* xchg Ev, Gv */
5452        ot = mo_b_d(b, dflag);
5453        modrm = cpu_ldub_code(env, s->pc++);
5454        reg = ((modrm >> 3) & 7) | rex_r;
5455        mod = (modrm >> 6) & 3;
5456        if (mod == 3) {
5457            rm = (modrm & 7) | REX_B(s);
5458        do_xchg_reg:
5459            gen_op_mov_v_reg(ot, cpu_T0, reg);
5460            gen_op_mov_v_reg(ot, cpu_T1, rm);
5461            gen_op_mov_reg_v(ot, rm, cpu_T0);
5462            gen_op_mov_reg_v(ot, reg, cpu_T1);
5463        } else {
5464            gen_lea_modrm(env, s, modrm);
5465            gen_op_mov_v_reg(ot, cpu_T0, reg);
5466            /* for xchg, lock is implicit */
5467            if (!(prefixes & PREFIX_LOCK))
5468                gen_helper_lock();
5469            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5470            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5471            if (!(prefixes & PREFIX_LOCK))
5472                gen_helper_unlock();
5473            gen_op_mov_reg_v(ot, reg, cpu_T1);
5474        }
5475        break;
5476    case 0xc4: /* les Gv */
5477        /* In CODE64 this is VEX3; see above.  */
5478        op = R_ES;
5479        goto do_lxx;
5480    case 0xc5: /* lds Gv */
5481        /* In CODE64 this is VEX2; see above.  */
5482        op = R_DS;
5483        goto do_lxx;
5484    case 0x1b2: /* lss Gv */
5485        op = R_SS;
5486        goto do_lxx;
5487    case 0x1b4: /* lfs Gv */
5488        op = R_FS;
5489        goto do_lxx;
5490    case 0x1b5: /* lgs Gv */
5491        op = R_GS;
5492    do_lxx:
5493        ot = dflag != MO_16 ? MO_32 : MO_16;
5494        modrm = cpu_ldub_code(env, s->pc++);
5495        reg = ((modrm >> 3) & 7) | rex_r;
5496        mod = (modrm >> 6) & 3;
5497        if (mod == 3)
5498            goto illegal_op;
5499        gen_lea_modrm(env, s, modrm);
5500        gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5501        gen_add_A0_im(s, 1 << ot);
5502        /* load the segment first to handle exceptions properly */
5503        gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
5504        gen_movl_seg_T0(s, op);
5505        /* then put the data */
5506        gen_op_mov_reg_v(ot, reg, cpu_T1);
5507        if (s->is_jmp) {
5508            gen_jmp_im(s->pc - s->cs_base);
5509            gen_eob(s);
5510        }
5511        break;
5512
5513        /************************/
5514        /* shifts */
5515    case 0xc0:
5516    case 0xc1:
5517        /* shift Ev,Ib */
5518        shift = 2;
5519    grp2:
5520        {
5521            ot = mo_b_d(b, dflag);
5522            modrm = cpu_ldub_code(env, s->pc++);
5523            mod = (modrm >> 6) & 3;
5524            op = (modrm >> 3) & 7;
5525
5526            if (mod != 3) {
5527                if (shift == 2) {
5528                    s->rip_offset = 1;
5529                }
5530                gen_lea_modrm(env, s, modrm);
5531                opreg = OR_TMP0;
5532            } else {
5533                opreg = (modrm & 7) | REX_B(s);
5534            }
5535
5536            /* simpler op */
5537            if (shift == 0) {
5538                gen_shift(s, op, ot, opreg, OR_ECX);
5539            } else {
5540                if (shift == 2) {
5541                    shift = cpu_ldub_code(env, s->pc++);
5542                }
5543                gen_shifti(s, op, ot, opreg, shift);
5544            }
5545        }
5546        break;
5547    case 0xd0:
5548    case 0xd1:
5549        /* shift Ev,1 */
5550        shift = 1;
5551        goto grp2;
5552    case 0xd2:
5553    case 0xd3:
5554        /* shift Ev,cl */
5555        shift = 0;
5556        goto grp2;
5557
5558    case 0x1a4: /* shld imm */
5559        op = 0;
5560        shift = 1;
5561        goto do_shiftd;
5562    case 0x1a5: /* shld cl */
5563        op = 0;
5564        shift = 0;
5565        goto do_shiftd;
5566    case 0x1ac: /* shrd imm */
5567        op = 1;
5568        shift = 1;
5569        goto do_shiftd;
5570    case 0x1ad: /* shrd cl */
5571        op = 1;
5572        shift = 0;
5573    do_shiftd:
5574        ot = dflag;
5575        modrm = cpu_ldub_code(env, s->pc++);
5576        mod = (modrm >> 6) & 3;
5577        rm = (modrm & 7) | REX_B(s);
5578        reg = ((modrm >> 3) & 7) | rex_r;
5579        if (mod != 3) {
5580            gen_lea_modrm(env, s, modrm);
5581            opreg = OR_TMP0;
5582        } else {
5583            opreg = rm;
5584        }
5585        gen_op_mov_v_reg(ot, cpu_T1, reg);
5586
5587        if (shift) {
5588            TCGv imm = tcg_const_tl(cpu_ldub_code(env, s->pc++));
5589            gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5590            tcg_temp_free(imm);
5591        } else {
5592            gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5593        }
5594        break;
5595
5596        /************************/
5597        /* floats */
5598    case 0xd8 ... 0xdf:
5599        if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5600            /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5601            /* XXX: what to do if illegal op ? */
5602            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5603            break;
5604        }
5605        modrm = cpu_ldub_code(env, s->pc++);
5606        mod = (modrm >> 6) & 3;
5607        rm = modrm & 7;
5608        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5609        if (mod != 3) {
5610            /* memory op */
5611            gen_lea_modrm(env, s, modrm);
5612            switch(op) {
5613            case 0x00 ... 0x07: /* fxxxs */
5614            case 0x10 ... 0x17: /* fixxxl */
5615            case 0x20 ... 0x27: /* fxxxl */
5616            case 0x30 ... 0x37: /* fixxx */
5617                {
5618                    int op1;
5619                    op1 = op & 7;
5620
5621                    switch(op >> 4) {
5622                    case 0:
5623                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5624                                            s->mem_index, MO_LEUL);
5625                        gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
5626                        break;
5627                    case 1:
5628                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5629                                            s->mem_index, MO_LEUL);
5630                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5631                        break;
5632                    case 2:
5633                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5634                                            s->mem_index, MO_LEQ);
5635                        gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
5636                        break;
5637                    case 3:
5638                    default:
5639                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5640                                            s->mem_index, MO_LESW);
5641                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5642                        break;
5643                    }
5644
5645                    gen_helper_fp_arith_ST0_FT0(op1);
5646                    if (op1 == 3) {
5647                        /* fcomp needs pop */
5648                        gen_helper_fpop(cpu_env);
5649                    }
5650                }
5651                break;
5652            case 0x08: /* flds */
5653            case 0x0a: /* fsts */
5654            case 0x0b: /* fstps */
5655            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5656            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5657            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5658                switch(op & 7) {
5659                case 0:
5660                    switch(op >> 4) {
5661                    case 0:
5662                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5663                                            s->mem_index, MO_LEUL);
5664                        gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
5665                        break;
5666                    case 1:
5667                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5668                                            s->mem_index, MO_LEUL);
5669                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5670                        break;
5671                    case 2:
5672                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5673                                            s->mem_index, MO_LEQ);
5674                        gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
5675                        break;
5676                    case 3:
5677                    default:
5678                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5679                                            s->mem_index, MO_LESW);
5680                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5681                        break;
5682                    }
5683                    break;
5684                case 1:
5685                    /* XXX: the corresponding CPUID bit must be tested ! */
5686                    switch(op >> 4) {
5687                    case 1:
5688                        gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
5689                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5690                                            s->mem_index, MO_LEUL);
5691                        break;
5692                    case 2:
5693                        gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
5694                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5695                                            s->mem_index, MO_LEQ);
5696                        break;
5697                    case 3:
5698                    default:
5699                        gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
5700                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5701                                            s->mem_index, MO_LEUW);
5702                        break;
5703                    }
5704                    gen_helper_fpop(cpu_env);
5705                    break;
5706                default:
5707                    switch(op >> 4) {
5708                    case 0:
5709                        gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
5710                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5711                                            s->mem_index, MO_LEUL);
5712                        break;
5713                    case 1:
5714                        gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
5715                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5716                                            s->mem_index, MO_LEUL);
5717                        break;
5718                    case 2:
5719                        gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
5720                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5721                                            s->mem_index, MO_LEQ);
5722                        break;
5723                    case 3:
5724                    default:
5725                        gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
5726                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5727                                            s->mem_index, MO_LEUW);
5728                        break;
5729                    }
5730                    if ((op & 7) == 3)
5731                        gen_helper_fpop(cpu_env);
5732                    break;
5733                }
5734                break;
5735            case 0x0c: /* fldenv mem */
5736                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5737                break;
5738            case 0x0d: /* fldcw mem */
5739                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5740                                    s->mem_index, MO_LEUW);
5741                gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
5742                break;
5743            case 0x0e: /* fnstenv mem */
5744                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5745                break;
5746            case 0x0f: /* fnstcw mem */
5747                gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
5748                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5749                                    s->mem_index, MO_LEUW);
5750                break;
5751            case 0x1d: /* fldt mem */
5752                gen_helper_fldt_ST0(cpu_env, cpu_A0);
5753                break;
5754            case 0x1f: /* fstpt mem */
5755                gen_helper_fstt_ST0(cpu_env, cpu_A0);
5756                gen_helper_fpop(cpu_env);
5757                break;
5758            case 0x2c: /* frstor mem */
5759                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5760                break;
5761            case 0x2e: /* fnsave mem */
5762                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5763                break;
5764            case 0x2f: /* fnstsw mem */
5765                gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
5766                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5767                                    s->mem_index, MO_LEUW);
5768                break;
5769            case 0x3c: /* fbld */
5770                gen_helper_fbld_ST0(cpu_env, cpu_A0);
5771                break;
5772            case 0x3e: /* fbstp */
5773                gen_helper_fbst_ST0(cpu_env, cpu_A0);
5774                gen_helper_fpop(cpu_env);
5775                break;
5776            case 0x3d: /* fildll */
5777                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5778                gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
5779                break;
5780            case 0x3f: /* fistpll */
5781                gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
5782                tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5783                gen_helper_fpop(cpu_env);
5784                break;
5785            default:
5786                goto unknown_op;
5787            }
5788        } else {
5789            /* register float ops */
5790            opreg = rm;
5791
5792            switch(op) {
5793            case 0x08: /* fld sti */
5794                gen_helper_fpush(cpu_env);
5795                gen_helper_fmov_ST0_STN(cpu_env,
5796                                        tcg_const_i32((opreg + 1) & 7));
5797                break;
5798            case 0x09: /* fxchg sti */
5799            case 0x29: /* fxchg4 sti, undocumented op */
5800            case 0x39: /* fxchg7 sti, undocumented op */
5801                gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
5802                break;
5803            case 0x0a: /* grp d9/2 */
5804                switch(rm) {
5805                case 0: /* fnop */
5806                    /* check exceptions (FreeBSD FPU probe) */
5807                    gen_helper_fwait(cpu_env);
5808                    break;
5809                default:
5810                    goto unknown_op;
5811                }
5812                break;
5813            case 0x0c: /* grp d9/4 */
5814                switch(rm) {
5815                case 0: /* fchs */
5816                    gen_helper_fchs_ST0(cpu_env);
5817                    break;
5818                case 1: /* fabs */
5819                    gen_helper_fabs_ST0(cpu_env);
5820                    break;
5821                case 4: /* ftst */
5822                    gen_helper_fldz_FT0(cpu_env);
5823                    gen_helper_fcom_ST0_FT0(cpu_env);
5824                    break;
5825                case 5: /* fxam */
5826                    gen_helper_fxam_ST0(cpu_env);
5827                    break;
5828                default:
5829                    goto unknown_op;
5830                }
5831                break;
5832            case 0x0d: /* grp d9/5 */
5833                {
5834                    switch(rm) {
5835                    case 0:
5836                        gen_helper_fpush(cpu_env);
5837                        gen_helper_fld1_ST0(cpu_env);
5838                        break;
5839                    case 1:
5840                        gen_helper_fpush(cpu_env);
5841                        gen_helper_fldl2t_ST0(cpu_env);
5842                        break;
5843                    case 2:
5844                        gen_helper_fpush(cpu_env);
5845                        gen_helper_fldl2e_ST0(cpu_env);
5846                        break;
5847                    case 3:
5848                        gen_helper_fpush(cpu_env);
5849                        gen_helper_fldpi_ST0(cpu_env);
5850                        break;
5851                    case 4:
5852                        gen_helper_fpush(cpu_env);
5853                        gen_helper_fldlg2_ST0(cpu_env);
5854                        break;
5855                    case 5:
5856                        gen_helper_fpush(cpu_env);
5857                        gen_helper_fldln2_ST0(cpu_env);
5858                        break;
5859                    case 6:
5860                        gen_helper_fpush(cpu_env);
5861                        gen_helper_fldz_ST0(cpu_env);
5862                        break;
5863                    default:
5864                        goto unknown_op;
5865                    }
5866                }
5867                break;
5868            case 0x0e: /* grp d9/6 */
5869                switch(rm) {
5870                case 0: /* f2xm1 */
5871                    gen_helper_f2xm1(cpu_env);
5872                    break;
5873                case 1: /* fyl2x */
5874                    gen_helper_fyl2x(cpu_env);
5875                    break;
5876                case 2: /* fptan */
5877                    gen_helper_fptan(cpu_env);
5878                    break;
5879                case 3: /* fpatan */
5880                    gen_helper_fpatan(cpu_env);
5881                    break;
5882                case 4: /* fxtract */
5883                    gen_helper_fxtract(cpu_env);
5884                    break;
5885                case 5: /* fprem1 */
5886                    gen_helper_fprem1(cpu_env);
5887                    break;
5888                case 6: /* fdecstp */
5889                    gen_helper_fdecstp(cpu_env);
5890                    break;
5891                default:
5892                case 7: /* fincstp */
5893                    gen_helper_fincstp(cpu_env);
5894                    break;
5895                }
5896                break;
5897            case 0x0f: /* grp d9/7 */
5898                switch(rm) {
5899                case 0: /* fprem */
5900                    gen_helper_fprem(cpu_env);
5901                    break;
5902                case 1: /* fyl2xp1 */
5903                    gen_helper_fyl2xp1(cpu_env);
5904                    break;
5905                case 2: /* fsqrt */
5906                    gen_helper_fsqrt(cpu_env);
5907                    break;
5908                case 3: /* fsincos */
5909                    gen_helper_fsincos(cpu_env);
5910                    break;
5911                case 5: /* fscale */
5912                    gen_helper_fscale(cpu_env);
5913                    break;
5914                case 4: /* frndint */
5915                    gen_helper_frndint(cpu_env);
5916                    break;
5917                case 6: /* fsin */
5918                    gen_helper_fsin(cpu_env);
5919                    break;
5920                default:
5921                case 7: /* fcos */
5922                    gen_helper_fcos(cpu_env);
5923                    break;
5924                }
5925                break;
5926            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
5927            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
5928            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
5929                {
5930                    int op1;
5931
5932                    op1 = op & 7;
5933                    if (op >= 0x20) {
5934                        gen_helper_fp_arith_STN_ST0(op1, opreg);
5935                        if (op >= 0x30)
5936                            gen_helper_fpop(cpu_env);
5937                    } else {
5938                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
5939                        gen_helper_fp_arith_ST0_FT0(op1);
5940                    }
5941                }
5942                break;
5943            case 0x02: /* fcom */
5944            case 0x22: /* fcom2, undocumented op */
5945                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
5946                gen_helper_fcom_ST0_FT0(cpu_env);
5947                break;
5948            case 0x03: /* fcomp */
5949            case 0x23: /* fcomp3, undocumented op */
5950            case 0x32: /* fcomp5, undocumented op */
5951                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
5952                gen_helper_fcom_ST0_FT0(cpu_env);
5953                gen_helper_fpop(cpu_env);
5954                break;
5955            case 0x15: /* da/5 */
5956                switch(rm) {
5957                case 1: /* fucompp */
5958                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
5959                    gen_helper_fucom_ST0_FT0(cpu_env);
5960                    gen_helper_fpop(cpu_env);
5961                    gen_helper_fpop(cpu_env);
5962                    break;
5963                default:
5964                    goto unknown_op;
5965                }
5966                break;
5967            case 0x1c:
5968                switch(rm) {
5969                case 0: /* feni (287 only, just do nop here) */
5970                    break;
5971                case 1: /* fdisi (287 only, just do nop here) */
5972                    break;
5973                case 2: /* fclex */
5974                    gen_helper_fclex(cpu_env);
5975                    break;
5976                case 3: /* fninit */
5977                    gen_helper_fninit(cpu_env);
5978                    break;
5979                case 4: /* fsetpm (287 only, just do nop here) */
5980                    break;
5981                default:
5982                    goto unknown_op;
5983                }
5984                break;
5985            case 0x1d: /* fucomi */
5986                if (!(s->cpuid_features & CPUID_CMOV)) {
5987                    goto illegal_op;
5988                }
5989                gen_update_cc_op(s);
5990                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
5991                gen_helper_fucomi_ST0_FT0(cpu_env);
5992                set_cc_op(s, CC_OP_EFLAGS);
5993                break;
5994            case 0x1e: /* fcomi */
5995                if (!(s->cpuid_features & CPUID_CMOV)) {
5996                    goto illegal_op;
5997                }
5998                gen_update_cc_op(s);
5999                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6000                gen_helper_fcomi_ST0_FT0(cpu_env);
6001                set_cc_op(s, CC_OP_EFLAGS);
6002                break;
6003            case 0x28: /* ffree sti */
6004                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6005                break;
6006            case 0x2a: /* fst sti */
6007                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6008                break;
6009            case 0x2b: /* fstp sti */
6010            case 0x0b: /* fstp1 sti, undocumented op */
6011            case 0x3a: /* fstp8 sti, undocumented op */
6012            case 0x3b: /* fstp9 sti, undocumented op */
6013                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6014                gen_helper_fpop(cpu_env);
6015                break;
6016            case 0x2c: /* fucom st(i) */
6017                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6018                gen_helper_fucom_ST0_FT0(cpu_env);
6019                break;
6020            case 0x2d: /* fucomp st(i) */
6021                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6022                gen_helper_fucom_ST0_FT0(cpu_env);
6023                gen_helper_fpop(cpu_env);
6024                break;
6025            case 0x33: /* de/3 */
6026                switch(rm) {
6027                case 1: /* fcompp */
6028                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6029                    gen_helper_fcom_ST0_FT0(cpu_env);
6030                    gen_helper_fpop(cpu_env);
6031                    gen_helper_fpop(cpu_env);
6032                    break;
6033                default:
6034                    goto unknown_op;
6035                }
6036                break;
6037            case 0x38: /* ffreep sti, undocumented op */
6038                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6039                gen_helper_fpop(cpu_env);
6040                break;
6041            case 0x3c: /* df/4 */
6042                switch(rm) {
6043                case 0:
6044                    gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
6045                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
6046                    gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
6047                    break;
6048                default:
6049                    goto unknown_op;
6050                }
6051                break;
6052            case 0x3d: /* fucomip */
6053                if (!(s->cpuid_features & CPUID_CMOV)) {
6054                    goto illegal_op;
6055                }
6056                gen_update_cc_op(s);
6057                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6058                gen_helper_fucomi_ST0_FT0(cpu_env);
6059                gen_helper_fpop(cpu_env);
6060                set_cc_op(s, CC_OP_EFLAGS);
6061                break;
6062            case 0x3e: /* fcomip */
6063                if (!(s->cpuid_features & CPUID_CMOV)) {
6064                    goto illegal_op;
6065                }
6066                gen_update_cc_op(s);
6067                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6068                gen_helper_fcomi_ST0_FT0(cpu_env);
6069                gen_helper_fpop(cpu_env);
6070                set_cc_op(s, CC_OP_EFLAGS);
6071                break;
6072            case 0x10 ... 0x13: /* fcmovxx */
6073            case 0x18 ... 0x1b:
6074                {
6075                    int op1;
6076                    TCGLabel *l1;
6077                    static const uint8_t fcmov_cc[8] = {
6078                        (JCC_B << 1),
6079                        (JCC_Z << 1),
6080                        (JCC_BE << 1),
6081                        (JCC_P << 1),
6082                    };
6083
6084                    if (!(s->cpuid_features & CPUID_CMOV)) {
6085                        goto illegal_op;
6086                    }
6087                    op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6088                    l1 = gen_new_label();
6089                    gen_jcc1_noeob(s, op1, l1);
6090                    gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6091                    gen_set_label(l1);
6092                }
6093                break;
6094            default:
6095                goto unknown_op;
6096            }
6097        }
6098        break;
6099        /************************/
6100        /* string ops */
6101
6102    case 0xa4: /* movsS */
6103    case 0xa5:
6104        ot = mo_b_d(b, dflag);
6105        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6106            gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6107        } else {
6108            gen_movs(s, ot);
6109        }
6110        break;
6111
6112    case 0xaa: /* stosS */
6113    case 0xab:
6114        ot = mo_b_d(b, dflag);
6115        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6116            gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6117        } else {
6118            gen_stos(s, ot);
6119        }
6120        break;
6121    case 0xac: /* lodsS */
6122    case 0xad:
6123        ot = mo_b_d(b, dflag);
6124        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6125            gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6126        } else {
6127            gen_lods(s, ot);
6128        }
6129        break;
6130    case 0xae: /* scasS */
6131    case 0xaf:
6132        ot = mo_b_d(b, dflag);
6133        if (prefixes & PREFIX_REPNZ) {
6134            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6135        } else if (prefixes & PREFIX_REPZ) {
6136            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6137        } else {
6138            gen_scas(s, ot);
6139        }
6140        break;
6141
6142    case 0xa6: /* cmpsS */
6143    case 0xa7:
6144        ot = mo_b_d(b, dflag);
6145        if (prefixes & PREFIX_REPNZ) {
6146            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6147        } else if (prefixes & PREFIX_REPZ) {
6148            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6149        } else {
6150            gen_cmps(s, ot);
6151        }
6152        break;
6153    case 0x6c: /* insS */
6154    case 0x6d:
6155        ot = mo_b_d32(b, dflag);
6156        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6157        gen_check_io(s, ot, pc_start - s->cs_base, 
6158                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6159        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6160            gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6161        } else {
6162            gen_ins(s, ot);
6163            if (s->tb->cflags & CF_USE_ICOUNT) {
6164                gen_jmp(s, s->pc - s->cs_base);
6165            }
6166        }
6167        break;
6168    case 0x6e: /* outsS */
6169    case 0x6f:
6170        ot = mo_b_d32(b, dflag);
6171        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6172        gen_check_io(s, ot, pc_start - s->cs_base,
6173                     svm_is_rep(prefixes) | 4);
6174        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6175            gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6176        } else {
6177            gen_outs(s, ot);
6178            if (s->tb->cflags & CF_USE_ICOUNT) {
6179                gen_jmp(s, s->pc - s->cs_base);
6180            }
6181        }
6182        break;
6183
6184        /************************/
6185        /* port I/O */
6186
6187    case 0xe4:
6188    case 0xe5:
6189        ot = mo_b_d32(b, dflag);
6190        val = cpu_ldub_code(env, s->pc++);
6191        tcg_gen_movi_tl(cpu_T0, val);
6192        gen_check_io(s, ot, pc_start - s->cs_base,
6193                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6194        if (s->tb->cflags & CF_USE_ICOUNT) {
6195            gen_io_start();
6196        }
6197        tcg_gen_movi_i32(cpu_tmp2_i32, val);
6198        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
6199        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
6200        gen_bpt_io(s, cpu_tmp2_i32, ot);
6201        if (s->tb->cflags & CF_USE_ICOUNT) {
6202            gen_io_end();
6203            gen_jmp(s, s->pc - s->cs_base);
6204        }
6205        break;
6206    case 0xe6:
6207    case 0xe7:
6208        ot = mo_b_d32(b, dflag);
6209        val = cpu_ldub_code(env, s->pc++);
6210        tcg_gen_movi_tl(cpu_T0, val);
6211        gen_check_io(s, ot, pc_start - s->cs_base,
6212                     svm_is_rep(prefixes));
6213        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
6214
6215        if (s->tb->cflags & CF_USE_ICOUNT) {
6216            gen_io_start();
6217        }
6218        tcg_gen_movi_i32(cpu_tmp2_i32, val);
6219        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
6220        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6221        gen_bpt_io(s, cpu_tmp2_i32, ot);
6222        if (s->tb->cflags & CF_USE_ICOUNT) {
6223            gen_io_end();
6224            gen_jmp(s, s->pc - s->cs_base);
6225        }
6226        break;
6227    case 0xec:
6228    case 0xed:
6229        ot = mo_b_d32(b, dflag);
6230        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6231        gen_check_io(s, ot, pc_start - s->cs_base,
6232                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6233        if (s->tb->cflags & CF_USE_ICOUNT) {
6234            gen_io_start();
6235        }
6236        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6237        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
6238        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
6239        gen_bpt_io(s, cpu_tmp2_i32, ot);
6240        if (s->tb->cflags & CF_USE_ICOUNT) {
6241            gen_io_end();
6242            gen_jmp(s, s->pc - s->cs_base);
6243        }
6244        break;
6245    case 0xee:
6246    case 0xef:
6247        ot = mo_b_d32(b, dflag);
6248        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6249        gen_check_io(s, ot, pc_start - s->cs_base,
6250                     svm_is_rep(prefixes));
6251        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
6252
6253        if (s->tb->cflags & CF_USE_ICOUNT) {
6254            gen_io_start();
6255        }
6256        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6257        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
6258        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6259        gen_bpt_io(s, cpu_tmp2_i32, ot);
6260        if (s->tb->cflags & CF_USE_ICOUNT) {
6261            gen_io_end();
6262            gen_jmp(s, s->pc - s->cs_base);
6263        }
6264        break;
6265
6266        /************************/
6267        /* control */
6268    case 0xc2: /* ret im */
6269        val = cpu_ldsw_code(env, s->pc);
6270        s->pc += 2;
6271        ot = gen_pop_T0(s);
6272        gen_stack_update(s, val + (1 << ot));
6273        /* Note that gen_pop_T0 uses a zero-extending load.  */
6274        gen_op_jmp_v(cpu_T0);
6275        gen_bnd_jmp(s);
6276        gen_eob(s);
6277        break;
6278    case 0xc3: /* ret */
6279        ot = gen_pop_T0(s);
6280        gen_pop_update(s, ot);
6281        /* Note that gen_pop_T0 uses a zero-extending load.  */
6282        gen_op_jmp_v(cpu_T0);
6283        gen_bnd_jmp(s);
6284        gen_eob(s);
6285        break;
6286    case 0xca: /* lret im */
6287        val = cpu_ldsw_code(env, s->pc);
6288        s->pc += 2;
6289    do_lret:
6290        if (s->pe && !s->vm86) {
6291            gen_update_cc_op(s);
6292            gen_jmp_im(pc_start - s->cs_base);
6293            gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6294                                      tcg_const_i32(val));
6295        } else {
6296            gen_stack_A0(s);
6297            /* pop offset */
6298            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
6299            /* NOTE: keeping EIP updated is not a problem in case of
6300               exception */
6301            gen_op_jmp_v(cpu_T0);
6302            /* pop selector */
6303            gen_add_A0_im(s, 1 << dflag);
6304            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
6305            gen_op_movl_seg_T0_vm(R_CS);
6306            /* add stack offset */
6307            gen_stack_update(s, val + (2 << dflag));
6308        }
6309        gen_eob(s);
6310        break;
6311    case 0xcb: /* lret */
6312        val = 0;
6313        goto do_lret;
6314    case 0xcf: /* iret */
6315        gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6316        if (!s->pe) {
6317            /* real mode */
6318            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6319            set_cc_op(s, CC_OP_EFLAGS);
6320        } else if (s->vm86) {
6321            if (s->iopl != 3) {
6322                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6323            } else {
6324                gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6325                set_cc_op(s, CC_OP_EFLAGS);
6326            }
6327        } else {
6328            gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6329                                      tcg_const_i32(s->pc - s->cs_base));
6330            set_cc_op(s, CC_OP_EFLAGS);
6331        }
6332        gen_eob(s);
6333        break;
6334    case 0xe8: /* call im */
6335        {
6336            if (dflag != MO_16) {
6337                tval = (int32_t)insn_get(env, s, MO_32);
6338            } else {
6339                tval = (int16_t)insn_get(env, s, MO_16);
6340            }
6341            next_eip = s->pc - s->cs_base;
6342            tval += next_eip;
6343            if (dflag == MO_16) {
6344                tval &= 0xffff;
6345            } else if (!CODE64(s)) {
6346                tval &= 0xffffffff;
6347            }
6348            tcg_gen_movi_tl(cpu_T0, next_eip);
6349            gen_push_v(s, cpu_T0);
6350            gen_bnd_jmp(s);
6351            gen_jmp(s, tval);
6352        }
6353        break;
6354    case 0x9a: /* lcall im */
6355        {
6356            unsigned int selector, offset;
6357
6358            if (CODE64(s))
6359                goto illegal_op;
6360            ot = dflag;
6361            offset = insn_get(env, s, ot);
6362            selector = insn_get(env, s, MO_16);
6363
6364            tcg_gen_movi_tl(cpu_T0, selector);
6365            tcg_gen_movi_tl(cpu_T1, offset);
6366        }
6367        goto do_lcall;
6368    case 0xe9: /* jmp im */
6369        if (dflag != MO_16) {
6370            tval = (int32_t)insn_get(env, s, MO_32);
6371        } else {
6372            tval = (int16_t)insn_get(env, s, MO_16);
6373        }
6374        tval += s->pc - s->cs_base;
6375        if (dflag == MO_16) {
6376            tval &= 0xffff;
6377        } else if (!CODE64(s)) {
6378            tval &= 0xffffffff;
6379        }
6380        gen_bnd_jmp(s);
6381        gen_jmp(s, tval);
6382        break;
6383    case 0xea: /* ljmp im */
6384        {
6385            unsigned int selector, offset;
6386
6387            if (CODE64(s))
6388                goto illegal_op;
6389            ot = dflag;
6390            offset = insn_get(env, s, ot);
6391            selector = insn_get(env, s, MO_16);
6392
6393            tcg_gen_movi_tl(cpu_T0, selector);
6394            tcg_gen_movi_tl(cpu_T1, offset);
6395        }
6396        goto do_ljmp;
6397    case 0xeb: /* jmp Jb */
6398        tval = (int8_t)insn_get(env, s, MO_8);
6399        tval += s->pc - s->cs_base;
6400        if (dflag == MO_16) {
6401            tval &= 0xffff;
6402        }
6403        gen_jmp(s, tval);
6404        break;
6405    case 0x70 ... 0x7f: /* jcc Jb */
6406        tval = (int8_t)insn_get(env, s, MO_8);
6407        goto do_jcc;
6408    case 0x180 ... 0x18f: /* jcc Jv */
6409        if (dflag != MO_16) {
6410            tval = (int32_t)insn_get(env, s, MO_32);
6411        } else {
6412            tval = (int16_t)insn_get(env, s, MO_16);
6413        }
6414    do_jcc:
6415        next_eip = s->pc - s->cs_base;
6416        tval += next_eip;
6417        if (dflag == MO_16) {
6418            tval &= 0xffff;
6419        }
6420        gen_bnd_jmp(s);
6421        gen_jcc(s, b, tval, next_eip);
6422        break;
6423
6424    case 0x190 ... 0x19f: /* setcc Gv */
6425        modrm = cpu_ldub_code(env, s->pc++);
6426        gen_setcc1(s, b, cpu_T0);
6427        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6428        break;
6429    case 0x140 ... 0x14f: /* cmov Gv, Ev */
6430        if (!(s->cpuid_features & CPUID_CMOV)) {
6431            goto illegal_op;
6432        }
6433        ot = dflag;
6434        modrm = cpu_ldub_code(env, s->pc++);
6435        reg = ((modrm >> 3) & 7) | rex_r;
6436        gen_cmovcc1(env, s, ot, b, modrm, reg);
6437        break;
6438
6439        /************************/
6440        /* flags */
6441    case 0x9c: /* pushf */
6442        gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6443        if (s->vm86 && s->iopl != 3) {
6444            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6445        } else {
6446            gen_update_cc_op(s);
6447            gen_helper_read_eflags(cpu_T0, cpu_env);
6448            gen_push_v(s, cpu_T0);
6449        }
6450        break;
6451    case 0x9d: /* popf */
6452        gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6453        if (s->vm86 && s->iopl != 3) {
6454            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6455        } else {
6456            ot = gen_pop_T0(s);
6457            if (s->cpl == 0) {
6458                if (dflag != MO_16) {
6459                    gen_helper_write_eflags(cpu_env, cpu_T0,
6460                                            tcg_const_i32((TF_MASK | AC_MASK |
6461                                                           ID_MASK | NT_MASK |
6462                                                           IF_MASK |
6463                                                           IOPL_MASK)));
6464                } else {
6465                    gen_helper_write_eflags(cpu_env, cpu_T0,
6466                                            tcg_const_i32((TF_MASK | AC_MASK |
6467                                                           ID_MASK | NT_MASK |
6468                                                           IF_MASK | IOPL_MASK)
6469                                                          & 0xffff));
6470                }
6471            } else {
6472                if (s->cpl <= s->iopl) {
6473                    if (dflag != MO_16) {
6474                        gen_helper_write_eflags(cpu_env, cpu_T0,
6475                                                tcg_const_i32((TF_MASK |
6476                                                               AC_MASK |
6477                                                               ID_MASK |
6478                                                               NT_MASK |
6479                                                               IF_MASK)));
6480                    } else {
6481                        gen_helper_write_eflags(cpu_env, cpu_T0,
6482                                                tcg_const_i32((TF_MASK |
6483                                                               AC_MASK |
6484                                                               ID_MASK |
6485                                                               NT_MASK |
6486                                                               IF_MASK)
6487                                                              & 0xffff));
6488                    }
6489                } else {
6490                    if (dflag != MO_16) {
6491                        gen_helper_write_eflags(cpu_env, cpu_T0,
6492                                           tcg_const_i32((TF_MASK | AC_MASK |
6493                                                          ID_MASK | NT_MASK)));
6494                    } else {
6495                        gen_helper_write_eflags(cpu_env, cpu_T0,
6496                                           tcg_const_i32((TF_MASK | AC_MASK |
6497                                                          ID_MASK | NT_MASK)
6498                                                         & 0xffff));
6499                    }
6500                }
6501            }
6502            gen_pop_update(s, ot);
6503            set_cc_op(s, CC_OP_EFLAGS);
6504            /* abort translation because TF/AC flag may change */
6505            gen_jmp_im(s->pc - s->cs_base);
6506            gen_eob(s);
6507        }
6508        break;
6509    case 0x9e: /* sahf */
6510        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6511            goto illegal_op;
6512        gen_op_mov_v_reg(MO_8, cpu_T0, R_AH);
6513        gen_compute_eflags(s);
6514        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6515        tcg_gen_andi_tl(cpu_T0, cpu_T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6516        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T0);
6517        break;
6518    case 0x9f: /* lahf */
6519        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6520            goto illegal_op;
6521        gen_compute_eflags(s);
6522        /* Note: gen_compute_eflags() only gives the condition codes */
6523        tcg_gen_ori_tl(cpu_T0, cpu_cc_src, 0x02);
6524        gen_op_mov_reg_v(MO_8, R_AH, cpu_T0);
6525        break;
6526    case 0xf5: /* cmc */
6527        gen_compute_eflags(s);
6528        tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6529        break;
6530    case 0xf8: /* clc */
6531        gen_compute_eflags(s);
6532        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6533        break;
6534    case 0xf9: /* stc */
6535        gen_compute_eflags(s);
6536        tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6537        break;
6538    case 0xfc: /* cld */
6539        tcg_gen_movi_i32(cpu_tmp2_i32, 1);
6540        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6541        break;
6542    case 0xfd: /* std */
6543        tcg_gen_movi_i32(cpu_tmp2_i32, -1);
6544        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6545        break;
6546
6547        /************************/
6548        /* bit operations */
6549    case 0x1ba: /* bt/bts/btr/btc Gv, im */
6550        ot = dflag;
6551        modrm = cpu_ldub_code(env, s->pc++);
6552        op = (modrm >> 3) & 7;
6553        mod = (modrm >> 6) & 3;
6554        rm = (modrm & 7) | REX_B(s);
6555        if (mod != 3) {
6556            s->rip_offset = 1;
6557            gen_lea_modrm(env, s, modrm);
6558            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6559        } else {
6560            gen_op_mov_v_reg(ot, cpu_T0, rm);
6561        }
6562        /* load shift */
6563        val = cpu_ldub_code(env, s->pc++);
6564        tcg_gen_movi_tl(cpu_T1, val);
6565        if (op < 4)
6566            goto unknown_op;
6567        op -= 4;
6568        goto bt_op;
6569    case 0x1a3: /* bt Gv, Ev */
6570        op = 0;
6571        goto do_btx;
6572    case 0x1ab: /* bts */
6573        op = 1;
6574        goto do_btx;
6575    case 0x1b3: /* btr */
6576        op = 2;
6577        goto do_btx;
6578    case 0x1bb: /* btc */
6579        op = 3;
6580    do_btx:
6581        ot = dflag;
6582        modrm = cpu_ldub_code(env, s->pc++);
6583        reg = ((modrm >> 3) & 7) | rex_r;
6584        mod = (modrm >> 6) & 3;
6585        rm = (modrm & 7) | REX_B(s);
6586        gen_op_mov_v_reg(MO_32, cpu_T1, reg);
6587        if (mod != 3) {
6588            gen_lea_modrm(env, s, modrm);
6589            /* specific case: we need to add a displacement */
6590            gen_exts(ot, cpu_T1);
6591            tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
6592            tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
6593            tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
6594            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6595        } else {
6596            gen_op_mov_v_reg(ot, cpu_T0, rm);
6597        }
6598    bt_op:
6599        tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
6600        tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
6601        switch(op) {
6602        case 0:
6603            break;
6604        case 1:
6605            tcg_gen_movi_tl(cpu_tmp0, 1);
6606            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
6607            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
6608            break;
6609        case 2:
6610            tcg_gen_movi_tl(cpu_tmp0, 1);
6611            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
6612            tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
6613            break;
6614        default:
6615        case 3:
6616            tcg_gen_movi_tl(cpu_tmp0, 1);
6617            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
6618            tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
6619            break;
6620        }
6621        if (op != 0) {
6622            if (mod != 3) {
6623                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
6624            } else {
6625                gen_op_mov_reg_v(ot, rm, cpu_T0);
6626            }
6627        }
6628
6629        /* Delay all CC updates until after the store above.  Note that
6630           C is the result of the test, Z is unchanged, and the others
6631           are all undefined.  */
6632        switch (s->cc_op) {
6633        case CC_OP_MULB ... CC_OP_MULQ:
6634        case CC_OP_ADDB ... CC_OP_ADDQ:
6635        case CC_OP_ADCB ... CC_OP_ADCQ:
6636        case CC_OP_SUBB ... CC_OP_SUBQ:
6637        case CC_OP_SBBB ... CC_OP_SBBQ:
6638        case CC_OP_LOGICB ... CC_OP_LOGICQ:
6639        case CC_OP_INCB ... CC_OP_INCQ:
6640        case CC_OP_DECB ... CC_OP_DECQ:
6641        case CC_OP_SHLB ... CC_OP_SHLQ:
6642        case CC_OP_SARB ... CC_OP_SARQ:
6643        case CC_OP_BMILGB ... CC_OP_BMILGQ:
6644            /* Z was going to be computed from the non-zero status of CC_DST.
6645               We can get that same Z value (and the new C value) by leaving
6646               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6647               same width.  */
6648            tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
6649            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6650            break;
6651        default:
6652            /* Otherwise, generate EFLAGS and replace the C bit.  */
6653            gen_compute_eflags(s);
6654            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4,
6655                               ctz32(CC_C), 1);
6656            break;
6657        }
6658        break;
6659    case 0x1bc: /* bsf / tzcnt */
6660    case 0x1bd: /* bsr / lzcnt */
6661        ot = dflag;
6662        modrm = cpu_ldub_code(env, s->pc++);
6663        reg = ((modrm >> 3) & 7) | rex_r;
6664        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6665        gen_extu(ot, cpu_T0);
6666
6667        /* Note that lzcnt and tzcnt are in different extensions.  */
6668        if ((prefixes & PREFIX_REPZ)
6669            && (b & 1
6670                ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6671                : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6672            int size = 8 << ot;
6673            tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
6674            if (b & 1) {
6675                /* For lzcnt, reduce the target_ulong result by the
6676                   number of zeros that we expect to find at the top.  */
6677                gen_helper_clz(cpu_T0, cpu_T0);
6678                tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size);
6679            } else {
6680                /* For tzcnt, a zero input must return the operand size:
6681                   force all bits outside the operand size to 1.  */
6682                target_ulong mask = (target_ulong)-2 << (size - 1);
6683                tcg_gen_ori_tl(cpu_T0, cpu_T0, mask);
6684                gen_helper_ctz(cpu_T0, cpu_T0);
6685            }
6686            /* For lzcnt/tzcnt, C and Z bits are defined and are
6687               related to the result.  */
6688            gen_op_update1_cc();
6689            set_cc_op(s, CC_OP_BMILGB + ot);
6690        } else {
6691            /* For bsr/bsf, only the Z bit is defined and it is related
6692               to the input and not the result.  */
6693            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
6694            set_cc_op(s, CC_OP_LOGICB + ot);
6695            if (b & 1) {
6696                /* For bsr, return the bit index of the first 1 bit,
6697                   not the count of leading zeros.  */
6698                gen_helper_clz(cpu_T0, cpu_T0);
6699                tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1);
6700            } else {
6701                gen_helper_ctz(cpu_T0, cpu_T0);
6702            }
6703            /* ??? The manual says that the output is undefined when the
6704               input is zero, but real hardware leaves it unchanged, and
6705               real programs appear to depend on that.  */
6706            tcg_gen_movi_tl(cpu_tmp0, 0);
6707            tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T0, cpu_cc_dst, cpu_tmp0,
6708                               cpu_regs[reg], cpu_T0);
6709        }
6710        gen_op_mov_reg_v(ot, reg, cpu_T0);
6711        break;
6712        /************************/
6713        /* bcd */
6714    case 0x27: /* daa */
6715        if (CODE64(s))
6716            goto illegal_op;
6717        gen_update_cc_op(s);
6718        gen_helper_daa(cpu_env);
6719        set_cc_op(s, CC_OP_EFLAGS);
6720        break;
6721    case 0x2f: /* das */
6722        if (CODE64(s))
6723            goto illegal_op;
6724        gen_update_cc_op(s);
6725        gen_helper_das(cpu_env);
6726        set_cc_op(s, CC_OP_EFLAGS);
6727        break;
6728    case 0x37: /* aaa */
6729        if (CODE64(s))
6730            goto illegal_op;
6731        gen_update_cc_op(s);
6732        gen_helper_aaa(cpu_env);
6733        set_cc_op(s, CC_OP_EFLAGS);
6734        break;
6735    case 0x3f: /* aas */
6736        if (CODE64(s))
6737            goto illegal_op;
6738        gen_update_cc_op(s);
6739        gen_helper_aas(cpu_env);
6740        set_cc_op(s, CC_OP_EFLAGS);
6741        break;
6742    case 0xd4: /* aam */
6743        if (CODE64(s))
6744            goto illegal_op;
6745        val = cpu_ldub_code(env, s->pc++);
6746        if (val == 0) {
6747            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
6748        } else {
6749            gen_helper_aam(cpu_env, tcg_const_i32(val));
6750            set_cc_op(s, CC_OP_LOGICB);
6751        }
6752        break;
6753    case 0xd5: /* aad */
6754        if (CODE64(s))
6755            goto illegal_op;
6756        val = cpu_ldub_code(env, s->pc++);
6757        gen_helper_aad(cpu_env, tcg_const_i32(val));
6758        set_cc_op(s, CC_OP_LOGICB);
6759        break;
6760        /************************/
6761        /* misc */
6762    case 0x90: /* nop */
6763        /* XXX: correct lock test for all insn */
6764        if (prefixes & PREFIX_LOCK) {
6765            goto illegal_op;
6766        }
6767        /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
6768        if (REX_B(s)) {
6769            goto do_xchg_reg_eax;
6770        }
6771        if (prefixes & PREFIX_REPZ) {
6772            gen_update_cc_op(s);
6773            gen_jmp_im(pc_start - s->cs_base);
6774            gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
6775            s->is_jmp = DISAS_TB_JUMP;
6776        }
6777        break;
6778    case 0x9b: /* fwait */
6779        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
6780            (HF_MP_MASK | HF_TS_MASK)) {
6781            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
6782        } else {
6783            gen_helper_fwait(cpu_env);
6784        }
6785        break;
6786    case 0xcc: /* int3 */
6787        gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
6788        break;
6789    case 0xcd: /* int N */
6790        val = cpu_ldub_code(env, s->pc++);
6791        if (s->vm86 && s->iopl != 3) {
6792            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6793        } else {
6794            gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
6795        }
6796        break;
6797    case 0xce: /* into */
6798        if (CODE64(s))
6799            goto illegal_op;
6800        gen_update_cc_op(s);
6801        gen_jmp_im(pc_start - s->cs_base);
6802        gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
6803        break;
6804#ifdef WANT_ICEBP
6805    case 0xf1: /* icebp (undocumented, exits to external debugger) */
6806        gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
6807#if 1
6808        gen_debug(s, pc_start - s->cs_base);
6809#else
6810        /* start debug */
6811        tb_flush(CPU(x86_env_get_cpu(env)));
6812        qemu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
6813#endif
6814        break;
6815#endif
6816    case 0xfa: /* cli */
6817        if (!s->vm86) {
6818            if (s->cpl <= s->iopl) {
6819                gen_helper_cli(cpu_env);
6820            } else {
6821                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6822            }
6823        } else {
6824            if (s->iopl == 3) {
6825                gen_helper_cli(cpu_env);
6826            } else {
6827                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6828            }
6829        }
6830        break;
6831    case 0xfb: /* sti */
6832        if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
6833            gen_helper_sti(cpu_env);
6834            /* interruptions are enabled only the first insn after sti */
6835            gen_jmp_im(s->pc - s->cs_base);
6836            gen_eob_inhibit_irq(s, true);
6837        } else {
6838            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6839        }
6840        break;
6841    case 0x62: /* bound */
6842        if (CODE64(s))
6843            goto illegal_op;
6844        ot = dflag;
6845        modrm = cpu_ldub_code(env, s->pc++);
6846        reg = (modrm >> 3) & 7;
6847        mod = (modrm >> 6) & 3;
6848        if (mod == 3)
6849            goto illegal_op;
6850        gen_op_mov_v_reg(ot, cpu_T0, reg);
6851        gen_lea_modrm(env, s, modrm);
6852        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6853        if (ot == MO_16) {
6854            gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
6855        } else {
6856            gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
6857        }
6858        break;
6859    case 0x1c8 ... 0x1cf: /* bswap reg */
6860        reg = (b & 7) | REX_B(s);
6861#ifdef TARGET_X86_64
6862        if (dflag == MO_64) {
6863            gen_op_mov_v_reg(MO_64, cpu_T0, reg);
6864            tcg_gen_bswap64_i64(cpu_T0, cpu_T0);
6865            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
6866        } else
6867#endif
6868        {
6869            gen_op_mov_v_reg(MO_32, cpu_T0, reg);
6870            tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
6871            tcg_gen_bswap32_tl(cpu_T0, cpu_T0);
6872            gen_op_mov_reg_v(MO_32, reg, cpu_T0);
6873        }
6874        break;
6875    case 0xd6: /* salc */
6876        if (CODE64(s))
6877            goto illegal_op;
6878        gen_compute_eflags_c(s, cpu_T0);
6879        tcg_gen_neg_tl(cpu_T0, cpu_T0);
6880        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
6881        break;
6882    case 0xe0: /* loopnz */
6883    case 0xe1: /* loopz */
6884    case 0xe2: /* loop */
6885    case 0xe3: /* jecxz */
6886        {
6887            TCGLabel *l1, *l2, *l3;
6888
6889            tval = (int8_t)insn_get(env, s, MO_8);
6890            next_eip = s->pc - s->cs_base;
6891            tval += next_eip;
6892            if (dflag == MO_16) {
6893                tval &= 0xffff;
6894            }
6895
6896            l1 = gen_new_label();
6897            l2 = gen_new_label();
6898            l3 = gen_new_label();
6899            b &= 3;
6900            switch(b) {
6901            case 0: /* loopnz */
6902            case 1: /* loopz */
6903                gen_op_add_reg_im(s->aflag, R_ECX, -1);
6904                gen_op_jz_ecx(s->aflag, l3);
6905                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
6906                break;
6907            case 2: /* loop */
6908                gen_op_add_reg_im(s->aflag, R_ECX, -1);
6909                gen_op_jnz_ecx(s->aflag, l1);
6910                break;
6911            default:
6912            case 3: /* jcxz */
6913                gen_op_jz_ecx(s->aflag, l1);
6914                break;
6915            }
6916
6917            gen_set_label(l3);
6918            gen_jmp_im(next_eip);
6919            tcg_gen_br(l2);
6920
6921            gen_set_label(l1);
6922            gen_jmp_im(tval);
6923            gen_set_label(l2);
6924            gen_eob(s);
6925        }
6926        break;
6927    case 0x130: /* wrmsr */
6928    case 0x132: /* rdmsr */
6929        if (s->cpl != 0) {
6930            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6931        } else {
6932            gen_update_cc_op(s);
6933            gen_jmp_im(pc_start - s->cs_base);
6934            if (b & 2) {
6935                gen_helper_rdmsr(cpu_env);
6936            } else {
6937                gen_helper_wrmsr(cpu_env);
6938            }
6939        }
6940        break;
6941    case 0x131: /* rdtsc */
6942        gen_update_cc_op(s);
6943        gen_jmp_im(pc_start - s->cs_base);
6944        if (s->tb->cflags & CF_USE_ICOUNT) {
6945            gen_io_start();
6946        }
6947        gen_helper_rdtsc(cpu_env);
6948        if (s->tb->cflags & CF_USE_ICOUNT) {
6949            gen_io_end();
6950            gen_jmp(s, s->pc - s->cs_base);
6951        }
6952        break;
6953    case 0x133: /* rdpmc */
6954        gen_update_cc_op(s);
6955        gen_jmp_im(pc_start - s->cs_base);
6956        gen_helper_rdpmc(cpu_env);
6957        break;
6958    case 0x134: /* sysenter */
6959        /* For Intel SYSENTER is valid on 64-bit */
6960        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
6961            goto illegal_op;
6962        if (!s->pe) {
6963            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6964        } else {
6965            gen_helper_sysenter(cpu_env);
6966            gen_eob(s);
6967        }
6968        break;
6969    case 0x135: /* sysexit */
6970        /* For Intel SYSEXIT is valid on 64-bit */
6971        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
6972            goto illegal_op;
6973        if (!s->pe) {
6974            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6975        } else {
6976            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
6977            gen_eob(s);
6978        }
6979        break;
6980#ifdef TARGET_X86_64
6981    case 0x105: /* syscall */
6982        /* XXX: is it usable in real mode ? */
6983        gen_update_cc_op(s);
6984        gen_jmp_im(pc_start - s->cs_base);
6985        gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
6986        gen_eob(s);
6987        break;
6988    case 0x107: /* sysret */
6989        if (!s->pe) {
6990            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6991        } else {
6992            gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
6993            /* condition codes are modified only in long mode */
6994            if (s->lma) {
6995                set_cc_op(s, CC_OP_EFLAGS);
6996            }
6997            gen_eob(s);
6998        }
6999        break;
7000#endif
7001    case 0x1a2: /* cpuid */
7002        gen_update_cc_op(s);
7003        gen_jmp_im(pc_start - s->cs_base);
7004        gen_helper_cpuid(cpu_env);
7005        break;
7006    case 0xf4: /* hlt */
7007        if (s->cpl != 0) {
7008            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7009        } else {
7010            gen_update_cc_op(s);
7011            gen_jmp_im(pc_start - s->cs_base);
7012            gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7013            s->is_jmp = DISAS_TB_JUMP;
7014        }
7015        break;
7016    case 0x100:
7017        modrm = cpu_ldub_code(env, s->pc++);
7018        mod = (modrm >> 6) & 3;
7019        op = (modrm >> 3) & 7;
7020        switch(op) {
7021        case 0: /* sldt */
7022            if (!s->pe || s->vm86)
7023                goto illegal_op;
7024            gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7025            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
7026                             offsetof(CPUX86State, ldt.selector));
7027            ot = mod == 3 ? dflag : MO_16;
7028            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7029            break;
7030        case 2: /* lldt */
7031            if (!s->pe || s->vm86)
7032                goto illegal_op;
7033            if (s->cpl != 0) {
7034                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7035            } else {
7036                gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7037                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7038                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
7039                gen_helper_lldt(cpu_env, cpu_tmp2_i32);
7040            }
7041            break;
7042        case 1: /* str */
7043            if (!s->pe || s->vm86)
7044                goto illegal_op;
7045            gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7046            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
7047                             offsetof(CPUX86State, tr.selector));
7048            ot = mod == 3 ? dflag : MO_16;
7049            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7050            break;
7051        case 3: /* ltr */
7052            if (!s->pe || s->vm86)
7053                goto illegal_op;
7054            if (s->cpl != 0) {
7055                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7056            } else {
7057                gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7058                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7059                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
7060                gen_helper_ltr(cpu_env, cpu_tmp2_i32);
7061            }
7062            break;
7063        case 4: /* verr */
7064        case 5: /* verw */
7065            if (!s->pe || s->vm86)
7066                goto illegal_op;
7067            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7068            gen_update_cc_op(s);
7069            if (op == 4) {
7070                gen_helper_verr(cpu_env, cpu_T0);
7071            } else {
7072                gen_helper_verw(cpu_env, cpu_T0);
7073            }
7074            set_cc_op(s, CC_OP_EFLAGS);
7075            break;
7076        default:
7077            goto unknown_op;
7078        }
7079        break;
7080
7081    case 0x101:
7082        modrm = cpu_ldub_code(env, s->pc++);
7083        switch (modrm) {
7084        CASE_MODRM_MEM_OP(0): /* sgdt */
7085            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7086            gen_lea_modrm(env, s, modrm);
7087            tcg_gen_ld32u_tl(cpu_T0,
7088                             cpu_env, offsetof(CPUX86State, gdt.limit));
7089            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
7090            gen_add_A0_im(s, 2);
7091            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
7092            if (dflag == MO_16) {
7093                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7094            }
7095            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7096            break;
7097
7098        case 0xc8: /* monitor */
7099            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7100                goto illegal_op;
7101            }
7102            gen_update_cc_op(s);
7103            gen_jmp_im(pc_start - s->cs_base);
7104            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
7105            gen_extu(s->aflag, cpu_A0);
7106            gen_add_A0_ds_seg(s);
7107            gen_helper_monitor(cpu_env, cpu_A0);
7108            break;
7109
7110        case 0xc9: /* mwait */
7111            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7112                goto illegal_op;
7113            }
7114            gen_update_cc_op(s);
7115            gen_jmp_im(pc_start - s->cs_base);
7116            gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7117            gen_eob(s);
7118            break;
7119
7120        case 0xca: /* clac */
7121            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7122                || s->cpl != 0) {
7123                goto illegal_op;
7124            }
7125            gen_helper_clac(cpu_env);
7126            gen_jmp_im(s->pc - s->cs_base);
7127            gen_eob(s);
7128            break;
7129
7130        case 0xcb: /* stac */
7131            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7132                || s->cpl != 0) {
7133                goto illegal_op;
7134            }
7135            gen_helper_stac(cpu_env);
7136            gen_jmp_im(s->pc - s->cs_base);
7137            gen_eob(s);
7138            break;
7139
7140        CASE_MODRM_MEM_OP(1): /* sidt */
7141            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7142            gen_lea_modrm(env, s, modrm);
7143            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
7144            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
7145            gen_add_A0_im(s, 2);
7146            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
7147            if (dflag == MO_16) {
7148                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7149            }
7150            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7151            break;
7152
7153        case 0xd0: /* xgetbv */
7154            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7155                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7156                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7157                goto illegal_op;
7158            }
7159            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7160            gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
7161            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
7162            break;
7163
7164        case 0xd1: /* xsetbv */
7165            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7166                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7167                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
7168                goto illegal_op;
7169            }
7170            if (s->cpl != 0) {
7171                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7172                break;
7173            }
7174            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7175                                  cpu_regs[R_EDX]);
7176            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7177            gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
7178            /* End TB because translation flags may change.  */
7179            gen_jmp_im(s->pc - s->cs_base);
7180            gen_eob(s);
7181            break;
7182
7183        case 0xd8: /* VMRUN */
7184            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7185                goto illegal_op;
7186            }
7187            if (s->cpl != 0) {
7188                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7189                break;
7190            }
7191            gen_update_cc_op(s);
7192            gen_jmp_im(pc_start - s->cs_base);
7193            gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7194                             tcg_const_i32(s->pc - pc_start));
7195            tcg_gen_exit_tb(0);
7196            s->is_jmp = DISAS_TB_JUMP;
7197            break;
7198
7199        case 0xd9: /* VMMCALL */
7200            if (!(s->flags & HF_SVME_MASK)) {
7201                goto illegal_op;
7202            }
7203            gen_update_cc_op(s);
7204            gen_jmp_im(pc_start - s->cs_base);
7205            gen_helper_vmmcall(cpu_env);
7206            break;
7207
7208        case 0xda: /* VMLOAD */
7209            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7210                goto illegal_op;
7211            }
7212            if (s->cpl != 0) {
7213                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7214                break;
7215            }
7216            gen_update_cc_op(s);
7217            gen_jmp_im(pc_start - s->cs_base);
7218            gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7219            break;
7220
7221        case 0xdb: /* VMSAVE */
7222            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7223                goto illegal_op;
7224            }
7225            if (s->cpl != 0) {
7226                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7227                break;
7228            }
7229            gen_update_cc_op(s);
7230            gen_jmp_im(pc_start - s->cs_base);
7231            gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7232            break;
7233
7234        case 0xdc: /* STGI */
7235            if ((!(s->flags & HF_SVME_MASK)
7236                   && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7237                || !s->pe) {
7238                goto illegal_op;
7239            }
7240            if (s->cpl != 0) {
7241                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7242                break;
7243            }
7244            gen_update_cc_op(s);
7245            gen_jmp_im(pc_start - s->cs_base);
7246            gen_helper_stgi(cpu_env);
7247            break;
7248
7249        case 0xdd: /* CLGI */
7250            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7251                goto illegal_op;
7252            }
7253            if (s->cpl != 0) {
7254                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7255                break;
7256            }
7257            gen_update_cc_op(s);
7258            gen_jmp_im(pc_start - s->cs_base);
7259            gen_helper_clgi(cpu_env);
7260            break;
7261
7262        case 0xde: /* SKINIT */
7263            if ((!(s->flags & HF_SVME_MASK)
7264                 && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7265                || !s->pe) {
7266                goto illegal_op;
7267            }
7268            gen_update_cc_op(s);
7269            gen_jmp_im(pc_start - s->cs_base);
7270            gen_helper_skinit(cpu_env);
7271            break;
7272
7273        case 0xdf: /* INVLPGA */
7274            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7275                goto illegal_op;
7276            }
7277            if (s->cpl != 0) {
7278                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7279                break;
7280            }
7281            gen_update_cc_op(s);
7282            gen_jmp_im(pc_start - s->cs_base);
7283            gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
7284            break;
7285
7286        CASE_MODRM_MEM_OP(2): /* lgdt */
7287            if (s->cpl != 0) {
7288                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7289                break;
7290            }
7291            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
7292            gen_lea_modrm(env, s, modrm);
7293            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
7294            gen_add_A0_im(s, 2);
7295            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7296            if (dflag == MO_16) {
7297                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7298            }
7299            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
7300            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7301            break;
7302
7303        CASE_MODRM_MEM_OP(3): /* lidt */
7304            if (s->cpl != 0) {
7305                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7306                break;
7307            }
7308            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
7309            gen_lea_modrm(env, s, modrm);
7310            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
7311            gen_add_A0_im(s, 2);
7312            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7313            if (dflag == MO_16) {
7314                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7315            }
7316            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
7317            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
7318            break;
7319
7320        CASE_MODRM_OP(4): /* smsw */
7321            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7322            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
7323            if (CODE64(s)) {
7324                mod = (modrm >> 6) & 3;
7325                ot = (mod != 3 ? MO_16 : s->dflag);
7326            } else {
7327                ot = MO_16;
7328            }
7329            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7330            break;
7331        case 0xee: /* rdpkru */
7332            if (prefixes & PREFIX_LOCK) {
7333                goto illegal_op;
7334            }
7335            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7336            gen_helper_rdpkru(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
7337            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
7338            break;
7339        case 0xef: /* wrpkru */
7340            if (prefixes & PREFIX_LOCK) {
7341                goto illegal_op;
7342            }
7343            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7344                                  cpu_regs[R_EDX]);
7345            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7346            gen_helper_wrpkru(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
7347            break;
7348        CASE_MODRM_OP(6): /* lmsw */
7349            if (s->cpl != 0) {
7350                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7351                break;
7352            }
7353            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7354            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7355            gen_helper_lmsw(cpu_env, cpu_T0);
7356            gen_jmp_im(s->pc - s->cs_base);
7357            gen_eob(s);
7358            break;
7359
7360        CASE_MODRM_MEM_OP(7): /* invlpg */
7361            if (s->cpl != 0) {
7362                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7363                break;
7364            }
7365            gen_update_cc_op(s);
7366            gen_jmp_im(pc_start - s->cs_base);
7367            gen_lea_modrm(env, s, modrm);
7368            gen_helper_invlpg(cpu_env, cpu_A0);
7369            gen_jmp_im(s->pc - s->cs_base);
7370            gen_eob(s);
7371            break;
7372
7373        case 0xf8: /* swapgs */
7374#ifdef TARGET_X86_64
7375            if (CODE64(s)) {
7376                if (s->cpl != 0) {
7377                    gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7378                } else {
7379                    tcg_gen_mov_tl(cpu_T0, cpu_seg_base[R_GS]);
7380                    tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7381                                  offsetof(CPUX86State, kernelgsbase));
7382                    tcg_gen_st_tl(cpu_T0, cpu_env,
7383                                  offsetof(CPUX86State, kernelgsbase));
7384                }
7385                break;
7386            }
7387#endif
7388            goto illegal_op;
7389
7390        case 0xf9: /* rdtscp */
7391            if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7392                goto illegal_op;
7393            }
7394            gen_update_cc_op(s);
7395            gen_jmp_im(pc_start - s->cs_base);
7396            if (s->tb->cflags & CF_USE_ICOUNT) {
7397                gen_io_start();
7398            }
7399            gen_helper_rdtscp(cpu_env);
7400            if (s->tb->cflags & CF_USE_ICOUNT) {
7401                gen_io_end();
7402                gen_jmp(s, s->pc - s->cs_base);
7403            }
7404            break;
7405
7406        default:
7407            goto unknown_op;
7408        }
7409        break;
7410
7411    case 0x108: /* invd */
7412    case 0x109: /* wbinvd */
7413        if (s->cpl != 0) {
7414            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7415        } else {
7416            gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7417            /* nothing to do */
7418        }
7419        break;
7420    case 0x63: /* arpl or movslS (x86_64) */
7421#ifdef TARGET_X86_64
7422        if (CODE64(s)) {
7423            int d_ot;
7424            /* d_ot is the size of destination */
7425            d_ot = dflag;
7426
7427            modrm = cpu_ldub_code(env, s->pc++);
7428            reg = ((modrm >> 3) & 7) | rex_r;
7429            mod = (modrm >> 6) & 3;
7430            rm = (modrm & 7) | REX_B(s);
7431
7432            if (mod == 3) {
7433                gen_op_mov_v_reg(MO_32, cpu_T0, rm);
7434                /* sign extend */
7435                if (d_ot == MO_64) {
7436                    tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
7437                }
7438                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
7439            } else {
7440                gen_lea_modrm(env, s, modrm);
7441                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, cpu_A0);
7442                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
7443            }
7444        } else
7445#endif
7446        {
7447            TCGLabel *label1;
7448            TCGv t0, t1, t2, a0;
7449
7450            if (!s->pe || s->vm86)
7451                goto illegal_op;
7452            t0 = tcg_temp_local_new();
7453            t1 = tcg_temp_local_new();
7454            t2 = tcg_temp_local_new();
7455            ot = MO_16;
7456            modrm = cpu_ldub_code(env, s->pc++);
7457            reg = (modrm >> 3) & 7;
7458            mod = (modrm >> 6) & 3;
7459            rm = modrm & 7;
7460            if (mod != 3) {
7461                gen_lea_modrm(env, s, modrm);
7462                gen_op_ld_v(s, ot, t0, cpu_A0);
7463                a0 = tcg_temp_local_new();
7464                tcg_gen_mov_tl(a0, cpu_A0);
7465            } else {
7466                gen_op_mov_v_reg(ot, t0, rm);
7467                TCGV_UNUSED(a0);
7468            }
7469            gen_op_mov_v_reg(ot, t1, reg);
7470            tcg_gen_andi_tl(cpu_tmp0, t0, 3);
7471            tcg_gen_andi_tl(t1, t1, 3);
7472            tcg_gen_movi_tl(t2, 0);
7473            label1 = gen_new_label();
7474            tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
7475            tcg_gen_andi_tl(t0, t0, ~3);
7476            tcg_gen_or_tl(t0, t0, t1);
7477            tcg_gen_movi_tl(t2, CC_Z);
7478            gen_set_label(label1);
7479            if (mod != 3) {
7480                gen_op_st_v(s, ot, t0, a0);
7481                tcg_temp_free(a0);
7482           } else {
7483                gen_op_mov_reg_v(ot, rm, t0);
7484            }
7485            gen_compute_eflags(s);
7486            tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7487            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7488            tcg_temp_free(t0);
7489            tcg_temp_free(t1);
7490            tcg_temp_free(t2);
7491        }
7492        break;
7493    case 0x102: /* lar */
7494    case 0x103: /* lsl */
7495        {
7496            TCGLabel *label1;
7497            TCGv t0;
7498            if (!s->pe || s->vm86)
7499                goto illegal_op;
7500            ot = dflag != MO_16 ? MO_32 : MO_16;
7501            modrm = cpu_ldub_code(env, s->pc++);
7502            reg = ((modrm >> 3) & 7) | rex_r;
7503            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7504            t0 = tcg_temp_local_new();
7505            gen_update_cc_op(s);
7506            if (b == 0x102) {
7507                gen_helper_lar(t0, cpu_env, cpu_T0);
7508            } else {
7509                gen_helper_lsl(t0, cpu_env, cpu_T0);
7510            }
7511            tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
7512            label1 = gen_new_label();
7513            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
7514            gen_op_mov_reg_v(ot, reg, t0);
7515            gen_set_label(label1);
7516            set_cc_op(s, CC_OP_EFLAGS);
7517            tcg_temp_free(t0);
7518        }
7519        break;
7520    case 0x118:
7521        modrm = cpu_ldub_code(env, s->pc++);
7522        mod = (modrm >> 6) & 3;
7523        op = (modrm >> 3) & 7;
7524        switch(op) {
7525        case 0: /* prefetchnta */
7526        case 1: /* prefetchnt0 */
7527        case 2: /* prefetchnt0 */
7528        case 3: /* prefetchnt0 */
7529            if (mod == 3)
7530                goto illegal_op;
7531            gen_nop_modrm(env, s, modrm);
7532            /* nothing more to do */
7533            break;
7534        default: /* nop (multi byte) */
7535            gen_nop_modrm(env, s, modrm);
7536            break;
7537        }
7538        break;
7539    case 0x11a:
7540        modrm = cpu_ldub_code(env, s->pc++);
7541        if (s->flags & HF_MPX_EN_MASK) {
7542            mod = (modrm >> 6) & 3;
7543            reg = ((modrm >> 3) & 7) | rex_r;
7544            if (prefixes & PREFIX_REPZ) {
7545                /* bndcl */
7546                if (reg >= 4
7547                    || (prefixes & PREFIX_LOCK)
7548                    || s->aflag == MO_16) {
7549                    goto illegal_op;
7550                }
7551                gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7552            } else if (prefixes & PREFIX_REPNZ) {
7553                /* bndcu */
7554                if (reg >= 4
7555                    || (prefixes & PREFIX_LOCK)
7556                    || s->aflag == MO_16) {
7557                    goto illegal_op;
7558                }
7559                TCGv_i64 notu = tcg_temp_new_i64();
7560                tcg_gen_not_i64(notu, cpu_bndu[reg]);
7561                gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7562                tcg_temp_free_i64(notu);
7563            } else if (prefixes & PREFIX_DATA) {
7564                /* bndmov -- from reg/mem */
7565                if (reg >= 4 || s->aflag == MO_16) {
7566                    goto illegal_op;
7567                }
7568                if (mod == 3) {
7569                    int reg2 = (modrm & 7) | REX_B(s);
7570                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7571                        goto illegal_op;
7572                    }
7573                    if (s->flags & HF_MPX_IU_MASK) {
7574                        tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7575                        tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7576                    }
7577                } else {
7578                    gen_lea_modrm(env, s, modrm);
7579                    if (CODE64(s)) {
7580                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
7581                                            s->mem_index, MO_LEQ);
7582                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
7583                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
7584                                            s->mem_index, MO_LEQ);
7585                    } else {
7586                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
7587                                            s->mem_index, MO_LEUL);
7588                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
7589                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
7590                                            s->mem_index, MO_LEUL);
7591                    }
7592                    /* bnd registers are now in-use */
7593                    gen_set_hflag(s, HF_MPX_IU_MASK);
7594                }
7595            } else if (mod != 3) {
7596                /* bndldx */
7597                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7598                if (reg >= 4
7599                    || (prefixes & PREFIX_LOCK)
7600                    || s->aflag == MO_16
7601                    || a.base < -1) {
7602                    goto illegal_op;
7603                }
7604                if (a.base >= 0) {
7605                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
7606                } else {
7607                    tcg_gen_movi_tl(cpu_A0, 0);
7608                }
7609                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
7610                if (a.index >= 0) {
7611                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
7612                } else {
7613                    tcg_gen_movi_tl(cpu_T0, 0);
7614                }
7615                if (CODE64(s)) {
7616                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, cpu_A0, cpu_T0);
7617                    tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7618                                   offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7619                } else {
7620                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, cpu_A0, cpu_T0);
7621                    tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7622                    tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7623                }
7624                gen_set_hflag(s, HF_MPX_IU_MASK);
7625            }
7626        }
7627        gen_nop_modrm(env, s, modrm);
7628        break;
7629    case 0x11b:
7630        modrm = cpu_ldub_code(env, s->pc++);
7631        if (s->flags & HF_MPX_EN_MASK) {
7632            mod = (modrm >> 6) & 3;
7633            reg = ((modrm >> 3) & 7) | rex_r;
7634            if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7635                /* bndmk */
7636                if (reg >= 4
7637                    || (prefixes & PREFIX_LOCK)
7638                    || s->aflag == MO_16) {
7639                    goto illegal_op;
7640                }
7641                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7642                if (a.base >= 0) {
7643                    tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7644                    if (!CODE64(s)) {
7645                        tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7646                    }
7647                } else if (a.base == -1) {
7648                    /* no base register has lower bound of 0 */
7649                    tcg_gen_movi_i64(cpu_bndl[reg], 0);
7650                } else {
7651                    /* rip-relative generates #ud */
7652                    goto illegal_op;
7653                }
7654                tcg_gen_not_tl(cpu_A0, gen_lea_modrm_1(a));
7655                if (!CODE64(s)) {
7656                    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
7657                }
7658                tcg_gen_extu_tl_i64(cpu_bndu[reg], cpu_A0);
7659                /* bnd registers are now in-use */
7660                gen_set_hflag(s, HF_MPX_IU_MASK);
7661                break;
7662            } else if (prefixes & PREFIX_REPNZ) {
7663                /* bndcn */
7664                if (reg >= 4
7665                    || (prefixes & PREFIX_LOCK)
7666                    || s->aflag == MO_16) {
7667                    goto illegal_op;
7668                }
7669                gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7670            } else if (prefixes & PREFIX_DATA) {
7671                /* bndmov -- to reg/mem */
7672                if (reg >= 4 || s->aflag == MO_16) {
7673                    goto illegal_op;
7674                }
7675                if (mod == 3) {
7676                    int reg2 = (modrm & 7) | REX_B(s);
7677                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7678                        goto illegal_op;
7679                    }
7680                    if (s->flags & HF_MPX_IU_MASK) {
7681                        tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7682                        tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
7683                    }
7684                } else {
7685                    gen_lea_modrm(env, s, modrm);
7686                    if (CODE64(s)) {
7687                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
7688                                            s->mem_index, MO_LEQ);
7689                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
7690                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
7691                                            s->mem_index, MO_LEQ);
7692                    } else {
7693                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
7694                                            s->mem_index, MO_LEUL);
7695                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
7696                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
7697                                            s->mem_index, MO_LEUL);
7698                    }
7699                }
7700            } else if (mod != 3) {
7701                /* bndstx */
7702                AddressParts a = gen_lea_modrm_0(env, s, modrm);
7703                if (reg >= 4
7704                    || (prefixes & PREFIX_LOCK)
7705                    || s->aflag == MO_16
7706                    || a.base < -1) {
7707                    goto illegal_op;
7708                }
7709                if (a.base >= 0) {
7710                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
7711                } else {
7712                    tcg_gen_movi_tl(cpu_A0, 0);
7713                }
7714                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
7715                if (a.index >= 0) {
7716                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
7717                } else {
7718                    tcg_gen_movi_tl(cpu_T0, 0);
7719                }
7720                if (CODE64(s)) {
7721                    gen_helper_bndstx64(cpu_env, cpu_A0, cpu_T0,
7722                                        cpu_bndl[reg], cpu_bndu[reg]);
7723                } else {
7724                    gen_helper_bndstx32(cpu_env, cpu_A0, cpu_T0,
7725                                        cpu_bndl[reg], cpu_bndu[reg]);
7726                }
7727            }
7728        }
7729        gen_nop_modrm(env, s, modrm);
7730        break;
7731    case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
7732        modrm = cpu_ldub_code(env, s->pc++);
7733        gen_nop_modrm(env, s, modrm);
7734        break;
7735    case 0x120: /* mov reg, crN */
7736    case 0x122: /* mov crN, reg */
7737        if (s->cpl != 0) {
7738            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7739        } else {
7740            modrm = cpu_ldub_code(env, s->pc++);
7741            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7742             * AMD documentation (24594.pdf) and testing of
7743             * intel 386 and 486 processors all show that the mod bits
7744             * are assumed to be 1's, regardless of actual values.
7745             */
7746            rm = (modrm & 7) | REX_B(s);
7747            reg = ((modrm >> 3) & 7) | rex_r;
7748            if (CODE64(s))
7749                ot = MO_64;
7750            else
7751                ot = MO_32;
7752            if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
7753                (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
7754                reg = 8;
7755            }
7756            switch(reg) {
7757            case 0:
7758            case 2:
7759            case 3:
7760            case 4:
7761            case 8:
7762                gen_update_cc_op(s);
7763                gen_jmp_im(pc_start - s->cs_base);
7764                if (b & 2) {
7765                    gen_op_mov_v_reg(ot, cpu_T0, rm);
7766                    gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
7767                                         cpu_T0);
7768                    gen_jmp_im(s->pc - s->cs_base);
7769                    gen_eob(s);
7770                } else {
7771                    gen_helper_read_crN(cpu_T0, cpu_env, tcg_const_i32(reg));
7772                    gen_op_mov_reg_v(ot, rm, cpu_T0);
7773                }
7774                break;
7775            default:
7776                goto unknown_op;
7777            }
7778        }
7779        break;
7780    case 0x121: /* mov reg, drN */
7781    case 0x123: /* mov drN, reg */
7782        if (s->cpl != 0) {
7783            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7784        } else {
7785            modrm = cpu_ldub_code(env, s->pc++);
7786            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7787             * AMD documentation (24594.pdf) and testing of
7788             * intel 386 and 486 processors all show that the mod bits
7789             * are assumed to be 1's, regardless of actual values.
7790             */
7791            rm = (modrm & 7) | REX_B(s);
7792            reg = ((modrm >> 3) & 7) | rex_r;
7793            if (CODE64(s))
7794                ot = MO_64;
7795            else
7796                ot = MO_32;
7797            if (reg >= 8) {
7798                goto illegal_op;
7799            }
7800            if (b & 2) {
7801                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
7802                gen_op_mov_v_reg(ot, cpu_T0, rm);
7803                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
7804                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T0);
7805                gen_jmp_im(s->pc - s->cs_base);
7806                gen_eob(s);
7807            } else {
7808                gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
7809                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
7810                gen_helper_get_dr(cpu_T0, cpu_env, cpu_tmp2_i32);
7811                gen_op_mov_reg_v(ot, rm, cpu_T0);
7812            }
7813        }
7814        break;
7815    case 0x106: /* clts */
7816        if (s->cpl != 0) {
7817            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7818        } else {
7819            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7820            gen_helper_clts(cpu_env);
7821            /* abort block because static cpu state changed */
7822            gen_jmp_im(s->pc - s->cs_base);
7823            gen_eob(s);
7824        }
7825        break;
7826    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
7827    case 0x1c3: /* MOVNTI reg, mem */
7828        if (!(s->cpuid_features & CPUID_SSE2))
7829            goto illegal_op;
7830        ot = mo_64_32(dflag);
7831        modrm = cpu_ldub_code(env, s->pc++);
7832        mod = (modrm >> 6) & 3;
7833        if (mod == 3)
7834            goto illegal_op;
7835        reg = ((modrm >> 3) & 7) | rex_r;
7836        /* generate a generic store */
7837        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
7838        break;
7839    case 0x1ae:
7840        modrm = cpu_ldub_code(env, s->pc++);
7841        switch (modrm) {
7842        CASE_MODRM_MEM_OP(0): /* fxsave */
7843            if (!(s->cpuid_features & CPUID_FXSR)
7844                || (prefixes & PREFIX_LOCK)) {
7845                goto illegal_op;
7846            }
7847            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
7848                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7849                break;
7850            }
7851            gen_lea_modrm(env, s, modrm);
7852            gen_helper_fxsave(cpu_env, cpu_A0);
7853            break;
7854
7855        CASE_MODRM_MEM_OP(1): /* fxrstor */
7856            if (!(s->cpuid_features & CPUID_FXSR)
7857                || (prefixes & PREFIX_LOCK)) {
7858                goto illegal_op;
7859            }
7860            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
7861                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7862                break;
7863            }
7864            gen_lea_modrm(env, s, modrm);
7865            gen_helper_fxrstor(cpu_env, cpu_A0);
7866            break;
7867
7868        CASE_MODRM_MEM_OP(2): /* ldmxcsr */
7869            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
7870                goto illegal_op;
7871            }
7872            if (s->flags & HF_TS_MASK) {
7873                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7874                break;
7875            }
7876            gen_lea_modrm(env, s, modrm);
7877            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, s->mem_index, MO_LEUL);
7878            gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
7879            break;
7880
7881        CASE_MODRM_MEM_OP(3): /* stmxcsr */
7882            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
7883                goto illegal_op;
7884            }
7885            if (s->flags & HF_TS_MASK) {
7886                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7887                break;
7888            }
7889            gen_lea_modrm(env, s, modrm);
7890            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
7891            gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
7892            break;
7893
7894        CASE_MODRM_MEM_OP(4): /* xsave */
7895            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7896                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
7897                                | PREFIX_REPZ | PREFIX_REPNZ))) {
7898                goto illegal_op;
7899            }
7900            gen_lea_modrm(env, s, modrm);
7901            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7902                                  cpu_regs[R_EDX]);
7903            gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
7904            break;
7905
7906        CASE_MODRM_MEM_OP(5): /* xrstor */
7907            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7908                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
7909                                | PREFIX_REPZ | PREFIX_REPNZ))) {
7910                goto illegal_op;
7911            }
7912            gen_lea_modrm(env, s, modrm);
7913            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7914                                  cpu_regs[R_EDX]);
7915            gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
7916            /* XRSTOR is how MPX is enabled, which changes how
7917               we translate.  Thus we need to end the TB.  */
7918            gen_update_cc_op(s);
7919            gen_jmp_im(s->pc - s->cs_base);
7920            gen_eob(s);
7921            break;
7922
7923        CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
7924            if (prefixes & PREFIX_LOCK) {
7925                goto illegal_op;
7926            }
7927            if (prefixes & PREFIX_DATA) {
7928                /* clwb */
7929                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
7930                    goto illegal_op;
7931                }
7932                gen_nop_modrm(env, s, modrm);
7933            } else {
7934                /* xsaveopt */
7935                if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7936                    || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
7937                    || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
7938                    goto illegal_op;
7939                }
7940                gen_lea_modrm(env, s, modrm);
7941                tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7942                                      cpu_regs[R_EDX]);
7943                gen_helper_xsaveopt(cpu_env, cpu_A0, cpu_tmp1_i64);
7944            }
7945            break;
7946
7947        CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
7948            if (prefixes & PREFIX_LOCK) {
7949                goto illegal_op;
7950            }
7951            if (prefixes & PREFIX_DATA) {
7952                /* clflushopt */
7953                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
7954                    goto illegal_op;
7955                }
7956            } else {
7957                /* clflush */
7958                if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
7959                    || !(s->cpuid_features & CPUID_CLFLUSH)) {
7960                    goto illegal_op;
7961                }
7962            }
7963            gen_nop_modrm(env, s, modrm);
7964            break;
7965
7966        case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
7967        case 0xc8 ... 0xc8: /* rdgsbase (f3 0f ae /1) */
7968        case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
7969        case 0xd8 ... 0xd8: /* wrgsbase (f3 0f ae /3) */
7970            if (CODE64(s)
7971                && (prefixes & PREFIX_REPZ)
7972                && !(prefixes & PREFIX_LOCK)
7973                && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
7974                TCGv base, treg, src, dst;
7975
7976                /* Preserve hflags bits by testing CR4 at runtime.  */
7977                tcg_gen_movi_i32(cpu_tmp2_i32, CR4_FSGSBASE_MASK);
7978                gen_helper_cr4_testbit(cpu_env, cpu_tmp2_i32);
7979
7980                base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
7981                treg = cpu_regs[(modrm & 7) | REX_B(s)];
7982
7983                if (modrm & 0x10) {
7984                    /* wr*base */
7985                    dst = base, src = treg;
7986                } else {
7987                    /* rd*base */
7988                    dst = treg, src = base;
7989                }
7990
7991                if (s->dflag == MO_32) {
7992                    tcg_gen_ext32u_tl(dst, src);
7993                } else {
7994                    tcg_gen_mov_tl(dst, src);
7995                }
7996                break;
7997            }
7998            goto unknown_op;
7999
8000        case 0xf8: /* sfence / pcommit */
8001            if (prefixes & PREFIX_DATA) {
8002                /* pcommit */
8003                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8004                    || (prefixes & PREFIX_LOCK)) {
8005                    goto illegal_op;
8006                }
8007                break;
8008            }
8009            /* fallthru */
8010        case 0xf9 ... 0xff: /* sfence */
8011            if (!(s->cpuid_features & CPUID_SSE)
8012                || (prefixes & PREFIX_LOCK)) {
8013                goto illegal_op;
8014            }
8015            break;
8016        case 0xe8 ... 0xef: /* lfence */
8017        case 0xf0 ... 0xf7: /* mfence */
8018            if (!(s->cpuid_features & CPUID_SSE2)
8019                || (prefixes & PREFIX_LOCK)) {
8020                goto illegal_op;
8021            }
8022            break;
8023
8024        default:
8025            goto unknown_op;
8026        }
8027        break;
8028
8029    case 0x10d: /* 3DNow! prefetch(w) */
8030        modrm = cpu_ldub_code(env, s->pc++);
8031        mod = (modrm >> 6) & 3;
8032        if (mod == 3)
8033            goto illegal_op;
8034        gen_nop_modrm(env, s, modrm);
8035        break;
8036    case 0x1aa: /* rsm */
8037        gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
8038        if (!(s->flags & HF_SMM_MASK))
8039            goto illegal_op;
8040        gen_update_cc_op(s);
8041        gen_jmp_im(s->pc - s->cs_base);
8042        gen_helper_rsm(cpu_env);
8043        gen_eob(s);
8044        break;
8045    case 0x1b8: /* SSE4.2 popcnt */
8046        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8047             PREFIX_REPZ)
8048            goto illegal_op;
8049        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8050            goto illegal_op;
8051
8052        modrm = cpu_ldub_code(env, s->pc++);
8053        reg = ((modrm >> 3) & 7) | rex_r;
8054
8055        if (s->prefix & PREFIX_DATA) {
8056            ot = MO_16;
8057        } else {
8058            ot = mo_64_32(dflag);
8059        }
8060
8061        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8062        gen_helper_popcnt(cpu_T0, cpu_env, cpu_T0, tcg_const_i32(ot));
8063        gen_op_mov_reg_v(ot, reg, cpu_T0);
8064
8065        set_cc_op(s, CC_OP_EFLAGS);
8066        break;
8067    case 0x10e ... 0x10f:
8068        /* 3DNow! instructions, ignore prefixes */
8069        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8070    case 0x110 ... 0x117:
8071    case 0x128 ... 0x12f:
8072    case 0x138 ... 0x13a:
8073    case 0x150 ... 0x179:
8074    case 0x17c ... 0x17f:
8075    case 0x1c2:
8076    case 0x1c4 ... 0x1c6:
8077    case 0x1d0 ... 0x1fe:
8078        gen_sse(env, s, b, pc_start, rex_r);
8079        break;
8080    default:
8081        goto unknown_op;
8082    }
8083    /* lock generation */
8084    if (s->prefix & PREFIX_LOCK)
8085        gen_helper_unlock();
8086    return s->pc;
8087 illegal_op:
8088    if (s->prefix & PREFIX_LOCK)
8089        gen_helper_unlock();
8090    /* XXX: ensure that no lock was generated */
8091    gen_illegal_opcode(s);
8092    return s->pc;
8093 unknown_op:
8094    if (s->prefix & PREFIX_LOCK)
8095        gen_helper_unlock();
8096    /* XXX: ensure that no lock was generated */
8097    gen_unknown_opcode(env, s);
8098    return s->pc;
8099}
8100
8101void tcg_x86_init(void)
8102{
8103    static const char reg_names[CPU_NB_REGS][4] = {
8104#ifdef TARGET_X86_64
8105        [R_EAX] = "rax",
8106        [R_EBX] = "rbx",
8107        [R_ECX] = "rcx",
8108        [R_EDX] = "rdx",
8109        [R_ESI] = "rsi",
8110        [R_EDI] = "rdi",
8111        [R_EBP] = "rbp",
8112        [R_ESP] = "rsp",
8113        [8]  = "r8",
8114        [9]  = "r9",
8115        [10] = "r10",
8116        [11] = "r11",
8117        [12] = "r12",
8118        [13] = "r13",
8119        [14] = "r14",
8120        [15] = "r15",
8121#else
8122        [R_EAX] = "eax",
8123        [R_EBX] = "ebx",
8124        [R_ECX] = "ecx",
8125        [R_EDX] = "edx",
8126        [R_ESI] = "esi",
8127        [R_EDI] = "edi",
8128        [R_EBP] = "ebp",
8129        [R_ESP] = "esp",
8130#endif
8131    };
8132    static const char seg_base_names[6][8] = {
8133        [R_CS] = "cs_base",
8134        [R_DS] = "ds_base",
8135        [R_ES] = "es_base",
8136        [R_FS] = "fs_base",
8137        [R_GS] = "gs_base",
8138        [R_SS] = "ss_base",
8139    };
8140    static const char bnd_regl_names[4][8] = {
8141        "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8142    };
8143    static const char bnd_regu_names[4][8] = {
8144        "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8145    };
8146    int i;
8147    static bool initialized;
8148
8149    if (initialized) {
8150        return;
8151    }
8152    initialized = true;
8153
8154    cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
8155    tcg_ctx.tcg_env = cpu_env;
8156    cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8157                                       offsetof(CPUX86State, cc_op), "cc_op");
8158    cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8159                                    "cc_dst");
8160    cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8161                                    "cc_src");
8162    cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8163                                     "cc_src2");
8164
8165    for (i = 0; i < CPU_NB_REGS; ++i) {
8166        cpu_regs[i] = tcg_global_mem_new(cpu_env,
8167                                         offsetof(CPUX86State, regs[i]),
8168                                         reg_names[i]);
8169    }
8170
8171    for (i = 0; i < 6; ++i) {
8172        cpu_seg_base[i]
8173            = tcg_global_mem_new(cpu_env,
8174                                 offsetof(CPUX86State, segs[i].base),
8175                                 seg_base_names[i]);
8176    }
8177
8178    for (i = 0; i < 4; ++i) {
8179        cpu_bndl[i]
8180            = tcg_global_mem_new_i64(cpu_env,
8181                                     offsetof(CPUX86State, bnd_regs[i].lb),
8182                                     bnd_regl_names[i]);
8183        cpu_bndu[i]
8184            = tcg_global_mem_new_i64(cpu_env,
8185                                     offsetof(CPUX86State, bnd_regs[i].ub),
8186                                     bnd_regu_names[i]);
8187    }
8188
8189    helper_lock_init();
8190}
8191
8192/* generate intermediate code for basic block 'tb'.  */
8193void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
8194{
8195    X86CPU *cpu = x86_env_get_cpu(env);
8196    CPUState *cs = CPU(cpu);
8197    DisasContext dc1, *dc = &dc1;
8198    target_ulong pc_ptr;
8199    uint32_t flags;
8200    target_ulong pc_start;
8201    target_ulong cs_base;
8202    int num_insns;
8203    int max_insns;
8204
8205    /* generate intermediate code */
8206    pc_start = tb->pc;
8207    cs_base = tb->cs_base;
8208    flags = tb->flags;
8209
8210    dc->pe = (flags >> HF_PE_SHIFT) & 1;
8211    dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
8212    dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
8213    dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
8214    dc->f_st = 0;
8215    dc->vm86 = (flags >> VM_SHIFT) & 1;
8216    dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
8217    dc->iopl = (flags >> IOPL_SHIFT) & 3;
8218    dc->tf = (flags >> TF_SHIFT) & 1;
8219    dc->singlestep_enabled = cs->singlestep_enabled;
8220    dc->cc_op = CC_OP_DYNAMIC;
8221    dc->cc_op_dirty = false;
8222    dc->cs_base = cs_base;
8223    dc->tb = tb;
8224    dc->popl_esp_hack = 0;
8225    /* select memory access functions */
8226    dc->mem_index = 0;
8227#ifdef CONFIG_SOFTMMU
8228    dc->mem_index = cpu_mmu_index(env, false);
8229#endif
8230    dc->cpuid_features = env->features[FEAT_1_EDX];
8231    dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8232    dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8233    dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8234    dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8235    dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8236#ifdef TARGET_X86_64
8237    dc->lma = (flags >> HF_LMA_SHIFT) & 1;
8238    dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
8239#endif
8240    dc->flags = flags;
8241    dc->jmp_opt = !(dc->tf || cs->singlestep_enabled ||
8242                    (flags & HF_INHIBIT_IRQ_MASK));
8243    /* Do not optimize repz jumps at all in icount mode, because
8244       rep movsS instructions are execured with different paths
8245       in !repz_opt and repz_opt modes. The first one was used
8246       always except single step mode. And this setting
8247       disables jumps optimization and control paths become
8248       equivalent in run and single step modes.
8249       Now there will be no jump optimization for repz in
8250       record/replay modes and there will always be an
8251       additional step for ecx=0 when icount is enabled.
8252     */
8253    dc->repz_opt = !dc->jmp_opt && !(tb->cflags & CF_USE_ICOUNT);
8254#if 0
8255    /* check addseg logic */
8256    if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
8257        printf("ERROR addseg\n");
8258#endif
8259
8260    cpu_T0 = tcg_temp_new();
8261    cpu_T1 = tcg_temp_new();
8262    cpu_A0 = tcg_temp_new();
8263
8264    cpu_tmp0 = tcg_temp_new();
8265    cpu_tmp1_i64 = tcg_temp_new_i64();
8266    cpu_tmp2_i32 = tcg_temp_new_i32();
8267    cpu_tmp3_i32 = tcg_temp_new_i32();
8268    cpu_tmp4 = tcg_temp_new();
8269    cpu_ptr0 = tcg_temp_new_ptr();
8270    cpu_ptr1 = tcg_temp_new_ptr();
8271    cpu_cc_srcT = tcg_temp_local_new();
8272
8273    dc->is_jmp = DISAS_NEXT;
8274    pc_ptr = pc_start;
8275    num_insns = 0;
8276    max_insns = tb->cflags & CF_COUNT_MASK;
8277    if (max_insns == 0) {
8278        max_insns = CF_COUNT_MASK;
8279    }
8280    if (max_insns > TCG_MAX_INSNS) {
8281        max_insns = TCG_MAX_INSNS;
8282    }
8283
8284    gen_tb_start(tb);
8285    for(;;) {
8286        tcg_gen_insn_start(pc_ptr, dc->cc_op);
8287        num_insns++;
8288
8289        /* If RF is set, suppress an internally generated breakpoint.  */
8290        if (unlikely(cpu_breakpoint_test(cs, pc_ptr,
8291                                         tb->flags & HF_RF_MASK
8292                                         ? BP_GDB : BP_ANY))) {
8293            gen_debug(dc, pc_ptr - dc->cs_base);
8294            /* The address covered by the breakpoint must be included in
8295               [tb->pc, tb->pc + tb->size) in order to for it to be
8296               properly cleared -- thus we increment the PC here so that
8297               the logic setting tb->size below does the right thing.  */
8298            pc_ptr += 1;
8299            goto done_generating;
8300        }
8301        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
8302            gen_io_start();
8303        }
8304
8305        pc_ptr = disas_insn(env, dc, pc_ptr);
8306        /* stop translation if indicated */
8307        if (dc->is_jmp)
8308            break;
8309        /* if single step mode, we generate only one instruction and
8310           generate an exception */
8311        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8312           the flag and abort the translation to give the irqs a
8313           change to be happen */
8314        if (dc->tf || dc->singlestep_enabled ||
8315            (flags & HF_INHIBIT_IRQ_MASK)) {
8316            gen_jmp_im(pc_ptr - dc->cs_base);
8317            gen_eob(dc);
8318            break;
8319        }
8320        /* Do not cross the boundary of the pages in icount mode,
8321           it can cause an exception. Do it only when boundary is
8322           crossed by the first instruction in the block.
8323           If current instruction already crossed the bound - it's ok,
8324           because an exception hasn't stopped this code.
8325         */
8326        if ((tb->cflags & CF_USE_ICOUNT)
8327            && ((pc_ptr & TARGET_PAGE_MASK)
8328                != ((pc_ptr + TARGET_MAX_INSN_SIZE - 1) & TARGET_PAGE_MASK)
8329                || (pc_ptr & ~TARGET_PAGE_MASK) == 0)) {
8330            gen_jmp_im(pc_ptr - dc->cs_base);
8331            gen_eob(dc);
8332            break;
8333        }
8334        /* if too long translation, stop generation too */
8335        if (tcg_op_buf_full() ||
8336            (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
8337            num_insns >= max_insns) {
8338            gen_jmp_im(pc_ptr - dc->cs_base);
8339            gen_eob(dc);
8340            break;
8341        }
8342        if (singlestep) {
8343            gen_jmp_im(pc_ptr - dc->cs_base);
8344            gen_eob(dc);
8345            break;
8346        }
8347    }
8348    if (tb->cflags & CF_LAST_IO)
8349        gen_io_end();
8350done_generating:
8351    gen_tb_end(tb, num_insns);
8352
8353#ifdef DEBUG_DISAS
8354    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
8355        && qemu_log_in_addr_range(pc_start)) {
8356        int disas_flags;
8357        qemu_log("----------------\n");
8358        qemu_log("IN: %s\n", lookup_symbol(pc_start));
8359#ifdef TARGET_X86_64
8360        if (dc->code64)
8361            disas_flags = 2;
8362        else
8363#endif
8364            disas_flags = !dc->code32;
8365        log_target_disas(cs, pc_start, pc_ptr - pc_start, disas_flags);
8366        qemu_log("\n");
8367    }
8368#endif
8369
8370    tb->size = pc_ptr - pc_start;
8371    tb->icount = num_insns;
8372}
8373
8374void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8375                          target_ulong *data)
8376{
8377    int cc_op = data[1];
8378    env->eip = data[0] - tb->cs_base;
8379    if (cc_op != CC_OP_DYNAMIC) {
8380        env->cc_op = cc_op;
8381    }
8382}
8383